aboutsummaryrefslogtreecommitdiff
path: root/src/zenutil/chunking.h
blob: 09c56454f0c8e7f50f4152915198a8ab42b6907b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
// Copyright Epic Games, Inc. All Rights Reserved.

#pragma once
#include <zencore/zencore.h>

namespace zen {

/** Content-defined chunking helper
 */
class ZenChunkHelper
{
public:
	void   SetChunkSize(size_t MinSize, size_t MaxSize, size_t AvgSize);
	size_t ScanChunk(const void* DataBytes, size_t ByteCount);
	void   Reset();

	// This controls which chunking approach is used - threshold or
	// modulo based. Threshold is faster and generates similarly sized
	// chunks
	void SetUseThreshold(bool NewState) { m_UseThreshold = NewState; }

	inline size_t	ChunkSizeMin() const { return m_ChunkSizeMin; }
	inline size_t	ChunkSizeMax() const { return m_ChunkSizeMax; }
	inline size_t	ChunkSizeAvg() const { return m_ChunkSizeAvg; }
	inline uint64_t BytesScanned() const { return m_BytesScanned; }

	static constexpr size_t kNoBoundaryFound = size_t(~0ull);

private:
	size_t m_ChunkSizeMin = 0;
	size_t m_ChunkSizeMax = 0;
	size_t m_ChunkSizeAvg = 0;

	uint32_t m_Discriminator = 0;  // Computed in SetChunkSize()
	uint32_t m_Threshold	 = 0;  // Computed in SetChunkSize()

	bool m_UseThreshold = true;

	static constexpr size_t kChunkSizeLimitMax		 = 64 * 1024 * 1024;
	static constexpr size_t kChunkSizeLimitMin		 = 1024;
	static constexpr size_t kDefaultAverageChunkSize = 64 * 1024;

	static constexpr int kWindowSize = 48;
	uint8_t				 m_Window[kWindowSize];
	uint32_t			 m_WindowSize = 0;

	uint32_t m_CurrentHash		= 0;
	uint32_t m_CurrentChunkSize = 0;

	uint64_t m_BytesScanned = 0;

	size_t InternalScanChunk(const void* DataBytes, size_t ByteCount);
	void   InternalReset();
};

}  // namespace zen