aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore/compactcas.h
blob: 2eb4c233a83d9b161e36299427bddbb43059f79d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
// Copyright Epic Games, Inc. All Rights Reserved.

#pragma once

#include <zencore/logbase.h>
#include <zencore/zencore.h>
#include <zenstore/blockstore.h>
#include <zenstore/caslog.h>
#include <zenstore/gc.h>

#include "cas.h"

#include <atomic>
#include <limits>

ZEN_THIRD_PARTY_INCLUDES_START
#include <tsl/robin_map.h>
ZEN_THIRD_PARTY_INCLUDES_END

namespace zen {

//////////////////////////////////////////////////////////////////////////

#pragma pack(push)
#pragma pack(1)

struct CasDiskIndexEntry
{
	static const uint8_t kTombstone = 0x01;

	IoHash				   Key;
	BlockStoreDiskLocation Location;
	ZenContentType		   ContentType = ZenContentType::kUnknownContentType;
	uint8_t				   Flags	   = 0;
};

#pragma pack(pop)

static_assert(sizeof(CasDiskIndexEntry) == 32);

/** This implements a storage strategy for small CAS values

   New chunks are simply appended to a small object file, and an index is
   maintained to allow chunks to be looked up within the active small object
   files

 */

struct CasContainerStrategy final : public GcStorage, public GcReferenceStore
{
	CasContainerStrategy(GcManager& Gc);
	~CasContainerStrategy();

	CasStore::InsertResult				InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash);
	std::vector<CasStore::InsertResult> InsertChunks(std::span<const IoBuffer> Chunks, std::span<const IoHash> ChunkHashes);
	IoBuffer							FindChunk(const IoHash& ChunkHash);
	bool								HaveChunk(const IoHash& ChunkHash);
	void								FilterChunks(HashKeySet& InOutChunks);
	bool								IterateChunks(std::span<const IoHash>											ChunkHashes,
													  const std::function<bool(size_t Index, const IoBuffer& Payload)>& AsyncCallback,
													  WorkerThreadPool*													OptionalWorkerPool,
													  uint64_t															LargeSizeLimit);
	void								Initialize(const std::filesystem::path& RootDirectory,
												   const std::string_view		ContainerBaseName,
												   uint32_t						MaxBlockSize,
												   uint32_t						Alignment,
												   bool							IsNewStore);
	void								Flush();

	// GcStorage

	virtual void		  ScrubStorage(ScrubContext& ScrubCtx) override;
	virtual GcStorageSize StorageSize() const override;

	virtual std::string		   GetGcName(GcCtx& Ctx) override;
	virtual GcReferencePruner* CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& Stats) override;

private:
	CasStore::InsertResult InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& ChunkHash);
	void				   MakeIndexSnapshot();
	uint64_t			   ReadIndexFile(const std::filesystem::path& IndexPath, uint32_t& OutVersion);
	uint64_t			   ReadLog(const std::filesystem::path& LogPath, uint64_t SkipEntryCount);
	void				   OpenContainer(bool IsNewStore);
	void				   CompactIndex(RwLock::ExclusiveLockScope&);

	LoggerRef Log() { return m_Log; }

	LoggerRef					   m_Log;
	GcManager&					   m_Gc;
	std::filesystem::path		   m_RootDirectory;
	uint32_t					   m_PayloadAlignment = 1u << 4;
	uint64_t					   m_MaxBlockSize	  = 1u << 28;
	bool						   m_IsInitialized	  = false;
	TCasLogFile<CasDiskIndexEntry> m_CasLog;
	uint64_t					   m_LogFlushPosition = 0;
	std::string					   m_ContainerBaseName;
	std::filesystem::path		   m_BlocksBasePath;
	BlockStore					   m_BlockStore;

	RwLock												   m_LocationMapLock;
	typedef tsl::robin_map<IoHash, size_t, IoHash::Hasher> LocationMap_t;
	LocationMap_t										   m_LocationMap;
	std::vector<BlockStoreDiskLocation>					   m_Locations;

	friend class CasContainerReferencePruner;
	friend class CasContainerStoreCompactor;
};

void compactcas_forcelink();

}  // namespace zen