aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore
diff options
context:
space:
mode:
authorzousar <[email protected]>2025-06-24 16:26:29 -0600
committerzousar <[email protected]>2025-06-24 16:26:29 -0600
commitbb298631ba35a323827dda0b8cd6158e276b5f61 (patch)
tree7ba8db91c44ce83f2c518f80f80ab14910eefa6f /src/zenstore
parentChange to PutResult structure (diff)
parent5.6.14 (diff)
downloadzen-bb298631ba35a323827dda0b8cd6158e276b5f61.tar.xz
zen-bb298631ba35a323827dda0b8cd6158e276b5f61.zip
Merge branch 'main' into zs/put-overwrite-policy
Diffstat (limited to 'src/zenstore')
-rw-r--r--src/zenstore/blockstore.cpp256
-rw-r--r--src/zenstore/buildstore/buildstore.cpp2053
-rw-r--r--src/zenstore/cache/cachedisklayer.cpp671
-rw-r--r--src/zenstore/cache/cacherpc.cpp116
-rw-r--r--src/zenstore/cache/structuredcachestore.cpp51
-rw-r--r--src/zenstore/cas.cpp9
-rw-r--r--src/zenstore/caslog.cpp2
-rw-r--r--src/zenstore/chunkedfile.cpp505
-rw-r--r--src/zenstore/chunking.cpp382
-rw-r--r--src/zenstore/chunking.h56
-rw-r--r--src/zenstore/compactcas.cpp707
-rw-r--r--src/zenstore/compactcas.h6
-rw-r--r--src/zenstore/filecas.cpp262
-rw-r--r--src/zenstore/filecas.h2
-rw-r--r--src/zenstore/gc.cpp581
-rw-r--r--src/zenstore/include/zenstore/accesstime.h53
-rw-r--r--src/zenstore/include/zenstore/blockstore.h18
-rw-r--r--src/zenstore/include/zenstore/buildstore/buildstore.h228
-rw-r--r--src/zenstore/include/zenstore/cache/cachedisklayer.h83
-rw-r--r--src/zenstore/include/zenstore/cache/cacheshared.h42
-rw-r--r--src/zenstore/include/zenstore/cache/structuredcachestore.h10
-rw-r--r--src/zenstore/include/zenstore/chunkedfile.h54
-rw-r--r--src/zenstore/include/zenstore/gc.h17
-rw-r--r--src/zenstore/workspaces.cpp26
-rw-r--r--src/zenstore/xmake.lua1
-rw-r--r--src/zenstore/zenstore.cpp2
26 files changed, 4320 insertions, 1873 deletions
diff --git a/src/zenstore/blockstore.cpp b/src/zenstore/blockstore.cpp
index e976c061d..7b56c64bd 100644
--- a/src/zenstore/blockstore.cpp
+++ b/src/zenstore/blockstore.cpp
@@ -70,7 +70,7 @@ BlockStoreFile::Open()
return false;
}
ZEN_WARN("Failed to open cas block '{}', reason: '{}', retries left: {}.", m_Path, Ec.message(), RetriesLeft);
- Sleep(100 - (3 - RetriesLeft) * 100); // Total 600 ms
+ Sleep(100 + (3 - RetriesLeft) * 100); // Total 600 ms
RetriesLeft--;
return true;
});
@@ -85,7 +85,7 @@ BlockStoreFile::Create(uint64_t InitialSize)
ZEN_TRACE_CPU("BlockStoreFile::Create");
auto ParentPath = m_Path.parent_path();
- if (!std::filesystem::is_directory(ParentPath))
+ if (!IsDir(ParentPath))
{
CreateDirectories(ParentPath);
}
@@ -153,14 +153,28 @@ void
BlockStoreFile::Write(const void* Data, uint64_t Size, uint64_t FileOffset)
{
ZEN_TRACE_CPU("BlockStoreFile::Write");
+#if ZEN_BUILD_DEBUG
+ if (uint64_t CachedFileSize = m_CachedFileSize.load(); CachedFileSize > 0)
+ {
+ ZEN_ASSERT(FileOffset + Size <= CachedFileSize);
+ }
+#endif // ZEN_BUILD_DEBUG
m_File.Write(Data, Size, FileOffset);
}
void
-BlockStoreFile::Flush()
+BlockStoreFile::Flush(uint64_t FinalSize)
{
ZEN_TRACE_CPU("BlockStoreFile::Flush");
m_File.Flush();
+ if (FinalSize != (uint64_t)-1)
+ {
+ uint64_t ExpectedSize = 0;
+ if (!m_CachedFileSize.compare_exchange_weak(ExpectedSize, FinalSize))
+ {
+ ZEN_ASSERT(m_CachedFileSize.load() == FinalSize);
+ }
+ }
}
BasicFile&
@@ -215,7 +229,7 @@ IsMetaDataValid(const std::filesystem::path& BlockPath, const std::filesystem::p
}
if (MetaWriteTime < BlockWriteTime)
{
- std::filesystem::remove(MetaPath, Ec);
+ RemoveFile(MetaPath, Ec);
return false;
}
return true;
@@ -239,7 +253,7 @@ BlockStoreFile::MetaSize() const
if (IsMetaDataValid(m_Path, MetaPath))
{
std::error_code DummyEc;
- if (uint64_t Size = std::filesystem::file_size(MetaPath, DummyEc); !DummyEc)
+ if (uint64_t Size = FileSizeFromPath(MetaPath, DummyEc); !DummyEc)
{
return Size;
}
@@ -252,7 +266,7 @@ BlockStoreFile::RemoveMeta()
{
std::filesystem::path MetaPath = GetMetaPath();
std::error_code DummyEc;
- std::filesystem::remove(MetaPath, DummyEc);
+ RemoveFile(MetaPath, DummyEc);
}
std::filesystem::path
@@ -272,6 +286,14 @@ BlockStore::BlockStore()
BlockStore::~BlockStore()
{
+ try
+ {
+ Close();
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("~BlockStore() failed with: ", Ex.what());
+ }
}
void
@@ -291,8 +313,9 @@ BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, uint64_t Max
m_MaxBlockSize = MaxBlockSize;
m_MaxBlockCount = MaxBlockCount;
- if (std::filesystem::is_directory(m_BlocksBasePath))
+ if (IsDir(m_BlocksBasePath))
{
+ std::vector<std::filesystem::path> EmptyBlockFiles;
uint32_t NextBlockIndex = 0;
std::vector<std::filesystem::path> FoldersToScan;
FoldersToScan.push_back(m_BlocksBasePath);
@@ -320,6 +343,12 @@ BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, uint64_t Max
{
continue;
}
+ if (Entry.file_size() == 0)
+ {
+ EmptyBlockFiles.push_back(Path);
+ continue;
+ }
+
Ref<BlockStoreFile> BlockFile{new BlockStoreFile(Path)};
BlockFile->Open();
m_TotalSize.fetch_add(BlockFile->TotalSize(), std::memory_order::relaxed);
@@ -333,6 +362,17 @@ BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, uint64_t Max
}
++FolderOffset;
}
+
+ for (const std::filesystem::path& EmptyBlockFile : EmptyBlockFiles)
+ {
+ std::error_code Ec;
+ RemoveFile(EmptyBlockFile, Ec);
+ if (Ec)
+ {
+ ZEN_WARN("Unable to remove empty block file {}. Reason: {}", EmptyBlockFile, Ec.message());
+ }
+ }
+
m_WriteBlockIndex.store(NextBlockIndex, std::memory_order_release);
}
else
@@ -341,7 +381,7 @@ BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, uint64_t Max
}
}
-void
+BlockStore::BlockIndexSet
BlockStore::SyncExistingBlocksOnDisk(const BlockIndexSet& KnownBlocks)
{
ZEN_MEMSCOPE(GetBlocksTag());
@@ -349,8 +389,8 @@ BlockStore::SyncExistingBlocksOnDisk(const BlockIndexSet& KnownBlocks)
RwLock::ExclusiveLockScope InsertLock(m_InsertLock);
- tsl::robin_set<uint32_t> MissingBlocks;
- tsl::robin_set<uint32_t> DeleteBlocks;
+ BlockIndexSet MissingBlocks;
+ BlockIndexSet DeleteBlocks;
DeleteBlocks.reserve(m_ChunkBlocks.size());
for (auto It : m_ChunkBlocks)
{
@@ -369,13 +409,6 @@ BlockStore::SyncExistingBlocksOnDisk(const BlockIndexSet& KnownBlocks)
MissingBlocks.insert(BlockIndex);
}
}
- for (std::uint32_t BlockIndex : MissingBlocks)
- {
- std::filesystem::path BlockPath = GetBlockPath(m_BlocksBasePath, BlockIndex);
- Ref<BlockStoreFile> NewBlockFile(new BlockStoreFile(BlockPath));
- NewBlockFile->Create(0);
- m_ChunkBlocks[BlockIndex] = NewBlockFile;
- }
for (std::uint32_t BlockIndex : DeleteBlocks)
{
std::filesystem::path BlockPath = GetBlockPath(m_BlocksBasePath, BlockIndex);
@@ -386,6 +419,7 @@ BlockStore::SyncExistingBlocksOnDisk(const BlockIndexSet& KnownBlocks)
}
m_ChunkBlocks.erase(BlockIndex);
}
+ return MissingBlocks;
}
BlockStore::BlockEntryCountMap
@@ -500,7 +534,7 @@ BlockStore::GetFreeBlockIndex(uint32_t ProbeIndex, RwLock::ExclusiveLockScope&,
{
OutBlockPath = GetBlockPath(m_BlocksBasePath, ProbeIndex);
std::error_code Ec;
- bool Exists = std::filesystem::exists(OutBlockPath, Ec);
+ bool Exists = IsFile(OutBlockPath, Ec);
if (Ec)
{
ZEN_WARN("Failed to probe existence of file '{}' when trying to allocate a new block. Reason: '{}'",
@@ -540,7 +574,7 @@ BlockStore::WriteChunk(const void* Data, uint64_t Size, uint32_t Alignment, cons
{
if (m_WriteBlock)
{
- m_WriteBlock->Flush();
+ m_WriteBlock->Flush(m_CurrentInsertOffset);
m_WriteBlock = nullptr;
}
@@ -578,7 +612,7 @@ BlockStore::WriteChunk(const void* Data, uint64_t Size, uint32_t Alignment, cons
}
void
-BlockStore::WriteChunks(std::span<IoBuffer> Datas, uint32_t Alignment, const WriteChunksCallback& Callback)
+BlockStore::WriteChunks(std::span<const IoBuffer> Datas, uint32_t Alignment, const WriteChunksCallback& Callback)
{
ZEN_MEMSCOPE(GetBlocksTag());
ZEN_TRACE_CPU("BlockStore::WriteChunks");
@@ -674,6 +708,27 @@ BlockStore::WriteChunks(std::span<IoBuffer> Datas, uint32_t Alignment, const Wri
}
}
+bool
+BlockStore::HasChunk(const BlockStoreLocation& Location) const
+{
+ ZEN_TRACE_CPU("BlockStore::TryGetChunk");
+ RwLock::SharedLockScope InsertLock(m_InsertLock);
+ if (auto BlockIt = m_ChunkBlocks.find(Location.BlockIndex); BlockIt != m_ChunkBlocks.end())
+ {
+ if (const Ref<BlockStoreFile>& Block = BlockIt->second; Block)
+ {
+ InsertLock.ReleaseNow();
+
+ const uint64_t BlockSize = Block->FileSize();
+ if (Location.Offset + Location.Size <= BlockSize)
+ {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
IoBuffer
BlockStore::TryGetChunk(const BlockStoreLocation& Location) const
{
@@ -706,7 +761,7 @@ BlockStore::Flush(bool ForceNewBlock)
{
if (m_WriteBlock)
{
- m_WriteBlock->Flush();
+ m_WriteBlock->Flush(m_CurrentInsertOffset);
}
m_WriteBlock = nullptr;
m_CurrentInsertOffset = 0;
@@ -735,6 +790,8 @@ BlockStore::IterateBlock(std::span<const BlockStoreLocation> ChunkLocations,
return true;
}
+ ZEN_ASSERT(ChunkLocations.size() >= InChunkIndexes.size());
+
if (LargeSizeLimit == 0)
{
LargeSizeLimit = DefaultIterateSmallChunkWindowSize;
@@ -746,7 +803,10 @@ BlockStore::IterateBlock(std::span<const BlockStoreLocation> ChunkLocations,
IterateSmallChunkWindowSize = Min((LargeSizeLimit + IterateSmallChunkMaxGapSize) * ChunkLocations.size(), IterateSmallChunkWindowSize);
- uint32_t BlockIndex = ChunkLocations[InChunkIndexes[0]].BlockIndex;
+ const size_t FirstLocationIndex = InChunkIndexes[0];
+ ZEN_ASSERT(FirstLocationIndex < ChunkLocations.size());
+
+ const uint32_t BlockIndex = ChunkLocations[FirstLocationIndex].BlockIndex;
std::vector<size_t> ChunkIndexes(InChunkIndexes.begin(), InChunkIndexes.end());
std::sort(ChunkIndexes.begin(), ChunkIndexes.end(), [&](size_t IndexA, size_t IndexB) -> bool {
return ChunkLocations[IndexA].Offset < ChunkLocations[IndexB].Offset;
@@ -756,8 +816,9 @@ BlockStore::IterateBlock(std::span<const BlockStoreLocation> ChunkLocations,
IterateSmallChunkWindowSize,
IterateSmallChunkMaxGapSize,
&ChunkLocations](uint64_t BlockFileSize, std::span<const size_t> ChunkIndexes, size_t StartIndexOffset) -> size_t {
- size_t ChunkCount = 0;
- size_t StartIndex = ChunkIndexes[StartIndexOffset];
+ size_t ChunkCount = 0;
+ size_t StartIndex = ChunkIndexes[StartIndexOffset];
+ ZEN_ASSERT(StartIndex < ChunkLocations.size());
const BlockStoreLocation& StartLocation = ChunkLocations[StartIndex];
uint64_t StartOffset = StartLocation.Offset;
uint64_t LastEnd = StartOffset + StartLocation.Size;
@@ -810,22 +871,26 @@ BlockStore::IterateBlock(std::span<const BlockStoreLocation> ChunkLocations,
ZEN_ASSERT(BlockFile);
InsertLock.ReleaseNow();
+ const size_t BlockSize = BlockFile->FileSize();
+
IoBuffer ReadBuffer;
void* BufferBase = nullptr;
size_t LocationIndexOffset = 0;
while (LocationIndexOffset < ChunkIndexes.size())
{
- size_t ChunkIndex = ChunkIndexes[LocationIndexOffset];
+ size_t ChunkIndex = ChunkIndexes[LocationIndexOffset];
+ ZEN_ASSERT(ChunkIndex < ChunkLocations.size());
const BlockStoreLocation& FirstLocation = ChunkLocations[ChunkIndex];
+ ZEN_ASSERT(FirstLocation.BlockIndex == BlockIndex);
- const size_t BlockSize = BlockFile->FileSize();
const size_t RangeCount = GetNextRange(BlockSize, ChunkIndexes, LocationIndexOffset);
if (RangeCount > 1)
{
- size_t LastChunkIndex = ChunkIndexes[LocationIndexOffset + RangeCount - 1];
- const BlockStoreLocation& LastLocation = ChunkLocations[LastChunkIndex];
- uint64_t Size = LastLocation.Offset + LastLocation.Size - FirstLocation.Offset;
+ size_t LastChunkIndex = ChunkIndexes[LocationIndexOffset + RangeCount - 1];
+ ZEN_ASSERT(LastChunkIndex < ChunkLocations.size());
+ const BlockStoreLocation& LastLocation = ChunkLocations[LastChunkIndex];
+ uint64_t Size = LastLocation.Offset + LastLocation.Size - FirstLocation.Offset;
if (ReadBuffer.GetSize() < Size)
{
ReadBuffer = IoBuffer(Min(Size * 2, IterateSmallChunkWindowSize));
@@ -834,8 +899,9 @@ BlockStore::IterateBlock(std::span<const BlockStoreLocation> ChunkLocations,
BlockFile->Read(BufferBase, Size, FirstLocation.Offset);
for (size_t RangeIndex = 0; RangeIndex < RangeCount; ++RangeIndex)
{
- size_t NextChunkIndex = ChunkIndexes[LocationIndexOffset + RangeIndex];
- const BlockStoreLocation& ChunkLocation = ChunkLocations[NextChunkIndex];
+ size_t NextChunkIndex = ChunkIndexes[LocationIndexOffset + RangeIndex];
+ ZEN_ASSERT(NextChunkIndex < ChunkLocations.size());
+ const BlockStoreLocation& ChunkLocation = ChunkLocations[NextChunkIndex];
if (ChunkLocation.Size == 0 || ((ChunkLocation.Offset + ChunkLocation.Size) > BlockSize))
{
ZEN_LOG_SCOPE("chunk [{},{}] out of bounds (block #{} file size = {})",
@@ -958,6 +1024,7 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState,
uint32_t NewBlockIndex = 0;
MovedChunksArray MovedChunks;
+ ChunkIndexArray ScrubbedChunks;
uint64_t AddedSize = 0;
uint64_t RemovedSize = 0;
@@ -986,14 +1053,16 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState,
auto ReportChanges = [&]() -> bool {
bool Continue = true;
- if (!MovedChunks.empty() || RemovedSize > 0)
+ if (!MovedChunks.empty() || !ScrubbedChunks.empty() || RemovedSize > 0)
{
- Continue = ChangeCallback(MovedChunks, RemovedSize > AddedSize ? RemovedSize - AddedSize : 0);
+ Continue = ChangeCallback(MovedChunks, ScrubbedChunks, RemovedSize > AddedSize ? RemovedSize - AddedSize : 0);
DeletedSize += RemovedSize;
+ m_TotalSize.fetch_add(AddedSize);
RemovedSize = 0;
AddedSize = 0;
MovedCount += MovedChunks.size();
MovedChunks.clear();
+ ScrubbedChunks.clear();
}
return Continue;
};
@@ -1022,6 +1091,7 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState,
LogPrefix,
m_BlocksBasePath,
BlockIndex);
+ ScrubbedChunks.insert(ScrubbedChunks.end(), KeepChunkIndexes.begin(), KeepChunkIndexes.end());
return true;
}
if (!It->second)
@@ -1030,6 +1100,7 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState,
LogPrefix,
m_BlocksBasePath,
BlockIndex);
+ ScrubbedChunks.insert(ScrubbedChunks.end(), KeepChunkIndexes.begin(), KeepChunkIndexes.end());
return true;
}
OldBlockFile = It->second;
@@ -1051,11 +1122,10 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState,
std::sort(SortedChunkIndexes.begin(), SortedChunkIndexes.end(), [&ChunkLocations](size_t Lhs, size_t Rhs) {
return ChunkLocations[Lhs].Offset < ChunkLocations[Rhs].Offset;
});
- BasicFileBuffer SourceFileBuffer(OldBlockFile->GetBasicFile(), Min(65536u, OldBlockSize));
+ BasicFileBuffer SourceFileBuffer(OldBlockFile->GetBasicFile(), Min(256u * 1024u, OldBlockSize));
- uint64_t WrittenBytesToBlock = 0;
- uint64_t MovedFromBlock = 0;
- std::vector<uint8_t> Chunk;
+ uint64_t MovedFromBlock = 0;
+ std::vector<uint8_t> ChunkBuffer;
for (const size_t& ChunkIndex : SortedChunkIndexes)
{
const BlockStoreLocation ChunkLocation = ChunkLocations[ChunkIndex];
@@ -1070,19 +1140,29 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState,
ChunkLocation.Size,
OldBlockFile->GetPath(),
OldBlockSize);
+ ScrubbedChunks.push_back(ChunkIndex);
continue;
}
- Chunk.resize(ChunkLocation.Size);
- SourceFileBuffer.Read(Chunk.data(), Chunk.size(), ChunkLocation.Offset);
+ MemoryView ChunkView = SourceFileBuffer.MakeView(ChunkLocation.Size, ChunkLocation.Offset);
+ if (ChunkView.GetSize() != ChunkLocation.Size)
+ {
+ ChunkBuffer.resize(ChunkLocation.Size);
+ SourceFileBuffer.Read(ChunkBuffer.data(), ChunkLocation.Size, ChunkLocation.Offset);
+ ChunkView = MemoryView(ChunkBuffer.data(), ChunkLocation.Size);
+ }
- if ((WriteOffset + Chunk.size()) > m_MaxBlockSize)
+ if ((WriteOffset + ChunkView.GetSize()) > m_MaxBlockSize)
{
- TargetFileBuffer.reset();
+ if (TargetFileBuffer)
+ {
+ TargetFileBuffer->Flush();
+ TargetFileBuffer.reset();
+ }
if (NewBlockFile)
{
ZEN_ASSERT_SLOW(NewBlockFile->IsOpen());
- NewBlockFile->Flush();
+ NewBlockFile->Flush(WriteOffset);
uint64_t NewBlockSize = NewBlockFile->FileSize();
MovedSize += NewBlockSize;
NewBlockFile = nullptr;
@@ -1161,22 +1241,23 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState,
NiceBytes(Space.Free + ReclaimedSpace));
}
NewBlockFile->Create(m_MaxBlockSize);
- NewBlockIndex = NextBlockIndex;
- WriteOffset = 0;
- AddedSize += WrittenBytesToBlock;
- WrittenBytesToBlock = 0;
- TargetFileBuffer = std::make_unique<BasicFileWriter>(NewBlockFile->GetBasicFile(), Min(65536u, m_MaxBlockSize));
+ NewBlockIndex = NextBlockIndex;
+ WriteOffset = 0;
+ TargetFileBuffer = std::make_unique<BasicFileWriter>(NewBlockFile->GetBasicFile(), Min(256u * 1024u, m_MaxBlockSize));
}
- TargetFileBuffer->Write(Chunk.data(), ChunkLocation.Size, WriteOffset);
+ const uint64_t OldWriteOffset = WriteOffset;
+ WriteOffset = TargetFileBuffer->AlignTo(PayloadAlignment);
+
+ TargetFileBuffer->Write(ChunkView.GetData(), ChunkLocation.Size, WriteOffset);
MovedChunks.push_back(
{ChunkIndex, {.BlockIndex = NewBlockIndex, .Offset = gsl::narrow<uint32_t>(WriteOffset), .Size = ChunkLocation.Size}});
- WrittenBytesToBlock = WriteOffset + ChunkLocation.Size;
+ WriteOffset += ChunkLocation.Size;
MovedFromBlock += RoundUp(ChunkLocation.Offset + ChunkLocation.Size, PayloadAlignment) - ChunkLocation.Offset;
- WriteOffset = RoundUp(WriteOffset + ChunkLocation.Size, PayloadAlignment);
+ uint64_t WrittenBytes = WriteOffset - OldWriteOffset;
+ AddedSize += WrittenBytes;
}
- AddedSize += WrittenBytesToBlock;
ZEN_INFO("{}moved {} chunks ({}) from '{}' to new block, freeing {}",
LogPrefix,
KeepChunkIndexes.size(),
@@ -1209,10 +1290,16 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState,
return true;
});
+ if (TargetFileBuffer)
+ {
+ TargetFileBuffer->Flush();
+ TargetFileBuffer.reset();
+ }
+
if (NewBlockFile)
{
ZEN_ASSERT_SLOW(NewBlockFile->IsOpen());
- NewBlockFile->Flush();
+ NewBlockFile->Flush(WriteOffset);
uint64_t NewBlockSize = NewBlockFile->FileSize();
MovedSize += NewBlockSize;
NewBlockFile = nullptr;
@@ -1343,6 +1430,8 @@ TEST_CASE("blockstore.blockfile")
CHECK(std::string(Boop) == "boop");
File1.Flush();
CHECK(File1.FileSize() == 10);
+ File1.Flush(10);
+ CHECK(File1.FileSize() == 10);
}
{
BlockStoreFile File1(RootDirectory / "1");
@@ -1375,14 +1464,14 @@ TEST_CASE("blockstore.blockfile")
BoopChunk = File1.GetChunk(5, 5);
}
- CHECK(std::filesystem::exists(RootDirectory / "1"));
+ CHECK(IsFile(RootDirectory / "1"));
const char* Data = static_cast<const char*>(DataChunk.GetData());
CHECK(std::string(Data) == "data");
const char* Boop = static_cast<const char*>(BoopChunk.GetData());
CHECK(std::string(Boop) == "boop");
}
- CHECK(std::filesystem::exists(RootDirectory / "1"));
+ CHECK(IsFile(RootDirectory / "1"));
{
IoBuffer DataChunk;
@@ -1401,7 +1490,7 @@ TEST_CASE("blockstore.blockfile")
const char* Boop = static_cast<const char*>(BoopChunk.GetData());
CHECK(std::string(Boop) == "boop");
}
- CHECK(!std::filesystem::exists(RootDirectory / "1"));
+ CHECK(!IsFile(RootDirectory / "1"));
}
namespace blockstore::impl {
@@ -1800,7 +1889,7 @@ TEST_CASE("blockstore.compact.blocks")
Store.CompactBlocks(
State,
Alignment,
- [&](const BlockStore::MovedChunksArray&, uint64_t) {
+ [&](const BlockStore::MovedChunksArray&, const BlockStore::ChunkIndexArray&, uint64_t) {
CHECK(false);
return true;
},
@@ -1825,9 +1914,10 @@ TEST_CASE("blockstore.compact.blocks")
Store.CompactBlocks(
State,
Alignment,
- [&](const BlockStore::MovedChunksArray& Moved, uint64_t Removed) {
+ [&](const BlockStore::MovedChunksArray& Moved, const BlockStore::ChunkIndexArray& Scrubbed, uint64_t Removed) {
RemovedSize += Removed;
CHECK(Moved.empty());
+ CHECK(Scrubbed.empty());
return true;
},
[]() { return 0; });
@@ -1850,9 +1940,10 @@ TEST_CASE("blockstore.compact.blocks")
Store.CompactBlocks(
State,
Alignment,
- [&](const BlockStore::MovedChunksArray& Moved, uint64_t Removed) {
+ [&](const BlockStore::MovedChunksArray& Moved, const BlockStore::ChunkIndexArray& Scrubbed, uint64_t Removed) {
RemovedSize += Removed;
CHECK(Moved.empty());
+ CHECK(Scrubbed.empty());
return true;
},
[]() { return 0; });
@@ -1860,7 +1951,7 @@ TEST_CASE("blockstore.compact.blocks")
CHECK_LE(Store.TotalSize(), 1088);
CHECK_GT(Store.TotalSize(), 0);
}
- SUBCASE("keep everthing")
+ SUBCASE("keep everything")
{
Store.Flush(true);
@@ -1873,7 +1964,7 @@ TEST_CASE("blockstore.compact.blocks")
Store.CompactBlocks(
State,
Alignment,
- [&](const BlockStore::MovedChunksArray&, uint64_t) {
+ [&](const BlockStore::MovedChunksArray&, const BlockStore::ChunkIndexArray&, uint64_t) {
CHECK(false);
return true;
},
@@ -1904,8 +1995,9 @@ TEST_CASE("blockstore.compact.blocks")
Store.CompactBlocks(
State,
Alignment,
- [&](const BlockStore::MovedChunksArray& Moved, uint64_t Removed) {
+ [&](const BlockStore::MovedChunksArray& Moved, const BlockStore::ChunkIndexArray& Scrubbed, uint64_t Removed) {
CHECK(Moved.empty());
+ CHECK(Scrubbed.empty());
RemovedSize += Removed;
return true;
},
@@ -1939,7 +2031,8 @@ TEST_CASE("blockstore.compact.blocks")
Store.CompactBlocks(
State,
Alignment,
- [&](const BlockStore::MovedChunksArray& Moved, uint64_t Removed) {
+ [&](const BlockStore::MovedChunksArray& Moved, const BlockStore::ChunkIndexArray& Scrubbed, uint64_t Removed) {
+ CHECK(Scrubbed.empty());
for (const auto& Move : Moved)
{
const BlockStoreLocation& OldLocation = State.GetLocation(Move.first);
@@ -2016,7 +2109,8 @@ TEST_CASE("blockstore.compact.blocks")
Store.CompactBlocks(
State,
Alignment,
- [&](const BlockStore::MovedChunksArray& Moved, uint64_t Removed) {
+ [&](const BlockStore::MovedChunksArray& Moved, const BlockStore::ChunkIndexArray& Scrubbed, uint64_t Removed) {
+ CHECK(Scrubbed.empty());
for (const auto& Move : Moved)
{
const BlockStoreLocation& OldLocation = State.GetLocation(Move.first);
@@ -2051,6 +2145,42 @@ TEST_CASE("blockstore.compact.blocks")
}
CHECK_LT(Store.TotalSize(), PreSize);
}
+ SUBCASE("scrub")
+ {
+ Store.Flush(true);
+
+ BlockStoreCompactState State;
+ for (const BlockStoreLocation& Location : ChunkLocations)
+ {
+ State.IncludeBlock(Location.BlockIndex);
+ CHECK(State.AddKeepLocation(Location));
+ }
+ State.IncludeBlock(0);
+ State.IncludeBlock(999);
+ std::vector<size_t> ExpectedScrubbedIndexes;
+ ExpectedScrubbedIndexes.push_back(ChunkLocations.size() + 0);
+ State.AddKeepLocation(BlockStoreLocation{.BlockIndex = 0, .Offset = 2000, .Size = 322});
+ ExpectedScrubbedIndexes.push_back(ChunkLocations.size() + 1);
+ State.AddKeepLocation(BlockStoreLocation{.BlockIndex = 0, .Offset = 10, .Size = 3220});
+ ExpectedScrubbedIndexes.push_back(ChunkLocations.size() + 2);
+ State.AddKeepLocation(BlockStoreLocation{.BlockIndex = 999, .Offset = 2, .Size = 40});
+
+ std::vector<size_t> ScrubbedIndexes;
+
+ Store.CompactBlocks(
+ State,
+ Alignment,
+ [&](const BlockStore::MovedChunksArray&, const BlockStore::ChunkIndexArray& ScrubbedArray, uint64_t) {
+ ScrubbedIndexes.insert(ScrubbedIndexes.end(), ScrubbedArray.begin(), ScrubbedArray.end());
+ return true;
+ },
+ []() {
+ CHECK(false);
+ return 0;
+ });
+ std::sort(ScrubbedIndexes.begin(), ScrubbedIndexes.end());
+ CHECK_EQ(ExpectedScrubbedIndexes, ScrubbedIndexes);
+ }
}
#endif
diff --git a/src/zenstore/buildstore/buildstore.cpp b/src/zenstore/buildstore/buildstore.cpp
new file mode 100644
index 000000000..20dc55bca
--- /dev/null
+++ b/src/zenstore/buildstore/buildstore.cpp
@@ -0,0 +1,2053 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zenstore/buildstore/buildstore.h>
+
+#include <zencore/compactbinarybuilder.h>
+#include <zencore/fmtutils.h>
+#include <zencore/logging.h>
+#include <zencore/memory/llm.h>
+#include <zencore/scopeguard.h>
+#include <zencore/trace.h>
+#include <zencore/workthreadpool.h>
+#include <zenutil/parallelwork.h>
+
+#include <zencore/uid.h>
+#include <zencore/xxhash.h>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <gsl/gsl-lite.hpp>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+#if ZEN_WITH_TESTS
+# include <zencore/compactbinarybuilder.h>
+# include <zencore/compress.h>
+# include <zencore/testing.h>
+# include <zencore/testutils.h>
+# include <zenutil/workerpools.h>
+#endif // ZEN_WITH_TESTS
+
+namespace zen {
+const FLLMTag&
+GetBuildstoreTag()
+{
+ static FLLMTag _("store", FLLMTag("builds"));
+
+ return _;
+}
+
+using namespace std::literals;
+
+namespace blobstore::impl {
+
+ const std::string BaseName = "builds";
+ const std::string ManifestExtension = ".cbo";
+ const char* IndexExtension = ".uidx";
+ const char* LogExtension = ".slog";
+ const char* AccessTimeExtension = ".zacs";
+
+ const uint32_t ManifestVersion = (1 << 16) | (0 << 8) | (0);
+
+ std::filesystem::path GetManifestPath(const std::filesystem::path& RootDirectory)
+ {
+ return RootDirectory / (BaseName + ManifestExtension);
+ }
+
+ std::filesystem::path GetBlobIndexPath(const std::filesystem::path& RootDirectory)
+ {
+ return RootDirectory / (BaseName + IndexExtension);
+ }
+
+ std::filesystem::path GetBlobLogPath(const std::filesystem::path& RootDirectory) { return RootDirectory / (BaseName + LogExtension); }
+
+ std::filesystem::path GetMetaIndexPath(const std::filesystem::path& RootDirectory)
+ {
+ return RootDirectory / (BaseName + "_meta" + IndexExtension);
+ }
+
+ std::filesystem::path GetMetaLogPath(const std::filesystem::path& RootDirectory)
+ {
+ return RootDirectory / (BaseName + "_meta" + LogExtension);
+ }
+
+ std::filesystem::path GetAccessTimesPath(const std::filesystem::path& RootDirectory)
+ {
+ return RootDirectory / (BaseName + AccessTimeExtension);
+ }
+
+ struct AccessTimeRecord
+ {
+ IoHash Key;
+ std::uint32_t SecondsSinceEpoch = 0;
+ };
+
+ static_assert(sizeof(AccessTimeRecord) == 24);
+
+#pragma pack(push)
+#pragma pack(1)
+ struct AccessTimesHeader
+ {
+ static constexpr uint32_t ExpectedMagic = 0x7363617a; // 'zacs';
+ static constexpr uint32_t CurrentVersion = 1;
+ static constexpr uint64_t DataAlignment = 8;
+
+ uint32_t Magic = ExpectedMagic;
+ uint32_t Version = CurrentVersion;
+ uint32_t AccessTimeCount = 0;
+ uint32_t Checksum = 0;
+
+ static uint32_t ComputeChecksum(const AccessTimesHeader& Header)
+ {
+ return XXH32(&Header.Magic, sizeof(AccessTimesHeader) - sizeof(uint32_t), 0xC0C0'BABA);
+ }
+ };
+#pragma pack(pop)
+
+ static_assert(sizeof(AccessTimesHeader) == 16);
+
+} // namespace blobstore::impl
+
+BuildStore::BuildStore(const BuildStoreConfig& Config, GcManager& Gc)
+: m_Log(logging::Get("builds"))
+, m_Config(Config)
+, m_Gc(Gc)
+, m_LargeBlobStore(m_Gc)
+, m_SmallBlobStore(Gc)
+, m_MetadataBlockStore()
+{
+ ZEN_TRACE_CPU("BuildStore::BuildStore");
+ ZEN_MEMSCOPE(GetBuildstoreTag());
+ try
+ {
+ bool IsNew = true;
+ Stopwatch Timer;
+ const auto _ = MakeGuard([&] {
+ ZEN_INFO("{} build store at {} in {}",
+ IsNew ? "Initialized" : "Read",
+ m_Config.RootDirectory,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
+
+ std::filesystem::path BlobLogPath = blobstore::impl::GetBlobLogPath(Config.RootDirectory);
+ std::filesystem::path MetaLogPath = blobstore::impl::GetMetaLogPath(Config.RootDirectory);
+ std::filesystem::path ManifestPath = blobstore::impl::GetManifestPath(Config.RootDirectory);
+ std::filesystem::path AccessTimesPath = blobstore::impl::GetAccessTimesPath(Config.RootDirectory);
+ if (IsFile(ManifestPath) && IsFile(BlobLogPath) && IsFile(MetaLogPath))
+ {
+ IsNew = false;
+ }
+
+ if (!IsNew)
+ {
+ RwLock::ExclusiveLockScope Lock(m_Lock);
+
+ CbObject ManifestReader = LoadCompactBinaryObject(ReadFile(ManifestPath).Flatten());
+ Oid ManifestId = ManifestReader["id"].AsObjectId();
+ uint32_t Version = ManifestReader["version"].AsUInt32();
+ DateTime CreationDate = ManifestReader["createdAt"].AsDateTime();
+ ZEN_UNUSED(CreationDate);
+ if (ManifestId == Oid::Zero || Version != blobstore::impl::ManifestVersion)
+ {
+ ZEN_WARN("Invalid manifest at {}, wiping state", ManifestPath);
+ IsNew = true;
+ }
+ else
+ {
+ m_BlobLogFlushPosition = ReadPayloadLog(Lock, BlobLogPath, 0);
+ m_MetaLogFlushPosition = ReadMetadataLog(Lock, MetaLogPath, 0);
+ if (IsFile(AccessTimesPath))
+ {
+ ReadAccessTimes(Lock, AccessTimesPath);
+ }
+ }
+ }
+
+ if (IsNew)
+ {
+ CleanDirectory(Config.RootDirectory, false);
+ CbObjectWriter ManifestWriter;
+ ManifestWriter.AddObjectId("id", Oid::NewOid());
+ ManifestWriter.AddInteger("version", blobstore::impl::ManifestVersion);
+ ManifestWriter.AddDateTime("createdAt", DateTime::Now());
+ TemporaryFile::SafeWriteFile(ManifestPath, ManifestWriter.Save().GetBuffer().AsIoBuffer());
+ }
+ m_LargeBlobStore.Initialize(Config.RootDirectory / "file_cas", IsNew);
+ m_SmallBlobStore.Initialize(Config.RootDirectory,
+ "blob_cas",
+ m_Config.SmallBlobBlockStoreMaxBlockSize,
+ m_Config.SmallBlobBlockStoreAlignement,
+ IsNew);
+ m_MetadataBlockStore.Initialize(Config.RootDirectory / "metadata", m_Config.MetadataBlockStoreMaxBlockSize, 1u << 20);
+
+ BlockStore::BlockIndexSet KnownBlocks;
+ for (const BlobEntry& Blob : m_BlobEntries)
+ {
+ if (const MetadataIndex MetaIndex = Blob.Metadata; MetaIndex)
+ {
+ const MetadataEntry& Metadata = m_MetadataEntries[MetaIndex];
+ KnownBlocks.insert(Metadata.Location.BlockIndex);
+ }
+ }
+ BlockStore::BlockIndexSet MissingBlocks = m_MetadataBlockStore.SyncExistingBlocksOnDisk(KnownBlocks);
+
+ m_PayloadlogFile.Open(BlobLogPath, CasLogFile::Mode::kWrite);
+ m_MetadatalogFile.Open(MetaLogPath, CasLogFile::Mode::kWrite);
+
+ if (!MissingBlocks.empty())
+ {
+ std::vector<MetadataDiskEntry> MissingMetadatas;
+ for (auto& It : m_BlobLookup)
+ {
+ const IoHash& BlobHash = It.first;
+ const BlobIndex ReadBlobIndex = It.second;
+ const BlobEntry& ReadBlobEntry = m_BlobEntries[ReadBlobIndex];
+ if (ReadBlobEntry.Metadata)
+ {
+ const MetadataEntry& MetaData = m_MetadataEntries[ReadBlobEntry.Metadata];
+ if (MissingBlocks.contains(MetaData.Location.BlockIndex))
+ {
+ MissingMetadatas.push_back(
+ MetadataDiskEntry{.Entry = m_MetadataEntries[ReadBlobEntry.Metadata], .BlobHash = BlobHash});
+ MissingMetadatas.back().Entry.Flags |= MetadataEntry::kTombStone;
+ m_MetadataEntries[ReadBlobEntry.Metadata] = {};
+ m_BlobEntries[ReadBlobIndex].Metadata = {};
+ }
+ }
+ }
+ ZEN_ASSERT(!MissingMetadatas.empty());
+
+ for (const MetadataDiskEntry& Entry : MissingMetadatas)
+ {
+ auto It = m_BlobLookup.find(Entry.BlobHash);
+ ZEN_ASSERT(It != m_BlobLookup.end());
+
+ const BlobIndex ReadBlobIndex = It->second;
+ const BlobEntry& ReadBlobEntry = m_BlobEntries[ReadBlobIndex];
+ if (!ReadBlobEntry.Payload)
+ {
+ m_BlobLookup.erase(It);
+ }
+ }
+ m_MetadatalogFile.Append(MissingMetadatas);
+ CompactState();
+ }
+
+ m_Gc.AddGcReferencer(*this);
+ m_Gc.AddGcReferenceLocker(*this);
+ m_Gc.AddGcStorage(this);
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("Failed to initialize build store. Reason: '{}'", Ex.what());
+ m_Gc.RemoveGcStorage(this);
+ m_Gc.RemoveGcReferenceLocker(*this);
+ m_Gc.RemoveGcReferencer(*this);
+ }
+}
+
+BuildStore::~BuildStore()
+{
+ try
+ {
+ ZEN_TRACE_CPU("BuildStore::~BuildStore");
+ m_Gc.RemoveGcStorage(this);
+ m_Gc.RemoveGcReferenceLocker(*this);
+ m_Gc.RemoveGcReferencer(*this);
+ Flush();
+ m_MetadatalogFile.Close();
+ m_PayloadlogFile.Close();
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("~BuildStore() threw exception: {}", Ex.what());
+ }
+}
+
+void
+BuildStore::PutBlob(const IoHash& BlobHash, const IoBuffer& Payload)
+{
+ ZEN_TRACE_CPU("BuildStore::PutBlob");
+ ZEN_MEMSCOPE(GetBuildstoreTag());
+ ZEN_ASSERT(Payload.GetContentType() == ZenContentType::kCompressedBinary);
+ {
+ RwLock::SharedLockScope _(m_Lock);
+ if (auto It = m_BlobLookup.find(BlobHash); It != m_BlobLookup.end())
+ {
+ const BlobIndex BlobIndex = It->second;
+ if (m_BlobEntries[BlobIndex].Payload)
+ {
+ return;
+ }
+ }
+ }
+
+ uint64_t PayloadSize = Payload.GetSize();
+ PayloadEntry Entry;
+ if (Payload.GetSize() > m_Config.SmallBlobBlockStoreMaxBlockEmbedSize)
+ {
+ CasStore::InsertResult Result = m_LargeBlobStore.InsertChunk(Payload, BlobHash);
+ ZEN_UNUSED(Result);
+ Entry = PayloadEntry(PayloadEntry::kStandalone, PayloadSize);
+ }
+ else
+ {
+ CasStore::InsertResult Result = m_SmallBlobStore.InsertChunk(Payload, BlobHash);
+ ZEN_UNUSED(Result);
+ Entry = PayloadEntry(0, PayloadSize);
+ }
+
+ {
+ RwLock::ExclusiveLockScope _(m_Lock);
+ if (auto It = m_BlobLookup.find(BlobHash); It != m_BlobLookup.end())
+ {
+ const BlobIndex ExistingBlobIndex = It->second;
+ BlobEntry& Blob = m_BlobEntries[ExistingBlobIndex];
+ if (Blob.Payload)
+ {
+ m_PayloadEntries[Blob.Payload] = Entry;
+ }
+ else
+ {
+ Blob.Payload = PayloadIndex(gsl::narrow<uint32_t>(m_PayloadEntries.size()));
+ m_PayloadEntries.push_back(Entry);
+ }
+ Blob.LastAccessTime = GcClock::TickCount();
+ }
+ else
+ {
+ PayloadIndex NewPayloadIndex = PayloadIndex(gsl::narrow<uint32_t>(m_PayloadEntries.size()));
+ m_PayloadEntries.push_back(Entry);
+
+ const BlobIndex NewBlobIndex(gsl::narrow<uint32_t>(m_BlobEntries.size()));
+ // we only remove during GC and compact this then...
+ m_BlobEntries.push_back(BlobEntry{.Payload = NewPayloadIndex, .LastAccessTime = AccessTime(GcClock::TickCount())});
+ m_BlobLookup.insert({BlobHash, NewBlobIndex});
+ }
+ }
+ m_PayloadlogFile.Append(PayloadDiskEntry{.Entry = Entry, .BlobHash = BlobHash});
+ m_LastAccessTimeUpdateCount++;
+}
+
+IoBuffer
+BuildStore::GetBlob(const IoHash& BlobHash)
+{
+ ZEN_TRACE_CPU("BuildStore::GetBlob");
+ ZEN_MEMSCOPE(GetBuildstoreTag());
+ RwLock::SharedLockScope Lock(m_Lock);
+ if (auto It = m_BlobLookup.find(BlobHash); It != m_BlobLookup.end())
+ {
+ const BlobIndex ExistingBlobIndex = It->second;
+ BlobEntry& Blob = m_BlobEntries[ExistingBlobIndex];
+ Blob.LastAccessTime = GcClock::TickCount();
+ if (Blob.Payload)
+ {
+ const PayloadEntry& Entry = m_PayloadEntries[Blob.Payload];
+ const bool IsStandalone = (Entry.GetFlags() & PayloadEntry::kStandalone) != 0;
+ Lock.ReleaseNow();
+
+ IoBuffer Chunk;
+ if (IsStandalone)
+ {
+ ZEN_TRACE_CPU("GetLarge");
+ Chunk = m_LargeBlobStore.FindChunk(BlobHash);
+ }
+ else
+ {
+ ZEN_TRACE_CPU("GetSmall");
+ Chunk = m_SmallBlobStore.FindChunk(BlobHash);
+ }
+ if (Chunk)
+ {
+ Chunk.SetContentType(ZenContentType::kCompressedBinary);
+ return Chunk;
+ }
+ else
+ {
+ ZEN_WARN("Inconsistencies in build store, {} is in index but not {}", BlobHash, IsStandalone ? "on disk" : "in block");
+ }
+ }
+ }
+ return {};
+}
+
+std::vector<BuildStore::BlobExistsResult>
+BuildStore::BlobsExists(std::span<const IoHash> BlobHashes)
+{
+ ZEN_TRACE_CPU("BuildStore::BlobsExists");
+ ZEN_MEMSCOPE(GetBuildstoreTag());
+ std::vector<BuildStore::BlobExistsResult> Result;
+ Result.reserve(BlobHashes.size());
+ RwLock::SharedLockScope _(m_Lock);
+ for (const IoHash& BlobHash : BlobHashes)
+ {
+ if (auto It = m_BlobLookup.find(BlobHash); It != m_BlobLookup.end())
+ {
+ const BlobIndex ExistingBlobIndex = It->second;
+ BlobEntry& Blob = m_BlobEntries[ExistingBlobIndex];
+ bool HasPayload = !!Blob.Payload;
+ bool HasMetadata = !!Blob.Metadata;
+ Result.push_back(BlobExistsResult{.HasBody = HasPayload, .HasMetadata = HasMetadata});
+ }
+ else
+ {
+ Result.push_back({});
+ }
+ }
+ return Result;
+}
+
+void
+BuildStore::PutMetadatas(std::span<const IoHash> BlobHashes, std::span<const IoBuffer> MetaDatas)
+{
+ ZEN_TRACE_CPU("BuildStore::PutMetadatas");
+ ZEN_MEMSCOPE(GetBuildstoreTag());
+ size_t WriteBlobIndex = 0;
+ m_MetadataBlockStore.WriteChunks(MetaDatas, m_Config.MetadataBlockStoreAlignement, [&](std::span<BlockStoreLocation> Locations) {
+ RwLock::ExclusiveLockScope _(m_Lock);
+ for (size_t LocationIndex = 0; LocationIndex < Locations.size(); LocationIndex++)
+ {
+ const IoBuffer& Data = MetaDatas[WriteBlobIndex];
+ const IoHash& BlobHash = BlobHashes[WriteBlobIndex];
+ const BlockStoreLocation& Location = Locations[LocationIndex];
+
+ MetadataEntry Entry = {.Location = Location, .ContentType = Data.GetContentType(), .Flags = 0};
+
+ if (auto It = m_BlobLookup.find(BlobHash); It != m_BlobLookup.end())
+ {
+ const BlobIndex ExistingBlobIndex = It->second;
+ BlobEntry& Blob = m_BlobEntries[ExistingBlobIndex];
+ if (Blob.Metadata)
+ {
+ m_MetadataEntries[Blob.Metadata] = Entry;
+ }
+ else
+ {
+ Blob.Metadata = MetadataIndex(gsl::narrow<uint32_t>(m_MetadataEntries.size()));
+ m_MetadataEntries.push_back(Entry);
+ }
+ Blob.LastAccessTime = GcClock::TickCount();
+ }
+ else
+ {
+ MetadataIndex NewMetadataIndex = MetadataIndex(gsl::narrow<uint32_t>(m_MetadataEntries.size()));
+ m_MetadataEntries.push_back(Entry);
+
+ const BlobIndex NewBlobIndex(gsl::narrow<uint32_t>(m_BlobEntries.size()));
+ m_BlobEntries.push_back(BlobEntry{.Metadata = NewMetadataIndex, .LastAccessTime = AccessTime(GcClock::TickCount())});
+ m_BlobLookup.insert({BlobHash, NewBlobIndex});
+ }
+
+ m_MetadatalogFile.Append(MetadataDiskEntry{.Entry = Entry, .BlobHash = BlobHash});
+
+ m_LastAccessTimeUpdateCount++;
+ WriteBlobIndex++;
+ if (m_TrackedCacheKeys)
+ {
+ m_TrackedCacheKeys->insert(BlobHash);
+ }
+ }
+ });
+}
+
+std::vector<IoBuffer>
+BuildStore::GetMetadatas(std::span<const IoHash> BlobHashes, WorkerThreadPool* OptionalWorkerPool)
+{
+ ZEN_TRACE_CPU("BuildStore::GetMetadatas");
+ ZEN_MEMSCOPE(GetBuildstoreTag());
+ std::vector<BlockStoreLocation> MetaLocations;
+ std::vector<size_t> MetaLocationResultIndexes;
+ MetaLocations.reserve(BlobHashes.size());
+ MetaLocationResultIndexes.reserve(BlobHashes.size());
+ tsl::robin_set<uint32_t> ReferencedBlocks;
+
+ std::vector<IoBuffer> Result;
+ std::vector<ZenContentType> ResultContentTypes;
+ Result.resize(BlobHashes.size());
+ ResultContentTypes.resize(BlobHashes.size(), ZenContentType::kUnknownContentType);
+ {
+ RwLock::SharedLockScope _(m_Lock);
+ for (size_t Index = 0; Index < BlobHashes.size(); Index++)
+ {
+ const IoHash& BlobHash = BlobHashes[Index];
+ if (auto It = m_BlobLookup.find(BlobHash); It != m_BlobLookup.end())
+ {
+ const BlobIndex ExistingBlobIndex = It->second;
+ BlobEntry& ExistingBlobEntry = m_BlobEntries[ExistingBlobIndex];
+ if (ExistingBlobEntry.Metadata)
+ {
+ const MetadataEntry& ExistingMetadataEntry = m_MetadataEntries[ExistingBlobEntry.Metadata];
+ MetaLocations.push_back(ExistingMetadataEntry.Location);
+ MetaLocationResultIndexes.push_back(Index);
+ ReferencedBlocks.insert(ExistingMetadataEntry.Location.BlockIndex);
+ ResultContentTypes[Index] = ExistingMetadataEntry.ContentType;
+ }
+ ExistingBlobEntry.LastAccessTime = AccessTime(GcClock::TickCount());
+ m_LastAccessTimeUpdateCount++;
+ }
+ }
+ }
+
+ auto DoOneBlock = [this](std::span<const BlockStoreLocation> MetaLocations,
+ std::span<const size_t> MetaLocationResultIndexes,
+ std::span<const size_t> ChunkIndexes,
+ std::vector<IoBuffer>& Result) {
+ if (ChunkIndexes.size() < 4)
+ {
+ for (size_t ChunkIndex : ChunkIndexes)
+ {
+ IoBuffer Chunk = m_MetadataBlockStore.TryGetChunk(MetaLocations[ChunkIndex]);
+ if (Chunk)
+ {
+ size_t ResultIndex = MetaLocationResultIndexes[ChunkIndex];
+ Result[ResultIndex] = std::move(Chunk);
+ }
+ }
+ return true;
+ }
+ return m_MetadataBlockStore.IterateBlock(
+ MetaLocations,
+ ChunkIndexes,
+ [&MetaLocationResultIndexes, &Result](size_t ChunkIndex, const void* Data, uint64_t Size) {
+ if (Data != nullptr)
+ {
+ size_t ResultIndex = MetaLocationResultIndexes[ChunkIndex];
+ Result[ResultIndex] = IoBuffer(IoBuffer::Clone, Data, Size);
+ }
+ return true;
+ },
+ [&](size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size) {
+ size_t ResultIndex = MetaLocationResultIndexes[ChunkIndex];
+ Result[ResultIndex] = File.GetChunk(Offset, Size);
+ return true;
+ },
+ 8u * 1024u);
+ };
+
+ if (!MetaLocations.empty())
+ {
+ std::atomic<bool> AbortFlag;
+ std::atomic<bool> PauseFlag;
+ ParallelWork Work(AbortFlag, PauseFlag);
+
+ try
+ {
+ m_MetadataBlockStore.IterateChunks(
+ MetaLocations,
+ [this, OptionalWorkerPool, &Work, &Result, &MetaLocations, &MetaLocationResultIndexes, &ReferencedBlocks, DoOneBlock](
+ uint32_t BlockIndex,
+ std::span<const size_t> ChunkIndexes) -> bool {
+ ZEN_UNUSED(BlockIndex);
+ if (ChunkIndexes.size() == MetaLocations.size() || OptionalWorkerPool == nullptr || ReferencedBlocks.size() == 1)
+ {
+ return DoOneBlock(MetaLocations, MetaLocationResultIndexes, ChunkIndexes, Result);
+ }
+ else
+ {
+ ZEN_ASSERT(OptionalWorkerPool != nullptr);
+ std::vector<size_t> TmpChunkIndexes(ChunkIndexes.begin(), ChunkIndexes.end());
+ Work.ScheduleWork(
+ *OptionalWorkerPool,
+ [this,
+ &Result,
+ &MetaLocations,
+ &MetaLocationResultIndexes,
+ DoOneBlock,
+ ChunkIndexes = std::move(TmpChunkIndexes)](std::atomic<bool>& AbortFlag) {
+ if (AbortFlag)
+ {
+ return;
+ }
+ try
+ {
+ if (!DoOneBlock(MetaLocations, MetaLocationResultIndexes, ChunkIndexes, Result))
+ {
+ AbortFlag.store(true);
+ }
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_WARN("Failed getting metadata for {} chunks. Reason: {}", ChunkIndexes.size(), Ex.what());
+ }
+ });
+ return !Work.IsAborted();
+ }
+ });
+ }
+ catch (const std::exception& Ex)
+ {
+ AbortFlag.store(true);
+ ZEN_WARN("Failed iterating block metadata chunks in {}. Reason: '{}'", m_Config.RootDirectory, Ex.what());
+ }
+
+ Work.Wait();
+ }
+ for (size_t Index = 0; Index < Result.size(); Index++)
+ {
+ if (Result[Index])
+ {
+ Result[Index].SetContentType(ResultContentTypes[Index]);
+ }
+ }
+ return Result;
+}
+
+void
+BuildStore::Flush()
+{
+ ZEN_TRACE_CPU("BuildStore::Flush");
+ try
+ {
+ Stopwatch Timer;
+ const auto _ = MakeGuard(
+ [&] { ZEN_INFO("Flushed build store at {} in {}", m_Config.RootDirectory, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); });
+
+ m_LargeBlobStore.Flush();
+ m_SmallBlobStore.Flush();
+ m_MetadataBlockStore.Flush(false);
+
+ m_PayloadlogFile.Flush();
+ m_MetadatalogFile.Flush();
+
+ if (uint64_t LastAccessTimeUpdateCount = m_LastAccessTimeUpdateCount.load(); LastAccessTimeUpdateCount > 0)
+ {
+ m_LastAccessTimeUpdateCount -= LastAccessTimeUpdateCount;
+ RwLock::ExclusiveLockScope UpdateLock(m_Lock);
+ WriteAccessTimes(UpdateLock, blobstore::impl::GetAccessTimesPath(m_Config.RootDirectory));
+ }
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("BuildStore::Flush failed. Reason: {}", Ex.what());
+ }
+}
+
+BuildStore::StorageStats
+BuildStore::GetStorageStats() const
+{
+ StorageStats Result;
+ {
+ RwLock::SharedLockScope _(m_Lock);
+ Result.EntryCount = m_BlobLookup.size();
+
+ for (auto LookupIt : m_BlobLookup)
+ {
+ const BlobIndex ReadBlobIndex = LookupIt.second;
+ const BlobEntry& ReadBlobEntry = m_BlobEntries[ReadBlobIndex];
+ if (ReadBlobEntry.Payload)
+ {
+ const PayloadEntry& Payload = m_PayloadEntries[ReadBlobEntry.Payload];
+ uint64_t Size = Payload.GetSize();
+ if ((Payload.GetFlags() & PayloadEntry::kStandalone) != 0)
+ {
+ Result.LargeBlobCount++;
+ Result.LargeBlobBytes += Size;
+ }
+ else
+ {
+ Result.SmallBlobCount++;
+ Result.SmallBlobBytes += Size;
+ }
+ }
+ if (ReadBlobEntry.Metadata)
+ {
+ const MetadataEntry& Metadata = m_MetadataEntries[ReadBlobEntry.Metadata];
+ Result.MetadataCount++;
+ Result.MetadataByteCount += Metadata.Location.Size;
+ }
+ }
+ }
+ return Result;
+}
+
+#if ZEN_WITH_TESTS
+std::optional<AccessTime>
+BuildStore::GetLastAccessTime(const IoHash& Key) const
+{
+ RwLock::SharedLockScope _(m_Lock);
+ if (auto It = m_BlobLookup.find(Key); It != m_BlobLookup.end())
+ {
+ const BlobIndex Index = It->second;
+ const BlobEntry& Entry = m_BlobEntries[Index];
+ return Entry.LastAccessTime;
+ }
+ return {};
+}
+
+bool
+BuildStore::SetLastAccessTime(const IoHash& Key, const AccessTime& Time)
+{
+ RwLock::SharedLockScope _(m_Lock);
+ if (auto It = m_BlobLookup.find(Key); It != m_BlobLookup.end())
+ {
+ const BlobIndex Index = It->second;
+ BlobEntry& Entry = m_BlobEntries[Index];
+ Entry.LastAccessTime = Time;
+ return true;
+ }
+ return false;
+}
+#endif // ZEN_WITH_TESTS
+
+void
+BuildStore::CompactState()
+{
+ ZEN_TRACE_CPU("BuildStore::CompactState");
+
+ std::vector<BlobEntry> BlobEntries;
+ std::vector<PayloadEntry> PayloadEntries;
+ std::vector<MetadataEntry> MetadataEntries;
+
+ tsl::robin_map<IoHash, BlobIndex, IoHash::Hasher> BlobLookup;
+
+ RwLock::ExclusiveLockScope _(m_Lock);
+ const size_t EntryCount = m_BlobLookup.size();
+ BlobLookup.reserve(EntryCount);
+ const size_t PayloadCount = m_PayloadEntries.size();
+ PayloadEntries.reserve(PayloadCount);
+ const size_t MetadataCount = m_MetadataEntries.size();
+ MetadataEntries.reserve(MetadataCount);
+
+ for (auto LookupIt : m_BlobLookup)
+ {
+ const IoHash& BlobHash = LookupIt.first;
+ const BlobIndex ReadBlobIndex = LookupIt.second;
+ const BlobEntry& ReadBlobEntry = m_BlobEntries[ReadBlobIndex];
+
+ const BlobIndex WriteBlobIndex(gsl::narrow<uint32_t>(BlobEntries.size()));
+ BlobEntries.push_back(ReadBlobEntry);
+ BlobEntry& WriteBlobEntry = BlobEntries.back();
+
+ if (WriteBlobEntry.Payload)
+ {
+ const PayloadEntry& ReadPayloadEntry = m_PayloadEntries[ReadBlobEntry.Payload];
+ WriteBlobEntry.Payload = PayloadIndex(gsl::narrow<uint32_t>(PayloadEntries.size()));
+ PayloadEntries.push_back(ReadPayloadEntry);
+ }
+ if (ReadBlobEntry.Metadata)
+ {
+ const MetadataEntry& ReadMetadataEntry = m_MetadataEntries[ReadBlobEntry.Metadata];
+ WriteBlobEntry.Metadata = MetadataIndex(gsl::narrow<uint32_t>(MetadataEntries.size()));
+ MetadataEntries.push_back(ReadMetadataEntry);
+ }
+
+ BlobLookup.insert({BlobHash, WriteBlobIndex});
+ }
+ m_BlobEntries.swap(BlobEntries);
+ m_PayloadEntries.swap(PayloadEntries);
+ m_MetadataEntries.swap(MetadataEntries);
+ m_BlobLookup.swap(BlobLookup);
+}
+
+uint64_t
+BuildStore::ReadPayloadLog(const RwLock::ExclusiveLockScope&, const std::filesystem::path& LogPath, uint64_t SkipEntryCount)
+{
+ ZEN_TRACE_CPU("BuildStore::ReadPayloadLog");
+ if (!IsFile(LogPath))
+ {
+ return 0;
+ }
+
+ uint64_t LogEntryCount = 0;
+ Stopwatch Timer;
+ const auto _ = MakeGuard([&] {
+ ZEN_INFO("read build store '{}' payload log containing {} entries in {}",
+ LogPath,
+ LogEntryCount,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
+
+ TCasLogFile<PayloadDiskEntry> CasLog;
+ if (!CasLog.IsValid(LogPath))
+ {
+ RemoveFile(LogPath);
+ return 0;
+ }
+ CasLog.Open(LogPath, CasLogFile::Mode::kRead);
+ if (!CasLog.Initialize())
+ {
+ return 0;
+ }
+
+ const uint64_t EntryCount = CasLog.GetLogCount();
+ if (EntryCount < SkipEntryCount)
+ {
+ ZEN_WARN("reading full payload log at '{}', reason: Log position from index snapshot is out of range", LogPath);
+ SkipEntryCount = 0;
+ }
+
+ LogEntryCount = EntryCount - SkipEntryCount;
+ uint64_t InvalidEntryCount = 0;
+
+ CasLog.Replay(
+ [&](const PayloadDiskEntry& Record) {
+ std::string InvalidEntryReason;
+ if (Record.Entry.GetFlags() & PayloadEntry::kTombStone)
+ {
+ // Note: this leaves m_BlobLookup and other arrays with 'holes' in them, this will get clean up in compact gc operation
+ if (auto ExistingIt = m_BlobLookup.find(Record.BlobHash); ExistingIt != m_BlobLookup.end())
+ {
+ if (!m_BlobEntries[ExistingIt->second].Metadata)
+ {
+ m_BlobLookup.erase(ExistingIt);
+ }
+ else
+ {
+ m_BlobEntries[ExistingIt->second].Payload = {};
+ }
+ }
+ return;
+ }
+
+ if (!ValidatePayloadDiskEntry(Record, InvalidEntryReason))
+ {
+ ZEN_WARN("skipping invalid payload entry in '{}', reason: '{}'", LogPath, InvalidEntryReason);
+ ++InvalidEntryCount;
+ return;
+ }
+ if (auto It = m_BlobLookup.find(Record.BlobHash); It != m_BlobLookup.end())
+ {
+ const BlobIndex ExistingBlobIndex = It->second;
+ BlobEntry& ExistingBlob = m_BlobEntries[ExistingBlobIndex];
+ if (ExistingBlob.Payload)
+ {
+ const PayloadIndex ExistingPayloadIndex = ExistingBlob.Payload;
+ m_PayloadEntries[ExistingPayloadIndex] = Record.Entry;
+ }
+ else
+ {
+ const PayloadIndex NewPayloadIndex(gsl::narrow<uint32_t>(m_PayloadEntries.size()));
+ m_PayloadEntries.push_back(Record.Entry);
+ ExistingBlob.Payload = NewPayloadIndex;
+ }
+ }
+ else
+ {
+ const PayloadIndex NewPayloadIndex(gsl::narrow<uint32_t>(m_PayloadEntries.size()));
+ m_PayloadEntries.push_back(Record.Entry);
+
+ const BlobIndex NewBlobIndex(gsl::narrow<uint32_t>(m_BlobEntries.size()));
+ m_BlobEntries.push_back(BlobEntry{.Payload = NewPayloadIndex, .LastAccessTime = AccessTime(GcClock::TickCount())});
+ m_BlobLookup.insert_or_assign(Record.BlobHash, NewBlobIndex);
+ }
+ },
+ SkipEntryCount);
+
+ if (InvalidEntryCount)
+ {
+ ZEN_WARN("found {} invalid payload entries in '{}'", InvalidEntryCount, LogPath);
+ }
+
+ return LogEntryCount;
+}
+
+uint64_t
+BuildStore::ReadMetadataLog(const RwLock::ExclusiveLockScope&, const std::filesystem::path& LogPath, uint64_t SkipEntryCount)
+{
+ ZEN_TRACE_CPU("BuildStore::ReadMetadataLog");
+ if (!IsFile(LogPath))
+ {
+ return 0;
+ }
+
+ uint64_t LogEntryCount = 0;
+ Stopwatch Timer;
+ const auto _ = MakeGuard([&] {
+ ZEN_INFO("read build store '{}' metadata log containing {} entries in {}",
+ LogPath,
+ LogEntryCount,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
+
+ TCasLogFile<MetadataDiskEntry> CasLog;
+ if (!CasLog.IsValid(LogPath))
+ {
+ RemoveFile(LogPath);
+ return 0;
+ }
+ CasLog.Open(LogPath, CasLogFile::Mode::kRead);
+ if (!CasLog.Initialize())
+ {
+ return 0;
+ }
+
+ const uint64_t EntryCount = CasLog.GetLogCount();
+ if (EntryCount < SkipEntryCount)
+ {
+ ZEN_WARN("reading full metadata log at '{}', reason: Log position from index snapshot is out of range", LogPath);
+ SkipEntryCount = 0;
+ }
+
+ LogEntryCount = EntryCount - SkipEntryCount;
+ uint64_t InvalidEntryCount = 0;
+
+ CasLog.Replay(
+ [&](const MetadataDiskEntry& Record) {
+ std::string InvalidEntryReason;
+ if (Record.Entry.Flags & MetadataEntry::kTombStone)
+ {
+ // Note: this leaves m_BlobLookup and other arrays with 'holes' in them, this will get clean up in compact gc operation
+ // Note: this leaves m_BlobLookup and other arrays with 'holes' in them, this will get clean up in compact gc operation
+ if (auto ExistingIt = m_BlobLookup.find(Record.BlobHash); ExistingIt != m_BlobLookup.end())
+ {
+ if (!m_BlobEntries[ExistingIt->second].Payload)
+ {
+ m_BlobLookup.erase(ExistingIt);
+ }
+ else
+ {
+ m_BlobEntries[ExistingIt->second].Metadata = {};
+ }
+ }
+ return;
+ }
+
+ if (!ValidateMetadataDiskEntry(Record, InvalidEntryReason))
+ {
+ ZEN_WARN("skipping invalid metadata entry in '{}', reason: '{}'", LogPath, InvalidEntryReason);
+ ++InvalidEntryCount;
+ return;
+ }
+ if (auto It = m_BlobLookup.find(Record.BlobHash); It != m_BlobLookup.end())
+ {
+ const BlobIndex ExistingBlobIndex = It->second;
+ BlobEntry& ExistingBlob = m_BlobEntries[ExistingBlobIndex];
+ if (ExistingBlob.Metadata)
+ {
+ const MetadataIndex ExistingMetadataIndex = ExistingBlob.Metadata;
+ m_MetadataEntries[ExistingMetadataIndex] = Record.Entry;
+ }
+ else
+ {
+ const MetadataIndex NewMetadataIndex(gsl::narrow<uint32_t>(m_MetadataEntries.size()));
+ m_MetadataEntries.push_back(Record.Entry);
+ ExistingBlob.Metadata = NewMetadataIndex;
+ }
+ }
+ else
+ {
+ const MetadataIndex NewMetadataIndex(gsl::narrow<uint32_t>(m_MetadataEntries.size()));
+ m_MetadataEntries.push_back(Record.Entry);
+
+ const BlobIndex NewBlobIndex(gsl::narrow<uint32_t>(m_BlobEntries.size()));
+ m_BlobEntries.push_back(BlobEntry{.Metadata = NewMetadataIndex, .LastAccessTime = AccessTime(GcClock::TickCount())});
+ m_BlobLookup.insert_or_assign(Record.BlobHash, NewBlobIndex);
+ }
+ },
+ SkipEntryCount);
+
+ if (InvalidEntryCount)
+ {
+ ZEN_WARN("found {} invalid metadata entries in '{}'", InvalidEntryCount, LogPath);
+ }
+
+ return LogEntryCount;
+}
+
+void
+BuildStore::ReadAccessTimes(const RwLock::ExclusiveLockScope&, const std::filesystem::path& AccessTimesPath)
+{
+ ZEN_TRACE_CPU("BuildStore::ReadAccessTimes");
+
+ using namespace blobstore::impl;
+
+ BasicFile AccessTimesFile;
+ AccessTimesFile.Open(AccessTimesPath, BasicFile::Mode::kRead);
+ uint64_t Size = AccessTimesFile.FileSize();
+ if (Size >= sizeof(AccessTimesHeader))
+ {
+ AccessTimesHeader Header;
+ uint64_t Offset = 0;
+ AccessTimesFile.Read(&Header, sizeof(Header), 0);
+ Offset += sizeof(AccessTimesHeader);
+ Offset = RoundUp(Offset, AccessTimesHeader::DataAlignment);
+ if ((Header.Magic == AccessTimesHeader::ExpectedMagic) && (Header.Version == AccessTimesHeader::CurrentVersion) &&
+ (Header.Checksum == AccessTimesHeader::ComputeChecksum(Header)))
+ {
+ uint64_t RecordsSize = sizeof(AccessTimeRecord) * Header.AccessTimeCount;
+ if (AccessTimesFile.FileSize() >= Offset + RecordsSize)
+ {
+ std::vector<AccessTimeRecord> AccessRecords(Header.AccessTimeCount);
+ AccessTimesFile.Read(AccessRecords.data(), RecordsSize, Offset);
+ for (const AccessTimeRecord& Record : AccessRecords)
+ {
+ const IoHash& Key = Record.Key;
+ const uint32_t SecondsSinceEpoch = Record.SecondsSinceEpoch;
+ if (auto It = m_BlobLookup.find(Key); It != m_BlobLookup.end())
+ {
+ const BlobIndex Index = It->second;
+ BlobEntry& Entry = m_BlobEntries[Index];
+ Entry.LastAccessTime.SetSecondsSinceEpoch(SecondsSinceEpoch);
+ }
+ else
+ {
+ m_LastAccessTimeUpdateCount++;
+ }
+ }
+ }
+ else
+ {
+ m_LastAccessTimeUpdateCount++;
+ }
+ }
+ else
+ {
+ m_LastAccessTimeUpdateCount++;
+ }
+ }
+ else
+ {
+ m_LastAccessTimeUpdateCount++;
+ }
+}
+
+void
+BuildStore::WriteAccessTimes(const RwLock::ExclusiveLockScope&, const std::filesystem::path& AccessTimesPath)
+{
+ ZEN_TRACE_CPU("BuildStore::WriteAccessTimes");
+
+ using namespace blobstore::impl;
+
+ uint32_t Count = gsl::narrow<uint32_t>(m_BlobLookup.size());
+ AccessTimesHeader Header = {.AccessTimeCount = Count};
+ Header.Checksum = AccessTimesHeader::ComputeChecksum(Header);
+
+ TemporaryFile TempFile;
+ std::error_code Ec;
+ if (TempFile.CreateTemporary(AccessTimesPath.parent_path(), Ec); Ec)
+ {
+ throw std::runtime_error(fmt::format("Failed to create temporary file {} to write access times. Reason ({}) {}",
+ TempFile.GetPath(),
+ Ec.value(),
+ Ec.message()));
+ }
+ {
+ uint64_t Offset = 0;
+ TempFile.Write(&Header, sizeof(AccessTimesHeader), Offset);
+ Offset += sizeof(AccessTimesHeader);
+ Offset = RoundUp(Offset, AccessTimesHeader::DataAlignment);
+
+ std::vector<AccessTimeRecord> AccessRecords;
+ AccessRecords.reserve(Header.AccessTimeCount);
+
+ for (auto It : m_BlobLookup)
+ {
+ const IoHash& Key = It.first;
+ const BlobIndex Index = It.second;
+ const BlobEntry& Entry = m_BlobEntries[Index];
+ const uint32_t SecondsSinceEpoch = Entry.LastAccessTime.GetSecondsSinceEpoch();
+ AccessRecords.emplace_back(AccessTimeRecord{.Key = Key, .SecondsSinceEpoch = SecondsSinceEpoch});
+ }
+ uint64_t RecordsSize = sizeof(AccessTimeRecord) * Header.AccessTimeCount;
+ TempFile.Write(AccessRecords.data(), RecordsSize, Offset);
+ Offset += sizeof(AccessTimesHeader) * Header.AccessTimeCount;
+ }
+ if (TempFile.MoveTemporaryIntoPlace(AccessTimesPath, Ec); Ec)
+ {
+ throw std::runtime_error(fmt::format("Failed to move temporary file {} to {} when write access times. Reason ({}) {}",
+ TempFile.GetPath(),
+ AccessTimesPath,
+ Ec.value(),
+ Ec.message()));
+ }
+}
+
+bool
+BuildStore::ValidatePayloadDiskEntry(const PayloadDiskEntry& Entry, std::string& OutReason)
+{
+ if (Entry.BlobHash == IoHash::Zero)
+ {
+ OutReason = fmt::format("Invalid blob hash {}", Entry.BlobHash.ToHexString());
+ return false;
+ }
+ if (Entry.Entry.GetFlags() & ~(PayloadEntry::kTombStone | PayloadEntry::kStandalone))
+ {
+ OutReason = fmt::format("Invalid flags {} for entry {}", Entry.Entry.GetFlags(), Entry.BlobHash.ToHexString());
+ return false;
+ }
+ if (Entry.Entry.GetFlags() & PayloadEntry::kTombStone)
+ {
+ return true;
+ }
+ if (Entry.Entry.GetSize() == 0 || Entry.Entry.GetSize() == 0x00ffffffffffffffu)
+ {
+ OutReason = fmt::format("Invalid size field {} for meta entry {}", Entry.Entry.GetSize(), Entry.BlobHash.ToHexString());
+ return false;
+ }
+ return true;
+}
+
+bool
+BuildStore::ValidateMetadataDiskEntry(const MetadataDiskEntry& Entry, std::string& OutReason)
+{
+ if (Entry.BlobHash == IoHash::Zero)
+ {
+ OutReason = fmt::format("Invalid blob hash {} for meta entry", Entry.BlobHash.ToHexString());
+ return false;
+ }
+ if (Entry.Entry.Location.Size == 0)
+ {
+ OutReason = fmt::format("Invalid meta blob size {} for meta entry", Entry.Entry.Location.Size);
+ return false;
+ }
+ if (Entry.Entry.Reserved1 != 0 || Entry.Entry.Reserved2 != 0)
+ {
+ OutReason = fmt::format("Invalid reserved fields for meta entry {}", Entry.BlobHash.ToHexString());
+ return false;
+ }
+ if (Entry.Entry.Flags & MetadataEntry::kTombStone)
+ {
+ return true;
+ }
+ if (Entry.Entry.ContentType == ZenContentType::kCOUNT)
+ {
+ OutReason = fmt::format("Invalid content type for meta entry {}", Entry.BlobHash.ToHexString());
+ return false;
+ }
+ if (Entry.Reserved1 != 0 || Entry.Reserved2 != 0 || Entry.Reserved3 != 0 || Entry.Reserved4 != 0)
+ {
+ OutReason = fmt::format("Invalid reserved fields for meta entry {}", Entry.BlobHash.ToHexString());
+ return false;
+ }
+ return true;
+}
+
+class BuildStoreGcReferenceChecker : public GcReferenceChecker
+{
+public:
+ BuildStoreGcReferenceChecker(BuildStore& Store) : m_Store(Store) {}
+ virtual std::string GetGcName(GcCtx& Ctx) override
+ {
+ ZEN_UNUSED(Ctx);
+ return fmt::format("buildstore: '{}'", m_Store.m_Config.RootDirectory.string());
+ }
+
+ virtual void PreCache(GcCtx& Ctx) override { ZEN_UNUSED(Ctx); }
+
+ virtual void UpdateLockedState(GcCtx& Ctx) override
+ {
+ ZEN_TRACE_CPU("Builds::UpdateLockedState");
+ ZEN_MEMSCOPE(GetBuildstoreTag());
+
+ auto Log = [&Ctx]() { return Ctx.Logger; };
+
+ m_References.reserve(m_Store.m_BlobLookup.size());
+ for (const auto& It : m_Store.m_BlobLookup)
+ {
+ const BuildStore::BlobIndex ExistingBlobIndex = It.second;
+ if (m_Store.m_BlobEntries[ExistingBlobIndex].Payload)
+ {
+ m_References.push_back(It.first);
+ }
+ }
+ FilterReferences(Ctx, fmt::format("buildstore [LOCKSTATE] '{}'", "buildstore"), m_References);
+ }
+
+ virtual std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) override
+ {
+ ZEN_UNUSED(Ctx);
+ ZEN_TRACE_CPU("Builds::GetUnusedReferences");
+ ZEN_MEMSCOPE(GetBuildstoreTag());
+
+ auto Log = [&Ctx]() { return Ctx.Logger; };
+
+ size_t InitialCount = IoCids.size();
+ size_t UsedCount = InitialCount;
+
+ Stopwatch Timer;
+ const auto _ = MakeGuard([&] {
+ if (!Ctx.Settings.Verbose)
+ {
+ return;
+ }
+ ZEN_INFO("GCV2: buildstore [FILTER REFERENCES] '{}': filtered out {} used references out of {} in {}",
+ "buildstore",
+ UsedCount,
+ InitialCount,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
+
+ std::span<IoHash> UnusedReferences = KeepUnusedReferences(m_References, IoCids);
+ UsedCount = IoCids.size() - UnusedReferences.size();
+ return UnusedReferences;
+ }
+
+private:
+ BuildStore& m_Store;
+ std::vector<IoHash> m_References;
+};
+
+std::string
+BuildStore::GetGcName(GcCtx& Ctx)
+{
+ ZEN_UNUSED(Ctx);
+ ZEN_MEMSCOPE(GetBuildstoreTag());
+
+ return fmt::format("buildstore: '{}'", m_Config.RootDirectory.string());
+}
+
+class BuildStoreGcCompator : public GcStoreCompactor
+{
+ using BlobEntry = BuildStore::BlobEntry;
+ using PayloadEntry = BuildStore::PayloadEntry;
+ using MetadataEntry = BuildStore::MetadataEntry;
+ using MetadataDiskEntry = BuildStore::MetadataDiskEntry;
+ using BlobIndex = BuildStore::BlobIndex;
+ using PayloadIndex = BuildStore::PayloadIndex;
+ using MetadataIndex = BuildStore::MetadataIndex;
+
+public:
+ BuildStoreGcCompator(BuildStore& Store, std::vector<IoHash>&& RemovedBlobs) : m_Store(Store), m_RemovedBlobs(std::move(RemovedBlobs)) {}
+
+ virtual void CompactStore(GcCtx& Ctx, GcCompactStoreStats& Stats, const std::function<uint64_t()>& ClaimDiskReserveCallback) override
+ {
+ ZEN_UNUSED(ClaimDiskReserveCallback);
+ ZEN_TRACE_CPU("Builds::CompactStore");
+ ZEN_MEMSCOPE(GetBuildstoreTag());
+
+ auto Log = [&Ctx]() { return Ctx.Logger; };
+
+ Stopwatch Timer;
+ const auto _ = MakeGuard([&] {
+ if (!Ctx.Settings.Verbose)
+ {
+ return;
+ }
+ ZEN_INFO("GCV2: buildstore [COMPACT] '{}': RemovedDisk: {} in {}",
+ m_Store.m_Config.RootDirectory,
+ NiceBytes(Stats.RemovedDisk),
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
+
+ const auto __ = MakeGuard([&] { m_Store.Flush(); });
+
+ if (!m_RemovedBlobs.empty())
+ {
+ if (Ctx.Settings.CollectSmallObjects)
+ {
+ m_Store.m_Lock.WithExclusiveLock([this]() { m_Store.m_TrackedCacheKeys = std::make_unique<HashSet>(); });
+ auto __ = MakeGuard([this]() { m_Store.m_Lock.WithExclusiveLock([&]() { m_Store.m_TrackedCacheKeys.reset(); }); });
+
+ BlockStore::BlockUsageMap BlockUsage;
+ {
+ RwLock::SharedLockScope __(m_Store.m_Lock);
+
+ for (auto LookupIt : m_Store.m_BlobLookup)
+ {
+ const BlobIndex ReadBlobIndex = LookupIt.second;
+ const BlobEntry& ReadBlobEntry = m_Store.m_BlobEntries[ReadBlobIndex];
+
+ if (ReadBlobEntry.Metadata)
+ {
+ const MetadataEntry& ReadMetadataEntry = m_Store.m_MetadataEntries[ReadBlobEntry.Metadata];
+
+ uint32_t BlockIndex = ReadMetadataEntry.Location.BlockIndex;
+ uint64_t ChunkSize = RoundUp(ReadMetadataEntry.Location.Size, m_Store.m_Config.MetadataBlockStoreAlignement);
+
+ if (auto BlockUsageIt = BlockUsage.find(BlockIndex); BlockUsageIt != BlockUsage.end())
+ {
+ BlockStore::BlockUsageInfo& Info = BlockUsageIt.value();
+ Info.EntryCount++;
+ Info.DiskUsage += ChunkSize;
+ }
+ else
+ {
+ BlockUsage.insert_or_assign(BlockIndex,
+ BlockStore::BlockUsageInfo{.DiskUsage = ChunkSize, .EntryCount = 1});
+ }
+ }
+ }
+ }
+
+ BlockStore::BlockEntryCountMap BlocksToCompact = m_Store.m_MetadataBlockStore.GetBlocksToCompact(BlockUsage, 90);
+ BlockStoreCompactState BlockCompactState;
+ std::vector<IoHash> BlockCompactStateKeys;
+ BlockCompactState.IncludeBlocks(BlocksToCompact);
+
+ if (BlocksToCompact.size() > 0)
+ {
+ {
+ RwLock::SharedLockScope ___(m_Store.m_Lock);
+ for (const auto& Entry : m_Store.m_BlobLookup)
+ {
+ BlobIndex Index = Entry.second;
+
+ if (MetadataIndex Meta = m_Store.m_BlobEntries[Index].Metadata; Meta)
+ {
+ if (BlockCompactState.AddKeepLocation(m_Store.m_MetadataEntries[Meta].Location))
+ {
+ BlockCompactStateKeys.push_back(Entry.first);
+ }
+ }
+ }
+ }
+
+ if (Ctx.Settings.IsDeleteMode)
+ {
+ if (Ctx.Settings.Verbose)
+ {
+ ZEN_INFO("GCV2: buildstore [COMPACT] '{}': compacting {} blocks",
+ m_Store.m_Config.RootDirectory,
+ BlocksToCompact.size());
+ }
+
+ m_Store.m_MetadataBlockStore.CompactBlocks(
+ BlockCompactState,
+ m_Store.m_Config.MetadataBlockStoreAlignement,
+ [&](const BlockStore::MovedChunksArray& MovedArray,
+ const BlockStore::ChunkIndexArray& ScrubbedArray,
+ uint64_t FreedDiskSpace) {
+ std::vector<MetadataDiskEntry> MovedEntries;
+ MovedEntries.reserve(MovedArray.size());
+ RwLock::ExclusiveLockScope _(m_Store.m_Lock);
+ for (const std::pair<size_t, BlockStoreLocation>& Moved : MovedArray)
+ {
+ size_t ChunkIndex = Moved.first;
+ const IoHash& Key = BlockCompactStateKeys[ChunkIndex];
+
+ ZEN_ASSERT(m_Store.m_TrackedCacheKeys);
+ if (m_Store.m_TrackedCacheKeys->contains(Key))
+ {
+ continue;
+ }
+
+ if (auto It = m_Store.m_BlobLookup.find(Key); It != m_Store.m_BlobLookup.end())
+ {
+ const BlobIndex Index = It->second;
+
+ if (MetadataIndex Meta = m_Store.m_BlobEntries[Index].Metadata; Meta)
+ {
+ m_Store.m_MetadataEntries[Meta].Location = Moved.second;
+ MovedEntries.push_back(
+ MetadataDiskEntry{.Entry = m_Store.m_MetadataEntries[Meta], .BlobHash = Key});
+ }
+ }
+ }
+
+ for (size_t Scrubbed : ScrubbedArray)
+ {
+ const IoHash& Key = BlockCompactStateKeys[Scrubbed];
+ if (auto It = m_Store.m_BlobLookup.find(Key); It != m_Store.m_BlobLookup.end())
+ {
+ const BlobIndex Index = It->second;
+
+ if (MetadataIndex Meta = m_Store.m_BlobEntries[Index].Metadata; Meta)
+ {
+ MovedEntries.push_back(
+ MetadataDiskEntry{.Entry = m_Store.m_MetadataEntries[Meta], .BlobHash = Key});
+ MovedEntries.back().Entry.Flags |= MetadataEntry::kTombStone;
+ m_Store.m_MetadataEntries[Meta] = {};
+ m_Store.m_BlobEntries[Index].Metadata = {};
+ }
+ }
+ }
+
+ m_Store.m_MetadatalogFile.Append(MovedEntries);
+
+ Stats.RemovedDisk += FreedDiskSpace;
+ if (Ctx.IsCancelledFlag.load())
+ {
+ return false;
+ }
+ return true;
+ },
+ ClaimDiskReserveCallback,
+ fmt::format("GCV2: buildstore [COMPACT] '{}': ", m_Store.m_Config.RootDirectory));
+ }
+ else
+ {
+ if (Ctx.Settings.Verbose)
+ {
+ ZEN_INFO("GCV2: buildstore [COMPACT] '{}': skipped compacting of {} eligible blocks",
+ m_Store.m_Config.RootDirectory,
+ BlocksToCompact.size());
+ }
+ }
+ }
+ }
+ }
+ }
+
+ virtual std::string GetGcName(GcCtx& Ctx) override
+ {
+ ZEN_UNUSED(Ctx);
+ ZEN_MEMSCOPE(GetBuildstoreTag());
+
+ return fmt::format("buildstore: '{}'", m_Store.m_Config.RootDirectory.string());
+ }
+
+private:
+ BuildStore& m_Store;
+ const std::vector<IoHash> m_RemovedBlobs;
+};
+
+GcStoreCompactor*
+BuildStore::RemoveExpiredData(GcCtx& Ctx, GcStats& Stats)
+{
+ ZEN_TRACE_CPU("Builds::RemoveExpiredData");
+ ZEN_MEMSCOPE(GetBuildstoreTag());
+
+ auto Log = [&Ctx]() { return Ctx.Logger; };
+
+ Stopwatch Timer;
+ const auto _ = MakeGuard([&] {
+ if (Ctx.Settings.Verbose)
+ {
+ ZEN_INFO("GCV2: buildstore [REMOVE EXPIRED] '{}': Count: {}, Expired: {}, Deleted: {}, FreedMemory: {} in {}",
+ m_Config.RootDirectory,
+ Stats.CheckedCount,
+ Stats.FoundCount,
+ Stats.DeletedCount,
+ NiceBytes(Stats.FreedMemory),
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ }
+ });
+
+ const GcClock::Tick ExpireTicks = Ctx.Settings.BuildStoreExpireTime.time_since_epoch().count();
+ std::vector<IoHash> ExpiredBlobs;
+ tsl::robin_set<IoHash, IoHash::Hasher> SizeDroppedBlobs;
+
+ {
+ struct SizeInfo
+ {
+ const IoHash Key;
+ uint32_t SecondsSinceEpoch = 0;
+ uint64_t BlobSize = 0;
+ };
+
+ bool DiskSizeExceeded = false;
+ const uint64_t CurrentDiskSize =
+ m_LargeBlobStore.StorageSize().DiskSize + m_SmallBlobStore.StorageSize().DiskSize + m_MetadataBlockStore.TotalSize();
+ if (CurrentDiskSize > m_Config.MaxDiskSpaceLimit)
+ {
+ DiskSizeExceeded = true;
+ }
+
+ uint64_t ExpiredDataSize = 0;
+
+ std::vector<SizeInfo> NonExpiredBlobSizeInfos;
+
+ {
+ RwLock::SharedLockScope __(m_Lock);
+ if (DiskSizeExceeded)
+ {
+ NonExpiredBlobSizeInfos.reserve(m_BlobLookup.size());
+ }
+ for (const auto& It : m_BlobLookup)
+ {
+ const BlobIndex ReadBlobIndex = It.second;
+ const BlobEntry& ReadBlobEntry = m_BlobEntries[ReadBlobIndex];
+ uint64_t Size = 0;
+ if (ReadBlobEntry.Payload)
+ {
+ const PayloadEntry& Payload = m_PayloadEntries[ReadBlobEntry.Payload];
+ Size += Payload.GetSize();
+ }
+ if (ReadBlobEntry.Metadata)
+ {
+ const MetadataEntry& Metadata = m_MetadataEntries[ReadBlobEntry.Metadata];
+ Size += Metadata.Location.Size;
+ }
+
+ const GcClock::Tick AccessTick = ReadBlobEntry.LastAccessTime;
+ if (AccessTick < ExpireTicks)
+ {
+ ExpiredBlobs.push_back(It.first);
+ ExpiredDataSize += ExpiredDataSize;
+ }
+ else if (DiskSizeExceeded)
+ {
+ NonExpiredBlobSizeInfos.emplace_back(SizeInfo{.Key = It.first,
+ .SecondsSinceEpoch = ReadBlobEntry.LastAccessTime.GetSecondsSinceEpoch(),
+ .BlobSize = Size});
+ }
+ }
+ Stats.CheckedCount += m_BlobLookup.size();
+ Stats.FoundCount += ExpiredBlobs.size();
+ }
+
+ if (DiskSizeExceeded)
+ {
+ const uint64_t NewSizeLimit =
+ m_Config.MaxDiskSpaceLimit -
+ (m_Config.MaxDiskSpaceLimit >> 4); // Remove a bit more than just below the limit so we have some space to grow
+ if ((CurrentDiskSize - ExpiredDataSize) > NewSizeLimit)
+ {
+ std::vector<size_t> NonExpiredOrder;
+ NonExpiredOrder.resize(NonExpiredBlobSizeInfos.size());
+ for (size_t Index = 0; Index < NonExpiredOrder.size(); Index++)
+ {
+ NonExpiredOrder[Index] = Index;
+ }
+ std::sort(NonExpiredOrder.begin(), NonExpiredOrder.end(), [&NonExpiredBlobSizeInfos](const size_t Lhs, const size_t Rhs) {
+ const SizeInfo& LhsInfo = NonExpiredBlobSizeInfos[Lhs];
+ const SizeInfo& RhsInfo = NonExpiredBlobSizeInfos[Rhs];
+ return LhsInfo.SecondsSinceEpoch < RhsInfo.SecondsSinceEpoch;
+ });
+
+ auto It = NonExpiredOrder.begin();
+ while (It != NonExpiredOrder.end())
+ {
+ const SizeInfo& Info = NonExpiredBlobSizeInfos[*It];
+ if ((CurrentDiskSize - ExpiredDataSize) < NewSizeLimit)
+ {
+ break;
+ }
+ ExpiredDataSize += Info.BlobSize;
+ ExpiredBlobs.push_back(Info.Key);
+ SizeDroppedBlobs.insert(Info.Key);
+ It++;
+ }
+ }
+ }
+ }
+
+ std::vector<IoHash> RemovedBlobs;
+ if (!ExpiredBlobs.empty())
+ {
+ if (Ctx.Settings.IsDeleteMode)
+ {
+ RemovedBlobs.reserve(ExpiredBlobs.size());
+
+ std::vector<PayloadDiskEntry> RemovedPayloads;
+ std::vector<MetadataDiskEntry> RemoveMetadatas;
+
+ RwLock::ExclusiveLockScope __(m_Lock);
+ if (Ctx.IsCancelledFlag.load())
+ {
+ return nullptr;
+ }
+
+ for (const IoHash& ExpiredBlob : ExpiredBlobs)
+ {
+ if (auto It = m_BlobLookup.find(ExpiredBlob); It != m_BlobLookup.end())
+ {
+ const BlobIndex ReadBlobIndex = It->second;
+ const BlobEntry& ReadBlobEntry = m_BlobEntries[ReadBlobIndex];
+
+ const GcClock::Tick AccessTick = ReadBlobEntry.LastAccessTime;
+
+ if (SizeDroppedBlobs.contains(ExpiredBlob) || (AccessTick < ExpireTicks))
+ {
+ if (ReadBlobEntry.Payload)
+ {
+ RemovedPayloads.push_back(
+ PayloadDiskEntry{.Entry = m_PayloadEntries[ReadBlobEntry.Payload], .BlobHash = ExpiredBlob});
+ RemovedPayloads.back().Entry.AddFlag(PayloadEntry::kTombStone);
+ m_PayloadEntries[ReadBlobEntry.Payload] = {};
+ m_BlobEntries[ReadBlobIndex].Payload = {};
+ }
+ if (ReadBlobEntry.Metadata)
+ {
+ RemoveMetadatas.push_back(
+ MetadataDiskEntry{.Entry = m_MetadataEntries[ReadBlobEntry.Metadata], .BlobHash = ExpiredBlob});
+ RemoveMetadatas.back().Entry.Flags |= MetadataEntry::kTombStone;
+ m_MetadataEntries[ReadBlobEntry.Metadata] = {};
+ m_BlobEntries[ReadBlobIndex].Metadata = {};
+ }
+
+ m_BlobLookup.erase(It);
+ m_LastAccessTimeUpdateCount++;
+
+ RemovedBlobs.push_back(ExpiredBlob);
+ Stats.DeletedCount++;
+ }
+ }
+ }
+ if (!RemovedPayloads.empty())
+ {
+ m_PayloadlogFile.Append(RemovedPayloads);
+ }
+ if (!RemoveMetadatas.empty())
+ {
+ m_MetadatalogFile.Append(RemoveMetadatas);
+ }
+ }
+ }
+
+ if (!RemovedBlobs.empty())
+ {
+ CompactState();
+ }
+
+ return new BuildStoreGcCompator(*this, std::move(RemovedBlobs));
+}
+
+std::vector<GcReferenceChecker*>
+BuildStore::CreateReferenceCheckers(GcCtx& Ctx)
+{
+ ZEN_UNUSED(Ctx);
+ ZEN_MEMSCOPE(GetBuildstoreTag());
+ return {new BuildStoreGcReferenceChecker(*this)};
+}
+
+std::vector<GcReferenceValidator*>
+BuildStore::CreateReferenceValidators(GcCtx& Ctx)
+{
+ ZEN_UNUSED(Ctx);
+ return {};
+}
+
+std::vector<RwLock::SharedLockScope>
+BuildStore::LockState(GcCtx& Ctx)
+{
+ ZEN_UNUSED(Ctx);
+ std::vector<RwLock::SharedLockScope> Locks;
+ Locks.emplace_back(RwLock::SharedLockScope(m_Lock));
+ return Locks;
+}
+
+void
+BuildStore::ScrubStorage(ScrubContext& ScrubCtx)
+{
+ ZEN_UNUSED(ScrubCtx);
+ // TODO
+}
+
+GcStorageSize
+BuildStore::StorageSize() const
+{
+ GcStorageSize Result;
+ Result.DiskSize = m_MetadataBlockStore.TotalSize();
+ return Result;
+}
+
+/*
+ ___________ __
+ \__ ___/___ _______/ |_ ______
+ | |_/ __ \ / ___/\ __\/ ___/
+ | |\ ___/ \___ \ | | \___ \
+ |____| \___ >____ > |__| /____ >
+ \/ \/ \/
+*/
+
+#if ZEN_WITH_TESTS
+
+TEST_CASE("BuildStore.Blobs")
+{
+ ScopedTemporaryDirectory _;
+
+ BuildStoreConfig Config;
+ Config.RootDirectory = _.Path() / "build_store";
+
+ std::vector<IoHash> CompressedBlobsHashes;
+ {
+ GcManager Gc;
+ BuildStore Store(Config, Gc);
+
+ for (size_t I = 0; I < 5; I++)
+ {
+ IoBuffer Blob = CreateSemiRandomBlob(4711 + I * 7);
+ CompressedBuffer CompressedBlob = CompressedBuffer::Compress(SharedBuffer(std::move(Blob)));
+ CompressedBlobsHashes.push_back(CompressedBlob.DecodeRawHash());
+ IoBuffer Payload = std::move(CompressedBlob).GetCompressed().Flatten().AsIoBuffer();
+ Payload.SetContentType(ZenContentType::kCompressedBinary);
+
+ Store.PutBlob(CompressedBlobsHashes.back(), Payload);
+ }
+
+ for (const IoHash& RawHash : CompressedBlobsHashes)
+ {
+ IoBuffer Payload = Store.GetBlob(RawHash);
+ CHECK(Payload);
+ CHECK(Payload.GetContentType() == ZenContentType::kCompressedBinary);
+ IoHash VerifyRawHash;
+ uint64_t VerifyRawSize;
+ CompressedBuffer CompressedBlob =
+ CompressedBuffer::FromCompressed(SharedBuffer(std::move(Payload)), VerifyRawHash, VerifyRawSize);
+ CHECK(CompressedBlob);
+ CHECK(VerifyRawHash == RawHash);
+ IoBuffer Decompressed = CompressedBlob.Decompress().AsIoBuffer();
+ CHECK(IoHash::HashBuffer(Decompressed) == RawHash);
+ }
+ }
+ {
+ GcManager Gc;
+ BuildStore Store(Config, Gc);
+ for (const IoHash& RawHash : CompressedBlobsHashes)
+ {
+ IoBuffer Payload = Store.GetBlob(RawHash);
+ CHECK(Payload);
+ CHECK(Payload.GetContentType() == ZenContentType::kCompressedBinary);
+ IoHash VerifyRawHash;
+ uint64_t VerifyRawSize;
+ CompressedBuffer CompressedBlob =
+ CompressedBuffer::FromCompressed(SharedBuffer(std::move(Payload)), VerifyRawHash, VerifyRawSize);
+ CHECK(CompressedBlob);
+ CHECK(VerifyRawHash == RawHash);
+ IoBuffer Decompressed = CompressedBlob.Decompress().AsIoBuffer();
+ CHECK(IoHash::HashBuffer(Decompressed) == RawHash);
+ }
+
+ for (size_t I = 0; I < 5; I++)
+ {
+ IoBuffer Blob = CreateSemiRandomBlob(5713 + I * 7);
+ CompressedBuffer CompressedBlob = CompressedBuffer::Compress(SharedBuffer(std::move(Blob)));
+ CompressedBlobsHashes.push_back(CompressedBlob.DecodeRawHash());
+ IoBuffer Payload = std::move(CompressedBlob).GetCompressed().Flatten().AsIoBuffer();
+ Payload.SetContentType(ZenContentType::kCompressedBinary);
+
+ Store.PutBlob(CompressedBlobsHashes.back(), Payload);
+ }
+ }
+ {
+ GcManager Gc;
+ BuildStore Store(Config, Gc);
+ for (const IoHash& RawHash : CompressedBlobsHashes)
+ {
+ IoBuffer Payload = Store.GetBlob(RawHash);
+ CHECK(Payload);
+ CHECK(Payload.GetContentType() == ZenContentType::kCompressedBinary);
+ IoHash VerifyRawHash;
+ uint64_t VerifyRawSize;
+ CompressedBuffer CompressedBlob =
+ CompressedBuffer::FromCompressed(SharedBuffer(std::move(Payload)), VerifyRawHash, VerifyRawSize);
+ CHECK(CompressedBlob);
+ CHECK(VerifyRawHash == RawHash);
+ IoBuffer Decompressed = CompressedBlob.Decompress().AsIoBuffer();
+ CHECK(IoHash::HashBuffer(Decompressed) == RawHash);
+ }
+ }
+}
+
+namespace blockstore::testing {
+ IoBuffer MakeMetaData(const IoHash& BlobHash, const std::vector<std::pair<std::string, std::string>>& KeyValues)
+ {
+ CbObjectWriter Writer;
+ Writer.AddHash("rawHash"sv, BlobHash);
+ Writer.BeginObject("values");
+ {
+ for (const auto& V : KeyValues)
+ {
+ Writer.AddString(V.first, V.second);
+ }
+ }
+ Writer.EndObject(); // values
+ return Writer.Save().GetBuffer().AsIoBuffer();
+ };
+
+} // namespace blockstore::testing
+
+TEST_CASE("BuildStore.Metadata")
+{
+ using namespace blockstore::testing;
+
+ ScopedTemporaryDirectory _;
+
+ WorkerThreadPool& WorkerPool = GetSmallWorkerPool(EWorkloadType::Burst);
+
+ BuildStoreConfig Config;
+ Config.RootDirectory = _.Path() / "build_store";
+
+ std::vector<IoHash> BlobHashes;
+ std::vector<IoBuffer> MetaPayloads;
+ {
+ GcManager Gc;
+ BuildStore Store(Config, Gc);
+
+ for (size_t I = 0; I < 5; I++)
+ {
+ BlobHashes.push_back(IoHash::HashBuffer(&I, sizeof(I)));
+ MetaPayloads.push_back(MakeMetaData(BlobHashes.back(), {{"index", fmt::format("{}", I)}}));
+ MetaPayloads.back().SetContentType(ZenContentType::kCbObject);
+ }
+ Store.PutMetadatas(BlobHashes, MetaPayloads);
+
+ std::vector<IoBuffer> ValidateMetaPayloads = Store.GetMetadatas(BlobHashes, &WorkerPool);
+ CHECK(ValidateMetaPayloads.size() == MetaPayloads.size());
+ for (size_t I = 0; I < ValidateMetaPayloads.size(); I++)
+ {
+ const IoHash ExpectedHash = IoHash::HashBuffer(MetaPayloads[I]);
+ CHECK_EQ(IoHash::HashBuffer(ValidateMetaPayloads[I]), ExpectedHash);
+ }
+ }
+ {
+ GcManager Gc;
+ BuildStore Store(Config, Gc);
+ std::vector<IoBuffer> ValidateMetaPayloads = Store.GetMetadatas(BlobHashes, &WorkerPool);
+ CHECK(ValidateMetaPayloads.size() == MetaPayloads.size());
+ for (size_t I = 0; I < ValidateMetaPayloads.size(); I++)
+ {
+ const IoHash ExpectedHash = IoHash::HashBuffer(MetaPayloads[I]);
+ CHECK_EQ(IoHash::HashBuffer(ValidateMetaPayloads[I]), ExpectedHash);
+ }
+ for (const IoHash& BlobHash : BlobHashes)
+ {
+ CHECK(!Store.GetBlob(BlobHash));
+ }
+ }
+ std::vector<IoHash> CompressedBlobsHashes;
+ {
+ GcManager Gc;
+ BuildStore Store(Config, Gc);
+ for (size_t I = 0; I < 5; I++)
+ {
+ IoBuffer Blob = CreateSemiRandomBlob(4711 + I * 7);
+ CompressedBuffer CompressedBlob = CompressedBuffer::Compress(SharedBuffer(std::move(Blob)));
+ CompressedBlobsHashes.push_back(CompressedBlob.DecodeRawHash());
+ IoBuffer Payload = std::move(CompressedBlob).GetCompressed().Flatten().AsIoBuffer();
+ Payload.SetContentType(ZenContentType::kCompressedBinary);
+
+ Store.PutBlob(CompressedBlobsHashes.back(), Payload);
+ }
+ std::vector<IoBuffer> MetadataPayloads = Store.GetMetadatas(CompressedBlobsHashes, &WorkerPool);
+ for (const auto& MetadataIt : MetadataPayloads)
+ {
+ CHECK(!MetadataIt);
+ }
+ for (const IoHash& BlobHash : CompressedBlobsHashes)
+ {
+ IoBuffer Blob = Store.GetBlob(BlobHash);
+ CHECK(Blob);
+ IoBuffer DecompressedBlob = CompressedBuffer::FromCompressedNoValidate(std::move(Blob)).Decompress().AsIoBuffer();
+ CHECK(DecompressedBlob);
+ CHECK_EQ(IoHash::HashBuffer(DecompressedBlob), BlobHash);
+ }
+ }
+
+ std::vector<IoBuffer> BlobMetaPayloads;
+ {
+ GcManager Gc;
+ BuildStore Store(Config, Gc);
+ for (const IoHash& BlobHash : CompressedBlobsHashes)
+ {
+ BlobMetaPayloads.push_back(MakeMetaData(BlobHash, {{"blobHash", fmt::format("{}", BlobHash)}}));
+ BlobMetaPayloads.back().SetContentType(ZenContentType::kCbObject);
+ }
+ Store.PutMetadatas(CompressedBlobsHashes, BlobMetaPayloads);
+
+ std::vector<IoBuffer> MetadataPayloads = Store.GetMetadatas(CompressedBlobsHashes, &WorkerPool);
+ CHECK(MetadataPayloads.size() == BlobMetaPayloads.size());
+ for (size_t I = 0; I < MetadataPayloads.size(); I++)
+ {
+ const IoBuffer& MetadataPayload = MetadataPayloads[I];
+ CHECK_EQ(IoHash::HashBuffer(MetadataPayload), IoHash::HashBuffer(BlobMetaPayloads[I]));
+ }
+ }
+
+ {
+ GcManager Gc;
+ BuildStore Store(Config, Gc);
+
+ std::vector<IoBuffer> MetadataPayloads = Store.GetMetadatas(CompressedBlobsHashes, &WorkerPool);
+ CHECK(MetadataPayloads.size() == BlobMetaPayloads.size());
+ for (size_t I = 0; I < MetadataPayloads.size(); I++)
+ {
+ const IoBuffer& MetadataPayload = MetadataPayloads[I];
+ CHECK(IoHash::HashBuffer(MetadataPayload) == IoHash::HashBuffer(BlobMetaPayloads[I]));
+ }
+ for (const IoHash& BlobHash : CompressedBlobsHashes)
+ {
+ IoBuffer Blob = Store.GetBlob(BlobHash);
+ CHECK(Blob);
+ IoBuffer DecompressedBlob = CompressedBuffer::FromCompressedNoValidate(std::move(Blob)).Decompress().AsIoBuffer();
+ CHECK(DecompressedBlob);
+ CHECK_EQ(IoHash::HashBuffer(DecompressedBlob), BlobHash);
+ }
+
+ BlobMetaPayloads.clear();
+ for (const IoHash& BlobHash : CompressedBlobsHashes)
+ {
+ BlobMetaPayloads.push_back(
+ MakeMetaData(BlobHash, {{"blobHash", fmt::format("{}", BlobHash)}, {"replaced", fmt::format("{}", true)}}));
+ BlobMetaPayloads.back().SetContentType(ZenContentType::kCbObject);
+ }
+ Store.PutMetadatas(CompressedBlobsHashes, BlobMetaPayloads);
+ }
+ {
+ GcManager Gc;
+ BuildStore Store(Config, Gc);
+
+ std::vector<IoBuffer> MetadataPayloads = Store.GetMetadatas(CompressedBlobsHashes, &WorkerPool);
+ CHECK(MetadataPayloads.size() == BlobMetaPayloads.size());
+ for (size_t I = 0; I < MetadataPayloads.size(); I++)
+ {
+ const IoBuffer& MetadataPayload = MetadataPayloads[I];
+ CHECK(IoHash::HashBuffer(MetadataPayload) == IoHash::HashBuffer(BlobMetaPayloads[I]));
+ }
+ for (const IoHash& BlobHash : CompressedBlobsHashes)
+ {
+ IoBuffer Blob = Store.GetBlob(BlobHash);
+ CHECK(Blob);
+ IoBuffer DecompressedBlob = CompressedBuffer::FromCompressedNoValidate(std::move(Blob)).Decompress().AsIoBuffer();
+ CHECK(DecompressedBlob);
+ CHECK_EQ(IoHash::HashBuffer(DecompressedBlob), BlobHash);
+ }
+ }
+}
+
+TEST_CASE("BuildStore.GC")
+{
+ using namespace blockstore::testing;
+
+ ScopedTemporaryDirectory _;
+
+ BuildStoreConfig Config;
+ Config.RootDirectory = _.Path() / "build_store";
+
+ std::vector<IoHash> CompressedBlobsHashes;
+ std::vector<IoBuffer> BlobMetaPayloads;
+ {
+ GcManager Gc;
+ BuildStore Store(Config, Gc);
+ for (size_t I = 0; I < 5; I++)
+ {
+ IoBuffer Blob = CreateSemiRandomBlob(4711 + I * 7);
+ CompressedBuffer CompressedBlob = CompressedBuffer::Compress(SharedBuffer(std::move(Blob)));
+ CompressedBlobsHashes.push_back(CompressedBlob.DecodeRawHash());
+ IoBuffer Payload = std::move(CompressedBlob).GetCompressed().Flatten().AsIoBuffer();
+ Payload.SetContentType(ZenContentType::kCompressedBinary);
+
+ Store.PutBlob(CompressedBlobsHashes.back(), Payload);
+ }
+ for (const IoHash& BlobHash : CompressedBlobsHashes)
+ {
+ BlobMetaPayloads.push_back(MakeMetaData(BlobHash, {{"blobHash", fmt::format("{}", BlobHash)}}));
+ BlobMetaPayloads.back().SetContentType(ZenContentType::kCbObject);
+ }
+ Store.PutMetadatas(CompressedBlobsHashes, BlobMetaPayloads);
+ }
+ {
+ GcManager Gc;
+ BuildStore Store(Config, Gc);
+
+ {
+ GcResult Result = Gc.CollectGarbage(GcSettings{.BuildStoreExpireTime = GcClock::Now() - std::chrono::hours(1),
+ .CollectSmallObjects = false,
+ .IsDeleteMode = false,
+ .Verbose = true});
+ CHECK(!Result.WasCancelled);
+ for (const IoHash& BlobHash : CompressedBlobsHashes)
+ {
+ IoBuffer Blob = Store.GetBlob(BlobHash);
+ CHECK(Blob);
+ IoBuffer DecompressedBlob = CompressedBuffer::FromCompressedNoValidate(std::move(Blob)).Decompress().AsIoBuffer();
+ CHECK(DecompressedBlob);
+ CHECK(IoHash::HashBuffer(DecompressedBlob) == BlobHash);
+ }
+
+ std::vector<IoBuffer> MetadataPayloads = Store.GetMetadatas(CompressedBlobsHashes, nullptr);
+ CHECK(MetadataPayloads.size() == BlobMetaPayloads.size());
+ for (size_t I = 0; I < MetadataPayloads.size(); I++)
+ {
+ const IoBuffer& MetadataPayload = MetadataPayloads[I];
+ CHECK(IoHash::HashBuffer(MetadataPayload) == IoHash::HashBuffer(BlobMetaPayloads[I]));
+ }
+ }
+ {
+ GcResult Result = Gc.CollectGarbage(GcSettings{.BuildStoreExpireTime = GcClock::Now() + std::chrono::hours(1),
+ .CollectSmallObjects = true,
+ .IsDeleteMode = true,
+ .Verbose = true});
+ CHECK(!Result.WasCancelled);
+ for (const IoHash& BlobHash : CompressedBlobsHashes)
+ {
+ IoBuffer Blob = Store.GetBlob(BlobHash);
+ CHECK(!Blob);
+ }
+
+ std::vector<IoBuffer> MetadataPayloads = Store.GetMetadatas(CompressedBlobsHashes, nullptr);
+ CHECK(MetadataPayloads.size() == BlobMetaPayloads.size());
+ for (size_t I = 0; I < MetadataPayloads.size(); I++)
+ {
+ const IoBuffer& MetadataPayload = MetadataPayloads[I];
+ CHECK(!MetadataPayload);
+ }
+ }
+ }
+}
+
+TEST_CASE("BuildStore.SizeLimit")
+{
+ using namespace blockstore::testing;
+
+ ScopedTemporaryDirectory _;
+
+ BuildStoreConfig Config = {.MaxDiskSpaceLimit = 1024u * 1024u};
+ Config.RootDirectory = _.Path() / "build_store";
+
+ std::vector<IoHash> CompressedBlobsHashes;
+ std::vector<IoBuffer> BlobMetaPayloads;
+ {
+ GcManager Gc;
+ BuildStore Store(Config, Gc);
+ for (size_t I = 0; I < 64; I++)
+ {
+ IoBuffer Blob = CreateSemiRandomBlob(65537 + I * 7);
+ CompressedBuffer CompressedBlob =
+ CompressedBuffer::Compress(SharedBuffer(std::move(Blob)), OodleCompressor::Mermaid, OodleCompressionLevel::None);
+ CompressedBlobsHashes.push_back(CompressedBlob.DecodeRawHash());
+ IoBuffer Payload = std::move(CompressedBlob).GetCompressed().Flatten().AsIoBuffer();
+ Payload.SetContentType(ZenContentType::kCompressedBinary);
+ Store.PutBlob(CompressedBlobsHashes.back(), Payload);
+ }
+ for (const IoHash& BlobHash : CompressedBlobsHashes)
+ {
+ BlobMetaPayloads.push_back(MakeMetaData(BlobHash, {{"blobHash", fmt::format("{}", BlobHash)}}));
+ BlobMetaPayloads.back().SetContentType(ZenContentType::kCbObject);
+ }
+ Store.PutMetadatas(CompressedBlobsHashes, BlobMetaPayloads);
+
+ {
+ for (size_t I = 0; I < 64; I++)
+ {
+ const IoHash& Key = CompressedBlobsHashes[I];
+ GcClock::Tick AccessTick = (GcClock::Now() + std::chrono::minutes(I)).time_since_epoch().count();
+
+ Store.SetLastAccessTime(Key, AccessTime(AccessTick));
+ }
+ }
+ }
+ {
+ GcManager Gc;
+ BuildStore Store(Config, Gc);
+
+ {
+ GcResult Result = Gc.CollectGarbage(GcSettings{.BuildStoreExpireTime = GcClock::Now() - std::chrono::hours(1),
+ .CollectSmallObjects = true,
+ .IsDeleteMode = true,
+ .Verbose = true});
+
+ uint32_t DeletedBlobs = 0;
+
+ CHECK(!Result.WasCancelled);
+ for (const IoHash& BlobHash : CompressedBlobsHashes)
+ {
+ IoBuffer Blob = Store.GetBlob(BlobHash);
+ if (!Blob)
+ {
+ DeletedBlobs++;
+ }
+ else
+ {
+ IoBuffer DecompressedBlob = CompressedBuffer::FromCompressedNoValidate(std::move(Blob)).Decompress().AsIoBuffer();
+ CHECK(DecompressedBlob);
+ CHECK(IoHash::HashBuffer(DecompressedBlob) == BlobHash);
+ }
+ }
+ CHECK(DeletedBlobs == 50);
+
+ std::vector<IoBuffer> MetadataPayloads = Store.GetMetadatas(CompressedBlobsHashes, nullptr);
+ CHECK(MetadataPayloads.size() == BlobMetaPayloads.size());
+ for (size_t I = 0; I < MetadataPayloads.size(); I++)
+ {
+ const IoBuffer& MetadataPayload = MetadataPayloads[I];
+ if (I < DeletedBlobs)
+ {
+ CHECK(!MetadataPayload);
+ }
+ else
+ {
+ CHECK(IoHash::HashBuffer(MetadataPayload) == IoHash::HashBuffer(BlobMetaPayloads[I]));
+ }
+ }
+ }
+ }
+}
+
+void
+buildstore_forcelink()
+{
+}
+
+#endif
+
+} // namespace zen
diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp
index 72a767645..1ebb8f144 100644
--- a/src/zenstore/cache/cachedisklayer.cpp
+++ b/src/zenstore/cache/cachedisklayer.cpp
@@ -14,6 +14,7 @@
#include <zencore/trace.h>
#include <zencore/workthreadpool.h>
#include <zencore/xxhash.h>
+#include <zenutil/parallelwork.h>
#include <zenutil/referencemetadata.h>
#include <zenutil/workerpools.h>
@@ -195,34 +196,33 @@ namespace cache::impl {
return true;
}
- bool MoveAndDeleteDirectory(const std::filesystem::path& Dir)
+ std::filesystem::path MoveDroppedDirectory(const std::filesystem::path& Dir)
{
int DropIndex = 0;
do
{
- if (!std::filesystem::exists(Dir))
+ if (!IsDir(Dir))
{
- return false;
+ return {};
}
std::string DroppedName = fmt::format("[dropped]{}({})", Dir.filename().string(), DropIndex);
std::filesystem::path DroppedBucketPath = Dir.parent_path() / DroppedName;
- if (std::filesystem::exists(DroppedBucketPath))
+ if (IsDir(DroppedBucketPath))
{
DropIndex++;
continue;
}
std::error_code Ec;
- std::filesystem::rename(Dir, DroppedBucketPath, Ec);
+ RenameDirectory(Dir, DroppedBucketPath, Ec);
if (!Ec)
{
- DeleteDirectories(DroppedBucketPath);
- return true;
+ return DroppedBucketPath;
}
- // TODO: Do we need to bail at some point?
zen::Sleep(100);
- } while (true);
+ } while (DropIndex < 10);
+ return {};
}
} // namespace cache::impl
@@ -373,10 +373,10 @@ private:
#pragma pack(4)
struct ManifestData
{
- uint32_t RawSize; // 4
- AccessTime Timestamp; // 4
- IoHash RawHash; // 20
- IoHash Key; // 20
+ uint32_t RawSize; // 4
+ uint32_t SecondsSinceEpoch; // 4
+ IoHash RawHash; // 20
+ IoHash Key; // 20
};
#pragma pack(pop)
@@ -658,7 +658,7 @@ BucketManifestSerializer::ReadSidecarFile(RwLock::ExclusiveLockScope& B
ZenCacheDiskLayer::CacheBucket::BucketPayload& PayloadEntry = Payloads[PlIndex];
- AccessTimes[PlIndex] = Entry->Timestamp;
+ AccessTimes[PlIndex].SetSecondsSinceEpoch(Entry->SecondsSinceEpoch);
if (Entry->RawSize && Entry->RawHash != IoHash::Zero)
{
@@ -685,6 +685,16 @@ BucketManifestSerializer::WriteSidecarFile(RwLock::SharedLockScope&,
{
ZEN_TRACE_CPU("Z$::WriteSidecarFile");
+ ZEN_DEBUG("writing store sidecar for '{}'", SidecarPath);
+ const uint64_t EntryCount = Index.size();
+ Stopwatch Timer;
+ const auto _ = MakeGuard([&] {
+ ZEN_INFO("wrote store sidecar for '{}' containing {} entries in {}",
+ SidecarPath,
+ EntryCount,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
+
BucketMetaHeader Header;
Header.EntryCount = m_ManifestEntryCount;
Header.LogPosition = SnapshotLogPosition;
@@ -702,43 +712,44 @@ BucketManifestSerializer::WriteSidecarFile(RwLock::SharedLockScope&,
SidecarFile.Write(&Header, sizeof Header, 0);
- // TODO: make this batching for better performance
{
uint64_t WriteOffset = sizeof Header;
- // BasicFileWriter SidecarWriter(SidecarFile, 128 * 1024);
+ const size_t MaxManifestDataBufferCount = (512u * 1024u) / sizeof(ManifestData);
- std::vector<ManifestData> ManifestDataBuffer;
- const size_t MaxManifestDataBufferCount = Min(Index.size(), 8192u); // 512 Kb
- ManifestDataBuffer.reserve(MaxManifestDataBufferCount);
+ std::vector<ManifestData> ManifestDataBuffer(Min(m_ManifestEntryCount, MaxManifestDataBufferCount));
+ auto WriteIt = ManifestDataBuffer.begin();
for (auto& Kv : Index)
{
- const IoHash& Key = Kv.first;
- const PayloadIndex PlIndex = Kv.second;
+ ManifestData& Data = *WriteIt++;
- IoHash RawHash = IoHash::Zero;
- uint32_t RawSize = 0;
+ const PayloadIndex PlIndex = Kv.second;
+ Data.Key = Kv.first;
+ Data.SecondsSinceEpoch = AccessTimes[PlIndex].GetSecondsSinceEpoch();
if (const MetaDataIndex MetaIndex = Payloads[PlIndex].MetaData)
{
- RawHash = MetaDatas[MetaIndex].RawHash;
- RawSize = MetaDatas[MetaIndex].RawSize;
+ Data.RawHash = MetaDatas[MetaIndex].RawHash;
+ Data.RawSize = MetaDatas[MetaIndex].RawSize;
+ }
+ else
+ {
+ Data.RawHash = IoHash::Zero;
+ Data.RawSize = 0;
}
- ManifestDataBuffer.emplace_back(
- ManifestData{.RawSize = RawSize, .Timestamp = AccessTimes[PlIndex], .RawHash = RawHash, .Key = Key});
- if (ManifestDataBuffer.size() == MaxManifestDataBufferCount)
+ if (WriteIt == ManifestDataBuffer.end())
{
- const uint64_t WriteSize = sizeof(ManifestData) * ManifestDataBuffer.size();
+ uint64_t WriteSize = std::distance(ManifestDataBuffer.begin(), WriteIt) * sizeof(ManifestData);
SidecarFile.Write(ManifestDataBuffer.data(), WriteSize, WriteOffset);
WriteOffset += WriteSize;
- ManifestDataBuffer.clear();
- ManifestDataBuffer.reserve(MaxManifestDataBufferCount);
+ WriteIt = ManifestDataBuffer.begin();
}
}
- if (ManifestDataBuffer.size() > 0)
+ if (WriteIt != ManifestDataBuffer.begin())
{
- SidecarFile.Write(ManifestDataBuffer.data(), sizeof(ManifestData) * ManifestDataBuffer.size(), WriteOffset);
+ uint64_t WriteSize = std::distance(ManifestDataBuffer.begin(), WriteIt) * sizeof(ManifestData);
+ SidecarFile.Write(ManifestDataBuffer.data(), WriteSize, WriteOffset);
}
}
@@ -763,11 +774,11 @@ namespace zen {
ZenCacheDiskLayer::CacheBucket::CacheBucket(GcManager& Gc,
std::atomic_uint64_t& OuterCacheMemoryUsage,
- std::string BucketName,
+ std::string_view BucketName,
const BucketConfiguration& Config)
: m_Gc(Gc)
, m_OuterCacheMemoryUsage(OuterCacheMemoryUsage)
-, m_BucketName(std::move(BucketName))
+, m_BucketName(BucketName)
, m_Configuration(Config)
, m_BucketId(Oid::Zero)
{
@@ -795,6 +806,16 @@ ZenCacheDiskLayer::CacheBucket::CacheBucket(GcManager& Gc,
ZenCacheDiskLayer::CacheBucket::~CacheBucket()
{
+ try
+ {
+ m_SlogFile.Flush();
+ m_SlogFile.Close();
+ m_BlockStore.Close();
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("~CacheBucket() failed with: ", Ex.what());
+ }
m_Gc.RemoveGcReferencer(*this);
}
@@ -868,12 +889,13 @@ ZenCacheDiskLayer::CacheBucket::OpenOrCreate(std::filesystem::path BucketDir, bo
}
void
-ZenCacheDiskLayer::CacheBucket::WriteIndexSnapshotLocked(bool FlushLockPosition, const std::function<uint64_t()>& ClaimDiskReserveFunc)
+ZenCacheDiskLayer::CacheBucket::WriteIndexSnapshotLocked(uint64_t LogPosition,
+ bool ResetLog,
+ const std::function<uint64_t()>& ClaimDiskReserveFunc)
{
ZEN_TRACE_CPU("Z$::Bucket::WriteIndexSnapshot");
- const uint64_t LogCount = FlushLockPosition ? 0 : m_SlogFile.GetLogCount();
- if (m_LogFlushPosition == LogCount)
+ if (m_LogFlushPosition == LogPosition)
{
return;
}
@@ -890,7 +912,7 @@ ZenCacheDiskLayer::CacheBucket::WriteIndexSnapshotLocked(bool FlushLockPosition,
namespace fs = std::filesystem;
- fs::path IndexPath = cache::impl::GetIndexPath(m_BucketDir, m_BucketName);
+ const fs::path IndexPath = cache::impl::GetIndexPath(m_BucketDir, m_BucketName);
try
{
@@ -922,66 +944,70 @@ ZenCacheDiskLayer::CacheBucket::WriteIndexSnapshotLocked(bool FlushLockPosition,
throw std::system_error(Ec, fmt::format("failed to create new snapshot file in '{}'", m_BucketDir));
}
- {
- // This is in a separate scope just to ensure IndexWriter goes out
- // of scope before the file is flushed/closed, in order to ensure
- // all data is written to the file
- BasicFileWriter IndexWriter(ObjectIndexFile, 128 * 1024);
+ const uint64_t IndexLogPosition = ResetLog ? 0 : LogPosition;
- cache::impl::CacheBucketIndexHeader Header = {.EntryCount = EntryCount,
- .LogPosition = LogCount,
- .PayloadAlignment = gsl::narrow<uint32_t>(m_Configuration.PayloadAlignment)};
+ cache::impl::CacheBucketIndexHeader Header = {.EntryCount = EntryCount,
+ .LogPosition = IndexLogPosition,
+ .PayloadAlignment = gsl::narrow<uint32_t>(m_Configuration.PayloadAlignment)};
- Header.Checksum = cache::impl::CacheBucketIndexHeader::ComputeChecksum(Header);
- IndexWriter.Write(&Header, sizeof(cache::impl::CacheBucketIndexHeader), 0);
+ Header.Checksum = cache::impl::CacheBucketIndexHeader::ComputeChecksum(Header);
+ ObjectIndexFile.Write(&Header, sizeof(cache::impl::CacheBucketIndexHeader), 0);
+ if (EntryCount > 0)
+ {
uint64_t IndexWriteOffset = sizeof(cache::impl::CacheBucketIndexHeader);
+ size_t MaxWriteEntryCount = (512u * 1024u) / sizeof(DiskIndexEntry);
+ std::vector<DiskIndexEntry> DiskEntryBuffer(Min(m_Index.size(), MaxWriteEntryCount));
+
+ auto WriteIt = DiskEntryBuffer.begin();
for (auto& Entry : m_Index)
{
- DiskIndexEntry IndexEntry;
- IndexEntry.Key = Entry.first;
- IndexEntry.Location = m_Payloads[Entry.second].Location;
- IndexWriter.Write(&IndexEntry, sizeof(DiskIndexEntry), IndexWriteOffset);
-
- IndexWriteOffset += sizeof(DiskIndexEntry);
+ *WriteIt++ = {.Key = Entry.first, .Location = m_Payloads[Entry.second].Location};
+ if (WriteIt == DiskEntryBuffer.end())
+ {
+ uint64_t WriteSize = std::distance(DiskEntryBuffer.begin(), WriteIt) * sizeof(DiskIndexEntry);
+ ObjectIndexFile.Write(DiskEntryBuffer.data(), WriteSize, IndexWriteOffset);
+ IndexWriteOffset += WriteSize;
+ WriteIt = DiskEntryBuffer.begin();
+ }
}
- IndexWriter.Flush();
+ if (WriteIt != DiskEntryBuffer.begin())
+ {
+ uint64_t WriteSize = std::distance(DiskEntryBuffer.begin(), WriteIt) * sizeof(DiskIndexEntry);
+ ObjectIndexFile.Write(DiskEntryBuffer.data(), WriteSize, IndexWriteOffset);
+ }
}
ObjectIndexFile.Flush();
ObjectIndexFile.MoveTemporaryIntoPlace(IndexPath, Ec);
if (Ec)
{
- std::filesystem::path TempFilePath = ObjectIndexFile.GetPath();
- ZEN_WARN("snapshot failed to rename new snapshot '{}' to '{}', reason: '{}'", TempFilePath, IndexPath, Ec.message());
+ throw std::system_error(Ec,
+ fmt::format("Snapshot failed to rename new snapshot '{}' to '{}', reason: '{}'",
+ ObjectIndexFile.GetPath(),
+ IndexPath,
+ Ec.message()));
}
- else
+
+ if (ResetLog)
{
- // We must only update the log flush position once the snapshot write succeeds
- if (FlushLockPosition)
- {
- std::filesystem::path LogPath = cache::impl::GetLogPath(m_BucketDir, m_BucketName);
+ const std::filesystem::path LogPath = cache::impl::GetLogPath(m_BucketDir, m_BucketName);
- if (std::filesystem::is_regular_file(LogPath))
+ if (IsFile(LogPath))
+ {
+ m_SlogFile.Close();
+ if (!RemoveFile(LogPath, Ec) || Ec)
{
- if (!std::filesystem::remove(LogPath, Ec) || Ec)
- {
- ZEN_WARN("snapshot failed to clean log file '{}', removing index at '{}', reason: '{}'",
- LogPath,
- IndexPath,
- Ec.message());
- std::error_code RemoveIndexEc;
- std::filesystem::remove(IndexPath, RemoveIndexEc);
- }
+ // This is non-critical, it only means that we will replay the events of the log over the snapshot - inefficent but in
+ // the end it will be the same result
+ ZEN_WARN("snapshot failed to clean log file '{}', reason: '{}'", LogPath, IndexPath, Ec.message());
}
- }
- if (!Ec)
- {
- m_LogFlushPosition = LogCount;
+ m_SlogFile.Open(LogPath, CasLogFile::Mode::kWrite);
}
}
+ m_LogFlushPosition = IndexLogPosition;
}
catch (const std::exception& Err)
{
@@ -994,7 +1020,7 @@ ZenCacheDiskLayer::CacheBucket::ReadIndexFile(RwLock::ExclusiveLockScope&, const
{
ZEN_TRACE_CPU("Z$::Bucket::ReadIndexFile");
- if (!std::filesystem::is_regular_file(IndexPath))
+ if (!IsFile(IndexPath))
{
return 0;
}
@@ -1078,7 +1104,7 @@ ZenCacheDiskLayer::CacheBucket::ReadLog(RwLock::ExclusiveLockScope&, const std::
{
ZEN_TRACE_CPU("Z$::Bucket::ReadLog");
- if (!std::filesystem::is_regular_file(LogPath))
+ if (!IsFile(LogPath))
{
return 0;
}
@@ -1158,47 +1184,40 @@ ZenCacheDiskLayer::CacheBucket::InitializeIndexFromDisk(RwLock::ExclusiveLockSco
if (IsNew)
{
- fs::remove(LogPath);
- fs::remove(IndexPath);
- fs::remove_all(m_BlocksBasePath);
+ RemoveFile(LogPath);
+ RemoveFile(IndexPath);
+ DeleteDirectories(m_BlocksBasePath);
}
CreateDirectories(m_BucketDir);
m_BlockStore.Initialize(m_BlocksBasePath, m_Configuration.MaxBlockSize, BlockStoreDiskLocation::MaxBlockIndex + 1);
- if (std::filesystem::is_regular_file(IndexPath))
+ if (IsFile(IndexPath))
{
uint32_t IndexVersion = 0;
m_LogFlushPosition = ReadIndexFile(IndexLock, IndexPath, IndexVersion);
if (IndexVersion == 0)
{
ZEN_WARN("removing invalid index file at '{}'", IndexPath);
- std::filesystem::remove(IndexPath);
+ RemoveFile(IndexPath);
}
}
uint64_t LogEntryCount = 0;
- if (std::filesystem::is_regular_file(LogPath))
+ if (IsFile(LogPath))
{
if (TCasLogFile<DiskIndexEntry>::IsValid(LogPath))
{
LogEntryCount = ReadLog(IndexLock, LogPath, m_LogFlushPosition);
}
- else if (fs::is_regular_file(LogPath))
+ else if (IsFile(LogPath))
{
ZEN_WARN("removing invalid log at '{}'", LogPath);
- std::filesystem::remove(LogPath);
+ RemoveFile(LogPath);
}
}
- if (IsNew || LogEntryCount > 0 || m_LogFlushPosition != 0)
- {
- WriteIndexSnapshot(IndexLock, /*Flush log*/ true);
- }
-
- m_SlogFile.Open(LogPath, CasLogFile::Mode::kWrite);
-
BlockStore::BlockIndexSet KnownBlocks;
for (const auto& Entry : m_Index)
{
@@ -1216,7 +1235,53 @@ ZenCacheDiskLayer::CacheBucket::InitializeIndexFromDisk(RwLock::ExclusiveLockSco
KnownBlocks.insert(BlockIndex);
}
}
- m_BlockStore.SyncExistingBlocksOnDisk(KnownBlocks);
+ BlockStore::BlockIndexSet MissingBlocks = m_BlockStore.SyncExistingBlocksOnDisk(KnownBlocks);
+ m_SlogFile.Open(LogPath, CasLogFile::Mode::kWrite);
+
+ bool RemovedEntries = false;
+ if (!MissingBlocks.empty())
+ {
+ std::vector<DiskIndexEntry> MissingEntries;
+
+ for (auto& It : m_Index)
+ {
+ BucketPayload& Payload = m_Payloads[It.second];
+ DiskLocation Location = Payload.Location;
+ if (!Location.IsFlagSet(DiskLocation::kStandaloneFile))
+ {
+ if (MissingBlocks.contains(Location.Location.BlockLocation.GetBlockIndex()))
+ {
+ RemoveMemCachedData(IndexLock, Payload);
+ RemoveMetaData(IndexLock, Payload);
+ }
+ }
+ Location.Flags |= DiskLocation::kTombStone;
+ MissingEntries.push_back(DiskIndexEntry{.Key = It.first, .Location = Location});
+ }
+
+ ZEN_ASSERT(!MissingEntries.empty());
+
+ for (const DiskIndexEntry& Entry : MissingEntries)
+ {
+ m_Index.erase(Entry.Key);
+ }
+ m_SlogFile.Append(MissingEntries);
+ m_SlogFile.Flush();
+ {
+ std::vector<BucketPayload> Payloads;
+ std::vector<AccessTime> AccessTimes;
+ std::vector<BucketMetaData> MetaDatas;
+ std::vector<MemCacheData> MemCachedPayloads;
+ IndexMap Index;
+ CompactState(IndexLock, Payloads, AccessTimes, MetaDatas, MemCachedPayloads, Index);
+ }
+ RemovedEntries = true;
+ }
+
+ if (IsNew || LogEntryCount > 0 || m_LogFlushPosition != 0 || RemovedEntries)
+ {
+ WriteIndexSnapshot(IndexLock, m_SlogFile.GetLogCount(), /*Flush log*/ true);
+ }
}
void
@@ -1384,7 +1449,7 @@ ZenCacheDiskLayer::CacheBucket::EndPutBatch(PutBatchHandle* Batch) noexcept
struct ZenCacheDiskLayer::CacheBucket::GetBatchHandle
{
- GetBatchHandle(std::vector<ZenCacheValue>& OutResults) : OutResults(OutResults)
+ GetBatchHandle(ZenCacheValueVec_t& OutResults) : OutResults(OutResults)
{
Keys.reserve(OutResults.capacity());
ResultIndexes.reserve(OutResults.capacity());
@@ -1395,11 +1460,11 @@ struct ZenCacheDiskLayer::CacheBucket::GetBatchHandle
std::vector<IoHash> Keys;
std::vector<size_t> ResultIndexes;
- std::vector<ZenCacheValue>& OutResults;
+ ZenCacheValueVec_t& OutResults;
};
ZenCacheDiskLayer::CacheBucket::GetBatchHandle*
-ZenCacheDiskLayer::CacheBucket::BeginGetBatch(std::vector<ZenCacheValue>& OutResult)
+ZenCacheDiskLayer::CacheBucket::BeginGetBatch(ZenCacheValueVec_t& OutResult)
{
ZEN_TRACE_CPU("Z$::Bucket::BeginGetBatch");
return new GetBatchHandle(OutResult);
@@ -1419,13 +1484,13 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept
if (!Batch->ResultIndexes.empty())
{
- std::vector<DiskLocation> StandaloneDiskLocations;
- std::vector<size_t> StandaloneKeyIndexes;
- std::vector<size_t> MemCachedKeyIndexes;
- std::vector<DiskLocation> InlineDiskLocations;
- std::vector<BlockStoreLocation> InlineBlockLocations;
- std::vector<size_t> InlineKeyIndexes;
- std::vector<bool> FillRawHashAndRawSize(Batch->Keys.size(), false);
+ eastl::fixed_vector<DiskLocation, 16> StandaloneDiskLocations;
+ eastl::fixed_vector<size_t, 16> StandaloneKeyIndexes;
+ eastl::fixed_vector<size_t, 16> MemCachedKeyIndexes;
+ eastl::fixed_vector<DiskLocation, 16> InlineDiskLocations;
+ eastl::fixed_vector<BlockStoreLocation, 16> InlineBlockLocations;
+ eastl::fixed_vector<size_t, 16> InlineKeyIndexes;
+ eastl::fixed_vector<bool, 16> FillRawHashAndRawSize(Batch->Keys.size(), false);
{
RwLock::SharedLockScope IndexLock(m_IndexLock);
for (size_t KeyIndex = 0; KeyIndex < Batch->Keys.size(); KeyIndex++)
@@ -1479,6 +1544,13 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept
}
}
}
+ else
+ {
+ if (m_Configuration.MemCacheSizeThreshold > 0)
+ {
+ m_MemoryMissCount++;
+ }
+ }
}
}
@@ -1487,7 +1559,7 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept
// Often we will find the metadata due to the thread setting the mem cached part doing it before us so it is worth
// checking if it is present once more before spending time fetching and setting the RawHash and RawSize in metadata
- auto FillOne = [&](const DiskLocation& Location, size_t KeyIndex, IoBuffer&& Value) {
+ auto FillOne = [&](const DiskLocation& Location, size_t KeyIndex, IoBuffer&& Value, bool UsesTemporaryMemory) {
if (!Value)
{
return;
@@ -1510,6 +1582,12 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept
}
}
+ if (AddToMemCache || UsesTemporaryMemory)
+ {
+ // We need to own it if we want to add it to the memcache or the buffer is just a range of the block iteration buffer
+ OutValue.Value.MakeOwned();
+ }
+
if (SetMetaInfo)
{
// See ZenCacheDiskLayer::CacheBucket::Get - it sets the memcache part first and then if it needs to it set the
@@ -1581,33 +1659,42 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept
if (!InlineDiskLocations.empty())
{
ZEN_TRACE_CPU("Z$::Bucket::EndGetBatch::ReadInline");
- m_BlockStore.IterateChunks(InlineBlockLocations, [&](uint32_t, std::span<const size_t> ChunkIndexes) -> bool {
- // Only read into memory the IoBuffers we could potentially add to memcache
- const uint64_t LargeChunkSizeLimit = Max(m_Configuration.MemCacheSizeThreshold, 1u * 1024u);
- m_BlockStore.IterateBlock(
- InlineBlockLocations,
- ChunkIndexes,
- [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex,
- const void* Data,
- uint64_t Size) -> bool {
- if (Data != nullptr)
- {
- FillOne(InlineDiskLocations[ChunkIndex],
- InlineKeyIndexes[ChunkIndex],
- IoBufferBuilder::MakeCloneFromMemory(Data, Size));
- }
- return true;
- },
- [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex,
- BlockStoreFile& File,
- uint64_t Offset,
- uint64_t Size) -> bool {
- FillOne(InlineDiskLocations[ChunkIndex], InlineKeyIndexes[ChunkIndex], File.GetChunk(Offset, Size));
- return true;
- },
- LargeChunkSizeLimit);
- return true;
- });
+ m_BlockStore.IterateChunks(std::span{begin(InlineBlockLocations), end(InlineBlockLocations)},
+ [&](uint32_t, std::span<const size_t> ChunkIndexes) -> bool {
+ // Up to 8KB or m_Configuration.MemCacheSizeThreshold depending on configuration
+ const uint64_t LargeChunkSizeLimit =
+ m_Configuration.MemCacheSizeThreshold == 0
+ ? Min(m_Configuration.LargeObjectThreshold, 8u * 1024u)
+ : Max(m_Configuration.MemCacheSizeThreshold, 8u * 1024u);
+
+ m_BlockStore.IterateBlock(
+ std::span{begin(InlineBlockLocations), end(InlineBlockLocations)},
+ ChunkIndexes,
+ [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex,
+ const void* Data,
+ uint64_t Size) -> bool {
+ if (Data != nullptr)
+ {
+ FillOne(InlineDiskLocations[ChunkIndex],
+ InlineKeyIndexes[ChunkIndex],
+ IoBufferBuilder::MakeFromMemory(MemoryView(Data, Size)),
+ /*UsesTemporaryMemory*/ true);
+ }
+ return true;
+ },
+ [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex,
+ BlockStoreFile& File,
+ uint64_t Offset,
+ uint64_t Size) -> bool {
+ FillOne(InlineDiskLocations[ChunkIndex],
+ InlineKeyIndexes[ChunkIndex],
+ File.GetChunk(Offset, Size),
+ /*UsesTemporaryMemory*/ false);
+ return true;
+ },
+ LargeChunkSizeLimit);
+ return true;
+ });
}
if (!StandaloneDiskLocations.empty())
@@ -1617,7 +1704,7 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept
{
size_t KeyIndex = StandaloneKeyIndexes[Index];
const DiskLocation& Location = StandaloneDiskLocations[Index];
- FillOne(Location, KeyIndex, GetStandaloneCacheValue(Location, Batch->Keys[KeyIndex]));
+ FillOne(Location, KeyIndex, GetStandaloneCacheValue(Location, Batch->Keys[KeyIndex]), /*UsesTemporaryMemory*/ false);
}
}
@@ -1697,10 +1784,6 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept
else
{
m_DiskMissCount++;
- if (m_Configuration.MemCacheSizeThreshold > 0)
- {
- m_MemoryMissCount++;
- }
}
}
}
@@ -2029,11 +2112,13 @@ ZenCacheDiskLayer::CacheBucket::GetUsageByAccess(GcClock::TimePoint Now, GcClock
}
}
-bool
+std::function<void()>
ZenCacheDiskLayer::CacheBucket::Drop()
{
ZEN_TRACE_CPU("Z$::Bucket::Drop");
+ m_Gc.RemoveGcReferencer(*this);
+
RwLock::ExclusiveLockScope _(m_IndexLock);
std::vector<std::unique_ptr<RwLock::ExclusiveLockScope>> ShardLocks;
@@ -2045,7 +2130,7 @@ ZenCacheDiskLayer::CacheBucket::Drop()
m_BlockStore.Close();
m_SlogFile.Close();
- const bool Deleted = cache::impl::MoveAndDeleteDirectory(m_BucketDir);
+ std::filesystem::path DroppedPath = cache::impl::MoveDroppedDirectory(m_BucketDir);
m_Index.clear();
m_Payloads.clear();
@@ -2058,7 +2143,21 @@ ZenCacheDiskLayer::CacheBucket::Drop()
m_OuterCacheMemoryUsage.fetch_sub(m_MemCachedSize.load());
m_MemCachedSize.store(0);
- return Deleted;
+ if (DroppedPath.empty())
+ {
+ return {};
+ }
+ else
+ {
+ return [DroppedPath = std::move(DroppedPath)]() {
+ std::error_code Ec;
+ (void)DeleteDirectories(DroppedPath, Ec);
+ if (Ec)
+ {
+ ZEN_WARN("Failed to clean up dropped bucket directory '{}', reason: '{}'", DroppedPath, Ec.message());
+ }
+ };
+ }
}
void
@@ -2093,6 +2192,9 @@ ZenCacheDiskLayer::CacheBucket::SaveSnapshot(const std::function<uint64_t()>& Cl
ZEN_TRACE_CPU("Z$::Bucket::SaveSnapshot");
try
{
+ // Be defensive regarding log position as it is written to without acquiring m_LocationMapLock
+ const uint64_t LogPosition = m_SlogFile.GetLogCount();
+
bool UseLegacyScheme = false;
IoBuffer Buffer;
@@ -2107,7 +2209,7 @@ ZenCacheDiskLayer::CacheBucket::SaveSnapshot(const std::function<uint64_t()>& Cl
{
RwLock::SharedLockScope IndexLock(m_IndexLock);
- WriteIndexSnapshot(IndexLock, /*Flush log*/ false);
+ WriteIndexSnapshot(IndexLock, LogPosition, /*Flush log*/ false);
// Note: this copy could be eliminated on shutdown to
// reduce memory usage and execution time
Index = m_Index;
@@ -2147,7 +2249,7 @@ ZenCacheDiskLayer::CacheBucket::SaveSnapshot(const std::function<uint64_t()>& Cl
else
{
RwLock::SharedLockScope IndexLock(m_IndexLock);
- WriteIndexSnapshot(IndexLock, /*Flush log*/ false);
+ WriteIndexSnapshot(IndexLock, LogPosition, /*Flush log*/ false);
const uint64_t EntryCount = m_Index.size();
Buffer = ManifestWriter.MakeSidecarManifest(m_BucketId, EntryCount);
uint64_t SidecarSize = ManifestWriter.GetSidecarSize();
@@ -2257,7 +2359,7 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx)
RwLock::SharedLockScope ValueLock(LockForHash(HashKey));
std::error_code Ec;
- uintmax_t size = std::filesystem::file_size(DataFilePath.ToPath(), Ec);
+ uintmax_t size = FileSizeFromPath(DataFilePath.ToPath(), Ec);
if (Ec)
{
ReportBadKey(HashKey);
@@ -2398,11 +2500,11 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx)
BuildPath(Path, Entry.Key);
fs::path FilePath = Path.ToPath();
RwLock::ExclusiveLockScope ValueLock(LockForHash(Entry.Key));
- if (fs::is_regular_file(FilePath))
+ if (IsFile(FilePath))
{
ZEN_DEBUG("deleting bad standalone cache file '{}'", Path.ToUtf8());
std::error_code Ec;
- fs::remove(FilePath, Ec); // We don't care if we fail, we are no longer tracking this file...
+ RemoveFile(FilePath, Ec); // We don't care if we fail, we are no longer tracking this file...
}
}
}
@@ -2535,7 +2637,7 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c
if (CleanUpTempFile)
{
std::error_code Ec;
- std::filesystem::remove(DataFile.GetPath(), Ec);
+ RemoveFile(DataFile.GetPath(), Ec);
if (Ec)
{
ZEN_WARN("Failed to clean up temporary file '{}' for put in '{}', reason '{}'",
@@ -2563,7 +2665,7 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c
RwLock::ExclusiveLockScope ValueLock(LockForHash(HashKey));
// We do a speculative remove of the file instead of probing with a exists call and check the error code instead
- std::filesystem::remove(FsPath, Ec);
+ RemoveFile(FsPath, Ec);
if (Ec)
{
if (Ec.value() != ENOENT)
@@ -2571,7 +2673,7 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c
ZEN_WARN("Failed to remove file '{}' for put in '{}', reason: '{}', retrying.", FsPath, m_BucketDir, Ec.message());
Sleep(100);
Ec.clear();
- std::filesystem::remove(FsPath, Ec);
+ RemoveFile(FsPath, Ec);
if (Ec && Ec.value() != ENOENT)
{
throw std::system_error(Ec, fmt::format("Failed to remove file '{}' for put in '{}'", FsPath, m_BucketDir));
@@ -2796,7 +2898,6 @@ ZenCacheDiskLayer::CacheBucket::PutInlineCacheValue(const IoHash& HashKey,
ZEN_MEMSCOPE(GetCacheDiskTag());
ZEN_TRACE_CPU("Z$::Bucket::UpdateLocation");
DiskLocation Location(BlockStoreLocation, m_Configuration.PayloadAlignment, EntryFlags);
- m_SlogFile.Append({.Key = HashKey, .Location = Location});
RwLock::ExclusiveLockScope IndexLock(m_IndexLock);
if (m_TrackedCacheKeys)
@@ -2826,6 +2927,7 @@ ZenCacheDiskLayer::CacheBucket::PutInlineCacheValue(const IoHash& HashKey,
m_AccessTimes.emplace_back(GcClock::TickCount());
m_Index.insert_or_assign(HashKey, EntryIndex);
}
+ m_SlogFile.Append({.Key = HashKey, .Location = Location});
});
}
@@ -2842,9 +2944,10 @@ class DiskBucketStoreCompactor : public GcStoreCompactor
using CacheBucket = ZenCacheDiskLayer::CacheBucket;
public:
- DiskBucketStoreCompactor(CacheBucket& Bucket, std::vector<std::pair<IoHash, uint64_t>>&& ExpiredStandaloneKeys)
+ DiskBucketStoreCompactor(CacheBucket& Bucket, std::vector<std::pair<IoHash, uint64_t>>&& ExpiredStandaloneKeys, bool FlushBucket)
: m_Bucket(Bucket)
, m_ExpiredStandaloneKeys(std::move(ExpiredStandaloneKeys))
+ , m_FlushBucket(FlushBucket)
{
m_ExpiredStandaloneKeys.shrink_to_fit();
}
@@ -2902,7 +3005,7 @@ public:
ZEN_DEBUG("GCV2: cachebucket [COMPACT] '{}': deleting standalone cache file '{}'", m_Bucket.m_BucketDir, Path.ToUtf8());
std::error_code Ec;
- if (!fs::remove(FilePath, Ec))
+ if (!RemoveFile(FilePath, Ec))
{
continue;
}
@@ -2923,7 +3026,7 @@ public:
ZEN_DEBUG("GCV2: cachebucket [COMPACT] '{}': checking standalone cache file '{}'", m_Bucket.m_BucketDir, Path.ToUtf8());
std::error_code Ec;
- bool Existed = std::filesystem::is_regular_file(FilePath, Ec);
+ bool Existed = IsFile(FilePath, Ec);
if (Ec)
{
ZEN_WARN("GCV2: cachebucket [COMPACT] '{}': failed checking cache payload file '{}'. Reason '{}'",
@@ -3023,10 +3126,12 @@ public:
m_Bucket.m_BlockStore.CompactBlocks(
BlockCompactState,
m_Bucket.m_Configuration.PayloadAlignment,
- [&](const BlockStore::MovedChunksArray& MovedArray, uint64_t FreedDiskSpace) {
+ [&](const BlockStore::MovedChunksArray& MovedArray,
+ const BlockStore::ChunkIndexArray& ScrubbedArray,
+ uint64_t FreedDiskSpace) {
std::vector<DiskIndexEntry> MovedEntries;
MovedEntries.reserve(MovedArray.size());
- RwLock::ExclusiveLockScope _(m_Bucket.m_IndexLock);
+ RwLock::ExclusiveLockScope IndexLock(m_Bucket.m_IndexLock);
for (const std::pair<size_t, BlockStoreLocation>& Moved : MovedArray)
{
size_t ChunkIndex = Moved.first;
@@ -3048,6 +3153,24 @@ public:
MovedEntries.push_back({.Key = Key, .Location = Payload.Location});
}
}
+
+ for (size_t ScrubbedIndex : ScrubbedArray)
+ {
+ const IoHash& Key = BlockCompactStateKeys[ScrubbedIndex];
+
+ if (auto It = m_Bucket.m_Index.find(Key); It != m_Bucket.m_Index.end())
+ {
+ BucketPayload& Payload = m_Bucket.m_Payloads[It->second];
+ DiskLocation Location = Payload.Location;
+
+ m_Bucket.RemoveMemCachedData(IndexLock, Payload);
+ m_Bucket.RemoveMetaData(IndexLock, Payload);
+
+ Location.Flags |= DiskLocation::kTombStone;
+ MovedEntries.push_back(DiskIndexEntry{.Key = Key, .Location = Location});
+ }
+ }
+
m_Bucket.m_SlogFile.Append(MovedEntries);
Stats.RemovedDisk += FreedDiskSpace;
if (Ctx.IsCancelledFlag.load())
@@ -3071,6 +3194,10 @@ public:
}
}
}
+ if (m_FlushBucket)
+ {
+ m_Bucket.Flush();
+ }
}
virtual std::string GetGcName(GcCtx& Ctx) override { return m_Bucket.GetGcName(Ctx); }
@@ -3078,6 +3205,7 @@ public:
private:
ZenCacheDiskLayer::CacheBucket& m_Bucket;
std::vector<std::pair<IoHash, uint64_t>> m_ExpiredStandaloneKeys;
+ bool m_FlushBucket = false;
};
GcStoreCompactor*
@@ -3101,24 +3229,6 @@ ZenCacheDiskLayer::CacheBucket::RemoveExpiredData(GcCtx& Ctx, GcStats& Stats)
NiceBytes(Stats.FreedMemory),
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
}
- if (Stats.DeletedCount > 0)
- {
- bool Expected = false;
- if (m_IsFlushing || !m_IsFlushing.compare_exchange_strong(Expected, true))
- {
- return;
- }
- auto FlushingGuard = MakeGuard([&] { m_IsFlushing.store(false); });
-
- try
- {
- SaveSnapshot([]() { return 0; });
- }
- catch (const std::exception& Ex)
- {
- ZEN_WARN("Failed to write index and manifest after RemoveExpiredData in '{}'. Reason: '{}'", m_BucketDir, Ex.what());
- }
- }
});
const GcClock::Tick ExpireTicks = Ctx.Settings.CacheExpireTime.time_since_epoch().count();
@@ -3170,7 +3280,7 @@ ZenCacheDiskLayer::CacheBucket::RemoveExpiredData(GcCtx& Ctx, GcStats& Stats)
return nullptr;
}
- if (Ctx.Settings.IsDeleteMode)
+ if (Ctx.Settings.IsDeleteMode && !ExpiredEntries.empty())
{
for (const DiskIndexEntry& Entry : ExpiredEntries)
{
@@ -3205,7 +3315,7 @@ ZenCacheDiskLayer::CacheBucket::RemoveExpiredData(GcCtx& Ctx, GcStats& Stats)
return nullptr;
}
- return new DiskBucketStoreCompactor(*this, std::move(ExpiredStandaloneKeys));
+ return new DiskBucketStoreCompactor(*this, std::move(ExpiredStandaloneKeys), /*FlushBucket*/ Stats.DeletedCount > 0);
}
bool
@@ -3395,7 +3505,7 @@ ZenCacheDiskLayer::CacheBucket::GetReferences(const LoggerRef& Logger,
CaptureAttachments(ChunkIndex, File.GetChunk(Offset, Size).GetView());
return !IsCancelledFlag.load();
},
- 0);
+ 32u * 1024);
if (Continue)
{
@@ -3698,11 +3808,11 @@ ZenCacheDiskLayer::CacheBucket*
ZenCacheDiskLayer::GetOrCreateBucket(std::string_view InBucket)
{
ZEN_TRACE_CPU("Z$::GetOrCreateBucket");
- const auto BucketName = std::string(InBucket);
{
RwLock::SharedLockScope SharedLock(m_Lock);
- if (auto It = m_Buckets.find(BucketName); It != m_Buckets.end())
+ if (auto It = m_Buckets.find_as(InBucket, std::hash<std::string_view>(), eastl::equal_to_2<std::string, std::string_view>());
+ It != m_Buckets.end())
{
return It->second.get();
}
@@ -3710,31 +3820,40 @@ ZenCacheDiskLayer::GetOrCreateBucket(std::string_view InBucket)
// We create the bucket without holding a lock since contructor calls GcManager::AddGcReferencer which takes an exclusive lock.
// This can cause a deadlock, if GC is running we would block while holding ZenCacheDiskLayer::m_Lock
- std::unique_ptr<CacheBucket> Bucket(
- std::make_unique<CacheBucket>(m_Gc, m_TotalMemCachedSize, BucketName, m_Configuration.BucketConfig));
+ BucketConfiguration* BucketConfig = &m_Configuration.BucketConfig;
+ if (auto It = m_Configuration.BucketConfigMap.find_as(InBucket,
+ std::hash<std::string_view>(),
+ eastl::equal_to_2<std::string, std::string_view>());
+ It != m_Configuration.BucketConfigMap.end())
+ {
+ BucketConfig = &It->second;
+ }
+ std::unique_ptr<CacheBucket> Bucket(std::make_unique<CacheBucket>(m_Gc, m_TotalMemCachedSize, InBucket, *BucketConfig));
RwLock::ExclusiveLockScope Lock(m_Lock);
- if (auto It = m_Buckets.find(BucketName); It != m_Buckets.end())
+ if (auto It = m_Buckets.find_as(InBucket, std::hash<std::string_view>(), eastl::equal_to_2<std::string, std::string_view>());
+ It != m_Buckets.end())
{
return It->second.get();
}
std::filesystem::path BucketPath = m_RootDir;
- BucketPath /= BucketName;
+ BucketPath /= InBucket;
try
{
if (!Bucket->OpenOrCreate(BucketPath))
{
- ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", BucketName, m_RootDir);
+ ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", InBucket, m_RootDir);
return nullptr;
}
}
catch (const std::exception& Err)
{
- ZEN_WARN("Creating bucket '{}' in '{}' FAILED, reason: '{}'", BucketName, BucketPath, Err.what());
+ ZEN_WARN("Creating bucket '{}' in '{}' FAILED, reason: '{}'", InBucket, BucketPath, Err.what());
throw;
}
+ std::string BucketName{InBucket};
CacheBucket* Result = Bucket.get();
m_Buckets.emplace(BucketName, std::move(Bucket));
if (m_CapturedBuckets)
@@ -3833,7 +3952,7 @@ ZenCacheDiskLayer::EndPutBatch(PutBatchHandle* Batch) noexcept
struct ZenCacheDiskLayer::GetBatchHandle
{
- GetBatchHandle(std::vector<ZenCacheValue>& OutResults) : OutResults(OutResults) {}
+ GetBatchHandle(ZenCacheValueVec_t& OutResults) : OutResults(OutResults) {}
struct BucketHandle
{
CacheBucket* Bucket;
@@ -3893,13 +4012,13 @@ struct ZenCacheDiskLayer::GetBatchHandle
return NewBucketHandle;
}
- RwLock Lock;
- std::vector<BucketHandle> BucketHandles;
- std::vector<ZenCacheValue>& OutResults;
+ RwLock Lock;
+ eastl::fixed_vector<BucketHandle, 4> BucketHandles;
+ ZenCacheValueVec_t& OutResults;
};
ZenCacheDiskLayer::GetBatchHandle*
-ZenCacheDiskLayer::BeginGetBatch(std::vector<ZenCacheValue>& OutResults)
+ZenCacheDiskLayer::BeginGetBatch(ZenCacheValueVec_t& OutResults)
{
return new GetBatchHandle(OutResults);
}
@@ -3994,7 +4113,11 @@ ZenCacheDiskLayer::DiscoverBuckets()
if (IsKnownBadBucketName(BucketName))
{
BadBucketDirectories.push_back(BucketPath);
-
+ continue;
+ }
+ else if (BucketName.starts_with("[dropped]"))
+ {
+ BadBucketDirectories.push_back(BucketPath);
continue;
}
@@ -4027,50 +4150,66 @@ ZenCacheDiskLayer::DiscoverBuckets()
RwLock SyncLock;
WorkerThreadPool& Pool = GetLargeWorkerPool(EWorkloadType::Burst);
- Latch WorkLatch(1);
- for (auto& BucketPath : FoundBucketDirectories)
+ std::atomic<bool> AbortFlag;
+ std::atomic<bool> PauseFlag;
+ ParallelWork Work(AbortFlag, PauseFlag);
+ try
{
- WorkLatch.AddCount(1);
- Pool.ScheduleWork([this, &WorkLatch, &SyncLock, BucketPath]() {
- ZEN_MEMSCOPE(GetCacheDiskTag());
-
- auto _ = MakeGuard([&]() { WorkLatch.CountDown(); });
- const std::string BucketName = PathToUtf8(BucketPath.stem());
- try
- {
- std::unique_ptr<CacheBucket> NewBucket =
- std::make_unique<CacheBucket>(m_Gc, m_TotalMemCachedSize, BucketName, m_Configuration.BucketConfig);
+ for (auto& BucketPath : FoundBucketDirectories)
+ {
+ Work.ScheduleWork(Pool, [this, &SyncLock, BucketPath](std::atomic<bool>&) {
+ ZEN_MEMSCOPE(GetCacheDiskTag());
- CacheBucket* Bucket = nullptr;
+ const std::string BucketName = PathToUtf8(BucketPath.stem());
+ try
{
- RwLock::ExclusiveLockScope __(SyncLock);
- auto InsertResult = m_Buckets.emplace(BucketName, std::move(NewBucket));
- Bucket = InsertResult.first->second.get();
- }
- ZEN_ASSERT(Bucket);
+ BucketConfiguration* BucketConfig = &m_Configuration.BucketConfig;
+ if (auto It = m_Configuration.BucketConfigMap.find_as(std::string_view(BucketName),
+ std::hash<std::string_view>(),
+ eastl::equal_to_2<std::string, std::string_view>());
+ It != m_Configuration.BucketConfigMap.end())
+ {
+ BucketConfig = &It->second;
+ }
- if (!Bucket->OpenOrCreate(BucketPath, /* AllowCreate */ false))
- {
- ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", BucketName, m_RootDir);
+ std::unique_ptr<CacheBucket> NewBucket =
+ std::make_unique<CacheBucket>(m_Gc, m_TotalMemCachedSize, BucketName, *BucketConfig);
+ CacheBucket* Bucket = nullptr;
{
RwLock::ExclusiveLockScope __(SyncLock);
- m_Buckets.erase(BucketName);
+ auto InsertResult = m_Buckets.emplace(BucketName, std::move(NewBucket));
+ Bucket = InsertResult.first->second.get();
+ }
+ ZEN_ASSERT(Bucket);
+
+ if (!Bucket->OpenOrCreate(BucketPath, /* AllowCreate */ false))
+ {
+ ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", BucketName, m_RootDir);
+
+ {
+ RwLock::ExclusiveLockScope __(SyncLock);
+ m_Buckets.erase(BucketName);
+ }
}
}
- }
- catch (const std::exception& Err)
- {
- ZEN_ERROR("Opening bucket '{}' in '{}' FAILED, reason: '{}'", BucketName, BucketPath, Err.what());
- return;
- }
- });
+ catch (const std::exception& Err)
+ {
+ ZEN_ERROR("Opening bucket '{}' in '{}' FAILED, reason: '{}'", BucketName, BucketPath, Err.what());
+ return;
+ }
+ });
+ }
+ }
+ catch (const std::exception& Ex)
+ {
+ AbortFlag.store(true);
+ ZEN_WARN("Failed discovering buckets in {}. Reason: '{}'", m_RootDir, Ex.what());
}
- WorkLatch.CountDown();
- WorkLatch.Wait();
+ Work.Wait();
}
-bool
+std::function<void()>
ZenCacheDiskLayer::DropBucket(std::string_view InBucket)
{
ZEN_TRACE_CPU("Z$::DropBucket");
@@ -4088,33 +4227,72 @@ ZenCacheDiskLayer::DropBucket(std::string_view InBucket)
return Bucket.Drop();
}
- // Make sure we remove the folder even if we don't know about the bucket
std::filesystem::path BucketPath = m_RootDir;
BucketPath /= std::string(InBucket);
- return cache::impl::MoveAndDeleteDirectory(BucketPath);
+ std::filesystem::path DroppedPath = cache::impl::MoveDroppedDirectory(BucketPath);
+ if (DroppedPath.empty())
+ {
+ return {};
+ }
+ else
+ {
+ return [DroppedPath = std::move(DroppedPath)]() {
+ std::error_code Ec;
+ (void)DeleteDirectories(DroppedPath, Ec);
+ if (Ec)
+ {
+ ZEN_WARN("Failed to clean up dropped bucket directory '{}', reason: '{}'", DroppedPath, Ec.message());
+ }
+ };
+ }
}
-bool
+std::function<void()>
ZenCacheDiskLayer::Drop()
{
ZEN_TRACE_CPU("Z$::Drop");
- RwLock::ExclusiveLockScope _(m_Lock);
-
- std::vector<std::unique_ptr<CacheBucket>> Buckets;
- Buckets.reserve(m_Buckets.size());
- while (!m_Buckets.empty())
+ std::vector<std::function<void()>> PostDropOps;
{
- const auto& It = m_Buckets.begin();
- CacheBucket& Bucket = *It->second;
- m_DroppedBuckets.push_back(std::move(It->second));
- m_Buckets.erase(It->first);
- if (!Bucket.Drop())
+ RwLock::ExclusiveLockScope _(m_Lock);
+ PostDropOps.reserve(m_Buckets.size());
+ while (!m_Buckets.empty())
{
- return false;
+ const auto& It = m_Buckets.begin();
+ CacheBucket& Bucket = *It->second;
+ m_DroppedBuckets.push_back(std::move(It->second));
+ m_Buckets.erase(It->first);
+ if (std::function<void()> PostDropOp = Bucket.Drop(); !PostDropOp)
+ {
+ return {};
+ }
+ else
+ {
+ PostDropOps.emplace_back(std::move(PostDropOp));
+ }
}
}
- return cache::impl::MoveAndDeleteDirectory(m_RootDir);
+
+ std::filesystem::path DroppedPath = cache::impl::MoveDroppedDirectory(m_RootDir);
+ if (DroppedPath.empty())
+ {
+ return {};
+ }
+ else
+ {
+ return [DroppedPath = std::move(DroppedPath), PostDropOps = std::move(PostDropOps)]() {
+ for (auto& PostDropOp : PostDropOps)
+ {
+ PostDropOp();
+ }
+ std::error_code Ec;
+ (void)DeleteDirectories(DroppedPath, Ec);
+ if (Ec)
+ {
+ ZEN_WARN("Failed to clean up dropped bucket directory '{}', reason: '{}'", DroppedPath, Ec.message());
+ }
+ };
+ }
}
void
@@ -4144,16 +4322,16 @@ ZenCacheDiskLayer::Flush()
}
{
WorkerThreadPool& Pool = GetMediumWorkerPool(EWorkloadType::Burst);
- Latch WorkLatch(1);
+ std::atomic<bool> AbortFlag;
+ std::atomic<bool> PauseFlag;
+ ParallelWork Work(AbortFlag, PauseFlag);
try
{
for (auto& Bucket : Buckets)
{
- WorkLatch.AddCount(1);
- Pool.ScheduleWork([&WorkLatch, Bucket]() {
+ Work.ScheduleWork(Pool, [Bucket](std::atomic<bool>&) {
ZEN_MEMSCOPE(GetCacheDiskTag());
- auto _ = MakeGuard([&]() { WorkLatch.CountDown(); });
try
{
Bucket->Flush();
@@ -4167,13 +4345,14 @@ ZenCacheDiskLayer::Flush()
}
catch (const std::exception& Ex)
{
+ AbortFlag.store(true);
ZEN_ERROR("Failed to flush buckets at '{}'. Reason: '{}'", m_RootDir, Ex.what());
}
- WorkLatch.CountDown();
- while (!WorkLatch.Wait(1000))
- {
- ZEN_DEBUG("Waiting for {} buckets at '{}' to flush", WorkLatch.Remaining(), m_RootDir);
- }
+
+ Work.Wait(1000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t RemainingWork) {
+ ZEN_UNUSED(IsAborted, IsPaused);
+ ZEN_DEBUG("Waiting for {} buckets at '{}' to flush", RemainingWork, m_RootDir);
+ });
}
}
diff --git a/src/zenstore/cache/cacherpc.cpp b/src/zenstore/cache/cacherpc.cpp
index 20c244250..436e8a083 100644
--- a/src/zenstore/cache/cacherpc.cpp
+++ b/src/zenstore/cache/cacherpc.cpp
@@ -20,6 +20,8 @@
#include <zencore/memory/llm.h>
+#include <EASTL/fixed_vector.h>
+
//////////////////////////////////////////////////////////////////////////
namespace zen {
@@ -89,7 +91,7 @@ GetRpcRequestCacheKey(const CbObjectView& KeyView, CacheKey& Key)
return false;
}
IoHash Hash = HashField.AsHash();
- Key = CacheKey::Create(*Bucket, Hash);
+ Key = CacheKey::CreateValidated(std::move(*Bucket), Hash);
return true;
}
@@ -218,6 +220,11 @@ CacheRpcHandler::HandleRpcRequest(const CacheRequestContext& Context,
ZEN_WARN("Content format not supported, expected package message format");
return RpcResponseCode::BadRequest;
}
+ if (CbValidateError Error = ValidateCompactBinary(Object.GetView(), CbValidateMode::Default); Error != CbValidateError::None)
+ {
+ ZEN_WARN("Content format is corrupt, compact binary format validation failed. Reason: '{}'", ToString(Error));
+ return RpcResponseCode::BadRequest;
+ }
}
if (!UriNamespace.empty())
@@ -305,7 +312,7 @@ CacheRpcHandler::HandleRpcPutCacheRecords(const CacheRequestContext& Context, co
}
DefaultPolicy = !PolicyText.empty() ? ParseCachePolicy(PolicyText) : CachePolicy::Default;
- std::vector<bool> Results;
+ eastl::fixed_vector<bool, 32> Results;
CbArrayView RequestsArray = Params["Requests"sv].AsArrayView();
for (CbFieldView RequestField : RequestsArray)
@@ -495,16 +502,15 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb
bool Exists = false;
bool ReadFromUpstream = false;
};
- struct RecordRequestData
+ struct RecordRequestData : public CacheKeyRequest
{
- CacheKeyRequest Upstream;
- CbObjectView RecordObject;
- IoBuffer RecordCacheValue;
- CacheRecordPolicy DownstreamPolicy;
- std::vector<ValueRequestData> Values;
- bool Complete = false;
- const UpstreamEndpointInfo* Source = nullptr;
- uint64_t ElapsedTimeUs;
+ CbObjectView RecordObject;
+ IoBuffer RecordCacheValue;
+ CacheRecordPolicy DownstreamPolicy;
+ eastl::fixed_vector<ValueRequestData, 4> Values;
+ bool Complete = false;
+ const UpstreamEndpointInfo* Source = nullptr;
+ uint64_t ElapsedTimeUs;
};
std::string_view PolicyText = Params["DefaultPolicy"sv].AsString();
@@ -517,8 +523,8 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb
const bool HasUpstream = m_UpstreamCache.IsActive();
- std::vector<RecordRequestData> Requests;
- std::vector<size_t> UpstreamIndexes;
+ eastl::fixed_vector<RecordRequestData, 16> Requests;
+ eastl::fixed_vector<size_t, 16> UpstreamIndexes;
auto ParseValues = [](RecordRequestData& Request) {
CbArrayView ValuesArray = Request.RecordObject["Values"sv].AsArrayView();
@@ -549,7 +555,7 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb
CbObjectView RequestObject = RequestField.AsObjectView();
CbObjectView KeyObject = RequestObject["Key"sv].AsObjectView();
- CacheKey& Key = Request.Upstream.Key;
+ CacheKey& Key = Request.Key;
if (!GetRpcRequestCacheKey(KeyObject, Key))
{
return CbPackage{};
@@ -571,6 +577,13 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb
{
FoundLocalInvalid = true;
}
+ else if (CbValidateError Error = ValidateCompactBinary(Request.RecordCacheValue.GetView(), CbValidateMode::Default);
+ Error != CbValidateError::None)
+ {
+ ZEN_WARN("HandleRpcGetCacheRecords stored record is corrupt, compact binary format validation failed. Reason: '{}'",
+ ToString(Error));
+ FoundLocalInvalid = true;
+ }
else
{
Request.RecordObject = CbObjectView(Request.RecordCacheValue.GetData());
@@ -654,7 +667,7 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb
{
m_CidStore.IterateChunks(
CidHashes,
- [this, &Request, ValueCount, &RequestValueIndexes](size_t Index, const IoBuffer& Payload) -> bool {
+ [this, &Request, &RequestValueIndexes](size_t Index, const IoBuffer& Payload) -> bool {
try
{
const size_t ValueIndex = RequestValueIndexes[Index];
@@ -721,7 +734,7 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb
for (size_t Index : UpstreamIndexes)
{
RecordRequestData& Request = Requests[Index];
- UpstreamRequests.push_back(&Request.Upstream);
+ UpstreamRequests.push_back(&Request);
if (Request.Values.size())
{
@@ -735,13 +748,13 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb
UpstreamPolicy |= !Value.ReadFromUpstream ? CachePolicy::SkipData : CachePolicy::None;
Builder.AddValuePolicy(Value.ValueId, UpstreamPolicy);
}
- Request.Upstream.Policy = Builder.Build();
+ Request.Policy = Builder.Build();
}
else
{
// We don't know which Values exist in the Record; ask the upstrem for all values that the client wants,
// and convert the CacheRecordPolicy to an upstream policy
- Request.Upstream.Policy = Request.DownstreamPolicy.ConvertToUpstream();
+ Request.Policy = Request.DownstreamPolicy.ConvertToUpstream();
}
}
@@ -751,10 +764,9 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb
return;
}
- RecordRequestData& Request =
- *reinterpret_cast<RecordRequestData*>(reinterpret_cast<char*>(&Params.Request) - offsetof(RecordRequestData, Upstream));
+ RecordRequestData& Request = *static_cast<RecordRequestData*>(&Params.Request);
Request.ElapsedTimeUs += static_cast<uint64_t>(Params.ElapsedSeconds * 1000000.0);
- const CacheKey& Key = Request.Upstream.Key;
+ const CacheKey& Key = Request.Key;
Stopwatch Timer;
auto TimeGuard = MakeGuard([&Timer, &Request]() { Request.ElapsedTimeUs += Timer.GetElapsedTimeUs(); });
if (!Request.RecordObject)
@@ -852,10 +864,12 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb
CbPackage ResponsePackage;
CbObjectWriter ResponseObject{2048};
+ ResponsePackage.ReserveAttachments(Requests.size());
+
ResponseObject.BeginArray("Result"sv);
for (RecordRequestData& Request : Requests)
{
- const CacheKey& Key = Request.Upstream.Key;
+ const CacheKey& Key = Request.Key;
if (Request.Complete ||
(Request.RecordObject && EnumHasAllFlags(Request.DownstreamPolicy.GetRecordPolicy(), CachePolicy::PartialRecord)))
{
@@ -930,11 +944,12 @@ CacheRpcHandler::HandleRpcPutCacheValues(const CacheRequestContext& Context, con
const bool HasUpstream = m_UpstreamCache.IsActive();
CbArrayView RequestsArray = Params["Requests"sv].AsArrayView();
- std::vector<ZenCacheStore::PutResult> BatchResults;
- std::vector<size_t> BatchResultIndexes;
- std::vector<ZenCacheStore::PutResult> Results;
- std::vector<CacheKey> UpstreamCacheKeys;
- uint64_t RequestCount = RequestsArray.Num();
+ std::vector<ZenCacheStore::PutResult> BatchResults;
+ eastl::fixed_vector<size_t, 32> BatchResultIndexes;
+ eastl::fixed_vector<ZenCacheStore::PutResult, 32> Results;
+ eastl::fixed_vector<CacheKey, 32> UpstreamCacheKeys;
+
+ uint64_t RequestCount = RequestsArray.Num();
{
Results.reserve(RequestCount);
std::unique_ptr<ZenCacheStore::PutBatch> Batch;
@@ -1145,15 +1160,15 @@ CacheRpcHandler::HandleRpcGetCacheValues(const CacheRequestContext& Context, CbO
uint64_t RawSize = 0;
CompressedBuffer Result;
};
- std::vector<RequestData> Requests;
+ eastl::fixed_vector<RequestData, 16> Requests;
- std::vector<size_t> RemoteRequestIndexes;
+ eastl::fixed_vector<size_t, 16> RemoteRequestIndexes;
const bool HasUpstream = m_UpstreamCache.IsActive();
- CbArrayView RequestsArray = Params["Requests"sv].AsArrayView();
- std::vector<ZenCacheValue> CacheValues;
- const uint64_t RequestCount = RequestsArray.Num();
+ CbArrayView RequestsArray = Params["Requests"sv].AsArrayView();
+ ZenCacheValueVec_t CacheValues;
+ const uint64_t RequestCount = RequestsArray.Num();
CacheValues.reserve(RequestCount);
{
std::unique_ptr<ZenCacheStore::GetBatch> Batch;
@@ -1182,7 +1197,6 @@ CacheRpcHandler::HandleRpcGetCacheValues(const CacheRequestContext& Context, CbO
CacheKey& Key = Request.Key;
CachePolicy Policy = Request.Policy;
- ZenCacheValue CacheValue;
if (EnumHasAllFlags(Policy, CachePolicy::QueryLocal))
{
if (Batch)
@@ -1328,6 +1342,9 @@ CacheRpcHandler::HandleRpcGetCacheValues(const CacheRequestContext& Context, CbO
ZEN_TRACE_CPU("Z$::RpcGetCacheValues::Response");
CbPackage RpcResponse;
CbObjectWriter ResponseObject{1024};
+
+ RpcResponse.ReserveAttachments(Requests.size());
+
ResponseObject.BeginArray("Result"sv);
for (const RequestData& Request : Requests)
{
@@ -1622,18 +1639,27 @@ CacheRpcHandler::GetLocalCacheRecords(const CacheRequestContext& Context,
Record.ValuesRead = true;
if (Record.CacheValue && Record.CacheValue.GetContentType() == ZenContentType::kCbObject)
{
- CbObjectView RecordObject = CbObjectView(Record.CacheValue.GetData());
- CbArrayView ValuesArray = RecordObject["Values"sv].AsArrayView();
- Record.Values.reserve(ValuesArray.Num());
- for (CbFieldView ValueField : ValuesArray)
+ if (CbValidateError Error = ValidateCompactBinary(Record.CacheValue.GetView(), CbValidateMode::Default);
+ Error != CbValidateError::None)
{
- CbObjectView ValueObject = ValueField.AsObjectView();
- Oid ValueId = ValueObject["Id"sv].AsObjectId();
- CbFieldView RawHashField = ValueObject["RawHash"sv];
- IoHash RawHash = RawHashField.AsBinaryAttachment();
- if (ValueId && !RawHashField.HasError())
+ ZEN_WARN("GetLocalCacheRecords stored record for is corrupt, compact binary format validation failed. Reason: '{}'",
+ ToString(Error));
+ }
+ else
+ {
+ CbObjectView RecordObject = CbObjectView(Record.CacheValue.GetData());
+ CbArrayView ValuesArray = RecordObject["Values"sv].AsArrayView();
+ Record.Values.reserve(ValuesArray.Num());
+ for (CbFieldView ValueField : ValuesArray)
{
- Record.Values.push_back({ValueId, RawHash, ValueObject["RawSize"sv].AsUInt64()});
+ CbObjectView ValueObject = ValueField.AsObjectView();
+ Oid ValueId = ValueObject["Id"sv].AsObjectId();
+ CbFieldView RawHashField = ValueObject["RawHash"sv];
+ IoHash RawHash = RawHashField.AsBinaryAttachment();
+ if (ValueId && !RawHashField.HasError())
+ {
+ Record.Values.push_back({ValueId, RawHash, ValueObject["RawSize"sv].AsUInt64()});
+ }
}
}
}
@@ -1706,7 +1732,7 @@ CacheRpcHandler::GetLocalCacheValues(const CacheRequestContext& Context,
using namespace cache::detail;
const bool HasUpstream = m_UpstreamCache.IsActive();
- std::vector<ZenCacheValue> Chunks;
+ ZenCacheValueVec_t Chunks;
Chunks.reserve(ValueRequests.size());
{
std::unique_ptr<ZenCacheStore::GetBatch> Batch;
@@ -1866,6 +1892,8 @@ CacheRpcHandler::WriteGetCacheChunksResponse([[maybe_unused]] const CacheRequest
CbPackage RpcResponse;
CbObjectWriter Writer{1024};
+ RpcResponse.ReserveAttachments(Requests.size());
+
Writer.BeginArray("Result"sv);
for (ChunkRequest& Request : Requests)
{
diff --git a/src/zenstore/cache/structuredcachestore.cpp b/src/zenstore/cache/structuredcachestore.cpp
index a3f80099f..973af52b2 100644
--- a/src/zenstore/cache/structuredcachestore.cpp
+++ b/src/zenstore/cache/structuredcachestore.cpp
@@ -178,13 +178,13 @@ ZenCacheNamespace::EndPutBatch(PutBatchHandle* Batch) noexcept
struct ZenCacheNamespace::GetBatchHandle
{
- GetBatchHandle(std::vector<ZenCacheValue>& OutResult) : Results(OutResult) {}
- std::vector<ZenCacheValue>& Results;
+ GetBatchHandle(ZenCacheValueVec_t& OutResult) : Results(OutResult) {}
+ ZenCacheValueVec_t& Results;
ZenCacheDiskLayer::GetBatchHandle* DiskLayerHandle = nullptr;
};
ZenCacheNamespace::GetBatchHandle*
-ZenCacheNamespace::BeginGetBatch(std::vector<ZenCacheValue>& OutResult)
+ZenCacheNamespace::BeginGetBatch(ZenCacheValueVec_t& OutResult)
{
ZenCacheNamespace::GetBatchHandle* Handle = new ZenCacheNamespace::GetBatchHandle(OutResult);
Handle->DiskLayerHandle = m_DiskLayer.BeginGetBatch(OutResult);
@@ -282,11 +282,14 @@ ZenCacheNamespace::DropBucket(std::string_view Bucket)
{
ZEN_INFO("dropping bucket '{}'", Bucket);
- const bool Dropped = m_DiskLayer.DropBucket(Bucket);
-
- ZEN_INFO("bucket '{}' was {}", Bucket, Dropped ? "dropped" : "not found");
-
- return Dropped;
+ std::function<void()> PostDropOp = m_DiskLayer.DropBucket(Bucket);
+ if (!PostDropOp)
+ {
+ ZEN_INFO("bucket '{}' was not found in {}", Bucket, m_RootDir);
+ return false;
+ }
+ PostDropOp();
+ return true;
}
void
@@ -296,9 +299,10 @@ ZenCacheNamespace::EnumerateBucketContents(std::string_view
m_DiskLayer.EnumerateBucketContents(Bucket, Fn);
}
-bool
+std::function<void()>
ZenCacheNamespace::Drop()
{
+ m_Gc.RemoveGcStorage(this);
return m_DiskLayer.Drop();
}
@@ -585,7 +589,7 @@ ZenCacheStore::PutBatch::~PutBatch()
}
}
-ZenCacheStore::GetBatch::GetBatch(ZenCacheStore& CacheStore, std::string_view InNamespace, std::vector<ZenCacheValue>& OutResult)
+ZenCacheStore::GetBatch::GetBatch(ZenCacheStore& CacheStore, std::string_view InNamespace, ZenCacheValueVec_t& OutResult)
: m_CacheStore(CacheStore)
, Results(OutResult)
{
@@ -800,16 +804,27 @@ ZenCacheStore::DropBucket(std::string_view Namespace, std::string_view Bucket)
bool
ZenCacheStore::DropNamespace(std::string_view InNamespace)
{
- RwLock::SharedLockScope _(m_NamespacesLock);
- if (auto It = m_Namespaces.find(std::string(InNamespace)); It != m_Namespaces.end())
+ std::function<void()> PostDropOp;
+ {
+ RwLock::SharedLockScope _(m_NamespacesLock);
+ if (auto It = m_Namespaces.find(std::string(InNamespace)); It != m_Namespaces.end())
+ {
+ ZenCacheNamespace& Namespace = *It->second;
+ m_DroppedNamespaces.push_back(std::move(It->second));
+ m_Namespaces.erase(It);
+ PostDropOp = Namespace.Drop();
+ }
+ else
+ {
+ ZEN_WARN("request for unknown namespace '{}' in ZenCacheStore::DropNamespace", InNamespace);
+ return false;
+ }
+ }
+ if (PostDropOp)
{
- ZenCacheNamespace& Namespace = *It->second;
- m_DroppedNamespaces.push_back(std::move(It->second));
- m_Namespaces.erase(It);
- return Namespace.Drop();
+ PostDropOp();
}
- ZEN_WARN("request for unknown namespace '{}' in ZenCacheStore::DropNamespace", InNamespace);
- return false;
+ return true;
}
void
diff --git a/src/zenstore/cas.cpp b/src/zenstore/cas.cpp
index 73c10a6db..460f0e10d 100644
--- a/src/zenstore/cas.cpp
+++ b/src/zenstore/cas.cpp
@@ -118,7 +118,7 @@ CasImpl::Initialize(const CidStoreConfiguration& InConfig)
// Ensure root directory exists - create if it doesn't exist already
- std::filesystem::create_directories(m_Config.RootDirectory);
+ CreateDirectories(m_Config.RootDirectory);
// Open or create manifest
@@ -412,6 +412,7 @@ CasImpl::IterateChunks(std::span<IoHash> DecompressedIds,
uint64_t LargeSizeLimit)
{
ZEN_TRACE_CPU("CAS::IterateChunks");
+
if (!m_SmallStrategy.IterateChunks(
DecompressedIds,
[&](size_t Index, const IoBuffer& Payload) {
@@ -420,10 +421,11 @@ CasImpl::IterateChunks(std::span<IoHash> DecompressedIds,
return AsyncCallback(Index, Payload);
},
OptionalWorkerPool,
- LargeSizeLimit))
+ LargeSizeLimit == 0 ? m_Config.HugeValueThreshold : Min(LargeSizeLimit, m_Config.HugeValueThreshold)))
{
return false;
}
+
if (!m_TinyStrategy.IterateChunks(
DecompressedIds,
[&](size_t Index, const IoBuffer& Payload) {
@@ -432,10 +434,11 @@ CasImpl::IterateChunks(std::span<IoHash> DecompressedIds,
return AsyncCallback(Index, Payload);
},
OptionalWorkerPool,
- LargeSizeLimit))
+ LargeSizeLimit == 0 ? m_Config.TinyValueThreshold : Min(LargeSizeLimit, m_Config.TinyValueThreshold)))
{
return false;
}
+
if (!m_LargeStrategy.IterateChunks(
DecompressedIds,
[&](size_t Index, const IoBuffer& Payload) {
diff --git a/src/zenstore/caslog.cpp b/src/zenstore/caslog.cpp
index 6c7b1b297..492ce9317 100644
--- a/src/zenstore/caslog.cpp
+++ b/src/zenstore/caslog.cpp
@@ -37,7 +37,7 @@ CasLogFile::~CasLogFile()
bool
CasLogFile::IsValid(std::filesystem::path FileName, size_t RecordSize)
{
- if (!std::filesystem::is_regular_file(FileName))
+ if (!IsFile(FileName))
{
return false;
}
diff --git a/src/zenstore/chunkedfile.cpp b/src/zenstore/chunkedfile.cpp
deleted file mode 100644
index f200bc1ec..000000000
--- a/src/zenstore/chunkedfile.cpp
+++ /dev/null
@@ -1,505 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#include <zencore/basicfile.h>
-#include <zenstore/chunkedfile.h>
-
-#include "chunking.h"
-
-ZEN_THIRD_PARTY_INCLUDES_START
-#include <tsl/robin_map.h>
-#include <gsl/gsl-lite.hpp>
-ZEN_THIRD_PARTY_INCLUDES_END
-
-namespace zen {
-
-namespace {
- struct ChunkedHeader
- {
- static constexpr uint32_t ExpectedMagic = 0x646b6863; // chkd
- static constexpr uint32_t CurrentVersion = 1;
-
- uint32_t Magic = ExpectedMagic;
- uint32_t Version = CurrentVersion;
- uint32_t ChunkSequenceLength;
- uint32_t ChunkHashCount;
- uint64_t ChunkSequenceOffset;
- uint64_t ChunkHashesOffset;
- uint64_t RawSize = 0;
- IoHash RawHash;
- };
-} // namespace
-
-IoBuffer
-SerializeChunkedInfo(const ChunkedInfo& Info)
-{
- size_t HeaderSize = RoundUp(sizeof(ChunkedHeader), 16) + RoundUp(sizeof(uint32_t) * Info.ChunkSequence.size(), 16) +
- RoundUp(sizeof(IoHash) * Info.ChunkHashes.size(), 16);
- IoBuffer HeaderData(HeaderSize);
-
- ChunkedHeader Header;
- Header.ChunkSequenceLength = gsl::narrow<uint32_t>(Info.ChunkSequence.size());
- Header.ChunkHashCount = gsl::narrow<uint32_t>(Info.ChunkHashes.size());
- Header.ChunkSequenceOffset = RoundUp(sizeof(ChunkedHeader), 16);
- Header.ChunkHashesOffset = RoundUp(Header.ChunkSequenceOffset + sizeof(uint32_t) * Header.ChunkSequenceLength, 16);
- Header.RawSize = Info.RawSize;
- Header.RawHash = Info.RawHash;
-
- MutableMemoryView WriteView = HeaderData.GetMutableView();
- {
- MutableMemoryView HeaderWriteView = WriteView.Left(sizeof(Header));
- HeaderWriteView.CopyFrom(MemoryView(&Header, sizeof(Header)));
- }
- {
- MutableMemoryView ChunkSequenceWriteView = WriteView.Mid(Header.ChunkSequenceOffset, sizeof(uint32_t) * Header.ChunkSequenceLength);
- ChunkSequenceWriteView.CopyFrom(MemoryView(Info.ChunkSequence.data(), ChunkSequenceWriteView.GetSize()));
- }
- {
- MutableMemoryView ChunksWriteView = WriteView.Mid(Header.ChunkHashesOffset, sizeof(IoHash) * Header.ChunkHashCount);
- ChunksWriteView.CopyFrom(MemoryView(Info.ChunkHashes.data(), ChunksWriteView.GetSize()));
- }
-
- return HeaderData;
-}
-
-ChunkedInfo
-DeserializeChunkedInfo(IoBuffer& Buffer)
-{
- MemoryView View = Buffer.GetView();
- ChunkedHeader Header;
- {
- MutableMemoryView HeaderWriteView(&Header, sizeof(Header));
- HeaderWriteView.CopyFrom(View.Left(sizeof(Header)));
- }
- if (Header.Magic != ChunkedHeader::ExpectedMagic)
- {
- return {};
- }
- if (Header.Version != ChunkedHeader::CurrentVersion)
- {
- return {};
- }
- ChunkedInfo Info;
- Info.RawSize = Header.RawSize;
- Info.RawHash = Header.RawHash;
- Info.ChunkSequence.resize(Header.ChunkSequenceLength);
- Info.ChunkHashes.resize(Header.ChunkHashCount);
- {
- MutableMemoryView ChunkSequenceWriteView(Info.ChunkSequence.data(), sizeof(uint32_t) * Header.ChunkSequenceLength);
- ChunkSequenceWriteView.CopyFrom(View.Mid(Header.ChunkSequenceOffset, ChunkSequenceWriteView.GetSize()));
- }
- {
- MutableMemoryView ChunksWriteView(Info.ChunkHashes.data(), sizeof(IoHash) * Header.ChunkHashCount);
- ChunksWriteView.CopyFrom(View.Mid(Header.ChunkHashesOffset, ChunksWriteView.GetSize()));
- }
-
- return Info;
-}
-
-void
-Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, std::function<IoBuffer(const IoHash& ChunkHash)> GetChunk)
-{
- BasicFile Reconstructed;
- Reconstructed.Open(TargetPath, BasicFile::Mode::kTruncate);
- BasicFileWriter ReconstructedWriter(Reconstructed, 64 * 1024);
- uint64_t Offset = 0;
- for (uint32_t SequenceIndex : Info.ChunkSequence)
- {
- IoBuffer Chunk = GetChunk(Info.ChunkHashes[SequenceIndex]);
- ReconstructedWriter.Write(Chunk.GetData(), Chunk.GetSize(), Offset);
- Offset += Chunk.GetSize();
- }
-}
-
-ChunkedInfoWithSource
-ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Params)
-{
- ChunkedInfoWithSource Result;
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> FoundChunks;
-
- ZenChunkHelper Chunker;
- Chunker.SetUseThreshold(Params.UseThreshold);
- Chunker.SetChunkSize(Params.MinSize, Params.MaxSize, Params.AvgSize);
- size_t End = Offset + Size;
- const size_t ScanBufferSize = 1u * 1024 * 1024; // (Params.MaxSize * 9) / 3;//1 * 1024 * 1024;
- BasicFileBuffer RawBuffer(RawData, ScanBufferSize);
- MemoryView SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset);
- ZEN_ASSERT(!SliceView.IsEmpty());
- size_t SliceSize = SliceView.GetSize();
- IoHashStream RawHashStream;
- while (Offset < End)
- {
- size_t ScanLength = Chunker.ScanChunk(SliceView.GetData(), SliceSize);
- if (ScanLength == ZenChunkHelper::kNoBoundaryFound)
- {
- if (Offset + SliceSize == End)
- {
- ScanLength = SliceSize;
- }
- else
- {
- SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset);
- SliceSize = SliceView.GetSize();
- Chunker.Reset();
- continue;
- }
- }
- uint32_t ChunkLength = gsl::narrow<uint32_t>(ScanLength); // +HashedLength);
- MemoryView ChunkView = SliceView.Left(ScanLength);
- RawHashStream.Append(ChunkView);
- IoHash ChunkHash = IoHash::HashBuffer(ChunkView);
- SliceView.RightChopInline(ScanLength);
- if (auto It = FoundChunks.find(ChunkHash); It != FoundChunks.end())
- {
- Result.Info.ChunkSequence.push_back(It->second);
- }
- else
- {
- uint32_t ChunkIndex = gsl::narrow<uint32_t>(Result.Info.ChunkHashes.size());
- FoundChunks.insert_or_assign(ChunkHash, ChunkIndex);
- Result.Info.ChunkHashes.push_back(ChunkHash);
- Result.ChunkSources.push_back(ChunkSource{.Offset = Offset, .Size = ChunkLength});
- Result.Info.ChunkSequence.push_back(ChunkIndex);
- }
-
- SliceSize = SliceView.GetSize();
- Offset += ChunkLength;
- }
- Result.Info.RawSize = Size;
- Result.Info.RawHash = RawHashStream.GetHash();
- return Result;
-}
-
-} // namespace zen
-
-#if ZEN_WITH_TESTS
-# include <zencore/filesystem.h>
-# include <zencore/fmtutils.h>
-# include <zencore/iohash.h>
-# include <zencore/logging.h>
-# include <zencore/scopeguard.h>
-# include <zencore/timer.h>
-# include <zencore/testing.h>
-# include <zencore/testutils.h>
-# include <zencore/workthreadpool.h>
-
-# include "chunking.h"
-
-ZEN_THIRD_PARTY_INCLUDES_START
-# include <tsl/robin_map.h>
-# include <tsl/robin_set.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-
-namespace zen {
-# if 0
-TEST_CASE("chunkedfile.findparams")
-{
-# if 1
- DirectoryContent SourceContent1;
- GetDirectoryContent("E:\\Temp\\ChunkingTestData\\31379208", DirectoryContentFlags::IncludeFiles, SourceContent1);
- const std::vector<std::filesystem::path>& SourceFiles1 = SourceContent1.Files;
- DirectoryContent SourceContent2;
- GetDirectoryContent("E:\\Temp\\ChunkingTestData\\31379208_2", DirectoryContentFlags::IncludeFiles, SourceContent2);
- const std::vector<std::filesystem::path>& SourceFiles2 = SourceContent2.Files;
-# else
- std::filesystem::path SourcePath1 =
- "E:\\Temp\\ChunkingTestData\\31375996\\ShaderArchive-FortniteGame_Chunk10-PCD3D_SM6-PCD3D_SM6.ushaderbytecode";
- std::filesystem::path SourcePath2 =
- "E:\\Temp\\ChunkingTestData\\31379208\\ShaderArchive-FortniteGame_Chunk10-PCD3D_SM6-PCD3D_SM6.ushaderbytecode";
- const std::vector<std::filesystem::path>& SourceFiles1 = {SourcePath1};
- const std::vector<std::filesystem::path>& SourceFiles2 = {SourcePath2};
-# endif
- ChunkedParams Params[] = {ChunkedParams{.UseThreshold = false, .MinSize = 17280, .MaxSize = 139264, .AvgSize = 36340},
- ChunkedParams{.UseThreshold = false, .MinSize = 15456, .MaxSize = 122880, .AvgSize = 35598},
- ChunkedParams{.UseThreshold = false, .MinSize = 16848, .MaxSize = 135168, .AvgSize = 39030},
- ChunkedParams{.UseThreshold = false, .MinSize = 14256, .MaxSize = 114688, .AvgSize = 36222},
- ChunkedParams{.UseThreshold = false, .MinSize = 15744, .MaxSize = 126976, .AvgSize = 36600},
- ChunkedParams{.UseThreshold = false, .MinSize = 15264, .MaxSize = 122880, .AvgSize = 35442},
- ChunkedParams{.UseThreshold = false, .MinSize = 16464, .MaxSize = 131072, .AvgSize = 37950},
- ChunkedParams{.UseThreshold = false, .MinSize = 15408, .MaxSize = 122880, .AvgSize = 38914},
- ChunkedParams{.UseThreshold = false, .MinSize = 15408, .MaxSize = 122880, .AvgSize = 35556},
- ChunkedParams{.UseThreshold = false, .MinSize = 15360, .MaxSize = 122880, .AvgSize = 35520},
- ChunkedParams{.UseThreshold = false, .MinSize = 15312, .MaxSize = 122880, .AvgSize = 35478},
- ChunkedParams{.UseThreshold = false, .MinSize = 16896, .MaxSize = 135168, .AvgSize = 39072},
- ChunkedParams{.UseThreshold = false, .MinSize = 15360, .MaxSize = 122880, .AvgSize = 38880},
- ChunkedParams{.UseThreshold = false, .MinSize = 15840, .MaxSize = 126976, .AvgSize = 36678},
- ChunkedParams{.UseThreshold = false, .MinSize = 16800, .MaxSize = 135168, .AvgSize = 38994},
- ChunkedParams{.UseThreshold = false, .MinSize = 15888, .MaxSize = 126976, .AvgSize = 36714},
- ChunkedParams{.UseThreshold = false, .MinSize = 15792, .MaxSize = 126976, .AvgSize = 36636},
- ChunkedParams{.UseThreshold = false, .MinSize = 14880, .MaxSize = 118784, .AvgSize = 37609},
- ChunkedParams{.UseThreshold = false, .MinSize = 15936, .MaxSize = 126976, .AvgSize = 36756},
- ChunkedParams{.UseThreshold = false, .MinSize = 15456, .MaxSize = 122880, .AvgSize = 38955},
- ChunkedParams{.UseThreshold = false, .MinSize = 15984, .MaxSize = 126976, .AvgSize = 36792},
- ChunkedParams{.UseThreshold = false, .MinSize = 14400, .MaxSize = 114688, .AvgSize = 36338},
- ChunkedParams{.UseThreshold = false, .MinSize = 14832, .MaxSize = 118784, .AvgSize = 37568},
- ChunkedParams{.UseThreshold = false, .MinSize = 16944, .MaxSize = 135168, .AvgSize = 39108},
- ChunkedParams{.UseThreshold = false, .MinSize = 14352, .MaxSize = 114688, .AvgSize = 36297},
- ChunkedParams{.UseThreshold = false, .MinSize = 14208, .MaxSize = 114688, .AvgSize = 36188},
- ChunkedParams{.UseThreshold = false, .MinSize = 14448, .MaxSize = 114688, .AvgSize = 36372},
- ChunkedParams{.UseThreshold = false, .MinSize = 13296, .MaxSize = 106496, .AvgSize = 36592},
- ChunkedParams{.UseThreshold = false, .MinSize = 15264, .MaxSize = 122880, .AvgSize = 38805},
- ChunkedParams{.UseThreshold = false, .MinSize = 14304, .MaxSize = 114688, .AvgSize = 36263},
- ChunkedParams{.UseThreshold = false, .MinSize = 14784, .MaxSize = 118784, .AvgSize = 37534},
- ChunkedParams{.UseThreshold = false, .MinSize = 15312, .MaxSize = 122880, .AvgSize = 38839},
- ChunkedParams{.UseThreshold = false, .MinSize = 14256, .MaxSize = 114688, .AvgSize = 39360},
- ChunkedParams{.UseThreshold = false, .MinSize = 13776, .MaxSize = 110592, .AvgSize = 37976},
- ChunkedParams{.UseThreshold = false, .MinSize = 14736, .MaxSize = 118784, .AvgSize = 37493},
- ChunkedParams{.UseThreshold = false, .MinSize = 14928, .MaxSize = 118784, .AvgSize = 37643},
- ChunkedParams{.UseThreshold = false, .MinSize = 14448, .MaxSize = 114688, .AvgSize = 39504},
- ChunkedParams{.UseThreshold = false, .MinSize = 13392, .MaxSize = 106496, .AvgSize = 36664},
- ChunkedParams{.UseThreshold = false, .MinSize = 13872, .MaxSize = 110592, .AvgSize = 38048},
- ChunkedParams{.UseThreshold = false, .MinSize = 14352, .MaxSize = 114688, .AvgSize = 39432},
- ChunkedParams{.UseThreshold = false, .MinSize = 13200, .MaxSize = 106496, .AvgSize = 36520},
- ChunkedParams{.UseThreshold = false, .MinSize = 17328, .MaxSize = 139264, .AvgSize = 36378},
- ChunkedParams{.UseThreshold = false, .MinSize = 17376, .MaxSize = 139264, .AvgSize = 36421},
- ChunkedParams{.UseThreshold = false, .MinSize = 17424, .MaxSize = 139264, .AvgSize = 36459},
- ChunkedParams{.UseThreshold = false, .MinSize = 17472, .MaxSize = 139264, .AvgSize = 36502},
- ChunkedParams{.UseThreshold = false, .MinSize = 17520, .MaxSize = 139264, .AvgSize = 36540},
- ChunkedParams{.UseThreshold = false, .MinSize = 17808, .MaxSize = 143360, .AvgSize = 37423},
- ChunkedParams{.UseThreshold = false, .MinSize = 17856, .MaxSize = 143360, .AvgSize = 37466},
- ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 25834},
- ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 21917},
- ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 29751},
- ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 33668},
- ChunkedParams{.UseThreshold = false, .MinSize = 17952, .MaxSize = 143360, .AvgSize = 37547},
- ChunkedParams{.UseThreshold = false, .MinSize = 17904, .MaxSize = 143360, .AvgSize = 37504},
- ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 22371},
- ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 37585},
- ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 26406},
- ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 26450},
- ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 30615},
- ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 30441},
- ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 22417},
- ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 22557},
- ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 30528},
- ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 27112},
- ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 34644},
- ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 34476},
- ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 35408},
- ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 38592},
- ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 30483},
- ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 26586},
- ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 26496},
- ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 31302},
- ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 34516},
- ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 22964},
- ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 35448},
- ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 38630},
- ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 23010},
- ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 31260},
- ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 34600},
- ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 27156},
- ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 30570},
- ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 38549},
- ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 22510},
- ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 38673},
- ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 34560},
- ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 22464},
- ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 26540},
- ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 38511},
- ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 23057},
- ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 27202},
- ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 31347},
- ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 35492},
- ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 31389},
- ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 27246},
- ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 23103},
- ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 35532},
- ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 23150},
- ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 27292},
- ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 31434},
- ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 35576},
- ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 27336},
- ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 23196},
- ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 31476},
- ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 35616},
- ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 27862},
- ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 32121},
- ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 23603},
- ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 36380},
- ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 27908},
- ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 23650},
- ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 32166},
- ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 36424},
- ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 23696},
- ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 32253},
- ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 32208},
- ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 23743},
- ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 36548},
- ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 28042},
- ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 23789},
- ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 32295},
- ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 36508},
- ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 27952},
- ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 27998},
- ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 36464}};
-
- static const size_t ParamsCount = sizeof(Params) / sizeof(ChunkedParams);
- std::vector<ChunkedInfoWithSource> Infos1(SourceFiles1.size());
- std::vector<ChunkedInfoWithSource> Infos2(SourceFiles2.size());
-
- WorkerThreadPool WorkerPool(32);
-
- for (size_t I = 0; I < ParamsCount; I++)
- {
- for (int UseThreshold = 0; UseThreshold < 2; UseThreshold++)
- {
- Latch WorkLatch(1);
- ChunkedParams Param = Params[I];
- Param.UseThreshold = UseThreshold == 1;
- Stopwatch Timer;
- for (size_t F = 0; F < SourceFiles1.size(); F++)
- {
- WorkLatch.AddCount(1);
- WorkerPool.ScheduleWork([&WorkLatch, F, Param, &SourceFiles1, &Infos1]() {
- auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); });
- BasicFile SourceData1;
- SourceData1.Open(SourceFiles1[F], BasicFile::Mode::kRead);
- Infos1[F] = ChunkData(SourceData1, 0, SourceData1.FileSize(), Param);
- });
- }
- for (size_t F = 0; F < SourceFiles2.size(); F++)
- {
- WorkLatch.AddCount(1);
- WorkerPool.ScheduleWork([&WorkLatch, F, Param, &SourceFiles2, &Infos2]() {
- auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); });
- BasicFile SourceData2;
- SourceData2.Open(SourceFiles2[F], BasicFile::Mode::kRead);
- Infos2[F] = ChunkData(SourceData2, 0, SourceData2.FileSize(), Param);
- });
- }
- WorkLatch.CountDown();
- WorkLatch.Wait();
- uint64_t ChunkTimeMS = Timer.GetElapsedTimeMs();
-
- uint64_t Raw1Size = 0;
- tsl::robin_set<IoHash> Chunks1;
- size_t ChunkedSize1 = 0;
- for (size_t F = 0; F < SourceFiles1.size(); F++)
- {
- const ChunkedInfoWithSource& Info = Infos1[F];
- Raw1Size += Info.Info.RawSize;
- for (uint32_t Chunk1Index = 0; Chunk1Index < Info.Info.ChunkHashes.size(); ++Chunk1Index)
- {
- const IoHash ChunkHash = Info.Info.ChunkHashes[Chunk1Index];
- if (Chunks1.insert(ChunkHash).second)
- {
- ChunkedSize1 += Info.ChunkSources[Chunk1Index].Size;
- }
- }
- }
-
- uint64_t Raw2Size = 0;
- tsl::robin_set<IoHash> Chunks2;
- size_t ChunkedSize2 = 0;
- size_t DiffSize = 0;
- for (size_t F = 0; F < SourceFiles2.size(); F++)
- {
- const ChunkedInfoWithSource& Info = Infos2[F];
- Raw2Size += Info.Info.RawSize;
- for (uint32_t Chunk2Index = 0; Chunk2Index < Info.Info.ChunkHashes.size(); ++Chunk2Index)
- {
- const IoHash ChunkHash = Info.Info.ChunkHashes[Chunk2Index];
- if (Chunks2.insert(ChunkHash).second)
- {
- ChunkedSize2 += Info.ChunkSources[Chunk2Index].Size;
- if (!Chunks1.contains(ChunkHash))
- {
- DiffSize += Info.ChunkSources[Chunk2Index].Size;
- }
- }
- }
- }
-
- ZEN_INFO(
- "Diff = {}, Chunks1 = {}, Chunks2 = {}, .UseThreshold = {}, .MinSize = {}, .MaxSize = {}, .AvgSize = {}, RawSize(1) = {}, "
- "RawSize(2) = {}, "
- "Saved(1) = {}, Saved(2) = {} in {}",
- NiceBytes(DiffSize),
- Chunks1.size(),
- Chunks2.size(),
- Param.UseThreshold,
- Param.MinSize,
- Param.MaxSize,
- Param.AvgSize,
- NiceBytes(Raw1Size),
- NiceBytes(Raw2Size),
- NiceBytes(Raw1Size - ChunkedSize1),
- NiceBytes(Raw2Size - ChunkedSize2),
- NiceTimeSpanMs(ChunkTimeMS));
- }
- }
-
-# if 0
- for (int64_t MinSizeBase = (12u * 1024u); MinSizeBase <= (32u * 1024u); MinSizeBase += 512)
- {
- for (int64_t Wiggle = -132; Wiggle < 126; Wiggle += 2)
- {
- // size_t MinSize = 7 * 1024 - 61; // (size_t)(MinSizeBase + Wiggle);
- // size_t MaxSize = 16 * (7 * 1024); // 8 * 7 * 1024;// MinSizeBase * 6;
- // size_t AvgSize = MaxSize / 2; // 4 * 7 * 1024;// MinSizeBase * 3;
- size_t MinSize = (size_t)(MinSizeBase + Wiggle);
- //for (size_t MaxSize = (MinSize * 4) - 768; MaxSize < (MinSize * 5) + 768; MaxSize += 64)
- size_t MaxSize = 8u * MinSizeBase;
- {
- for (size_t AvgSize = (MaxSize - MinSize) / 32 + MinSize; AvgSize < (MaxSize - MinSize) / 4 + MinSize; AvgSize += (MaxSize - MinSize) / 32)
-// size_t AvgSize = (MaxSize - MinSize) / 4 + MinSize;
- {
- WorkLatch.AddCount(1);
- WorkerPool.ScheduleWork([&WorkLatch, MinSize, MaxSize, AvgSize, SourcePath1, SourcePath2]()
- {
- auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); });
- ChunkedParams Params{ .UseThreshold = true, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize };
- BasicFile SourceData1;
- SourceData1.Open(SourcePath1, BasicFile::Mode::kRead);
- BasicFile SourceData2;
- SourceData2.Open(SourcePath2, BasicFile::Mode::kRead);
- ChunkedInfoWithSource Info1 = ChunkData(SourceData1, Params);
- ChunkedInfoWithSource Info2 = ChunkData(SourceData2, Params);
-
- tsl::robin_set<IoHash> Chunks1;
- Chunks1.reserve(Info1.Info.ChunkHashes.size());
- Chunks1.insert(Info1.Info.ChunkHashes.begin(), Info1.Info.ChunkHashes.end());
- size_t ChunkedSize1 = 0;
- for (uint32_t Chunk1Index = 0; Chunk1Index < Info1.Info.ChunkHashes.size(); ++Chunk1Index)
- {
- ChunkedSize1 += Info1.ChunkSources[Chunk1Index].Size;
- }
- size_t DiffSavedSize = 0;
- size_t ChunkedSize2 = 0;
- for (uint32_t Chunk2Index = 0; Chunk2Index < Info2.Info.ChunkHashes.size(); ++Chunk2Index)
- {
- ChunkedSize2 += Info2.ChunkSources[Chunk2Index].Size;
- if (Chunks1.find(Info2.Info.ChunkHashes[Chunk2Index]) == Chunks1.end())
- {
- DiffSavedSize += Info2.ChunkSources[Chunk2Index].Size;
- }
- }
- ZEN_INFO("Diff {}, Chunks1: {}, Chunks2: {}, Min: {}, Max: {}, Avg: {}, Saved(1) {}, Saved(2) {}",
- NiceBytes(DiffSavedSize),
- Info1.Info.ChunkHashes.size(),
- Info2.Info.ChunkHashes.size(),
- MinSize,
- MaxSize,
- AvgSize,
- NiceBytes(Info1.Info.RawSize - ChunkedSize1),
- NiceBytes(Info2.Info.RawSize - ChunkedSize2));
- });
- }
- }
- }
- }
-# endif // 0
-
- // WorkLatch.CountDown();
- // WorkLatch.Wait();
-}
-# endif // 0
-
-void
-chunkedfile_forcelink()
-{
-}
-
-} // namespace zen
-
-#endif
diff --git a/src/zenstore/chunking.cpp b/src/zenstore/chunking.cpp
deleted file mode 100644
index 30edd322a..000000000
--- a/src/zenstore/chunking.cpp
+++ /dev/null
@@ -1,382 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#include "chunking.h"
-
-#include <gsl/gsl-lite.hpp>
-
-#include <cmath>
-
-namespace zen::detail {
-
-static const uint32_t BuzhashTable[] = {
- 0x458be752, 0xc10748cc, 0xfbbcdbb8, 0x6ded5b68, 0xb10a82b5, 0x20d75648, 0xdfc5665f, 0xa8428801, 0x7ebf5191, 0x841135c7, 0x65cc53b3,
- 0x280a597c, 0x16f60255, 0xc78cbc3e, 0x294415f5, 0xb938d494, 0xec85c4e6, 0xb7d33edc, 0xe549b544, 0xfdeda5aa, 0x882bf287, 0x3116737c,
- 0x05569956, 0xe8cc1f68, 0x0806ac5e, 0x22a14443, 0x15297e10, 0x50d090e7, 0x4ba60f6f, 0xefd9f1a7, 0x5c5c885c, 0x82482f93, 0x9bfd7c64,
- 0x0b3e7276, 0xf2688e77, 0x8fad8abc, 0xb0509568, 0xf1ada29f, 0xa53efdfe, 0xcb2b1d00, 0xf2a9e986, 0x6463432b, 0x95094051, 0x5a223ad2,
- 0x9be8401b, 0x61e579cb, 0x1a556a14, 0x5840fdc2, 0x9261ddf6, 0xcde002bb, 0x52432bb0, 0xbf17373e, 0x7b7c222f, 0x2955ed16, 0x9f10ca59,
- 0xe840c4c9, 0xccabd806, 0x14543f34, 0x1462417a, 0x0d4a1f9c, 0x087ed925, 0xd7f8f24c, 0x7338c425, 0xcf86c8f5, 0xb19165cd, 0x9891c393,
- 0x325384ac, 0x0308459d, 0x86141d7e, 0xc922116a, 0xe2ffa6b6, 0x53f52aed, 0x2cd86197, 0xf5b9f498, 0xbf319c8f, 0xe0411fae, 0x977eb18c,
- 0xd8770976, 0x9833466a, 0xc674df7f, 0x8c297d45, 0x8ca48d26, 0xc49ed8e2, 0x7344f874, 0x556f79c7, 0x6b25eaed, 0xa03e2b42, 0xf68f66a4,
- 0x8e8b09a2, 0xf2e0e62a, 0x0d3a9806, 0x9729e493, 0x8c72b0fc, 0x160b94f6, 0x450e4d3d, 0x7a320e85, 0xbef8f0e1, 0x21d73653, 0x4e3d977a,
- 0x1e7b3929, 0x1cc6c719, 0xbe478d53, 0x8d752809, 0xe6d8c2c6, 0x275f0892, 0xc8acc273, 0x4cc21580, 0xecc4a617, 0xf5f7be70, 0xe795248a,
- 0x375a2fe9, 0x425570b6, 0x8898dcf8, 0xdc2d97c4, 0x0106114b, 0x364dc22f, 0x1e0cad1f, 0xbe63803c, 0x5f69fac2, 0x4d5afa6f, 0x1bc0dfb5,
- 0xfb273589, 0x0ea47f7b, 0x3c1c2b50, 0x21b2a932, 0x6b1223fd, 0x2fe706a8, 0xf9bd6ce2, 0xa268e64e, 0xe987f486, 0x3eacf563, 0x1ca2018c,
- 0x65e18228, 0x2207360a, 0x57cf1715, 0x34c37d2b, 0x1f8f3cde, 0x93b657cf, 0x31a019fd, 0xe69eb729, 0x8bca7b9b, 0x4c9d5bed, 0x277ebeaf,
- 0xe0d8f8ae, 0xd150821c, 0x31381871, 0xafc3f1b0, 0x927db328, 0xe95effac, 0x305a47bd, 0x426ba35b, 0x1233af3f, 0x686a5b83, 0x50e072e5,
- 0xd9d3bb2a, 0x8befc475, 0x487f0de6, 0xc88dff89, 0xbd664d5e, 0x971b5d18, 0x63b14847, 0xd7d3c1ce, 0x7f583cf3, 0x72cbcb09, 0xc0d0a81c,
- 0x7fa3429b, 0xe9158a1b, 0x225ea19a, 0xd8ca9ea3, 0xc763b282, 0xbb0c6341, 0x020b8293, 0xd4cd299d, 0x58cfa7f8, 0x91b4ee53, 0x37e4d140,
- 0x95ec764c, 0x30f76b06, 0x5ee68d24, 0x679c8661, 0xa41979c2, 0xf2b61284, 0x4fac1475, 0x0adb49f9, 0x19727a23, 0x15a7e374, 0xc43a18d5,
- 0x3fb1aa73, 0x342fc615, 0x924c0793, 0xbee2d7f0, 0x8a279de9, 0x4aa2d70c, 0xe24dd37f, 0xbe862c0b, 0x177c22c2, 0x5388e5ee, 0xcd8a7510,
- 0xf901b4fd, 0xdbc13dbc, 0x6c0bae5b, 0x64efe8c7, 0x48b02079, 0x80331a49, 0xca3d8ae6, 0xf3546190, 0xfed7108b, 0xc49b941b, 0x32baf4a9,
- 0xeb833a4a, 0x88a3f1a5, 0x3a91ce0a, 0x3cc27da1, 0x7112e684, 0x4a3096b1, 0x3794574c, 0xa3c8b6f3, 0x1d213941, 0x6e0a2e00, 0x233479f1,
- 0x0f4cd82f, 0x6093edd2, 0x5d7d209e, 0x464fe319, 0xd4dcac9e, 0x0db845cb, 0xfb5e4bc3, 0xe0256ce1, 0x09fb4ed1, 0x0914be1e, 0xa5bdb2c3,
- 0xc6eb57bb, 0x30320350, 0x3f397e91, 0xa67791bc, 0x86bc0e2c, 0xefa0a7e2, 0xe9ff7543, 0xe733612c, 0xd185897b, 0x329e5388, 0x91dd236b,
- 0x2ecb0d93, 0xf4d82a3d, 0x35b5c03f, 0xe4e606f0, 0x05b21843, 0x37b45964, 0x5eff22f4, 0x6027f4cc, 0x77178b3c, 0xae507131, 0x7bf7cabc,
- 0xf9c18d66, 0x593ade65, 0xd95ddf11,
-};
-
-// ROL operation (compiler turns this into a ROL when optimizing)
-ZEN_FORCEINLINE static uint32_t
-Rotate32(uint32_t Value, size_t RotateCount)
-{
- RotateCount &= 31;
-
- return ((Value) << (RotateCount)) | ((Value) >> (32 - RotateCount));
-}
-
-} // namespace zen::detail
-
-namespace zen {
-
-void
-ZenChunkHelper::Reset()
-{
- InternalReset();
-
- m_BytesScanned = 0;
-}
-
-void
-ZenChunkHelper::InternalReset()
-{
- m_CurrentHash = 0;
- m_CurrentChunkSize = 0;
- m_WindowSize = 0;
-}
-
-void
-ZenChunkHelper::SetChunkSize(size_t MinSize, size_t MaxSize, size_t AvgSize)
-{
- if (m_WindowSize)
- return; // Already started
-
- static_assert(kChunkSizeLimitMin > kWindowSize);
-
- if (AvgSize)
- {
- // TODO: Validate AvgSize range
- }
- else
- {
- if (MinSize && MaxSize)
- {
- AvgSize = std::lrint(std::pow(2, (std::log2(MinSize) + std::log2(MaxSize)) / 2));
- }
- else if (MinSize)
- {
- AvgSize = MinSize * 4;
- }
- else if (MaxSize)
- {
- AvgSize = MaxSize / 4;
- }
- else
- {
- AvgSize = kDefaultAverageChunkSize;
- }
- }
-
- if (MinSize)
- {
- // TODO: Validate MinSize range
- }
- else
- {
- MinSize = std::max(AvgSize / 4, kChunkSizeLimitMin);
- }
-
- if (MaxSize)
- {
- // TODO: Validate MaxSize range
- }
- else
- {
- MaxSize = std::min(AvgSize * 4, kChunkSizeLimitMax);
- }
-
- m_Discriminator = gsl::narrow<uint32_t>(AvgSize - MinSize);
-
- if (m_Discriminator < MinSize)
- {
- m_Discriminator = gsl::narrow<uint32_t>(MinSize);
- }
-
- if (m_Discriminator > MaxSize)
- {
- m_Discriminator = gsl::narrow<uint32_t>(MaxSize);
- }
-
- m_Threshold = gsl::narrow<uint32_t>((uint64_t(std::numeric_limits<uint32_t>::max()) + 1) / m_Discriminator);
-
- m_ChunkSizeMin = MinSize;
- m_ChunkSizeMax = MaxSize;
- m_ChunkSizeAvg = AvgSize;
-}
-
-size_t
-ZenChunkHelper::ScanChunk(const void* DataBytesIn, size_t ByteCount)
-{
- size_t Result = InternalScanChunk(DataBytesIn, ByteCount);
-
- if (Result == kNoBoundaryFound)
- {
- m_BytesScanned += ByteCount;
- }
- else
- {
- m_BytesScanned += Result;
- }
-
- return Result;
-}
-
-size_t
-ZenChunkHelper::InternalScanChunk(const void* DataBytesIn, size_t ByteCount)
-{
- size_t CurrentOffset = 0;
- const uint8_t* CursorPtr = reinterpret_cast<const uint8_t*>(DataBytesIn);
-
- // There's no point in updating the hash if we know we're not
- // going to have a cut point, so just skip the data. This logic currently
- // provides roughly a 20% speedup on my machine
-
- const size_t NeedHashOffset = m_ChunkSizeMin - kWindowSize;
-
- if (m_CurrentChunkSize < NeedHashOffset)
- {
- const uint32_t SkipBytes = gsl::narrow<uint32_t>(std::min<uint64_t>(ByteCount, NeedHashOffset - m_CurrentChunkSize));
-
- ByteCount -= SkipBytes;
- m_CurrentChunkSize += SkipBytes;
- CurrentOffset += SkipBytes;
- CursorPtr += SkipBytes;
-
- m_WindowSize = 0;
-
- if (ByteCount == 0)
- {
- return kNoBoundaryFound;
- }
- }
-
- // Fill window first
-
- if (m_WindowSize < kWindowSize)
- {
- const uint32_t FillBytes = uint32_t(std::min<size_t>(ByteCount, kWindowSize - m_WindowSize));
-
- memcpy(&m_Window[m_WindowSize], CursorPtr, FillBytes);
-
- CursorPtr += FillBytes;
-
- m_WindowSize += FillBytes;
- m_CurrentChunkSize += FillBytes;
-
- CurrentOffset += FillBytes;
- ByteCount -= FillBytes;
-
- if (m_WindowSize < kWindowSize)
- {
- return kNoBoundaryFound;
- }
-
- // We have a full window, initialize hash
-
- uint32_t CurrentHash = 0;
-
- for (int i = 1; i < kWindowSize; ++i)
- {
- CurrentHash ^= detail::Rotate32(detail::BuzhashTable[m_Window[i - 1]], kWindowSize - i);
- }
-
- m_CurrentHash = CurrentHash ^ detail::BuzhashTable[m_Window[kWindowSize - 1]];
- }
-
- // Scan for boundaries (i.e points where the hash matches the value determined by
- // the discriminator)
-
- uint32_t CurrentHash = m_CurrentHash;
- uint32_t CurrentChunkSize = m_CurrentChunkSize;
-
- size_t Index = CurrentChunkSize % kWindowSize;
-
- if (m_Threshold && m_UseThreshold)
- {
- // This is roughly 4x faster than the general modulo approach on my
- // TR 3990X (~940MB/sec) and doesn't require any special parameters to
- // achieve max performance
-
- while (ByteCount)
- {
- const uint8_t NewByte = *CursorPtr;
- const uint8_t OldByte = m_Window[Index];
-
- CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^
- detail::BuzhashTable[NewByte];
-
- CurrentChunkSize++;
- CurrentOffset++;
-
- if (CurrentChunkSize >= m_ChunkSizeMin)
- {
- bool FoundBoundary;
-
- if (CurrentChunkSize >= m_ChunkSizeMax)
- {
- FoundBoundary = true;
- }
- else
- {
- FoundBoundary = CurrentHash <= m_Threshold;
- }
-
- if (FoundBoundary)
- {
- // Boundary found!
- InternalReset();
-
- return CurrentOffset;
- }
- }
-
- m_Window[Index++] = *CursorPtr;
-
- if (Index == kWindowSize)
- {
- Index = 0;
- }
-
- ++CursorPtr;
- --ByteCount;
- }
- }
- else if ((m_Discriminator & (m_Discriminator - 1)) == 0)
- {
- // This is quite a bit faster than the generic modulo path, but
- // requires a very specific average chunk size to be used. If you
- // pass in an even power-of-two divided by 0.75 as the average
- // chunk size you'll hit this path
-
- const uint32_t Mask = m_Discriminator - 1;
-
- while (ByteCount)
- {
- const uint8_t NewByte = *CursorPtr;
- const uint8_t OldByte = m_Window[Index];
-
- CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^
- detail::BuzhashTable[NewByte];
-
- CurrentChunkSize++;
- CurrentOffset++;
-
- if (CurrentChunkSize >= m_ChunkSizeMin)
- {
- bool FoundBoundary;
-
- if (CurrentChunkSize >= m_ChunkSizeMax)
- {
- FoundBoundary = true;
- }
- else
- {
- FoundBoundary = (CurrentHash & Mask) == Mask;
- }
-
- if (FoundBoundary)
- {
- // Boundary found!
- InternalReset();
-
- return CurrentOffset;
- }
- }
-
- m_Window[Index++] = *CursorPtr;
-
- if (Index == kWindowSize)
- {
- Index = 0;
- }
-
- ++CursorPtr;
- --ByteCount;
- }
- }
- else
- {
- // This is the slowest path, which caps out around 250MB/sec for large sizes
- // on my TR3900X
-
- while (ByteCount)
- {
- const uint8_t NewByte = *CursorPtr;
- const uint8_t OldByte = m_Window[Index];
-
- CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^
- detail::BuzhashTable[NewByte];
-
- CurrentChunkSize++;
- CurrentOffset++;
-
- if (CurrentChunkSize >= m_ChunkSizeMin)
- {
- bool FoundBoundary;
-
- if (CurrentChunkSize >= m_ChunkSizeMax)
- {
- FoundBoundary = true;
- }
- else
- {
- FoundBoundary = (CurrentHash % m_Discriminator) == (m_Discriminator - 1);
- }
-
- if (FoundBoundary)
- {
- // Boundary found!
- InternalReset();
-
- return CurrentOffset;
- }
- }
-
- m_Window[Index++] = *CursorPtr;
-
- if (Index == kWindowSize)
- {
- Index = 0;
- }
-
- ++CursorPtr;
- --ByteCount;
- }
- }
-
- m_CurrentChunkSize = CurrentChunkSize;
- m_CurrentHash = CurrentHash;
-
- return kNoBoundaryFound;
-}
-
-} // namespace zen
diff --git a/src/zenstore/chunking.h b/src/zenstore/chunking.h
deleted file mode 100644
index 09c56454f..000000000
--- a/src/zenstore/chunking.h
+++ /dev/null
@@ -1,56 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#pragma once
-#include <zencore/zencore.h>
-
-namespace zen {
-
-/** Content-defined chunking helper
- */
-class ZenChunkHelper
-{
-public:
- void SetChunkSize(size_t MinSize, size_t MaxSize, size_t AvgSize);
- size_t ScanChunk(const void* DataBytes, size_t ByteCount);
- void Reset();
-
- // This controls which chunking approach is used - threshold or
- // modulo based. Threshold is faster and generates similarly sized
- // chunks
- void SetUseThreshold(bool NewState) { m_UseThreshold = NewState; }
-
- inline size_t ChunkSizeMin() const { return m_ChunkSizeMin; }
- inline size_t ChunkSizeMax() const { return m_ChunkSizeMax; }
- inline size_t ChunkSizeAvg() const { return m_ChunkSizeAvg; }
- inline uint64_t BytesScanned() const { return m_BytesScanned; }
-
- static constexpr size_t kNoBoundaryFound = size_t(~0ull);
-
-private:
- size_t m_ChunkSizeMin = 0;
- size_t m_ChunkSizeMax = 0;
- size_t m_ChunkSizeAvg = 0;
-
- uint32_t m_Discriminator = 0; // Computed in SetChunkSize()
- uint32_t m_Threshold = 0; // Computed in SetChunkSize()
-
- bool m_UseThreshold = true;
-
- static constexpr size_t kChunkSizeLimitMax = 64 * 1024 * 1024;
- static constexpr size_t kChunkSizeLimitMin = 1024;
- static constexpr size_t kDefaultAverageChunkSize = 64 * 1024;
-
- static constexpr int kWindowSize = 48;
- uint8_t m_Window[kWindowSize];
- uint32_t m_WindowSize = 0;
-
- uint32_t m_CurrentHash = 0;
- uint32_t m_CurrentChunkSize = 0;
-
- uint64_t m_BytesScanned = 0;
-
- size_t InternalScanChunk(const void* DataBytes, size_t ByteCount);
- void InternalReset();
-};
-
-} // namespace zen
diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp
index 2be0542db..b00abb2cb 100644
--- a/src/zenstore/compactcas.cpp
+++ b/src/zenstore/compactcas.cpp
@@ -15,6 +15,7 @@
#include <zencore/trace.h>
#include <zencore/workthreadpool.h>
#include <zenstore/scrubcontext.h>
+#include <zenutil/parallelwork.h>
#include <gsl/gsl-lite.hpp>
@@ -144,6 +145,16 @@ CasContainerStrategy::CasContainerStrategy(GcManager& Gc) : m_Log(logging::Get("
CasContainerStrategy::~CasContainerStrategy()
{
+ try
+ {
+ m_BlockStore.Close();
+ m_CasLog.Flush();
+ m_CasLog.Close();
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("~CasContainerStrategy failed with: ", Ex.what());
+ }
m_Gc.RemoveGcReferenceStore(*this);
m_Gc.RemoveGcStorage(this);
}
@@ -203,12 +214,12 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const
ZEN_TRACE_CPU("CasContainer::UpdateLocation");
BlockStoreDiskLocation DiskLocation(Location, m_PayloadAlignment);
const CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = DiskLocation};
- m_CasLog.Append(IndexEntry);
{
RwLock::ExclusiveLockScope _(m_LocationMapLock);
m_LocationMap.emplace(ChunkHash, m_Locations.size());
m_Locations.push_back(DiskLocation);
}
+ m_CasLog.Append(IndexEntry);
});
return CasStore::InsertResult{.New = true};
@@ -226,7 +237,7 @@ CasContainerStrategy::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash)
}
std::vector<CasStore::InsertResult>
-CasContainerStrategy::InsertChunks(std::span<IoBuffer> Chunks, std::span<IoHash> ChunkHashes)
+CasContainerStrategy::InsertChunks(std::span<const IoBuffer> Chunks, std::span<const IoHash> ChunkHashes)
{
ZEN_MEMSCOPE(GetCasContainerTag());
@@ -272,7 +283,6 @@ CasContainerStrategy::InsertChunks(std::span<IoBuffer> Chunks, std::span<IoHash>
IndexEntries.emplace_back(
CasDiskIndexEntry{.Key = ChunkHashes[ChunkIndex], .Location = BlockStoreDiskLocation(Location, m_PayloadAlignment)});
}
- m_CasLog.Append(IndexEntries);
{
RwLock::ExclusiveLockScope _(m_LocationMapLock);
for (const CasDiskIndexEntry& DiskIndexEntry : IndexEntries)
@@ -281,6 +291,7 @@ CasContainerStrategy::InsertChunks(std::span<IoBuffer> Chunks, std::span<IoHash>
m_Locations.push_back(DiskIndexEntry.Location);
}
}
+ m_CasLog.Append(IndexEntries);
});
return Result;
}
@@ -306,7 +317,12 @@ bool
CasContainerStrategy::HaveChunk(const IoHash& ChunkHash)
{
RwLock::SharedLockScope _(m_LocationMapLock);
- return m_LocationMap.contains(ChunkHash);
+ if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end())
+ {
+ const BlockStoreLocation& Location = m_Locations[KeyIt->second].Get(m_PayloadAlignment);
+ return m_BlockStore.HasChunk(Location);
+ }
+ return false;
}
void
@@ -323,7 +339,7 @@ CasContainerStrategy::FilterChunks(HashKeySet& InOutChunks)
}
bool
-CasContainerStrategy::IterateChunks(std::span<IoHash> ChunkHashes,
+CasContainerStrategy::IterateChunks(std::span<const IoHash> ChunkHashes,
const std::function<bool(size_t Index, const IoBuffer& Payload)>& AsyncCallback,
WorkerThreadPool* OptionalWorkerPool,
uint64_t LargeSizeLimit)
@@ -360,7 +376,11 @@ CasContainerStrategy::IterateChunks(std::span<IoHash> ChunkHashes,
return true;
}
- auto DoOneBlock = [&](std::span<const size_t> ChunkIndexes) {
+ auto DoOneBlock = [this](const std::function<bool(size_t Index, const IoBuffer& Payload)>& AsyncCallback,
+ uint64_t LargeSizeLimit,
+ std::span<const size_t> FoundChunkIndexes,
+ std::span<const BlockStoreLocation> FoundChunkLocations,
+ std::span<const size_t> ChunkIndexes) {
if (ChunkIndexes.size() < 4)
{
for (size_t ChunkIndex : ChunkIndexes)
@@ -376,57 +396,96 @@ CasContainerStrategy::IterateChunks(std::span<IoHash> ChunkHashes,
return m_BlockStore.IterateBlock(
FoundChunkLocations,
ChunkIndexes,
- [&](size_t ChunkIndex, const void* Data, uint64_t Size) {
+ [AsyncCallback, FoundChunkIndexes](size_t ChunkIndex, const void* Data, uint64_t Size) {
if (Data == nullptr)
{
return AsyncCallback(FoundChunkIndexes[ChunkIndex], IoBuffer());
}
return AsyncCallback(FoundChunkIndexes[ChunkIndex], IoBuffer(IoBuffer::Wrap, Data, Size));
},
- [&](size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size) {
+ [AsyncCallback, FoundChunkIndexes](size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size) {
return AsyncCallback(FoundChunkIndexes[ChunkIndex], File.GetChunk(Offset, Size));
},
LargeSizeLimit);
};
- Latch WorkLatch(1);
- std::atomic_bool AsyncContinue = true;
- bool Continue = m_BlockStore.IterateChunks(FoundChunkLocations, [&](uint32_t BlockIndex, std::span<const size_t> ChunkIndexes) {
- if (OptionalWorkerPool && (ChunkIndexes.size() > 3))
+ std::atomic<bool> AbortFlag;
+ {
+ std::atomic<bool> PauseFlag;
+ ParallelWork Work(AbortFlag, PauseFlag);
+ try
{
- WorkLatch.AddCount(1);
- OptionalWorkerPool->ScheduleWork([&, ChunkIndexes = std::vector<size_t>(ChunkIndexes.begin(), ChunkIndexes.end())]() {
- auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); });
- if (!AsyncContinue)
- {
- return;
- }
- try
- {
- bool Continue = DoOneBlock(ChunkIndexes);
- if (!Continue)
+ const bool Continue = m_BlockStore.IterateChunks(
+ FoundChunkLocations,
+ [this,
+ &Work,
+ &AbortFlag,
+ &AsyncCallback,
+ LargeSizeLimit,
+ DoOneBlock,
+ &FoundChunkIndexes,
+ &FoundChunkLocations,
+ OptionalWorkerPool](uint32_t BlockIndex, std::span<const size_t> ChunkIndexes) {
+ if (OptionalWorkerPool && (ChunkIndexes.size() > 3))
{
- AsyncContinue.store(false);
- }
- }
- catch (const std::exception& Ex)
- {
- ZEN_WARN("Failed iterating chunks for cas root path {}, block {}. Reason: '{}'",
- m_RootDirectory,
+ std::vector<size_t> TmpChunkIndexes(ChunkIndexes.begin(), ChunkIndexes.end());
+ Work.ScheduleWork(
+ *OptionalWorkerPool,
+ [this,
+ &AsyncCallback,
+ LargeSizeLimit,
+ DoOneBlock,
BlockIndex,
- Ex.what());
- }
- });
- return AsyncContinue.load();
+ &FoundChunkIndexes,
+ &FoundChunkLocations,
+ ChunkIndexes = std::move(TmpChunkIndexes)](std::atomic<bool>& AbortFlag) {
+ if (AbortFlag)
+ {
+ return;
+ }
+ try
+ {
+ bool Continue =
+ DoOneBlock(AsyncCallback, LargeSizeLimit, FoundChunkIndexes, FoundChunkLocations, ChunkIndexes);
+ if (!Continue)
+ {
+ AbortFlag.store(true);
+ }
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_WARN("Failed iterating chunks for cas root path {}, block {}. Reason: '{}'",
+ m_RootDirectory,
+ BlockIndex,
+ Ex.what());
+ AbortFlag.store(true);
+ }
+ });
+ return !AbortFlag.load();
+ }
+ else
+ {
+ if (!DoOneBlock(AsyncCallback, LargeSizeLimit, FoundChunkIndexes, FoundChunkLocations, ChunkIndexes))
+ {
+ AbortFlag.store(true);
+ }
+ return !AbortFlag.load();
+ }
+ });
+ if (!Continue)
+ {
+ AbortFlag.store(true);
+ }
}
- else
+ catch (const std::exception& Ex)
{
- return DoOneBlock(ChunkIndexes);
+ AbortFlag.store(true);
+ ZEN_WARN("Failed iterating chunks for cas root path {}. Reason: '{}'", m_RootDirectory, Ex.what());
}
- });
- WorkLatch.CountDown();
- WorkLatch.Wait();
- return AsyncContinue.load() && Continue;
+
+ Work.Wait();
+ }
+ return !AbortFlag.load();
}
void
@@ -437,7 +496,7 @@ CasContainerStrategy::Flush()
ZEN_TRACE_CPU("CasContainer::Flush");
m_BlockStore.Flush(/*ForceNewBlock*/ false);
m_CasLog.Flush();
- MakeIndexSnapshot();
+ MakeIndexSnapshot(/*ResetLog*/ false);
}
void
@@ -677,7 +736,9 @@ public:
m_CasContainerStrategy.m_BlockStore.CompactBlocks(
BlockCompactState,
m_CasContainerStrategy.m_PayloadAlignment,
- [&](const BlockStore::MovedChunksArray& MovedArray, uint64_t FreedDiskSpace) {
+ [&](const BlockStore::MovedChunksArray& MovedArray,
+ const BlockStore::ChunkIndexArray& ScrubbedArray,
+ uint64_t FreedDiskSpace) {
std::vector<CasDiskIndexEntry> MovedEntries;
RwLock::ExclusiveLockScope _(m_CasContainerStrategy.m_LocationMapLock);
for (const std::pair<size_t, BlockStoreLocation>& Moved : MovedArray)
@@ -702,7 +763,27 @@ public:
MovedEntries.push_back(CasDiskIndexEntry{.Key = Key, .Location = Location});
}
}
+ for (size_t ChunkIndex : ScrubbedArray)
+ {
+ const IoHash& Key = BlockCompactStateKeys[ChunkIndex];
+ if (auto It = m_CasContainerStrategy.m_LocationMap.find(Key);
+ It != m_CasContainerStrategy.m_LocationMap.end())
+ {
+ BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[It->second];
+ const BlockStoreLocation& OldLocation = BlockCompactState.GetLocation(ChunkIndex);
+ if (Location.Get(m_CasContainerStrategy.m_PayloadAlignment) != OldLocation)
+ {
+ // Someone has moved our chunk so lets just skip the new location we were provided, it will be
+ // GC:d at a later time
+ continue;
+ }
+ MovedEntries.push_back(
+ CasDiskIndexEntry{.Key = Key, .Location = Location, .Flags = CasDiskIndexEntry::kTombstone});
+ m_CasContainerStrategy.m_LocationMap.erase(It);
+ }
+ }
m_CasContainerStrategy.m_CasLog.Append(MovedEntries);
+ m_CasContainerStrategy.m_CasLog.Flush();
Stats.RemovedDisk += FreedDiskSpace;
if (Ctx.IsCancelledFlag.load())
{
@@ -900,13 +981,12 @@ CasContainerStrategy::StorageSize() const
}
void
-CasContainerStrategy::MakeIndexSnapshot()
+CasContainerStrategy::MakeIndexSnapshot(bool ResetLog)
{
ZEN_MEMSCOPE(GetCasContainerTag());
ZEN_TRACE_CPU("CasContainer::MakeIndexSnapshot");
- uint64_t LogCount = m_CasLog.GetLogCount();
- if (m_LogFlushPosition == LogCount)
+ if (m_LogFlushPosition == m_CasLog.GetLogCount())
{
return;
}
@@ -923,34 +1003,17 @@ CasContainerStrategy::MakeIndexSnapshot()
namespace fs = std::filesystem;
- fs::path IndexPath = cas::impl::GetIndexPath(m_RootDirectory, m_ContainerBaseName);
- fs::path TempIndexPath = cas::impl::GetTempIndexPath(m_RootDirectory, m_ContainerBaseName);
-
- // Move index away, we keep it if something goes wrong
- if (fs::is_regular_file(TempIndexPath))
- {
- std::error_code Ec;
- if (!fs::remove(TempIndexPath, Ec) || Ec)
- {
- ZEN_WARN("snapshot failed to clean up temp snapshot at {}, reason: '{}'", TempIndexPath, Ec.message());
- return;
- }
- }
+ const fs::path IndexPath = cas::impl::GetIndexPath(m_RootDirectory, m_ContainerBaseName);
try
{
- if (fs::is_regular_file(IndexPath))
- {
- fs::rename(IndexPath, TempIndexPath);
- }
-
// Write the current state of the location map to a new index state
std::vector<CasDiskIndexEntry> Entries;
- uint64_t IndexLogPosition = 0;
+ // Be defensive regarding log position as it is written to without acquiring m_LocationMapLock
+ const uint64_t IndexLogPosition = ResetLog ? 0 : m_CasLog.GetLogCount();
{
RwLock::SharedLockScope ___(m_LocationMapLock);
- IndexLogPosition = m_CasLog.GetLogCount();
Entries.resize(m_LocationMap.size());
uint64_t EntryIndex = 0;
@@ -960,6 +1023,7 @@ CasContainerStrategy::MakeIndexSnapshot()
IndexEntry.Key = Entry.first;
IndexEntry.Location = m_Locations[Entry.second];
}
+ EntryCount = m_LocationMap.size();
}
TemporaryFile ObjectIndexFile;
@@ -969,7 +1033,7 @@ CasContainerStrategy::MakeIndexSnapshot()
{
throw std::system_error(Ec, fmt::format("Failed to create temp file for index snapshot at '{}'", IndexPath));
}
- CasDiskIndexHeader Header = {.EntryCount = Entries.size(),
+ CasDiskIndexHeader Header = {.EntryCount = EntryCount,
.LogPosition = IndexLogPosition,
.PayloadAlignment = gsl::narrow<uint32_t>(m_PayloadAlignment)};
@@ -981,35 +1045,34 @@ CasContainerStrategy::MakeIndexSnapshot()
ObjectIndexFile.MoveTemporaryIntoPlace(IndexPath, Ec);
if (Ec)
{
- throw std::system_error(Ec, fmt::format("Failed to move temp file '{}' to '{}'", ObjectIndexFile.GetPath(), IndexPath));
+ throw std::system_error(Ec,
+ fmt::format("Snapshot failed to rename new snapshot '{}' to '{}', reason: '{}'",
+ ObjectIndexFile.GetPath(),
+ IndexPath,
+ Ec.message()));
}
- EntryCount = Entries.size();
- m_LogFlushPosition = IndexLogPosition;
- }
- catch (const std::exception& Err)
- {
- ZEN_WARN("snapshot FAILED, reason: '{}'", Err.what());
-
- // Restore any previous snapshot
- if (fs::is_regular_file(TempIndexPath))
+ if (ResetLog)
{
- std::error_code Ec;
- fs::remove(IndexPath, Ec); // We don't care if this fails, we try to move the old temp file regardless
- fs::rename(TempIndexPath, IndexPath, Ec);
- if (Ec)
+ const std::filesystem::path LogPath = cas::impl::GetLogPath(m_RootDirectory, m_ContainerBaseName);
+
+ if (IsFile(LogPath))
{
- ZEN_WARN("snapshot failed to restore old snapshot from {}, reason: '{}'", TempIndexPath, Ec.message());
+ m_CasLog.Close();
+ if (!RemoveFile(LogPath, Ec) || Ec)
+ {
+ // This is non-critical, it only means that we will replay the events of the log over the snapshot - inefficent but in
+ // the end it will be the same result
+ ZEN_WARN("Snapshot failed to clean log file '{}', reason: '{}'", LogPath, IndexPath, Ec.message());
+ }
+ m_CasLog.Open(LogPath, CasLogFile::Mode::kWrite);
}
}
+ m_LogFlushPosition = IndexLogPosition;
}
- if (fs::is_regular_file(TempIndexPath))
+ catch (const std::exception& Err)
{
- std::error_code Ec;
- if (!fs::remove(TempIndexPath, Ec) || Ec)
- {
- ZEN_WARN("snapshot failed to remove temporary file {}, reason: '{}'", TempIndexPath, Ec.message());
- }
+ ZEN_WARN("snapshot FAILED, reason: '{}'", Err.what());
}
}
@@ -1092,7 +1155,7 @@ CasContainerStrategy::ReadLog(const std::filesystem::path& LogPath, uint64_t Ski
if (!TCasLogFile<CasDiskIndexEntry>::IsValid(LogPath))
{
ZEN_WARN("removing invalid cas log at '{}'", LogPath);
- std::filesystem::remove(LogPath);
+ RemoveFile(LogPath);
return 0;
}
@@ -1115,7 +1178,7 @@ CasContainerStrategy::ReadLog(const std::filesystem::path& LogPath, uint64_t Ski
ZEN_WARN("reading full log at '{}', reason: Log position from index snapshot is out of range", LogPath);
SkipEntryCount = 0;
}
- LogEntryCount = EntryCount - SkipEntryCount;
+ LogEntryCount = SkipEntryCount;
CasLog.Replay(
[&](const CasDiskIndexEntry& Record) {
LogEntryCount++;
@@ -1134,7 +1197,6 @@ CasContainerStrategy::ReadLog(const std::filesystem::path& LogPath, uint64_t Ski
m_Locations.push_back(Record.Location);
},
SkipEntryCount);
-
return LogEntryCount;
}
return 0;
@@ -1155,7 +1217,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
if (IsNewStore)
{
- std::filesystem::remove_all(BasePath);
+ DeleteDirectories(BasePath);
}
CreateDirectories(BasePath);
@@ -1165,19 +1227,19 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
std::filesystem::path LogPath = cas::impl::GetLogPath(m_RootDirectory, m_ContainerBaseName);
std::filesystem::path IndexPath = cas::impl::GetIndexPath(m_RootDirectory, m_ContainerBaseName);
- if (std::filesystem::is_regular_file(IndexPath))
+ if (IsFile(IndexPath))
{
uint32_t IndexVersion = 0;
m_LogFlushPosition = ReadIndexFile(IndexPath, IndexVersion);
if (IndexVersion == 0)
{
ZEN_WARN("removing invalid index file at '{}'", IndexPath);
- std::filesystem::remove(IndexPath);
+ RemoveFile(IndexPath);
}
}
uint64_t LogEntryCount = 0;
- if (std::filesystem::is_regular_file(LogPath))
+ if (IsFile(LogPath))
{
if (TCasLogFile<CasDiskIndexEntry>::IsValid(LogPath))
{
@@ -1186,12 +1248,10 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
else
{
ZEN_WARN("removing invalid cas log at '{}'", LogPath);
- std::filesystem::remove(LogPath);
+ RemoveFile(LogPath);
}
}
- m_CasLog.Open(LogPath, CasLogFile::Mode::kWrite);
-
BlockStore::BlockIndexSet KnownBlocks;
for (const auto& Entry : m_LocationMap)
@@ -1201,11 +1261,41 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
KnownBlocks.insert(BlockIndex);
}
- m_BlockStore.SyncExistingBlocksOnDisk(KnownBlocks);
+ BlockStore::BlockIndexSet MissingBlocks = m_BlockStore.SyncExistingBlocksOnDisk(KnownBlocks);
+
+ m_CasLog.Open(LogPath, CasLogFile::Mode::kWrite);
+
+ bool RemovedEntries = false;
+ if (!MissingBlocks.empty())
+ {
+ std::vector<CasDiskIndexEntry> MissingEntries;
+ for (auto& It : m_LocationMap)
+ {
+ const uint32_t BlockIndex = m_Locations[It.second].GetBlockIndex();
+ if (MissingBlocks.contains(BlockIndex))
+ {
+ MissingEntries.push_back({.Key = It.first, .Location = m_Locations[It.second], .Flags = CasDiskIndexEntry::kTombstone});
+ }
+ }
+ ZEN_ASSERT(!MissingEntries.empty());
+
+ for (const CasDiskIndexEntry& Entry : MissingEntries)
+ {
+ m_LocationMap.erase(Entry.Key);
+ }
+ m_CasLog.Append(MissingEntries);
+ m_CasLog.Flush();
+
+ {
+ RwLock::ExclusiveLockScope IndexLock(m_LocationMapLock);
+ CompactIndex(IndexLock);
+ }
+ RemovedEntries = true;
+ }
- if (IsNewStore || (LogEntryCount > 0))
+ if (IsNewStore || (LogEntryCount > 0) || RemovedEntries)
{
- MakeIndexSnapshot();
+ MakeIndexSnapshot(/*ResetLog*/ true);
}
// TODO: should validate integrity of container files here
@@ -1573,6 +1663,423 @@ TEST_CASE("compactcas.threadedinsert")
}
}
+TEST_CASE("compactcas.restart")
+{
+ uint64_t ExpectedSize = 0;
+
+ auto GenerateChunks = [&](CasContainerStrategy& Cas, size_t ChunkCount, uint64_t ChunkSize, std::vector<IoHash>& Hashes) {
+ WorkerThreadPool ThreadPool(Max(std::thread::hardware_concurrency() - 1u, 2u), "put");
+
+ Latch WorkLatch(1);
+ tsl::robin_set<IoHash, IoHash::Hasher> ChunkHashesLookup;
+ ChunkHashesLookup.reserve(ChunkCount);
+ RwLock InsertLock;
+ for (size_t Offset = 0; Offset < ChunkCount;)
+ {
+ size_t BatchCount = Min<size_t>(ChunkCount - Offset, 512u);
+ WorkLatch.AddCount(1);
+ ThreadPool.ScheduleWork(
+ [&WorkLatch, &InsertLock, &ChunkHashesLookup, &ExpectedSize, &Hashes, &Cas, Offset, BatchCount, ChunkSize]() {
+ auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); });
+
+ std::vector<IoBuffer> BatchBlobs;
+ std::vector<IoHash> BatchHashes;
+ BatchBlobs.reserve(BatchCount);
+ BatchHashes.reserve(BatchCount);
+
+ while (BatchBlobs.size() < BatchCount)
+ {
+ IoBuffer Chunk =
+ CreateSemiRandomBlob(ChunkSize + ((BatchHashes.size() % 100) + (BatchHashes.size() % 7) * 315u + Offset % 377));
+ IoHash Hash = IoHash::HashBuffer(Chunk);
+ {
+ RwLock::ExclusiveLockScope __(InsertLock);
+ if (ChunkHashesLookup.contains(Hash))
+ {
+ continue;
+ }
+ ChunkHashesLookup.insert(Hash);
+ ExpectedSize += Chunk.Size();
+ }
+
+ BatchBlobs.emplace_back(CompressedBuffer::Compress(SharedBuffer(Chunk)).GetCompressed().Flatten().AsIoBuffer());
+ BatchHashes.push_back(Hash);
+ }
+
+ Cas.InsertChunks(BatchBlobs, BatchHashes);
+ {
+ RwLock::ExclusiveLockScope __(InsertLock);
+ Hashes.insert(Hashes.end(), BatchHashes.begin(), BatchHashes.end());
+ }
+ });
+ Offset += BatchCount;
+ }
+ WorkLatch.CountDown();
+ WorkLatch.Wait();
+ };
+
+ ScopedTemporaryDirectory TempDir;
+ std::filesystem::path CasPath = TempDir.Path();
+ CreateDirectories(CasPath);
+
+ bool Generate = false;
+ if (!Generate)
+ {
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ Cas.Initialize(CasPath, "test", 65536 * 128, 8, false);
+ }
+
+ const uint64_t kChunkSize = 1048 + 395;
+ const size_t kChunkCount = 7167;
+
+ std::vector<IoHash> Hashes;
+ Hashes.reserve(kChunkCount);
+
+ auto ValidateChunks = [&](CasContainerStrategy& Cas, std::span<const IoHash> Hashes, bool ShouldExist) {
+ for (const IoHash& Hash : Hashes)
+ {
+ if (ShouldExist)
+ {
+ CHECK(Cas.HaveChunk(Hash));
+ IoBuffer Buffer = Cas.FindChunk(Hash);
+ CHECK(Buffer);
+ IoHash ValidateHash;
+ uint64_t ValidateRawSize;
+ CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(Buffer), ValidateHash, ValidateRawSize);
+ CHECK(Compressed);
+ CHECK(ValidateHash == Hash);
+ }
+ else
+ {
+ CHECK(!Cas.HaveChunk(Hash));
+ IoBuffer Buffer = Cas.FindChunk(Hash);
+ CHECK(!Buffer);
+ }
+ }
+ };
+
+ {
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ Cas.Initialize(CasPath, "test", 65536 * 128, 8, true);
+ GenerateChunks(Cas, kChunkCount, kChunkSize, Hashes);
+ ValidateChunks(Cas, Hashes, true);
+ Cas.Flush();
+ ValidateChunks(Cas, Hashes, true);
+ }
+
+ {
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ Cas.Initialize(CasPath, "test", 65536 * 128, 8, false);
+ ValidateChunks(Cas, Hashes, true);
+ GenerateChunks(Cas, kChunkCount, kChunkSize / 4, Hashes);
+ ValidateChunks(Cas, Hashes, true);
+ }
+
+ class GcRefChecker : public GcReferenceChecker
+ {
+ public:
+ explicit GcRefChecker(std::vector<IoHash>&& HashesToKeep) : m_HashesToKeep(std::move(HashesToKeep)) {}
+ ~GcRefChecker() {}
+ std::string GetGcName(GcCtx& Ctx) override
+ {
+ ZEN_UNUSED(Ctx);
+ return "test";
+ }
+ void PreCache(GcCtx& Ctx) override { FilterReferences(Ctx, "test", m_HashesToKeep); }
+ void UpdateLockedState(GcCtx& Ctx) override { ZEN_UNUSED(Ctx); }
+ std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) override
+ {
+ ZEN_UNUSED(Ctx);
+ return KeepUnusedReferences(m_HashesToKeep, IoCids);
+ }
+
+ private:
+ std::vector<IoHash> m_HashesToKeep;
+ };
+
+ class GcRef : public GcReferencer
+ {
+ public:
+ GcRef(GcManager& Gc, std::span<const IoHash> HashesToKeep) : m_Gc(Gc)
+ {
+ m_HashesToKeep.insert(m_HashesToKeep.begin(), HashesToKeep.begin(), HashesToKeep.end());
+ m_Gc.AddGcReferencer(*this);
+ }
+ ~GcRef() { m_Gc.RemoveGcReferencer(*this); }
+ std::string GetGcName(GcCtx& Ctx) override
+ {
+ ZEN_UNUSED(Ctx);
+ return "test";
+ }
+ GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override
+ {
+ ZEN_UNUSED(Ctx, Stats);
+ return nullptr;
+ }
+ std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) override
+ {
+ ZEN_UNUSED(Ctx);
+ return {new GcRefChecker(std::move(m_HashesToKeep))};
+ }
+ std::vector<GcReferenceValidator*> CreateReferenceValidators(GcCtx& Ctx) override
+ {
+ ZEN_UNUSED(Ctx);
+ return {};
+ }
+
+ private:
+ GcManager& m_Gc;
+ std::vector<IoHash> m_HashesToKeep;
+ };
+
+ {
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ Cas.Initialize(CasPath, "test", 65536 * 128, 8, false);
+ GenerateChunks(Cas, kChunkCount, kChunkSize / 5, Hashes);
+ }
+
+ {
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ Cas.Initialize(CasPath, "test", 65536 * 128, 8, false);
+ ValidateChunks(Cas, Hashes, true);
+ GenerateChunks(Cas, kChunkCount, kChunkSize / 2, Hashes);
+ ValidateChunks(Cas, Hashes, true);
+ if (true)
+ {
+ std::vector<IoHash> DropHashes;
+ std::vector<IoHash> KeepHashes;
+ for (size_t Index = 0; Index < Hashes.size(); Index++)
+ {
+ if (Index % 5 == 0)
+ {
+ KeepHashes.push_back(Hashes[Index]);
+ }
+ else
+ {
+ DropHashes.push_back(Hashes[Index]);
+ }
+ }
+ // std::span<const IoHash> KeepHashes(Hashes);
+ // ZEN_ASSERT(ExpectedGcCount < Hashes.size());
+ // KeepHashes = KeepHashes.subspan(ExpectedGcCount);
+ GcRef Ref(Gc, KeepHashes);
+ Gc.CollectGarbage(GcSettings{.CollectSmallObjects = true, .IsDeleteMode = true});
+ ValidateChunks(Cas, KeepHashes, true);
+ ValidateChunks(Cas, DropHashes, false);
+ Hashes = KeepHashes;
+ }
+ GenerateChunks(Cas, kChunkCount, kChunkSize / 3, Hashes);
+ }
+
+ {
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ Cas.Initialize(CasPath, "test", 65536 * 128, 8, false);
+ ValidateChunks(Cas, Hashes, true);
+ Cas.Flush();
+ ValidateChunks(Cas, Hashes, true);
+ }
+
+ {
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ Cas.Initialize(CasPath, "test", 65536 * 128, 8, false);
+ ValidateChunks(Cas, Hashes, true);
+ }
+}
+
+TEST_CASE("compactcas.iteratechunks")
+{
+ std::atomic<size_t> WorkCompleted = 0;
+ WorkerThreadPool ThreadPool(Max(std::thread::hardware_concurrency() - 1u, 2u), "put");
+
+ const uint64_t kChunkSize = 1048 + 395;
+ const size_t kChunkCount = 63840;
+
+ for (uint32_t N = 0; N < 4; N++)
+ {
+ GcManager Gc;
+ CasContainerStrategy Cas(Gc);
+ ScopedTemporaryDirectory TempDir;
+ Cas.Initialize(TempDir.Path(), "test", 65536 * 128, 8, true);
+
+ CHECK(Cas.IterateChunks(
+ {},
+ [](size_t Index, const IoBuffer& Payload) {
+ ZEN_UNUSED(Index, Payload);
+ return true;
+ },
+ &ThreadPool,
+ 2048u));
+
+ uint64_t ExpectedSize = 0;
+
+ std::vector<IoHash> Hashes;
+ Hashes.reserve(kChunkCount);
+
+ {
+ Latch WorkLatch(1);
+ tsl::robin_set<IoHash, IoHash::Hasher> ChunkHashesLookup;
+ ChunkHashesLookup.reserve(kChunkCount);
+ RwLock InsertLock;
+ for (size_t Offset = 0; Offset < kChunkCount;)
+ {
+ size_t BatchCount = Min<size_t>(kChunkCount - Offset, 512u);
+ WorkLatch.AddCount(1);
+ ThreadPool.ScheduleWork(
+ [N, &WorkLatch, &InsertLock, &ChunkHashesLookup, &ExpectedSize, &Hashes, &Cas, Offset, BatchCount]() {
+ auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); });
+
+ std::vector<IoBuffer> BatchBlobs;
+ std::vector<IoHash> BatchHashes;
+ BatchBlobs.reserve(BatchCount);
+ BatchHashes.reserve(BatchCount);
+
+ while (BatchBlobs.size() < BatchCount)
+ {
+ IoBuffer Chunk = CreateRandomBlob(
+ N + kChunkSize + ((BatchHashes.size() % 100) + (BatchHashes.size() % 7) * 315u + Offset % 377));
+ IoHash Hash = IoHash::HashBuffer(Chunk);
+ {
+ RwLock::ExclusiveLockScope __(InsertLock);
+ if (ChunkHashesLookup.contains(Hash))
+ {
+ continue;
+ }
+ ChunkHashesLookup.insert(Hash);
+ ExpectedSize += Chunk.Size();
+ }
+
+ BatchBlobs.emplace_back(std::move(Chunk));
+ BatchHashes.push_back(Hash);
+ }
+
+ Cas.InsertChunks(BatchBlobs, BatchHashes);
+ {
+ RwLock::ExclusiveLockScope __(InsertLock);
+ Hashes.insert(Hashes.end(), BatchHashes.begin(), BatchHashes.end());
+ }
+ });
+ Offset += BatchCount;
+ }
+ WorkLatch.CountDown();
+ WorkLatch.Wait();
+ }
+
+ WorkerThreadPool BatchWorkerPool(Max(std::thread::hardware_concurrency() - 1u, 2u), "fetch");
+ {
+ std::vector<std::atomic<bool>> FetchedFlags(Hashes.size());
+ std::atomic<uint64_t> FetchedSize = 0;
+ CHECK(Cas.IterateChunks(
+ Hashes,
+ [&Hashes, &FetchedFlags, &FetchedSize](size_t Index, const IoBuffer& Payload) {
+ CHECK(FetchedFlags[Index].load() == false);
+ FetchedFlags[Index].store(true);
+ const IoHash& Hash = Hashes[Index];
+ CHECK(Hash == IoHash::HashBuffer(Payload));
+ FetchedSize += Payload.GetSize();
+ return true;
+ },
+ &BatchWorkerPool,
+ 2048u));
+ for (const auto& Flag : FetchedFlags)
+ {
+ CHECK(Flag.load());
+ }
+ CHECK(FetchedSize == ExpectedSize);
+ }
+
+ Latch WorkLatch(1);
+ for (size_t I = 0; I < 2; I++)
+ {
+ WorkLatch.AddCount(1);
+ ThreadPool.ScheduleWork([&Cas, &Hashes, &BatchWorkerPool, &WorkLatch, I]() {
+ auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); });
+ std::vector<IoHash> PartialHashes;
+ PartialHashes.reserve(Hashes.size() / 4);
+ for (size_t Index = 0; Index < Hashes.size(); Index++)
+ {
+ size_t TestIndex = Index + I;
+ if ((TestIndex % 7 == 1) || (TestIndex % 13 == 1) || (TestIndex % 17 == 1))
+ {
+ PartialHashes.push_back(Hashes[Index]);
+ }
+ }
+ std::reverse(PartialHashes.begin(), PartialHashes.end());
+
+ std::vector<IoHash> NoFoundHashes;
+ std::vector<size_t> NoFindIndexes;
+
+ NoFoundHashes.reserve(9);
+ for (size_t J = 0; J < 9; J++)
+ {
+ std::string Data = fmt::format("oh no, we don't exist {}", J + 1);
+ NoFoundHashes.push_back(IoHash::HashBuffer(Data.data(), Data.length()));
+ }
+
+ NoFindIndexes.reserve(9);
+
+ // Sprinkle in chunks that are not found!
+ auto It = PartialHashes.insert(PartialHashes.begin() + (PartialHashes.size() / 4) * 0, NoFoundHashes[0]);
+ NoFindIndexes.push_back(std::distance(PartialHashes.begin(), It));
+ It = PartialHashes.insert(PartialHashes.begin() + (PartialHashes.size() / 4) * 0 + 1, NoFoundHashes[1]);
+ NoFindIndexes.push_back(std::distance(PartialHashes.begin(), It));
+ It = PartialHashes.insert(PartialHashes.begin() + (PartialHashes.size() / 4) * 1, NoFoundHashes[2]);
+ NoFindIndexes.push_back(std::distance(PartialHashes.begin(), It));
+ It = PartialHashes.insert(PartialHashes.begin() + (PartialHashes.size() / 4) * 1 + 1, NoFoundHashes[3]);
+ NoFindIndexes.push_back(std::distance(PartialHashes.begin(), It));
+ It = PartialHashes.insert(PartialHashes.begin() + (PartialHashes.size() / 4) * 2, NoFoundHashes[4]);
+ NoFindIndexes.push_back(std::distance(PartialHashes.begin(), It));
+ It = PartialHashes.insert(PartialHashes.begin() + (PartialHashes.size() / 4) * 3, NoFoundHashes[5]);
+ NoFindIndexes.push_back(std::distance(PartialHashes.begin(), It));
+ It = PartialHashes.insert(PartialHashes.begin() + (PartialHashes.size() / 4) * 3 + 1, NoFoundHashes[6]);
+ NoFindIndexes.push_back(std::distance(PartialHashes.begin(), It));
+ It = PartialHashes.insert(PartialHashes.begin() + (PartialHashes.size() / 4) * 4, NoFoundHashes[7]);
+ NoFindIndexes.push_back(std::distance(PartialHashes.begin(), It));
+ It = PartialHashes.insert(PartialHashes.end(), NoFoundHashes[8]);
+ NoFindIndexes.push_back(std::distance(PartialHashes.begin(), It));
+
+ std::vector<std::atomic<bool>> FoundFlags(PartialHashes.size() + NoFoundHashes.size());
+ std::vector<std::atomic<uint32_t>> FetchedCounts(PartialHashes.size() + NoFoundHashes.size());
+
+ CHECK(Cas.IterateChunks(
+ PartialHashes,
+ [&PartialHashes, &FoundFlags, &FetchedCounts, &NoFindIndexes](size_t Index, const IoBuffer& Payload) {
+ CHECK_EQ(NoFindIndexes.end(), std::find(NoFindIndexes.begin(), NoFindIndexes.end(), Index));
+ uint32_t PreviousCount = FetchedCounts[Index].fetch_add(1);
+ CHECK(PreviousCount == 0);
+ FoundFlags[Index] = !!Payload;
+ const IoHash& Hash = PartialHashes[Index];
+ CHECK(Hash == IoHash::HashBuffer(Payload));
+ return true;
+ },
+ &BatchWorkerPool,
+ 2048u));
+
+ for (size_t FoundIndex = 0; FoundIndex < PartialHashes.size(); FoundIndex++)
+ {
+ CHECK(FetchedCounts[FoundIndex].load() <= 1);
+ if (std::find(NoFindIndexes.begin(), NoFindIndexes.end(), FoundIndex) == NoFindIndexes.end())
+ {
+ CHECK(FoundFlags[FoundIndex]);
+ }
+ else
+ {
+ CHECK(!FoundFlags[FoundIndex]);
+ }
+ }
+ });
+ }
+ WorkLatch.CountDown();
+ WorkLatch.Wait();
+ }
+}
+
#endif
void
diff --git a/src/zenstore/compactcas.h b/src/zenstore/compactcas.h
index 07e620086..15e4cbf81 100644
--- a/src/zenstore/compactcas.h
+++ b/src/zenstore/compactcas.h
@@ -52,11 +52,11 @@ struct CasContainerStrategy final : public GcStorage, public GcReferenceStore
~CasContainerStrategy();
CasStore::InsertResult InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash);
- std::vector<CasStore::InsertResult> InsertChunks(std::span<IoBuffer> Chunks, std::span<IoHash> ChunkHashes);
+ std::vector<CasStore::InsertResult> InsertChunks(std::span<const IoBuffer> Chunks, std::span<const IoHash> ChunkHashes);
IoBuffer FindChunk(const IoHash& ChunkHash);
bool HaveChunk(const IoHash& ChunkHash);
void FilterChunks(HashKeySet& InOutChunks);
- bool IterateChunks(std::span<IoHash> ChunkHashes,
+ bool IterateChunks(std::span<const IoHash> ChunkHashes,
const std::function<bool(size_t Index, const IoBuffer& Payload)>& AsyncCallback,
WorkerThreadPool* OptionalWorkerPool,
uint64_t LargeSizeLimit);
@@ -77,7 +77,7 @@ struct CasContainerStrategy final : public GcStorage, public GcReferenceStore
private:
CasStore::InsertResult InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& ChunkHash);
- void MakeIndexSnapshot();
+ void MakeIndexSnapshot(bool ResetLog);
uint64_t ReadIndexFile(const std::filesystem::path& IndexPath, uint32_t& OutVersion);
uint64_t ReadLog(const std::filesystem::path& LogPath, uint64_t SkipEntryCount);
void OpenContainer(bool IsNewStore);
diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp
index 14123528c..68644be2d 100644
--- a/src/zenstore/filecas.cpp
+++ b/src/zenstore/filecas.cpp
@@ -20,6 +20,7 @@
#include <zencore/workthreadpool.h>
#include <zenstore/gc.h>
#include <zenstore/scrubcontext.h>
+#include <zenutil/parallelwork.h>
#if ZEN_WITH_TESTS
# include <zencore/compactbinarybuilder.h>
@@ -176,16 +177,16 @@ FileCasStrategy::Initialize(const std::filesystem::path& RootDirectory, bool IsN
if (IsNewStore)
{
- std::filesystem::remove(LogPath);
- std::filesystem::remove(IndexPath);
+ RemoveFile(LogPath);
+ RemoveFile(IndexPath);
- if (std::filesystem::is_directory(m_RootDirectory))
+ if (IsDir(m_RootDirectory))
{
// We need to explicitly only delete sharded root folders as the cas manifest, tinyobject and smallobject cas folders may reside
// in this folder as well
struct Visitor : public FileSystemTraversal::TreeVisitor
{
- virtual void VisitFile(const std::filesystem::path&, const path_view&, uint64_t, uint32_t) override
+ virtual void VisitFile(const std::filesystem::path&, const path_view&, uint64_t, uint32_t, uint64_t) override
{
// We don't care about files
}
@@ -211,24 +212,24 @@ FileCasStrategy::Initialize(const std::filesystem::path& RootDirectory, bool IsN
Traversal.TraverseFileSystem(m_RootDirectory, CasVisitor);
for (const std::filesystem::path& SharededRoot : CasVisitor.ShardedRoots)
{
- std::filesystem::remove_all(SharededRoot);
+ DeleteDirectories(SharededRoot);
}
}
}
- if (std::filesystem::is_regular_file(IndexPath))
+ if (IsFile(IndexPath))
{
uint32_t IndexVersion = 0;
m_LogFlushPosition = ReadIndexFile(IndexPath, IndexVersion);
if (IndexVersion == 0)
{
ZEN_WARN("removing invalid index file at '{}'", IndexPath);
- std::filesystem::remove(IndexPath);
+ RemoveFile(IndexPath);
}
}
uint64_t LogEntryCount = 0;
- if (std::filesystem::is_regular_file(LogPath))
+ if (IsFile(LogPath))
{
if (TCasLogFile<FileCasIndexEntry>::IsValid(LogPath))
{
@@ -237,7 +238,7 @@ FileCasStrategy::Initialize(const std::filesystem::path& RootDirectory, bool IsN
else
{
ZEN_WARN("removing invalid cas log at '{}'", LogPath);
- std::filesystem::remove(LogPath);
+ RemoveFile(LogPath);
}
}
@@ -251,7 +252,7 @@ FileCasStrategy::Initialize(const std::filesystem::path& RootDirectory, bool IsN
if (IsNewStore || LogEntryCount > 0)
{
- MakeIndexSnapshot();
+ MakeIndexSnapshot(/*ResetLog*/ true);
}
}
@@ -327,7 +328,7 @@ FileCasStrategy::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash, CasStore::
{
std::filesystem::path TempPath(ChunkPath.parent_path() / Oid::NewOid().ToString());
std::error_code Ec;
- std::filesystem::rename(ChunkPath, TempPath, Ec);
+ RenameFile(ChunkPath, TempPath, Ec);
if (Ec)
{
throw std::system_error(Ec, fmt::format("unable to move existing CAS file {} to {}", ChunkPath, TempPath));
@@ -452,7 +453,7 @@ FileCasStrategy::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash, CasStore::
{
PayloadFile.Close();
std::error_code DummyEc;
- std::filesystem::remove(ChunkPath, DummyEc);
+ RemoveFile(ChunkPath, DummyEc);
throw;
}
bool IsNew = UpdateIndex(ChunkHash, Chunk.Size());
@@ -503,7 +504,7 @@ FileCasStrategy::SafeOpenChunk(const IoHash& ChunkHash, uint64 ExpectedSize)
{
std::error_code Ec;
std::filesystem::path TempPath(ChunkPath.parent_path() / Oid::NewOid().ToString());
- std::filesystem::rename(ChunkPath, TempPath, Ec);
+ RenameFile(ChunkPath, TempPath, Ec);
if (!Ec)
{
Chunk.SetDeleteOnClose(true);
@@ -563,8 +564,24 @@ FileCasStrategy::HaveChunk(const IoHash& ChunkHash)
{
ZEN_ASSERT(m_IsInitialized);
- RwLock::SharedLockScope _(m_Lock);
- return m_Index.contains(ChunkHash);
+ {
+ RwLock::SharedLockScope _(m_Lock);
+ if (auto It = m_Index.find(ChunkHash); It == m_Index.end())
+ {
+ return false;
+ }
+ }
+
+ ShardingHelper Name(m_RootDirectory, ChunkHash);
+ const std::filesystem::path ChunkPath = Name.ShardedPath.ToPath();
+ RwLock::SharedLockScope ShardLock(LockForHash(ChunkHash));
+
+ if (IsFile(ChunkPath))
+ {
+ return true;
+ }
+
+ return false;
}
void
@@ -574,7 +591,7 @@ FileCasStrategy::DeleteChunk(const IoHash& ChunkHash, std::error_code& Ec)
ShardingHelper Name(m_RootDirectory, ChunkHash);
const std::filesystem::path ChunkPath = Name.ShardedPath.ToPath();
- uint64_t FileSize = static_cast<uint64_t>(std::filesystem::file_size(ChunkPath, Ec));
+ uint64_t FileSize = static_cast<uint64_t>(FileSizeFromPath(ChunkPath, Ec));
if (Ec)
{
ZEN_WARN("get file size FAILED, file cas '{}'", ChunkPath);
@@ -582,9 +599,9 @@ FileCasStrategy::DeleteChunk(const IoHash& ChunkHash, std::error_code& Ec)
}
ZEN_DEBUG("deleting CAS payload file '{}' {}", ChunkPath, NiceBytes(FileSize));
- std::filesystem::remove(ChunkPath, Ec);
+ RemoveFile(ChunkPath, Ec);
- if (!Ec || !std::filesystem::exists(ChunkPath))
+ if (!Ec || !IsFile(ChunkPath))
{
{
RwLock::ExclusiveLockScope _(m_Lock);
@@ -632,10 +649,11 @@ FileCasStrategy::IterateChunks(std::span<IoHash> ChunkHashes,
}
}
}
- std::atomic_bool Continue = true;
+ std::atomic<bool> AsyncContinue = true;
if (!FoundChunkIndexes.empty())
{
- auto ProcessOne = [this, &ChunkHashes, &Continue, &AsyncCallback](size_t ChunkIndex, uint64_t ExpectedSize) {
+ auto ProcessOne = [this, &ChunkHashes, &AsyncCallback](size_t ChunkIndex, uint64_t ExpectedSize) {
+ ZEN_ASSERT(ChunkIndex < ChunkHashes.size());
const IoHash& ChunkHash = ChunkHashes[ChunkIndex];
IoBuffer Payload = SafeOpenChunk(ChunkHash, ExpectedSize);
if (!AsyncCallback(ChunkIndex, std::move(Payload)))
@@ -645,49 +663,70 @@ FileCasStrategy::IterateChunks(std::span<IoHash> ChunkHashes,
return true;
};
- Latch WorkLatch(1);
- for (size_t Index = 0; Index < FoundChunkIndexes.size(); Index++)
+ std::atomic<bool> AbortFlag;
+ std::atomic<bool> PauseFlag;
+ ParallelWork Work(AbortFlag, PauseFlag);
+ try
{
- size_t ChunkIndex = FoundChunkIndexes[Index];
- uint64_t ExpectedSize = FoundChunkExpectedSizes[Index];
- if (!Continue)
+ for (size_t Index = 0; Index < FoundChunkIndexes.size(); Index++)
{
- break;
- }
- if (OptionalWorkerPool)
- {
- WorkLatch.AddCount(1);
- OptionalWorkerPool->ScheduleWork([this, &WorkLatch, &ProcessOne, &ChunkHashes, ChunkIndex, ExpectedSize, &Continue]() {
- auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); });
- if (!Continue)
- {
- return;
- }
- try
- {
- if (!ProcessOne(ChunkIndex, ExpectedSize))
- {
- Continue = false;
- }
- }
- catch (const std::exception& Ex)
+ if (AbortFlag)
+ {
+ AsyncContinue.store(false);
+ }
+ if (!AsyncContinue)
+ {
+ break;
+ }
+ size_t ChunkIndex = FoundChunkIndexes[Index];
+ uint64_t ExpectedSize = FoundChunkExpectedSizes[Index];
+ if (OptionalWorkerPool)
+ {
+ Work.ScheduleWork(
+ *OptionalWorkerPool,
+ [this, &ProcessOne, &ChunkHashes, ChunkIndex, ExpectedSize, &AsyncContinue](std::atomic<bool>& AbortFlag) {
+ if (AbortFlag)
+ {
+ AsyncContinue.store(false);
+ }
+ if (!AsyncContinue)
+ {
+ return;
+ }
+ try
+ {
+ if (!ProcessOne(ChunkIndex, ExpectedSize))
+ {
+ AsyncContinue.store(false);
+ }
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_WARN("Failed iterating chunks for cas root path {}, chunk {}. Reason: '{}'",
+ m_RootDirectory,
+ ChunkHashes[ChunkIndex],
+ Ex.what());
+ AsyncContinue.store(false);
+ }
+ });
+ }
+ else
+ {
+ if (!ProcessOne(ChunkIndex, ExpectedSize))
{
- ZEN_WARN("Failed iterating chunks for cas root path {}, chunk {}. Reason: '{}'",
- m_RootDirectory,
- ChunkHashes[ChunkIndex],
- Ex.what());
+ AsyncContinue.store(false);
}
- });
- }
- else
- {
- Continue = Continue && ProcessOne(ChunkIndex, ExpectedSize);
+ }
}
}
- WorkLatch.CountDown();
- WorkLatch.Wait();
+ catch (const std::exception& Ex)
+ {
+ AbortFlag.store(true);
+ ZEN_WARN("Failed iterating chunks in {}. Reason: '{}'", this->m_RootDirectory, Ex.what());
+ }
+ Work.Wait();
}
- return Continue;
+ return AsyncContinue.load();
}
void
@@ -727,7 +766,7 @@ FileCasStrategy::Flush()
ZEN_TRACE_CPU("FileCas::Flush");
m_CasLog.Flush();
- MakeIndexSnapshot();
+ MakeIndexSnapshot(/*ResetLog*/ false);
}
void
@@ -912,15 +951,14 @@ FileCasStrategy::ValidateEntry(const FileCasIndexEntry& Entry, std::string& OutR
}
void
-FileCasStrategy::MakeIndexSnapshot()
+FileCasStrategy::MakeIndexSnapshot(bool ResetLog)
{
ZEN_MEMSCOPE(GetFileCasTag());
ZEN_TRACE_CPU("FileCas::MakeIndexSnapshot");
using namespace filecas::impl;
- uint64_t LogCount = m_CasLog.GetLogCount();
- if (m_LogFlushPosition == LogCount)
+ if (m_LogFlushPosition == m_CasLog.GetLogCount())
{
return;
}
@@ -937,34 +975,17 @@ FileCasStrategy::MakeIndexSnapshot()
namespace fs = std::filesystem;
- fs::path IndexPath = GetIndexPath(m_RootDirectory);
- fs::path STmpIndexPath = GetTempIndexPath(m_RootDirectory);
-
- // Move index away, we keep it if something goes wrong
- if (fs::is_regular_file(STmpIndexPath))
- {
- std::error_code Ec;
- if (!fs::remove(STmpIndexPath, Ec) || Ec)
- {
- ZEN_WARN("snapshot failed to clean up temp snapshot at {}, reason: '{}'", STmpIndexPath, Ec.message());
- return;
- }
- }
+ const fs::path IndexPath = GetIndexPath(m_RootDirectory);
try
{
- if (fs::is_regular_file(IndexPath))
- {
- fs::rename(IndexPath, STmpIndexPath);
- }
-
// Write the current state of the location map to a new index state
std::vector<FileCasIndexEntry> Entries;
- uint64_t IndexLogPosition = 0;
+ // Be defensive regarding log position as it is written to without acquiring m_LocationMapLock
+ const uint64_t IndexLogPosition = ResetLog ? 0 : m_CasLog.GetLogCount();
{
RwLock::SharedLockScope __(m_Lock);
- IndexLogPosition = m_CasLog.GetLogCount();
Entries.resize(m_Index.size());
uint64_t EntryIndex = 0;
@@ -974,6 +995,7 @@ FileCasStrategy::MakeIndexSnapshot()
IndexEntry.Key = Entry.first;
IndexEntry.Size = Entry.second.Size;
}
+ EntryCount = m_Index.size();
}
TemporaryFile ObjectIndexFile;
@@ -983,47 +1005,47 @@ FileCasStrategy::MakeIndexSnapshot()
{
throw std::system_error(Ec, fmt::format("Failed to create temp file for index snapshot at '{}'", IndexPath));
}
- filecas::impl::FileCasIndexHeader Header = {.EntryCount = Entries.size(), .LogPosition = IndexLogPosition};
+ filecas::impl::FileCasIndexHeader Header = {.EntryCount = EntryCount, .LogPosition = IndexLogPosition};
Header.Checksum = filecas::impl::FileCasIndexHeader::ComputeChecksum(Header);
ObjectIndexFile.Write(&Header, sizeof(filecas::impl::FileCasIndexHeader), 0);
- ObjectIndexFile.Write(Entries.data(), Entries.size() * sizeof(FileCasIndexEntry), sizeof(filecas::impl::FileCasIndexHeader));
+ ObjectIndexFile.Write(Entries.data(), EntryCount * sizeof(FileCasIndexEntry), sizeof(filecas::impl::FileCasIndexHeader));
ObjectIndexFile.Flush();
ObjectIndexFile.MoveTemporaryIntoPlace(IndexPath, Ec);
if (Ec)
{
- throw std::system_error(Ec, fmt::format("Failed to move temp file '{}' to '{}'", ObjectIndexFile.GetPath(), IndexPath));
+ throw std::system_error(Ec,
+ fmt::format("Snapshot failed to rename new snapshot '{}' to '{}', reason: '{}'",
+ ObjectIndexFile.GetPath(),
+ IndexPath,
+ Ec.message()));
}
- EntryCount = Entries.size();
- m_LogFlushPosition = IndexLogPosition;
- }
- catch (const std::exception& Err)
- {
- ZEN_WARN("snapshot FAILED, reason: '{}'", Err.what());
-
- // Restore any previous snapshot
- if (fs::is_regular_file(STmpIndexPath))
+ if (ResetLog)
{
- std::error_code Ec;
- fs::remove(IndexPath, Ec); // We don't care if this fails, we try to move the old temp file regardless
- fs::rename(STmpIndexPath, IndexPath, Ec);
- if (Ec)
+ const std::filesystem::path LogPath = GetLogPath(m_RootDirectory);
+
+ if (IsFile(LogPath))
{
- ZEN_WARN("snapshot failed to restore old snapshot from {}, reason: '{}'", STmpIndexPath, Ec.message());
+ m_CasLog.Close();
+ if (!RemoveFile(LogPath, Ec) || Ec)
+ {
+ // This is non-critical, it only means that we will replay the events of the log over the snapshot - inefficent but in
+ // the end it will be the same result
+ ZEN_WARN("Snapshot failed to clean log file '{}', reason: '{}'", LogPath, IndexPath, Ec.message());
+ }
+ m_CasLog.Open(LogPath, CasLogFile::Mode::kWrite);
}
}
+ m_LogFlushPosition = IndexLogPosition;
}
- if (fs::is_regular_file(STmpIndexPath))
+ catch (const std::exception& Err)
{
- std::error_code Ec;
- if (!fs::remove(STmpIndexPath, Ec) || Ec)
- {
- ZEN_WARN("snapshot failed to remove temporary file {}, reason: '{}'", STmpIndexPath, Ec.message());
- }
+ ZEN_WARN("snapshot FAILED, reason: '{}'", Err.what());
}
}
+
uint64_t
FileCasStrategy::ReadIndexFile(const std::filesystem::path& IndexPath, uint32_t& OutVersion)
{
@@ -1032,7 +1054,7 @@ FileCasStrategy::ReadIndexFile(const std::filesystem::path& IndexPath, uint32_t&
using namespace filecas::impl;
std::vector<FileCasIndexEntry> Entries;
- if (std::filesystem::is_regular_file(IndexPath))
+ if (IsFile(IndexPath))
{
Stopwatch Timer;
const auto _ = MakeGuard([&] {
@@ -1077,7 +1099,7 @@ FileCasStrategy::ReadIndexFile(const std::filesystem::path& IndexPath, uint32_t&
return 0;
}
- if (std::filesystem::is_directory(m_RootDirectory))
+ if (IsDir(m_RootDirectory))
{
ZEN_INFO("missing index for file cas, scanning for cas files in {}", m_RootDirectory);
TCasLogFile<FileCasIndexEntry> CasLog;
@@ -1116,7 +1138,7 @@ FileCasStrategy::ReadLog(const std::filesystem::path& LogPath, uint64_t SkipEntr
using namespace filecas::impl;
- if (std::filesystem::is_regular_file(LogPath))
+ if (IsFile(LogPath))
{
uint64_t LogEntryCount = 0;
Stopwatch Timer;
@@ -1174,7 +1196,7 @@ FileCasStrategy::ScanFolderForCasFiles(const std::filesystem::path& RootDir)
struct Visitor : public FileSystemTraversal::TreeVisitor
{
Visitor(const std::filesystem::path& RootDir, std::vector<FileCasIndexEntry>& Entries) : RootDirectory(RootDir), Entries(Entries) {}
- virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t) override
+ virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t, uint64_t) override
{
std::filesystem::path RelPath = std::filesystem::relative(Parent, RootDirectory);
@@ -1274,12 +1296,12 @@ public:
ChunkPath);
}
std::error_code Ec;
- uint64_t SizeOnDisk = std::filesystem::file_size(ChunkPath, Ec);
+ uint64_t SizeOnDisk = FileSizeFromPath(ChunkPath, Ec);
if (Ec)
{
SizeOnDisk = 0;
}
- bool Existed = std::filesystem::remove(ChunkPath, Ec);
+ bool Existed = RemoveFile(ChunkPath, Ec);
if (Ec)
{
// Target file may be open for read, attempt to move it to a temp file and mark it delete on close
@@ -1290,7 +1312,7 @@ public:
if (OldChunk)
{
std::filesystem::path TempPath(ChunkPath.parent_path() / Oid::NewOid().ToString());
- std::filesystem::rename(ChunkPath, TempPath, Ec);
+ RenameFile(ChunkPath, TempPath, Ec);
if (!Ec)
{
OldChunk.SetDeleteOnClose(true);
@@ -1317,7 +1339,7 @@ public:
else
{
std::error_code Ec;
- bool Existed = std::filesystem::is_regular_file(ChunkPath, Ec);
+ bool Existed = IsFile(ChunkPath, Ec);
if (Ec)
{
if (Ctx.Settings.Verbose)
@@ -1516,7 +1538,7 @@ TEST_CASE("cas.chunk.moveoverwrite")
Payload1.SetDeleteOnClose(true);
CasStore::InsertResult Result = FileCas.InsertChunk(Payload1, CompressedPayload1.DecodeRawHash());
CHECK_EQ(Result.New, true);
- CHECK(!std::filesystem::exists(Payload1Path));
+ CHECK(!IsFile(Payload1Path));
}
{
std::filesystem::path Payload1BPath{TempDir.Path() / "payload_1"};
@@ -1526,9 +1548,9 @@ TEST_CASE("cas.chunk.moveoverwrite")
CasStore::InsertResult Result = FileCas.InsertChunk(Payload1B, CompressedPayload1.DecodeRawHash());
CHECK_EQ(Result.New, false);
- CHECK(std::filesystem::exists(Payload1BPath));
+ CHECK(IsFile(Payload1BPath));
Payload1B = {};
- CHECK(!std::filesystem::exists(Payload1BPath));
+ CHECK(!IsFile(Payload1BPath));
}
IoBuffer FetchedPayload = FileCas.FindChunk(CompressedPayload1.DecodeRawHash());
@@ -1554,7 +1576,7 @@ TEST_CASE("cas.chunk.moveoverwrite")
}
Payload2 = {};
- CHECK(!std::filesystem::exists(Payload2Path));
+ CHECK(!IsFile(Payload2Path));
{
IoHash RawHash;
@@ -1598,9 +1620,9 @@ TEST_CASE("cas.chunk.copyoverwrite")
CasStore::InsertResult Result =
FileCas.InsertChunk(Payload1, CompressedPayload1.DecodeRawHash(), CasStore::InsertMode::kCopyOnly);
CHECK_EQ(Result.New, true);
- CHECK(std::filesystem::exists(Payload1Path));
+ CHECK(IsFile(Payload1Path));
Payload1 = {};
- CHECK(!std::filesystem::exists(Payload1Path));
+ CHECK(!IsFile(Payload1Path));
}
{
std::filesystem::path Payload1BPath{TempDir.Path() / "payload_1"};
@@ -1611,9 +1633,9 @@ TEST_CASE("cas.chunk.copyoverwrite")
CasStore::InsertResult Result =
FileCas.InsertChunk(Payload1B, CompressedPayload1.DecodeRawHash(), CasStore::InsertMode::kCopyOnly);
CHECK_EQ(Result.New, false);
- CHECK(std::filesystem::exists(Payload1BPath));
+ CHECK(IsFile(Payload1BPath));
Payload1B = {};
- CHECK(!std::filesystem::exists(Payload1BPath));
+ CHECK(!IsFile(Payload1BPath));
}
IoBuffer FetchedPayload = FileCas.FindChunk(CompressedPayload1.DecodeRawHash());
@@ -1640,7 +1662,7 @@ TEST_CASE("cas.chunk.copyoverwrite")
}
Payload2 = {};
- CHECK(!std::filesystem::exists(Payload2Path));
+ CHECK(!IsFile(Payload2Path));
{
IoHash RawHash;
diff --git a/src/zenstore/filecas.h b/src/zenstore/filecas.h
index 21d8c3b9e..e93356927 100644
--- a/src/zenstore/filecas.h
+++ b/src/zenstore/filecas.h
@@ -50,7 +50,7 @@ struct FileCasStrategy final : public GcStorage, public GcReferenceStore
virtual GcReferencePruner* CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& Stats) override;
private:
- void MakeIndexSnapshot();
+ void MakeIndexSnapshot(bool ResetLog);
uint64_t ReadIndexFile(const std::filesystem::path& IndexPath, uint32_t& OutVersion);
uint64_t ReadLog(const std::filesystem::path& LogPath, uint64_t LogPosition);
LoggerRef Log() { return m_Log; }
diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp
index 7ac10d613..a15a2e084 100644
--- a/src/zenstore/gc.cpp
+++ b/src/zenstore/gc.cpp
@@ -62,11 +62,11 @@ namespace {
{
if (Size == 0)
{
- std::filesystem::remove(Path);
+ RemoveFile(Path);
return std::error_code{};
}
CreateDirectories(Path.parent_path());
- if (std::filesystem::is_regular_file(Path) && std::filesystem::file_size(Path) == Size)
+ if (IsFile(Path) && FileSizeFromPath(Path) == Size)
{
return std::error_code();
}
@@ -709,7 +709,7 @@ GcManager::CollectGarbage(const GcSettings& Settings)
RwLock StoreCompactorsLock;
std::unordered_map<std::unique_ptr<GcReferenceValidator>, size_t> ReferenceValidators;
RwLock ReferenceValidatorsLock;
- WorkerThreadPool& PreCachePhaseThreadPool =
+ WorkerThreadPool& ParallelWorkThreadPool =
Settings.SingleThread ? GetSyncWorkerPool() : GetSmallWorkerPool(EWorkloadType::Background);
if (!m_GcReferencers.empty())
@@ -721,7 +721,6 @@ GcManager::CollectGarbage(const GcSettings& Settings)
ZEN_INFO("GCV2: Removing expired data from {} referencers", m_GcReferencers.size());
ZEN_TRACE_CPU("GcV2::RemoveExpiredData");
- Latch WorkLeft(1);
{
// First remove any cache keys that may own references
SCOPED_TIMER(Result.RemoveExpiredDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()); if (Ctx.Settings.Verbose) {
@@ -733,39 +732,45 @@ GcManager::CollectGarbage(const GcSettings& Settings)
{
if (CheckGCCancel())
{
- WorkLeft.CountDown();
- WorkLeft.Wait();
return Sum(Result, true);
}
GcReferencer* Owner = m_GcReferencers[Index];
std::pair<std::string, GcReferencerStats>* Stats = &Result.ReferencerStats[Index];
- WorkLeft.AddCount(1);
- PreCachePhaseThreadPool.ScheduleWork([this, &Ctx, &WorkLeft, Owner, Stats, &StoreCompactorsLock, &StoreCompactors]() {
- ZEN_MEMSCOPE(GetGcTag());
-
- auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
- try
+ try
+ {
+ Stats->first = Owner->GetGcName(Ctx);
+ SCOPED_TIMER(Stats->second.RemoveExpiredDataStats.ElapsedMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ std::unique_ptr<GcStoreCompactor> StoreCompactor(
+ Owner->RemoveExpiredData(Ctx, Stats->second.RemoveExpiredDataStats));
+ if (StoreCompactor)
{
- Stats->first = Owner->GetGcName(Ctx);
- SCOPED_TIMER(Stats->second.RemoveExpiredDataStats.ElapsedMS =
- std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
- std::unique_ptr<GcStoreCompactor> StoreCompactor(
- Owner->RemoveExpiredData(Ctx, Stats->second.RemoveExpiredDataStats));
- if (StoreCompactor)
- {
- RwLock::ExclusiveLockScope __(StoreCompactorsLock);
- StoreCompactors.insert_or_assign(std::move(StoreCompactor), &Stats->second.CompactStoreStats);
- }
+ RwLock::ExclusiveLockScope __(StoreCompactorsLock);
+ StoreCompactors.insert_or_assign(std::move(StoreCompactor), &Stats->second.CompactStoreStats);
}
- catch (const std::exception& Ex)
+ }
+ catch (const std::system_error& Ex)
+ {
+ if (IsOOD(Ex) || IsOOM(Ex))
+ {
+ ZEN_WARN("GCV2: Failed removing expired data for {}. Reason: '{}'", Owner->GetGcName(Ctx), Ex.what());
+ }
+ else
{
ZEN_ERROR("GCV2: Failed removing expired data for {}. Reason: '{}'", Owner->GetGcName(Ctx), Ex.what());
- SetCancelGC(true);
}
- });
+ SetCancelGC(true);
+ }
+ catch (const std::bad_alloc& Ex)
+ {
+ ZEN_ERROR("GCV2: Failed removing expired data for {}. Reason: '{}'", Owner->GetGcName(Ctx), Ex.what());
+ SetCancelGC(true);
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("GCV2: Failed removing expired data for {}. Reason: '{}'", Owner->GetGcName(Ctx), Ex.what());
+ SetCancelGC(true);
+ }
}
- WorkLeft.CountDown();
- WorkLeft.Wait();
}
}
@@ -810,7 +815,7 @@ GcManager::CollectGarbage(const GcSettings& Settings)
GcReferenceStore* ReferenceStore = m_GcReferenceStores[Index];
std::pair<std::string, GcReferenceStoreStats>* Stats = &Result.ReferenceStoreStats[Index];
WorkLeft.AddCount(1);
- PreCachePhaseThreadPool.ScheduleWork(
+ ParallelWorkThreadPool.ScheduleWork(
[this, &Ctx, ReferenceStore, Stats, Index, &WorkLeft, &ReferencePrunersLock, &ReferencePruners]() {
ZEN_MEMSCOPE(GetGcTag());
@@ -832,6 +837,29 @@ GcManager::CollectGarbage(const GcSettings& Settings)
ReferencePruners.insert_or_assign(Index, std::move(ReferencePruner));
}
}
+ catch (const std::system_error& Ex)
+ {
+ if (IsOOD(Ex) || IsOOM(Ex))
+ {
+ ZEN_WARN("GCV2: Failed creating reference pruners for {}. Reason: '{}'",
+ ReferenceStore->GetGcName(Ctx),
+ Ex.what());
+ }
+ else
+ {
+ ZEN_ERROR("GCV2: Failed creating reference pruners for {}. Reason: '{}'",
+ ReferenceStore->GetGcName(Ctx),
+ Ex.what());
+ }
+ SetCancelGC(true);
+ }
+ catch (const std::bad_alloc& Ex)
+ {
+ ZEN_ERROR("GCV2: Failed creating reference pruners for {}. Reason: '{}'",
+ ReferenceStore->GetGcName(Ctx),
+ Ex.what());
+ SetCancelGC(true);
+ }
catch (const std::exception& Ex)
{
ZEN_ERROR("GCV2: Failed creating reference pruners for {}. Reason: '{}'",
@@ -885,41 +913,70 @@ GcManager::CollectGarbage(const GcSettings& Settings)
GcReferencer* Referencer = m_GcReferencers[Index];
std::pair<std::string, GcReferencerStats>* Stats = &Result.ReferencerStats[Index];
WorkLeft.AddCount(1);
- PreCachePhaseThreadPool.ScheduleWork(
+ ParallelWorkThreadPool.ScheduleWork(
[this, &Ctx, &WorkLeft, Referencer, Index, Stats, &ReferenceCheckersLock, &ReferenceCheckers]() {
ZEN_MEMSCOPE(GetGcTag());
auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
- // The Referencer will create a reference checker that guarantees that the references do not change
- // as long as it lives
- std::vector<GcReferenceChecker*> Checkers;
- try
+ if (!CheckGCCancel())
{
+ // The Referencer will create a reference checker that guarantees that the references do not
+ // change as long as it lives
+ std::vector<GcReferenceChecker*> Checkers;
+ auto __ = MakeGuard([&Checkers]() {
+ while (!Checkers.empty())
+ {
+ delete Checkers.back();
+ Checkers.pop_back();
+ }
+ });
+ try
{
- SCOPED_TIMER(Stats->second.CreateReferenceCheckersMS =
- std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
- Checkers = Referencer->CreateReferenceCheckers(Ctx);
+ {
+ SCOPED_TIMER(Stats->second.CreateReferenceCheckersMS =
+ std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ Checkers = Referencer->CreateReferenceCheckers(Ctx);
+ }
+ if (!Checkers.empty())
+ {
+ RwLock::ExclusiveLockScope __(ReferenceCheckersLock);
+ for (auto& Checker : Checkers)
+ {
+ ReferenceCheckers.insert_or_assign(std::unique_ptr<GcReferenceChecker>(Checker),
+ Index);
+ Checker = nullptr;
+ }
+ }
}
- if (!Checkers.empty())
+ catch (const std::system_error& Ex)
{
- RwLock::ExclusiveLockScope __(ReferenceCheckersLock);
- for (auto& Checker : Checkers)
+ if (IsOOD(Ex) || IsOOM(Ex))
+ {
+ ZEN_WARN("GCV2: Failed creating reference checkers for {}. Reason: '{}'",
+ Referencer->GetGcName(Ctx),
+ Ex.what());
+ }
+ else
{
- ReferenceCheckers.insert_or_assign(std::unique_ptr<GcReferenceChecker>(Checker), Index);
- Checker = nullptr;
+ ZEN_ERROR("GCV2: Failed creating reference checkers for {}. Reason: '{}'",
+ Referencer->GetGcName(Ctx),
+ Ex.what());
}
+ SetCancelGC(true);
}
- }
- catch (const std::exception& Ex)
- {
- ZEN_ERROR("GCV2: Failed creating reference checkers for {}. Reason: '{}'",
- Referencer->GetGcName(Ctx),
- Ex.what());
- SetCancelGC(true);
- while (!Checkers.empty())
+ catch (const std::bad_alloc& Ex)
+ {
+ ZEN_ERROR("GCV2: Failed creating reference checkers for {}. Reason: '{}'",
+ Referencer->GetGcName(Ctx),
+ Ex.what());
+ SetCancelGC(true);
+ }
+ catch (const std::exception& Ex)
{
- delete Checkers.back();
- Checkers.pop_back();
+ ZEN_ERROR("GCV2: Failed creating reference checkers for {}. Reason: '{}'",
+ Referencer->GetGcName(Ctx),
+ Ex.what());
+ SetCancelGC(true);
}
}
});
@@ -962,19 +1019,26 @@ GcManager::CollectGarbage(const GcSettings& Settings)
GcReferencer* Referencer = m_GcReferencers[Index];
std::pair<std::string, GcReferencerStats>* ReferemcerStats = &Result.ReferencerStats[Index];
WorkLeft.AddCount(1);
- PreCachePhaseThreadPool.ScheduleWork([this,
- &Ctx,
- &WorkLeft,
- Referencer,
- Index,
- Result = &Result,
- ReferemcerStats,
- &ReferenceValidatorsLock,
- &ReferenceValidators]() {
+ ParallelWorkThreadPool.ScheduleWork([this,
+ &Ctx,
+ &WorkLeft,
+ Referencer,
+ Index,
+ Result = &Result,
+ ReferemcerStats,
+ &ReferenceValidatorsLock,
+ &ReferenceValidators]() {
ZEN_MEMSCOPE(GetGcTag());
auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
std::vector<GcReferenceValidator*> Validators;
+ auto __ = MakeGuard([&Validators]() {
+ while (!Validators.empty())
+ {
+ delete Validators.back();
+ Validators.pop_back();
+ }
+ });
try
{
{
@@ -995,17 +1059,35 @@ GcManager::CollectGarbage(const GcSettings& Settings)
}
}
}
+ catch (const std::system_error& Ex)
+ {
+ if (IsOOD(Ex) || IsOOM(Ex))
+ {
+ ZEN_WARN("GCV2: Failed creating reference validators for {}. Reason: '{}'",
+ Referencer->GetGcName(Ctx),
+ Ex.what());
+ }
+ else
+ {
+ ZEN_ERROR("GCV2: Failed creating reference validators for {}. Reason: '{}'",
+ Referencer->GetGcName(Ctx),
+ Ex.what());
+ }
+ SetCancelGC(true);
+ }
+ catch (const std::bad_alloc& Ex)
+ {
+ ZEN_ERROR("GCV2: Failed creating reference validators for {}. Reason: '{}'",
+ Referencer->GetGcName(Ctx),
+ Ex.what());
+ SetCancelGC(true);
+ }
catch (const std::exception& Ex)
{
ZEN_ERROR("GCV2: Failed creating reference validators for {}. Reason: '{}'",
Referencer->GetGcName(Ctx),
Ex.what());
SetCancelGC(true);
- while (!Validators.empty())
- {
- delete Validators.back();
- Validators.pop_back();
- }
}
});
}
@@ -1023,8 +1105,6 @@ GcManager::CollectGarbage(const GcSettings& Settings)
ZEN_INFO("GCV2: Precaching state for {} reference checkers", ReferenceCheckers.size());
ZEN_TRACE_CPU("GcV2::PreCache");
- Latch WorkLeft(1);
-
{
SCOPED_TIMER(Result.PreCacheStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());
if (Ctx.Settings.Verbose) {
@@ -1036,33 +1116,40 @@ GcManager::CollectGarbage(const GcSettings& Settings)
{
if (CheckGCCancel())
{
- WorkLeft.CountDown();
- WorkLeft.Wait();
return Sum(Result, true);
}
GcReferenceChecker* Checker = It.first.get();
size_t Index = It.second;
std::pair<std::string, GcReferencerStats>* Stats = &Result.ReferencerStats[Index];
- WorkLeft.AddCount(1);
- PreCachePhaseThreadPool.ScheduleWork([this, &Ctx, Checker, Index, Stats, &WorkLeft]() {
- ZEN_MEMSCOPE(GetGcTag());
-
- auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); });
- try
+ try
+ {
+ SCOPED_TIMER(Stats->second.PreCacheStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
+ Checker->PreCache(Ctx);
+ }
+ catch (const std::system_error& Ex)
+ {
+ if (IsOOD(Ex) || IsOOM(Ex))
{
- SCOPED_TIMER(Stats->second.PreCacheStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
- Checker->PreCache(Ctx);
+ ZEN_WARN("GCV2: Failed precaching for {}. Reason: '{}'", Checker->GetGcName(Ctx), Ex.what());
}
- catch (const std::exception& Ex)
+ else
{
ZEN_ERROR("GCV2: Failed precaching for {}. Reason: '{}'", Checker->GetGcName(Ctx), Ex.what());
- SetCancelGC(true);
}
- });
+ SetCancelGC(true);
+ }
+ catch (const std::bad_alloc& Ex)
+ {
+ ZEN_ERROR("GCV2: Failed precaching for {}. Reason: '{}'", Checker->GetGcName(Ctx), Ex.what());
+ SetCancelGC(true);
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_ERROR("GCV2: Failed precaching for {}. Reason: '{}'", Checker->GetGcName(Ctx), Ex.what());
+ SetCancelGC(true);
+ }
}
- WorkLeft.CountDown();
- WorkLeft.Wait();
}
}
@@ -1081,7 +1168,7 @@ GcManager::CollectGarbage(const GcSettings& Settings)
ZEN_INFO("GCV2: Locking state for {} reference checkers", ReferenceCheckers.size());
{
ZEN_TRACE_CPU("GcV2::LockReferencers");
- // From this point we have blocked all writes to all References (DiskBucket/ProjectStore) until
+ // From this point we have blocked all writes to all References (DiskBucket/ProjectStore/BuildStore) until
// we delete the ReferenceLockers
Latch WorkLeft(1);
{
@@ -1108,7 +1195,7 @@ GcManager::CollectGarbage(const GcSettings& Settings)
ZEN_TRACE_CPU("GcV2::UpdateLockedState");
// Locking all references checkers so we have a steady state of which references are used
- // From this point we have blocked all writes to all References (DiskBucket/ProjectStore) until
+ // From this point we have blocked all writes to all References (DiskBucket/ProjectStore/BuildStore) until
// we delete the ReferenceCheckers
Latch WorkLeft(1);
@@ -1142,6 +1229,29 @@ GcManager::CollectGarbage(const GcSettings& Settings)
std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
Checker->UpdateLockedState(Ctx);
}
+ catch (const std::system_error& Ex)
+ {
+ if (IsOOD(Ex) || IsOOM(Ex))
+ {
+ ZEN_WARN("GCV2: Failed Updating locked state for {}. Reason: '{}'",
+ Checker->GetGcName(Ctx),
+ Ex.what());
+ }
+ else
+ {
+ ZEN_ERROR("GCV2: Failed Updating locked state for {}. Reason: '{}'",
+ Checker->GetGcName(Ctx),
+ Ex.what());
+ }
+ SetCancelGC(true);
+ }
+ catch (const std::bad_alloc& Ex)
+ {
+ ZEN_WARN("GCV2: Failed Updating locked state for {}. Reason: '{}'",
+ Checker->GetGcName(Ctx),
+ Ex.what());
+ SetCancelGC(true);
+ }
catch (const std::exception& Ex)
{
ZEN_ERROR("GCV2: Failed Updating locked state for {}. Reason: '{}'",
@@ -1231,6 +1341,29 @@ GcManager::CollectGarbage(const GcSettings& Settings)
StoreCompactors.insert_or_assign(std::move(StoreCompactor), &Stats->CompactStoreStats);
}
}
+ catch (const std::system_error& Ex)
+ {
+ if (IsOOD(Ex) || IsOOM(Ex))
+ {
+ ZEN_WARN("GCV2: Failed removing unused data for {}. Reason: '{}'",
+ Pruner->GetGcName(Ctx),
+ Ex.what());
+ }
+ else
+ {
+ ZEN_ERROR("GCV2: Failed removing unused data for {}. Reason: '{}'",
+ Pruner->GetGcName(Ctx),
+ Ex.what());
+ }
+ SetCancelGC(true);
+ }
+ catch (const std::bad_alloc& Ex)
+ {
+ ZEN_ERROR("GCV2: Failed removing unused data for {}. Reason: '{}'",
+ Pruner->GetGcName(Ctx),
+ Ex.what());
+ SetCancelGC(true);
+ }
catch (const std::exception& Ex)
{
ZEN_ERROR("GCV2: Failed removing unused data for {}. Reason: '{}'",
@@ -1262,12 +1395,12 @@ GcManager::CollectGarbage(const GcSettings& Settings)
ZEN_TRACE_CPU("GcV2::CompactStores");
auto ClaimDiskReserve = [&]() -> uint64_t {
- if (!std::filesystem::is_regular_file(Settings.DiskReservePath))
+ if (!IsFile(Settings.DiskReservePath))
{
return 0;
}
- uint64_t ReclaimedSize = std::filesystem::file_size(Settings.DiskReservePath);
- if (std::filesystem::remove(Settings.DiskReservePath))
+ uint64_t ReclaimedSize = FileSizeFromPath(Settings.DiskReservePath);
+ if (RemoveFile(Settings.DiskReservePath))
{
return ReclaimedSize;
}
@@ -1294,6 +1427,23 @@ GcManager::CollectGarbage(const GcSettings& Settings)
SCOPED_TIMER(Stats.ElapsedMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
Compactor->CompactStore(Ctx, Stats, ClaimDiskReserve);
}
+ catch (const std::system_error& Ex)
+ {
+ if (IsOOD(Ex) || IsOOM(Ex))
+ {
+ ZEN_WARN("GCV2: Failed compacting store {}. Reason: '{}'", Compactor->GetGcName(Ctx), Ex.what());
+ }
+ else
+ {
+ ZEN_ERROR("GCV2: Failed compacting store {}. Reason: '{}'", Compactor->GetGcName(Ctx), Ex.what());
+ }
+ SetCancelGC(true);
+ }
+ catch (const std::bad_alloc& Ex)
+ {
+ ZEN_ERROR("GCV2: Failed compacting store {}. Reason: '{}'", Compactor->GetGcName(Ctx), Ex.what());
+ SetCancelGC(true);
+ }
catch (const std::exception& Ex)
{
ZEN_ERROR("GCV2: Failed compacting store {}. Reason: '{}'", Compactor->GetGcName(Ctx), Ex.what());
@@ -1335,6 +1485,23 @@ GcManager::CollectGarbage(const GcSettings& Settings)
SCOPED_TIMER(Stats.ElapsedMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()););
ReferenceValidator->Validate(Ctx, Stats);
}
+ catch (const std::system_error& Ex)
+ {
+ if (IsOOD(Ex) || IsOOM(Ex))
+ {
+ ZEN_WARN("GCV2: Failed validating referencer {}. Reason: '{}'", ReferenceValidator->GetGcName(Ctx), Ex.what());
+ }
+ else
+ {
+ ZEN_ERROR("GCV2: Failed validating referencer {}. Reason: '{}'", ReferenceValidator->GetGcName(Ctx), Ex.what());
+ }
+ SetCancelGC(true);
+ }
+ catch (const std::bad_alloc& Ex)
+ {
+ ZEN_ERROR("GCV2: Failed validating referencer {}. Reason: '{}'", ReferenceValidator->GetGcName(Ctx), Ex.what());
+ SetCancelGC(true);
+ }
catch (const std::exception& Ex)
{
ZEN_ERROR("GCV2: Failed validating referencer {}. Reason: '{}'", ReferenceValidator->GetGcName(Ctx), Ex.what());
@@ -1557,7 +1724,7 @@ GcScheduler::Initialize(const GcSchedulerConfig& Config)
m_Config.LightweightInterval = m_Config.MonitorInterval;
}
- std::filesystem::create_directories(Config.RootDirectory);
+ CreateDirectories(Config.RootDirectory);
std::error_code Ec = CreateGCReserve(m_Config.RootDirectory / "reserve.gc", m_Config.DiskReserveSize);
if (Ec)
@@ -1739,6 +1906,7 @@ GcScheduler::AppendGCLog(std::string_view Id, GcClock::TimePoint StartTime, cons
{
Writer << "CacheExpireTime"sv << ToDateTime(Settings.CacheExpireTime);
Writer << "ProjectStoreExpireTime"sv << ToDateTime(Settings.ProjectStoreExpireTime);
+ Writer << "BuildStoreExpireTime"sv << ToDateTime(Settings.BuildStoreExpireTime);
Writer << "CollectSmallObjects"sv << Settings.CollectSmallObjects;
Writer << "IsDeleteMode"sv << Settings.IsDeleteMode;
Writer << "SkipCidDelete"sv << Settings.SkipCidDelete;
@@ -1849,7 +2017,7 @@ GcScheduler::GetState() const
if (Result.Config.DiskReserveSize != 0)
{
Ec.clear();
- Result.HasDiskReserve = std::filesystem::is_regular_file(Result.Config.RootDirectory / "reserve.gc", Ec) && !Ec;
+ Result.HasDiskReserve = IsFile(Result.Config.RootDirectory / "reserve.gc", Ec) && !Ec;
}
if (Result.Status != GcSchedulerStatus::kRunning)
@@ -1900,17 +2068,46 @@ GcScheduler::SchedulerThread()
ZEN_MEMSCOPE(GetGcTag());
SetCurrentThreadName("GcScheduler");
- std::chrono::seconds WaitTime{0};
-
- bool SilenceErrors = false;
+ std::chrono::seconds WaitTime{0};
+ const std::chrono::seconds ShortWaitTime{5};
+ bool SilenceErrors = false;
for (;;)
{
- bool Timeout = false;
+ (void)CheckDiskSpace();
+
+ std::chrono::seconds WaitedTime{0};
+ bool Timeout = false;
{
ZEN_ASSERT(WaitTime.count() >= 0);
std::unique_lock Lock(m_GcMutex);
- Timeout = std::cv_status::timeout == m_GcSignal.wait_for(Lock, WaitTime);
+ while (!Timeout)
+ {
+ std::chrono::seconds ShortWait = Min(WaitTime, ShortWaitTime);
+ bool ShortTimeout = std::cv_status::timeout == m_GcSignal.wait_for(Lock, ShortWait);
+ if (ShortTimeout)
+ {
+ if (WaitTime > ShortWaitTime)
+ {
+ DiskSpace Space = CheckDiskSpace();
+ if (!AreDiskWritesAllowed())
+ {
+ ZEN_INFO("Triggering GC due to low disk space ({}) on {}", NiceBytes(Space.Free), m_Config.RootDirectory);
+ Timeout = true;
+ }
+ WaitTime -= ShortWaitTime;
+ }
+ else
+ {
+ Timeout = true;
+ }
+ }
+ else
+ {
+ // We got a signal
+ break;
+ }
+ }
}
if (Status() == GcSchedulerStatus::kStopped)
@@ -1940,7 +2137,9 @@ GcScheduler::SchedulerThread()
std::chrono::seconds LightweightGcInterval = m_Config.LightweightInterval;
std::chrono::seconds MaxCacheDuration = m_Config.MaxCacheDuration;
std::chrono::seconds MaxProjectStoreDuration = m_Config.MaxProjectStoreDuration;
+ std::chrono::seconds MaxBuildStoreDuration = m_Config.MaxBuildStoreDuration;
uint64_t DiskSizeSoftLimit = m_Config.DiskSizeSoftLimit;
+ uint64_t MinimumFreeDiskSpaceToAllowWrites = m_Config.MinimumFreeDiskSpaceToAllowWrites;
bool SkipCid = false;
GcVersion UseGCVersion = m_Config.UseGCVersion;
uint32_t CompactBlockUsageThresholdPercent = m_Config.CompactBlockUsageThresholdPercent;
@@ -1955,8 +2154,9 @@ GcScheduler::SchedulerThread()
uint8_t NextAttachmentPassIndex =
ComputeAttachmentRange(m_AttachmentPassIndex, m_Config.AttachmentPassCount, AttachmentRangeMin, AttachmentRangeMax);
- bool DiskSpaceGCTriggered = false;
- bool TimeBasedGCTriggered = false;
+ bool LowDiskSpaceGCTriggered = false;
+ bool HighDiskSpaceUsageGCTriggered = false;
+ bool TimeBasedGCTriggered = false;
GcClock::TimePoint Now = GcClock::Now();
@@ -1975,6 +2175,10 @@ GcScheduler::SchedulerThread()
{
MaxProjectStoreDuration = TriggerParams.MaxProjectStoreDuration;
}
+ if (TriggerParams.MaxBuildStoreDuration != std::chrono::seconds::max())
+ {
+ MaxBuildStoreDuration = TriggerParams.MaxBuildStoreDuration;
+ }
if (TriggerParams.DiskSizeSoftLimit != 0)
{
DiskSizeSoftLimit = TriggerParams.DiskSizeSoftLimit;
@@ -2046,6 +2250,8 @@ GcScheduler::SchedulerThread()
MaxCacheDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxCacheDuration;
GcClock::TimePoint ProjectStoreExpireTime =
MaxProjectStoreDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxProjectStoreDuration;
+ GcClock::TimePoint BuildStoreExpireTime =
+ MaxBuildStoreDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxBuildStoreDuration;
const GcStorageSize TotalSize = m_GcManager.TotalStorageSize();
@@ -2087,12 +2293,32 @@ GcScheduler::SchedulerThread()
}
}
- uint64_t GcDiskSpaceGoal = 0;
+ uint64_t MaximumDiskUseGcSpaceGoal = 0;
+ uint64_t MinimumFreeDiskGcSpaceGoal = 0;
+
if (DiskSizeSoftLimit != 0 && TotalSize.DiskSize > DiskSizeSoftLimit)
{
- GcDiskSpaceGoal = TotalSize.DiskSize - DiskSizeSoftLimit;
+ MaximumDiskUseGcSpaceGoal = TotalSize.DiskSize - DiskSizeSoftLimit;
+ HighDiskSpaceUsageGCTriggered = true;
+ }
+
+ if (MinimumFreeDiskSpaceToAllowWrites != 0 && Space.Free < MinimumFreeDiskSpaceToAllowWrites)
+ {
+ MinimumFreeDiskGcSpaceGoal = MinimumFreeDiskSpaceToAllowWrites - Space.Free;
+ if (MinimumFreeDiskGcSpaceGoal > MaximumDiskUseGcSpaceGoal)
+ {
+ LowDiskSpaceGCTriggered = true;
+ EnableValidation = false;
+ }
+ }
+
+ if (MaximumDiskUseGcSpaceGoal > 0 || MinimumFreeDiskGcSpaceGoal > 0)
+ {
+ const uint64_t GcDiskSpaceRemoveGoal = Max(MaximumDiskUseGcSpaceGoal, MinimumFreeDiskGcSpaceGoal);
+
std::unique_lock Lock(m_GcMutex);
- GcClock::Tick AgeTick = m_DiskUsageWindow.FindTimepointThatRemoves(GcDiskSpaceGoal, Now.time_since_epoch().count());
+ GcClock::Tick AgeTick =
+ m_DiskUsageWindow.FindTimepointThatRemoves(GcDiskSpaceRemoveGoal, Now.time_since_epoch().count());
GcClock::TimePoint SizeBasedExpireTime = GcClock::TimePointFromTick(AgeTick);
if (SizeBasedExpireTime > CacheExpireTime)
{
@@ -2102,6 +2328,10 @@ GcScheduler::SchedulerThread()
{
ProjectStoreExpireTime = SizeBasedExpireTime;
}
+ if (SizeBasedExpireTime > BuildStoreExpireTime)
+ {
+ BuildStoreExpireTime = SizeBasedExpireTime;
+ }
}
std::chrono::seconds RemainingTimeUntilGc =
@@ -2130,29 +2360,33 @@ GcScheduler::SchedulerThread()
RemainingTimeUntilLightweightGc = RemainingTimeUntilGc;
}
- if (GcDiskSpaceGoal > 0)
- {
- DiskSpaceGCTriggered = true;
- }
- else if (RemainingTimeUntilGc.count() == 0)
- {
- TimeBasedGCTriggered = true;
- }
- else if (RemainingTimeUntilLightweightGc.count() == 0)
+ if (MaximumDiskUseGcSpaceGoal == 0 && MinimumFreeDiskGcSpaceGoal == 0)
{
- TimeBasedGCTriggered = true;
- SkipCid = true;
+ if (RemainingTimeUntilGc.count() == 0)
+ {
+ TimeBasedGCTriggered = true;
+ }
+ else if (RemainingTimeUntilLightweightGc.count() == 0)
+ {
+ TimeBasedGCTriggered = true;
+ SkipCid = true;
+ }
}
std::string NextTriggerStatus;
- if (GcInterval.count() != 0 || LightweightGcInterval.count() != 0 || DiskSizeSoftLimit != 0)
{
ExtendableStringBuilder<256> Sb;
- if (DiskSpaceGCTriggered)
+ if (LowDiskSpaceGCTriggered)
+ {
+ Sb.Append(fmt::format(" Free disk space is below {}, trying to reclaim {}.",
+ NiceBytes(MinimumFreeDiskSpaceToAllowWrites),
+ NiceBytes(MinimumFreeDiskGcSpaceGoal)));
+ }
+ else if (HighDiskSpaceUsageGCTriggered)
{
Sb.Append(fmt::format(" Disk space exceeds {}, trying to reclaim {}.",
NiceBytes(DiskSizeSoftLimit),
- NiceBytes(GcDiskSpaceGoal)));
+ NiceBytes(MaximumDiskUseGcSpaceGoal)));
}
else if (TimeBasedGCTriggered)
{
@@ -2182,6 +2416,10 @@ GcScheduler::SchedulerThread()
{
Sb.Append(fmt::format(" Disk usage GC in {}.", NiceBytes(DiskSizeSoftLimit - TotalSize.DiskSize)));
}
+ else if (MinimumFreeDiskSpaceToAllowWrites != 0 && Space.Free > MinimumFreeDiskSpaceToAllowWrites)
+ {
+ Sb.Append(fmt::format(" Disk usage GC in {}.", NiceBytes(Space.Free - MinimumFreeDiskSpaceToAllowWrites)));
+ }
}
NextTriggerStatus = Sb;
}
@@ -2198,7 +2436,7 @@ GcScheduler::SchedulerThread()
NiceBytes(MaxLoad / uint64_t(std::chrono::seconds(m_Config.MonitorInterval).count())),
NextTriggerStatus);
- if (!DiskSpaceGCTriggered && !TimeBasedGCTriggered)
+ if (!HighDiskSpaceUsageGCTriggered && !LowDiskSpaceGCTriggered && !TimeBasedGCTriggered)
{
WaitTime = m_Config.MonitorInterval;
if (RemainingTimeUntilGc < WaitTime)
@@ -2227,6 +2465,7 @@ GcScheduler::SchedulerThread()
bool GcSuccess = CollectGarbage(CacheExpireTime,
ProjectStoreExpireTime,
+ BuildStoreExpireTime,
DoDelete,
CollectSmallObjects,
SkipCid,
@@ -2333,6 +2572,7 @@ GcScheduler::ScrubStorage(bool DoDelete, bool SkipCid, std::chrono::seconds Time
bool
GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
const GcClock::TimePoint& ProjectStoreExpireTime,
+ const GcClock::TimePoint& BuildStoreExpireTime,
bool Delete,
bool CollectSmallObjects,
bool SkipCid,
@@ -2375,12 +2615,12 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
{
// We are low on disk, check if we can release our extra storage reserve, if we can't bail from doing GC
auto ClaimDiskReserve = [&]() -> uint64_t {
- if (!std::filesystem::is_regular_file(DiskReservePath))
+ if (!IsFile(DiskReservePath))
{
return 0;
}
- uint64_t ReclaimedSize = std::filesystem::file_size(DiskReservePath);
- if (std::filesystem::remove(DiskReservePath))
+ uint64_t ReclaimedSize = FileSizeFromPath(DiskReservePath);
+ if (RemoveFile(DiskReservePath))
{
return ReclaimedSize;
}
@@ -2416,6 +2656,7 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
const GcSettings Settings = {.CacheExpireTime = CacheExpireTime,
.ProjectStoreExpireTime = ProjectStoreExpireTime,
+ .BuildStoreExpireTime = BuildStoreExpireTime,
.CollectSmallObjects = CollectSmallObjects,
.IsDeleteMode = Delete,
.SkipCidDelete = SkipCid,
@@ -2447,6 +2688,7 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
}
SB.Append(fmt::format(" Cache cutoff time: {}\n", Settings.CacheExpireTime));
SB.Append(fmt::format(" Project store cutoff time: {}\n", Settings.ProjectStoreExpireTime));
+ SB.Append(fmt::format(" Build store cutoff time: {}\n", Settings.BuildStoreExpireTime));
};
{
@@ -2522,7 +2764,11 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
ZEN_INFO("GCV2: {}", SB.ToView());
- AppendGCLog(GcId, GcStartTime, Settings, Result);
+ CheckDiskSpace();
+ if (!m_AreDiskWritesBlocked.load())
+ {
+ AppendGCLog(GcId, GcStartTime, Settings, Result);
+ }
if (SkipCid)
{
@@ -2552,6 +2798,7 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
if (Delete)
{
GcClock::TimePoint KeepRangeStart = Min(CacheExpireTime, ProjectStoreExpireTime);
+ KeepRangeStart = Min(KeepRangeStart, BuildStoreExpireTime);
m_LastGcExpireTime = KeepRangeStart;
std::unique_lock Lock(m_GcMutex);
m_DiskUsageWindow.KeepRange(KeepRangeStart.time_since_epoch().count(), GcClock::Duration::max().count());
@@ -2563,65 +2810,69 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
m_LastFullGCDiff = Diff;
}
- for (uint32_t RetryCount = 0; RetryCount < 3; RetryCount++)
+ CheckDiskSpace();
+ if (!m_AreDiskWritesBlocked.load())
{
- if (RetryCount > 0)
+ for (uint32_t RetryCount = 0; RetryCount < 3; RetryCount++)
{
- ZEN_INFO("Writing GC state failed {} time(s), pausing and trying again", RetryCount);
- Sleep(250);
- }
- try
- {
- const fs::path Path = m_Config.RootDirectory / "gc_state";
- ZEN_DEBUG("saving scheduler state to '{}'", Path);
- CbObjectWriter SchedulerState;
- SchedulerState << "LastGcTime"sv << static_cast<int64_t>(m_LastGcTime.time_since_epoch().count());
- SchedulerState << "LastGcExpireTime"sv << static_cast<int64_t>(m_LastGcExpireTime.time_since_epoch().count());
- SchedulerState << "AttachmentPassIndex"sv << m_AttachmentPassIndex;
-
- SaveCompactBinaryObject(Path, SchedulerState.Save());
if (RetryCount > 0)
{
- ZEN_INFO("Writing GC state succeeded after {} attempts", RetryCount + 1);
- }
- break;
- }
- catch (const std::system_error& SystemError)
- {
- if (IsOOM(SystemError.code()))
- {
- ZEN_WARN("writing gc scheduler state ran out of memory: '{}'", SystemError.what());
+ ZEN_INFO("Writing GC state failed {} time(s), pausing and trying again", RetryCount);
+ Sleep(250);
}
- else if (IsOOD(SystemError.code()))
- {
- ZEN_WARN("writing gc scheduler state ran out of disk space: '{}'", SystemError.what());
- }
- if (RetryCount == 0)
+ try
{
- ZEN_ERROR("writing gc scheduler state failed with system error exception: '{}' ({})",
- SystemError.what(),
- SystemError.code().value());
+ const fs::path Path = m_Config.RootDirectory / "gc_state";
+ ZEN_DEBUG("saving scheduler state to '{}'", Path);
+ CbObjectWriter SchedulerState;
+ SchedulerState << "LastGcTime"sv << static_cast<int64_t>(m_LastGcTime.time_since_epoch().count());
+ SchedulerState << "LastGcExpireTime"sv << static_cast<int64_t>(m_LastGcExpireTime.time_since_epoch().count());
+ SchedulerState << "AttachmentPassIndex"sv << m_AttachmentPassIndex;
+
+ SaveCompactBinaryObject(Path, SchedulerState.Save());
+ if (RetryCount > 0)
+ {
+ ZEN_INFO("Writing GC state succeeded after {} attempts", RetryCount + 1);
+ }
+ break;
}
- else
+ catch (const std::system_error& SystemError)
{
- ZEN_WARN("writing gc scheduler state failed with system error exception: '{}' ({})",
- SystemError.what(),
- SystemError.code().value());
+ if (IsOOM(SystemError.code()))
+ {
+ ZEN_WARN("writing gc scheduler state ran out of memory: '{}'", SystemError.what());
+ }
+ else if (IsOOD(SystemError.code()))
+ {
+ ZEN_WARN("writing gc scheduler state ran out of disk space: '{}'", SystemError.what());
+ }
+ else if (RetryCount == 0)
+ {
+ ZEN_ERROR("writing gc scheduler state failed with system error exception: '{}' ({})",
+ SystemError.what(),
+ SystemError.code().value());
+ }
+ else
+ {
+ ZEN_WARN("writing gc scheduler state failed with system error exception: '{}' ({})",
+ SystemError.what(),
+ SystemError.code().value());
+ }
}
- }
- catch (const std::bad_alloc& BadAlloc)
- {
- ZEN_WARN("writing gc scheduler state ran out of memory: '{}'", BadAlloc.what());
- }
- catch (const std::exception& Ex)
- {
- if (RetryCount == 0)
+ catch (const std::bad_alloc& BadAlloc)
{
- ZEN_ERROR("writing gc scheduler state failed with: '{}'", Ex.what());
+ ZEN_WARN("writing gc scheduler state ran out of memory: '{}'", BadAlloc.what());
}
- else
+ catch (const std::exception& Ex)
{
- ZEN_WARN("writing gc scheduler state failed with: '{}'", Ex.what());
+ if (RetryCount == 0)
+ {
+ ZEN_ERROR("writing gc scheduler state failed with: '{}'", Ex.what());
+ }
+ else
+ {
+ ZEN_WARN("writing gc scheduler state failed with: '{}'", Ex.what());
+ }
}
}
}
diff --git a/src/zenstore/include/zenstore/accesstime.h b/src/zenstore/include/zenstore/accesstime.h
new file mode 100644
index 000000000..e53937b52
--- /dev/null
+++ b/src/zenstore/include/zenstore/accesstime.h
@@ -0,0 +1,53 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zenstore/gc.h>
+
+#include <gsl/gsl-lite.hpp>
+
+namespace zen {
+
+// This store the access time as seconds since epoch internally in a 32-bit value giving is a range of 136 years since epoch
+struct AccessTime
+{
+ explicit AccessTime(GcClock::Tick Tick) noexcept : SecondsSinceEpoch(ToSecondsSinceEpoch(Tick)) {}
+ AccessTime& operator=(GcClock::Tick Tick) noexcept
+ {
+ SecondsSinceEpoch.store(ToSecondsSinceEpoch(Tick), std::memory_order_relaxed);
+ return *this;
+ }
+ operator GcClock::Tick() const noexcept
+ {
+ return std::chrono::duration_cast<GcClock::Duration>(std::chrono::seconds(SecondsSinceEpoch.load(std::memory_order_relaxed)))
+ .count();
+ }
+
+ AccessTime(AccessTime&& Rhs) noexcept : SecondsSinceEpoch(Rhs.SecondsSinceEpoch.load(std::memory_order_relaxed)) {}
+ AccessTime(const AccessTime& Rhs) noexcept : SecondsSinceEpoch(Rhs.SecondsSinceEpoch.load(std::memory_order_relaxed)) {}
+ AccessTime& operator=(AccessTime&& Rhs) noexcept
+ {
+ SecondsSinceEpoch.store(Rhs.SecondsSinceEpoch.load(std::memory_order_relaxed), std::memory_order_relaxed);
+ return *this;
+ }
+ AccessTime& operator=(const AccessTime& Rhs) noexcept
+ {
+ SecondsSinceEpoch.store(Rhs.SecondsSinceEpoch.load(std::memory_order_relaxed), std::memory_order_relaxed);
+ return *this;
+ }
+
+ void SetSecondsSinceEpoch(uint32_t InSecondsSinceEpoch) { SecondsSinceEpoch.store(InSecondsSinceEpoch); }
+
+ uint32_t GetSecondsSinceEpoch() const { return SecondsSinceEpoch.load(); }
+
+private:
+ AccessTime(uint32_t InSecondsSinceEpoch) noexcept : SecondsSinceEpoch(InSecondsSinceEpoch) {}
+
+ static uint32_t ToSecondsSinceEpoch(GcClock::Tick Tick)
+ {
+ return gsl::narrow<uint32_t>(std::chrono::duration_cast<std::chrono::seconds>(GcClock::Duration(Tick)).count());
+ }
+ std::atomic_uint32_t SecondsSinceEpoch;
+};
+
+} // namespace zen
diff --git a/src/zenstore/include/zenstore/blockstore.h b/src/zenstore/include/zenstore/blockstore.h
index 97357e5cb..fce05766f 100644
--- a/src/zenstore/include/zenstore/blockstore.h
+++ b/src/zenstore/include/zenstore/blockstore.h
@@ -94,7 +94,7 @@ struct BlockStoreFile : public RefCounted
IoBuffer GetChunk(uint64_t Offset, uint64_t Size);
void Read(void* Data, uint64_t Size, uint64_t FileOffset);
void Write(const void* Data, uint64_t Size, uint64_t FileOffset);
- void Flush();
+ void Flush(uint64_t FinalSize = (uint64_t)-1);
BasicFile& GetBasicFile();
void StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function<void(const void* Data, uint64_t Size)>&& ChunkFun);
bool IsOpen() const;
@@ -107,7 +107,7 @@ private:
const std::filesystem::path m_Path;
IoBuffer m_IoBuffer;
BasicFile m_File;
- uint64_t m_CachedFileSize = 0;
+ std::atomic<uint64_t> m_CachedFileSize = 0;
};
class BlockStoreCompactState;
@@ -127,7 +127,8 @@ public:
typedef std::vector<std::pair<size_t, BlockStoreLocation>> MovedChunksArray;
typedef std::vector<size_t> ChunkIndexArray;
- typedef std::function<bool(const MovedChunksArray& MovedChunks, uint64_t FreedDiskSpace)> CompactCallback;
+ typedef std::function<bool(const MovedChunksArray& MovedChunks, const ChunkIndexArray& ScrubbedChunks, uint64_t FreedDiskSpace)>
+ CompactCallback;
typedef std::function<uint64_t()> ClaimDiskReserveCallback;
typedef std::function<bool(size_t ChunkIndex, const void* Data, uint64_t Size)> IterateChunksSmallSizeCallback;
typedef std::function<bool(size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size)> IterateChunksLargeSizeCallback;
@@ -146,18 +147,19 @@ public:
typedef tsl::robin_set<uint32_t> BlockIndexSet;
- // Ask the store to create empty blocks for all locations that does not have a block
// Remove any block that is not referenced
- void SyncExistingBlocksOnDisk(const BlockIndexSet& KnownBlocks);
- BlockEntryCountMap GetBlocksToCompact(const BlockUsageMap& BlockUsage, uint32_t BlockUsageThresholdPercent);
+ // Return a list of blocks that are not present
+ [[nodiscard]] BlockIndexSet SyncExistingBlocksOnDisk(const BlockIndexSet& KnownBlocks);
+ BlockEntryCountMap GetBlocksToCompact(const BlockUsageMap& BlockUsage, uint32_t BlockUsageThresholdPercent);
void Close();
void WriteChunk(const void* Data, uint64_t Size, uint32_t Alignment, const WriteChunkCallback& Callback);
typedef std::function<void(std::span<BlockStoreLocation> Locations)> WriteChunksCallback;
- void WriteChunks(std::span<IoBuffer> Datas, uint32_t Alignment, const WriteChunksCallback& Callback);
+ void WriteChunks(std::span<const IoBuffer> Datas, uint32_t Alignment, const WriteChunksCallback& Callback);
+ bool HasChunk(const BlockStoreLocation& Location) const;
IoBuffer TryGetChunk(const BlockStoreLocation& Location) const;
void Flush(bool ForceNewBlock);
@@ -172,7 +174,7 @@ public:
void CompactBlocks(
const BlockStoreCompactState& CompactState,
uint32_t PayloadAlignment,
- const CompactCallback& ChangeCallback = [](const MovedChunksArray&, uint64_t) { return true; },
+ const CompactCallback& ChangeCallback = [](const MovedChunksArray&, const ChunkIndexArray&, uint64_t) { return true; },
const ClaimDiskReserveCallback& DiskReserveCallback = []() { return 0; },
std::string_view LogPrefix = {});
diff --git a/src/zenstore/include/zenstore/buildstore/buildstore.h b/src/zenstore/include/zenstore/buildstore/buildstore.h
new file mode 100644
index 000000000..adf48dc26
--- /dev/null
+++ b/src/zenstore/include/zenstore/buildstore/buildstore.h
@@ -0,0 +1,228 @@
+
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zenstore/blockstore.h>
+
+#include <zencore/iohash.h>
+#include <zenstore/accesstime.h>
+#include <zenstore/caslog.h>
+#include <zenstore/gc.h>
+#include "../compactcas.h"
+#include "../filecas.h"
+
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <tsl/robin_map.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+namespace zen {
+
+struct BuildStoreConfig
+{
+ std::filesystem::path RootDirectory;
+ uint32_t SmallBlobBlockStoreMaxBlockSize = 256 * 1024 * 1024;
+ uint64_t SmallBlobBlockStoreMaxBlockEmbedSize = 1 * 1024 * 1024;
+ uint32_t SmallBlobBlockStoreAlignement = 16;
+ uint32_t MetadataBlockStoreMaxBlockSize = 64 * 1024 * 1024;
+ uint32_t MetadataBlockStoreAlignement = 8;
+ uint64_t MaxDiskSpaceLimit = 1u * 1024u * 1024u * 1024u * 1024u; // 1TB
+};
+
+class BuildStore : public GcReferencer, public GcReferenceLocker, public GcStorage
+{
+public:
+ explicit BuildStore(const BuildStoreConfig& Config, GcManager& Gc);
+ virtual ~BuildStore();
+
+ void PutBlob(const IoHash& BlobHashes, const IoBuffer& Payload);
+ IoBuffer GetBlob(const IoHash& BlobHashes);
+
+ struct BlobExistsResult
+ {
+ bool HasBody = 0;
+ bool HasMetadata = 0;
+ };
+
+ std::vector<BlobExistsResult> BlobsExists(std::span<const IoHash> BlobHashes);
+
+ void PutMetadatas(std::span<const IoHash> BlobHashes, std::span<const IoBuffer> MetaDatas);
+ std::vector<IoBuffer> GetMetadatas(std::span<const IoHash> BlobHashes, WorkerThreadPool* OptionalWorkerPool);
+
+ void Flush();
+
+ struct StorageStats
+ {
+ uint64_t EntryCount = 0;
+ uint64_t LargeBlobCount = 0;
+ uint64_t LargeBlobBytes = 0;
+ uint64_t SmallBlobCount = 0;
+ uint64_t SmallBlobBytes = 0;
+ uint64_t MetadataCount = 0;
+ uint64_t MetadataByteCount = 0;
+ };
+
+ StorageStats GetStorageStats() const;
+
+#if ZEN_WITH_TESTS
+ std::optional<AccessTime> GetLastAccessTime(const IoHash& Key) const;
+ bool SetLastAccessTime(const IoHash& Key, const AccessTime& Time);
+#endif // ZEN_WITH_TESTS
+
+private:
+ LoggerRef Log() { return m_Log; }
+
+ void CompactState();
+
+ uint64_t ReadPayloadLog(const RwLock::ExclusiveLockScope&, const std::filesystem::path& LogPath, uint64_t SkipEntryCount);
+ uint64_t ReadMetadataLog(const RwLock::ExclusiveLockScope&, const std::filesystem::path& LogPath, uint64_t SkipEntryCount);
+ void WriteAccessTimes(const RwLock::ExclusiveLockScope&, const std::filesystem::path& AccessTimesPath);
+ void ReadAccessTimes(const RwLock::ExclusiveLockScope&, const std::filesystem::path& AccessTimesPath);
+
+ //////// GcReferencer
+ virtual std::string GetGcName(GcCtx& Ctx) override;
+ virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override;
+ virtual std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) override;
+ virtual std::vector<GcReferenceValidator*> CreateReferenceValidators(GcCtx& Ctx) override;
+
+ //////// GcReferenceLocker
+ virtual std::vector<RwLock::SharedLockScope> LockState(GcCtx& Ctx) override;
+
+ //////// GcStorage
+ virtual void ScrubStorage(ScrubContext& ScrubCtx) override;
+ virtual GcStorageSize StorageSize() const override;
+
+#pragma pack(push)
+#pragma pack(1)
+ struct PayloadEntry
+ {
+ PayloadEntry() {}
+ PayloadEntry(uint64_t Flags, uint64_t Size)
+ {
+ ZEN_ASSERT((Size & 0x00ffffffffffffffu) == Size);
+ ZEN_ASSERT((Flags & (kTombStone | kStandalone)) == Flags);
+ FlagsAndSize = (Size << 8) | Flags;
+ }
+ static const uint8_t kTombStone = 0x10u; // Represents a deleted key/value
+ static const uint8_t kStandalone = 0x20u; // This payload is stored as a standalone value
+
+ uint64_t FlagsAndSize = 0;
+ uint64_t GetSize() const { return FlagsAndSize >> 8; }
+ uint8_t GetFlags() const { return uint8_t(FlagsAndSize & 0xff); }
+ void AddFlag(uint8_t Flag) { FlagsAndSize |= Flag; }
+ void SetSize(uint64_t Size)
+ {
+ ZEN_ASSERT((Size & 0x00ffffffffffffffu) == Size);
+ FlagsAndSize = (Size << 8) | (FlagsAndSize & 0xff);
+ }
+ void SetFlags(uint8_t Flags) { FlagsAndSize = (FlagsAndSize & 0xffffffffffffff00u) | Flags; }
+ };
+ static_assert(sizeof(PayloadEntry) == 8);
+
+ struct PayloadDiskEntry
+ {
+ PayloadEntry Entry; // 8 bytes
+ IoHash BlobHash; // 20 bytes
+ };
+ static_assert(sizeof(PayloadDiskEntry) == 28);
+
+ struct MetadataEntry
+ {
+ BlockStoreLocation Location; // 12 bytes
+
+ ZenContentType ContentType = ZenContentType::kCOUNT; // 1 byte
+ static const uint8_t kTombStone = 0x10u; // Represents a deleted key/value
+ uint8_t Flags = 0; // 1 byte
+
+ uint8_t Reserved1 = 0;
+ uint8_t Reserved2 = 0;
+ };
+ static_assert(sizeof(MetadataEntry) == 16);
+
+ struct MetadataDiskEntry
+ {
+ MetadataEntry Entry; // 16 bytes
+ IoHash BlobHash; // 20 bytes
+ uint8_t Reserved1 = 0;
+ uint8_t Reserved2 = 0;
+ uint8_t Reserved3 = 0;
+ uint8_t Reserved4 = 0;
+ };
+ static_assert(sizeof(MetadataDiskEntry) == 40);
+
+#pragma pack(pop)
+
+ static bool ValidatePayloadDiskEntry(const PayloadDiskEntry& Entry, std::string& OutReason);
+ static bool ValidateMetadataDiskEntry(const MetadataDiskEntry& Entry, std::string& OutReason);
+
+ struct PayloadIndex
+ {
+ uint32_t Index = std::numeric_limits<uint32_t>::max();
+
+ operator bool() const { return Index != std::numeric_limits<uint32_t>::max(); };
+ PayloadIndex() = default;
+ explicit PayloadIndex(size_t InIndex) : Index(uint32_t(InIndex)) {}
+ operator size_t() const { return Index; };
+ inline auto operator<=>(const PayloadIndex& Other) const = default;
+ };
+
+ struct MetadataIndex
+ {
+ uint32_t Index = std::numeric_limits<uint32_t>::max();
+
+ operator bool() const { return Index != std::numeric_limits<uint32_t>::max(); };
+ MetadataIndex() = default;
+ explicit MetadataIndex(size_t InIndex) : Index(uint32_t(InIndex)) {}
+ operator size_t() const { return Index; };
+ inline auto operator<=>(const MetadataIndex& Other) const = default;
+ };
+
+ struct BlobIndex
+ {
+ uint32_t Index = std::numeric_limits<uint32_t>::max();
+
+ operator bool() const { return Index != std::numeric_limits<uint32_t>::max(); };
+ BlobIndex() = default;
+ explicit BlobIndex(size_t InIndex) : Index(uint32_t(InIndex)) {}
+ operator size_t() const { return Index; };
+ inline auto operator<=>(const BlobIndex& Other) const = default;
+ };
+
+ struct BlobEntry
+ {
+ PayloadIndex Payload;
+ MetadataIndex Metadata;
+ AccessTime LastAccessTime;
+ };
+ static_assert(sizeof(BlobEntry) == 12);
+
+ LoggerRef m_Log;
+ const BuildStoreConfig m_Config;
+ GcManager& m_Gc;
+
+ mutable RwLock m_Lock;
+
+ std::vector<PayloadEntry> m_PayloadEntries;
+ std::vector<MetadataEntry> m_MetadataEntries;
+
+ std::vector<BlobEntry> m_BlobEntries;
+ tsl::robin_map<IoHash, BlobIndex, IoHash::Hasher> m_BlobLookup;
+
+ FileCasStrategy m_LargeBlobStore;
+ CasContainerStrategy m_SmallBlobStore;
+ BlockStore m_MetadataBlockStore;
+
+ TCasLogFile<PayloadDiskEntry> m_PayloadlogFile;
+ TCasLogFile<MetadataDiskEntry> m_MetadatalogFile;
+ uint64_t m_BlobLogFlushPosition = 0;
+ uint64_t m_MetaLogFlushPosition = 0;
+
+ std::unique_ptr<HashSet> m_TrackedCacheKeys;
+ std::atomic<uint64_t> m_LastAccessTimeUpdateCount;
+
+ friend class BuildStoreGcReferenceChecker;
+ friend class BuildStoreGcReferencePruner;
+ friend class BuildStoreGcCompator;
+};
+
+void buildstore_forcelink();
+
+} // namespace zen
diff --git a/src/zenstore/include/zenstore/cache/cachedisklayer.h b/src/zenstore/include/zenstore/cache/cachedisklayer.h
index 4f5c905ee..11d13bede 100644
--- a/src/zenstore/include/zenstore/cache/cachedisklayer.h
+++ b/src/zenstore/include/zenstore/cache/cachedisklayer.h
@@ -5,6 +5,7 @@
#include "cacheshared.h"
#include <zencore/stats.h>
+#include <zenstore/accesstime.h>
#include <zenstore/blockstore.h>
#include <zenstore/caslog.h>
@@ -12,8 +13,9 @@ ZEN_THIRD_PARTY_INCLUDES_START
#include <tsl/robin_map.h>
ZEN_THIRD_PARTY_INCLUDES_END
+#include <EASTL/string.h>
+#include <EASTL/unordered_map.h>
#include <filesystem>
-#include <unordered_map>
namespace zen {
@@ -118,6 +120,9 @@ public:
struct Configuration
{
+ typedef eastl::unordered_map<std::string, BucketConfiguration, std::hash<std::string>, std::equal_to<std::string>>
+ BucketConfigMap_t;
+ BucketConfigMap_t BucketConfigMap;
BucketConfiguration BucketConfig;
uint64_t MemCacheTargetFootprintBytes = 512 * 1024 * 1024;
uint64_t MemCacheTrimIntervalSeconds = 60;
@@ -176,7 +181,7 @@ public:
~ZenCacheDiskLayer();
struct GetBatchHandle;
- GetBatchHandle* BeginGetBatch(std::vector<ZenCacheValue>& OutResult);
+ GetBatchHandle* BeginGetBatch(ZenCacheValueVec_t& OutResult);
void EndGetBatch(GetBatchHandle* Batch) noexcept;
bool Get(std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue);
void Get(std::string_view Bucket, const IoHash& HashKey, GetBatchHandle& BatchHandle);
@@ -185,16 +190,16 @@ public:
PutBatchHandle* BeginPutBatch(std::vector<PutResult>& OutResult);
void EndPutBatch(PutBatchHandle* Batch) noexcept;
- PutResult Put(std::string_view Bucket,
- const IoHash& HashKey,
- const ZenCacheValue& Value,
- std::span<IoHash> References,
- bool Overwrite,
- PutBatchHandle* OptionalBatchHandle);
- bool Drop();
- bool DropBucket(std::string_view Bucket);
- void Flush();
- void ScrubStorage(ScrubContext& Ctx);
+ PutResult Put(std::string_view Bucket,
+ const IoHash& HashKey,
+ const ZenCacheValue& Value,
+ std::span<IoHash> References,
+ bool Overwrite,
+ PutBatchHandle* OptionalBatchHandle);
+ std::function<void()> Drop();
+ std::function<void()> DropBucket(std::string_view Bucket);
+ void Flush();
+ void ScrubStorage(ScrubContext& Ctx);
void DiscoverBuckets();
GcStorageSize StorageSize() const;
@@ -224,27 +229,30 @@ public:
*/
struct CacheBucket : public GcReferencer
{
- CacheBucket(GcManager& Gc, std::atomic_uint64_t& OuterCacheMemoryUsage, std::string BucketName, const BucketConfiguration& Config);
+ CacheBucket(GcManager& Gc,
+ std::atomic_uint64_t& OuterCacheMemoryUsage,
+ std::string_view BucketName,
+ const BucketConfiguration& Config);
~CacheBucket();
bool OpenOrCreate(std::filesystem::path BucketDir, bool AllowCreate = true);
struct GetBatchHandle;
- GetBatchHandle* BeginGetBatch(std::vector<ZenCacheValue>& OutResult);
+ GetBatchHandle* BeginGetBatch(ZenCacheValueVec_t& OutResult);
void EndGetBatch(GetBatchHandle* Batch) noexcept;
bool Get(const IoHash& HashKey, ZenCacheValue& OutValue);
void Get(const IoHash& HashKey, GetBatchHandle& BatchHandle);
struct PutBatchHandle;
- PutBatchHandle* BeginPutBatch(std::vector<ZenCacheDiskLayer::PutResult>& OutResult);
- void EndPutBatch(PutBatchHandle* Batch) noexcept;
+ PutBatchHandle* BeginPutBatch(std::vector<ZenCacheDiskLayer::PutResult>& OutResult);
+ void EndPutBatch(PutBatchHandle* Batch) noexcept;
PutResult Put(const IoHash& HashKey,
const ZenCacheValue& Value,
std::span<IoHash> References,
bool Overwrite,
PutBatchHandle* OptionalBatchHandle);
- uint64_t MemCacheTrim(GcClock::TimePoint ExpireTime);
- bool Drop();
+ uint64_t MemCacheTrim(GcClock::TimePoint ExpireTime);
+ std::function<void()> Drop();
void Flush();
void ScrubStorage(ScrubContext& Ctx);
RwLock::SharedLockScope GetGcReferencerLock();
@@ -413,20 +421,23 @@ public:
void SaveSnapshot(const std::function<uint64_t()>& ClaimDiskReserveFunc = []() { return 0; });
void WriteIndexSnapshot(
RwLock::ExclusiveLockScope&,
- bool FlushLockPosition,
+ uint64_t LogPosition,
+ bool ResetLog,
const std::function<uint64_t()>& ClaimDiskReserveFunc = []() { return 0; })
{
- WriteIndexSnapshotLocked(FlushLockPosition, ClaimDiskReserveFunc);
+ WriteIndexSnapshotLocked(LogPosition, ResetLog, ClaimDiskReserveFunc);
}
void WriteIndexSnapshot(
RwLock::SharedLockScope&,
- bool FlushLockPosition,
+ uint64_t LogPosition,
+ bool ResetLog,
const std::function<uint64_t()>& ClaimDiskReserveFunc = []() { return 0; })
{
- WriteIndexSnapshotLocked(FlushLockPosition, ClaimDiskReserveFunc);
+ WriteIndexSnapshotLocked(LogPosition, ResetLog, ClaimDiskReserveFunc);
}
void WriteIndexSnapshotLocked(
- bool FlushLockPosition,
+ uint64_t LogPosition,
+ bool ResetLog,
const std::function<uint64_t()>& ClaimDiskReserveFunc = []() { return 0; });
void CompactState(RwLock::ExclusiveLockScope& IndexLock,
@@ -498,18 +509,20 @@ private:
bool StartAsyncMemCacheTrim();
void MemCacheTrim();
- GcManager& m_Gc;
- JobQueue& m_JobQueue;
- std::filesystem::path m_RootDir;
- Configuration m_Configuration;
- std::atomic_uint64_t m_TotalMemCachedSize{};
- std::atomic_bool m_IsMemCacheTrimming = false;
- std::atomic<GcClock::Tick> m_NextAllowedTrimTick;
- mutable RwLock m_Lock;
- std::unordered_map<std::string, std::unique_ptr<CacheBucket>> m_Buckets;
- std::vector<std::unique_ptr<CacheBucket>> m_DroppedBuckets;
- uint32_t m_UpdateCaptureRefCounter = 0;
- std::unique_ptr<std::vector<std::string>> m_CapturedBuckets;
+ typedef eastl::unordered_map<std::string, std::unique_ptr<CacheBucket>, std::hash<std::string>, std::equal_to<std::string>> BucketMap_t;
+
+ GcManager& m_Gc;
+ JobQueue& m_JobQueue;
+ std::filesystem::path m_RootDir;
+ Configuration m_Configuration;
+ std::atomic_uint64_t m_TotalMemCachedSize{};
+ std::atomic_bool m_IsMemCacheTrimming = false;
+ std::atomic<GcClock::Tick> m_NextAllowedTrimTick;
+ mutable RwLock m_Lock;
+ BucketMap_t m_Buckets;
+ std::vector<std::unique_ptr<CacheBucket>> m_DroppedBuckets;
+ uint32_t m_UpdateCaptureRefCounter = 0;
+ std::unique_ptr<std::vector<std::string>> m_CapturedBuckets;
ZenCacheDiskLayer(const ZenCacheDiskLayer&) = delete;
ZenCacheDiskLayer& operator=(const ZenCacheDiskLayer&) = delete;
diff --git a/src/zenstore/include/zenstore/cache/cacheshared.h b/src/zenstore/include/zenstore/cache/cacheshared.h
index dc0c341d0..8f40ae727 100644
--- a/src/zenstore/include/zenstore/cache/cacheshared.h
+++ b/src/zenstore/include/zenstore/cache/cacheshared.h
@@ -6,6 +6,8 @@
#include <zencore/iohash.h>
#include <zenstore/gc.h>
+#include <EASTL/fixed_vector.h>
+
#include <gsl/gsl-lite.hpp>
#include <unordered_map>
@@ -32,6 +34,8 @@ struct ZenCacheValue
IoHash RawHash = IoHash::Zero;
};
+typedef eastl::fixed_vector<ZenCacheValue, 16> ZenCacheValueVec_t;
+
struct CacheValueDetails
{
struct ValueDetails
@@ -76,42 +80,4 @@ enum class PutStatus
bool IsKnownBadBucketName(std::string_view BucketName);
bool ValidateIoBuffer(ZenContentType ContentType, IoBuffer Buffer);
-//////////////////////////////////////////////////////////////////////////
-
-// This store the access time as seconds since epoch internally in a 32-bit value giving is a range of 136 years since epoch
-struct AccessTime
-{
- explicit AccessTime(GcClock::Tick Tick) noexcept : SecondsSinceEpoch(ToSeconds(Tick)) {}
- AccessTime& operator=(GcClock::Tick Tick) noexcept
- {
- SecondsSinceEpoch.store(ToSeconds(Tick), std::memory_order_relaxed);
- return *this;
- }
- operator GcClock::Tick() const noexcept
- {
- return std::chrono::duration_cast<GcClock::Duration>(std::chrono::seconds(SecondsSinceEpoch.load(std::memory_order_relaxed)))
- .count();
- }
-
- AccessTime(AccessTime&& Rhs) noexcept : SecondsSinceEpoch(Rhs.SecondsSinceEpoch.load(std::memory_order_relaxed)) {}
- AccessTime(const AccessTime& Rhs) noexcept : SecondsSinceEpoch(Rhs.SecondsSinceEpoch.load(std::memory_order_relaxed)) {}
- AccessTime& operator=(AccessTime&& Rhs) noexcept
- {
- SecondsSinceEpoch.store(Rhs.SecondsSinceEpoch.load(std::memory_order_relaxed), std::memory_order_relaxed);
- return *this;
- }
- AccessTime& operator=(const AccessTime& Rhs) noexcept
- {
- SecondsSinceEpoch.store(Rhs.SecondsSinceEpoch.load(std::memory_order_relaxed), std::memory_order_relaxed);
- return *this;
- }
-
-private:
- static uint32_t ToSeconds(GcClock::Tick Tick)
- {
- return gsl::narrow<uint32_t>(std::chrono::duration_cast<std::chrono::seconds>(GcClock::Duration(Tick)).count());
- }
- std::atomic_uint32_t SecondsSinceEpoch;
-};
-
} // namespace zen
diff --git a/src/zenstore/include/zenstore/cache/structuredcachestore.h b/src/zenstore/include/zenstore/cache/structuredcachestore.h
index 581f7861b..b6e8e7565 100644
--- a/src/zenstore/include/zenstore/cache/structuredcachestore.h
+++ b/src/zenstore/include/zenstore/cache/structuredcachestore.h
@@ -88,7 +88,7 @@ public:
void EndPutBatch(PutBatchHandle* Batch) noexcept;
struct GetBatchHandle;
- GetBatchHandle* BeginGetBatch(std::vector<ZenCacheValue>& OutResults);
+ GetBatchHandle* BeginGetBatch(ZenCacheValueVec_t& OutResults);
void EndGetBatch(GetBatchHandle* Batch) noexcept;
bool Get(std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue);
@@ -104,8 +104,8 @@ public:
void EnumerateBucketContents(std::string_view Bucket,
std::function<void(const IoHash& Key, const CacheValueDetails::ValueDetails& Details)>& Fn) const;
- bool Drop();
- void Flush();
+ std::function<void()> Drop();
+ void Flush();
// GcStorage
virtual void ScrubStorage(ScrubContext& ScrubCtx) override;
@@ -225,14 +225,14 @@ public:
class GetBatch
{
public:
- GetBatch(ZenCacheStore& CacheStore, std::string_view Namespace, std::vector<ZenCacheValue>& OutResult);
+ GetBatch(ZenCacheStore& CacheStore, std::string_view Namespace, ZenCacheValueVec_t& OutResult);
~GetBatch();
private:
ZenCacheStore& m_CacheStore;
ZenCacheNamespace* m_Store = nullptr;
ZenCacheNamespace::GetBatchHandle* m_NamespaceBatchHandle = nullptr;
- std::vector<ZenCacheValue>& Results;
+ ZenCacheValueVec_t& Results;
friend class ZenCacheStore;
};
diff --git a/src/zenstore/include/zenstore/chunkedfile.h b/src/zenstore/include/zenstore/chunkedfile.h
deleted file mode 100644
index c6330bdbd..000000000
--- a/src/zenstore/include/zenstore/chunkedfile.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#pragma once
-
-#include <zencore/iobuffer.h>
-#include <zencore/iohash.h>
-#include <zencore/zencore.h>
-
-#include <functional>
-#include <vector>
-
-namespace zen {
-
-class BasicFile;
-
-struct ChunkedInfo
-{
- uint64_t RawSize = 0;
- IoHash RawHash;
- std::vector<uint32_t> ChunkSequence;
- std::vector<IoHash> ChunkHashes;
-};
-
-struct ChunkSource
-{
- uint64_t Offset; // 8
- uint32_t Size; // 4
-};
-
-struct ChunkedInfoWithSource
-{
- ChunkedInfo Info;
- std::vector<ChunkSource> ChunkSources;
-};
-
-struct ChunkedParams
-{
- bool UseThreshold = true;
- size_t MinSize = (2u * 1024u) - 128u;
- size_t MaxSize = (16u * 1024u);
- size_t AvgSize = (3u * 1024u);
-};
-
-static const ChunkedParams UShaderByteCodeParams = {.UseThreshold = true, .MinSize = 17280, .MaxSize = 139264, .AvgSize = 36340};
-
-ChunkedInfoWithSource ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Params = {});
-void Reconstruct(const ChunkedInfo& Info,
- const std::filesystem::path& TargetPath,
- std::function<IoBuffer(const IoHash& ChunkHash)> GetChunk);
-IoBuffer SerializeChunkedInfo(const ChunkedInfo& Info);
-ChunkedInfo DeserializeChunkedInfo(IoBuffer& Buffer);
-
-void chunkedfile_forcelink();
-} // namespace zen
diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h
index 3daae0a93..3223fba39 100644
--- a/src/zenstore/include/zenstore/gc.h
+++ b/src/zenstore/include/zenstore/gc.h
@@ -55,6 +55,7 @@ struct GcSettings
{
GcClock::TimePoint CacheExpireTime = GcClock::Now();
GcClock::TimePoint ProjectStoreExpireTime = GcClock::Now();
+ GcClock::TimePoint BuildStoreExpireTime = GcClock::Now();
bool CollectSmallObjects = false;
bool IsDeleteMode = false;
bool SkipCidDelete = false;
@@ -412,6 +413,7 @@ struct GcSchedulerConfig
std::chrono::seconds Interval{};
std::chrono::seconds MaxCacheDuration{86400};
std::chrono::seconds MaxProjectStoreDuration{604800};
+ std::chrono::seconds MaxBuildStoreDuration{604800};
bool CollectSmallObjects = true;
bool Enabled = true;
uint64_t DiskReserveSize = 1ul << 28;
@@ -496,6 +498,7 @@ public:
bool CollectSmallObjects = false;
std::chrono::seconds MaxCacheDuration = std::chrono::seconds::max();
std::chrono::seconds MaxProjectStoreDuration = std::chrono::seconds::max();
+ std::chrono::seconds MaxBuildStoreDuration = std::chrono::seconds::max();
uint64_t DiskSizeSoftLimit = 0;
bool SkipCid = false;
bool SkipDelete = false;
@@ -528,6 +531,7 @@ private:
void SchedulerThread();
bool CollectGarbage(const GcClock::TimePoint& CacheExpireTime,
const GcClock::TimePoint& ProjectStoreExpireTime,
+ const GcClock::TimePoint& BuildStoreExpireTime,
bool Delete,
bool CollectSmallObjects,
bool SkipCid,
@@ -582,16 +586,3 @@ private:
void gc_forcelink();
} // namespace zen
-
-template<>
-struct fmt::formatter<zen::GcClock::TimePoint> : formatter<string_view>
-{
- template<typename FormatContext>
- auto format(const zen::GcClock::TimePoint& TimePoint, FormatContext& ctx) const
- {
- std::time_t Time = std::chrono::system_clock::to_time_t(TimePoint);
- char TimeString[std::size("yyyy-mm-ddThh:mm:ss")];
- std::strftime(std::data(TimeString), std::size(TimeString), "%FT%T", std::localtime(&Time));
- return fmt::format_to(ctx.out(), "{}", TimeString);
- }
-};
diff --git a/src/zenstore/workspaces.cpp b/src/zenstore/workspaces.cpp
index 02a83d2a6..0ca2adab2 100644
--- a/src/zenstore/workspaces.cpp
+++ b/src/zenstore/workspaces.cpp
@@ -444,7 +444,7 @@ Workspaces::RefreshWorkspaceShares(const Oid& WorkspaceId)
{
const std::filesystem::path& RootPath = Workspace->GetConfig().RootPath;
std::filesystem::path ConfigPath = RootPath / WorkspaceConfigName;
- if (std::filesystem::exists(ConfigPath))
+ if (IsFile(ConfigPath))
{
std::string Error;
std::vector<Workspaces::WorkspaceShareConfiguration> WorkspaceShares = ReadWorkspaceConfig(m_Log, RootPath, Error);
@@ -458,7 +458,7 @@ Workspaces::RefreshWorkspaceShares(const Oid& WorkspaceId)
{
const std::filesystem::path& SharePath = Configuration.SharePath;
- if (std::filesystem::is_directory(RootPath / SharePath))
+ if (IsDir(RootPath / SharePath))
{
DeletedShares.erase(Configuration.Id);
@@ -808,7 +808,7 @@ Workspaces::ReadConfig(const LoggerRef& InLog, const std::filesystem::path& Work
ZEN_DEBUG("Reading workspaces state from {}", WorkspaceStatePath);
const std::filesystem::path ConfigPath = WorkspaceStatePath / WorkspacesConfigName;
- if (std::filesystem::exists(ConfigPath))
+ if (IsFile(ConfigPath))
{
std::vector<Workspaces::WorkspaceConfiguration> Workspaces =
WorkspacesFromJson(IoBufferBuilder::MakeFromFile(ConfigPath), OutError);
@@ -847,7 +847,7 @@ Workspaces::ReadWorkspaceConfig(const LoggerRef& InLog, const std::filesystem::p
ZEN_DEBUG("Reading workspace state from {}", WorkspaceRoot);
std::filesystem::path ConfigPath = WorkspaceRoot / WorkspaceConfigName;
- if (std::filesystem::exists(ConfigPath))
+ if (IsFile(ConfigPath))
{
std::vector<Workspaces::WorkspaceShareConfiguration> WorkspaceShares =
WorkspaceSharesFromJson(IoBufferBuilder::MakeFromFile(ConfigPath), OutError);
@@ -886,7 +886,7 @@ Workspaces::AddWorkspace(const LoggerRef& Log, const std::filesystem::path& Work
{
throw std::invalid_argument(fmt::format("invalid root path '{}' for workspace {}", Configuration.RootPath, Configuration.Id));
}
- if (!std::filesystem::is_directory(Configuration.RootPath))
+ if (!IsDir(Configuration.RootPath))
{
throw std::invalid_argument(
fmt::format("workspace root path '{}' does not exist for workspace '{}'", Configuration.RootPath, Configuration.Id));
@@ -965,7 +965,7 @@ Workspaces::AddWorkspaceShare(const LoggerRef& Log,
throw std::invalid_argument(
fmt::format("workspace share path '{}' is not a sub-path of workspace path '{}'", Configuration.SharePath, WorkspaceRoot));
}
- if (!std::filesystem::is_directory(WorkspaceRoot / Configuration.SharePath))
+ if (!IsDir(WorkspaceRoot / Configuration.SharePath))
{
throw std::invalid_argument(
fmt::format("workspace share path '{}' does not exist in workspace path '{}'", Configuration.SharePath, WorkspaceRoot));
@@ -1244,7 +1244,7 @@ Workspaces::FindWorkspaceShare(const Oid& WorkspaceId, const Oid& ShareId, bool
const Workspaces::WorkspaceConfiguration& WorkspaceConfig = Workspace->GetConfig();
const Workspaces::WorkspaceShareConfiguration& ShareConfig = Share->GetConfig();
std::filesystem::path FullSharePath = WorkspaceConfig.RootPath / ShareConfig.SharePath;
- if (std::filesystem::is_directory(FullSharePath))
+ if (IsDir(FullSharePath))
{
if (ForceRefresh || !Share->IsInitialized())
{
@@ -1306,18 +1306,18 @@ namespace {
std::filesystem::path EmptyFolder(RootPath / "empty_folder");
std::filesystem::path FirstFolder(RootPath / "first_folder");
- std::filesystem::create_directory(FirstFolder);
+ CreateDirectories(FirstFolder);
Result.push_back(std::make_pair(FirstFolder / "first_folder_blob1.bin", CreateRandomBlob(22)));
Result.push_back(std::make_pair(FirstFolder / "first_folder_blob2.bin", CreateRandomBlob(122)));
std::filesystem::path SecondFolder(RootPath / "second_folder");
- std::filesystem::create_directory(SecondFolder);
+ CreateDirectories(SecondFolder);
Result.push_back(std::make_pair(SecondFolder / "second_folder_blob1.bin", CreateRandomBlob(522)));
Result.push_back(std::make_pair(SecondFolder / "second_folder_blob2.bin", CreateRandomBlob(122)));
Result.push_back(std::make_pair(SecondFolder / "second_folder_blob3.bin", CreateRandomBlob(225)));
std::filesystem::path SecondFolderChild(SecondFolder / "child_in_second");
- std::filesystem::create_directory(SecondFolderChild);
+ CreateDirectories(SecondFolderChild);
Result.push_back(std::make_pair(SecondFolderChild / "second_child_folder_blob1.bin", CreateRandomBlob(622)));
for (const auto& It : Result)
@@ -1365,13 +1365,13 @@ TEST_CASE("workspaces.scanfolder")
Structure->IterateEntries([&](const Oid& Id, const FolderStructure::FileEntry& Entry) {
std::filesystem::path AbsPath = RootPath / Entry.RelativePath;
- CHECK(std::filesystem::is_regular_file(AbsPath));
- CHECK(std::filesystem::file_size(AbsPath) == Entry.Size);
+ CHECK(IsFile(AbsPath));
+ CHECK(FileSizeFromPath(AbsPath) == Entry.Size);
const FolderStructure::FileEntry* FindEntry = Structure->FindEntry(Id);
CHECK(FindEntry);
std::filesystem::path Path = RootPath / FindEntry->RelativePath;
CHECK(AbsPath == Path);
- CHECK(std::filesystem::file_size(AbsPath) == FindEntry->Size);
+ CHECK(FileSizeFromPath(AbsPath) == FindEntry->Size);
});
}
diff --git a/src/zenstore/xmake.lua b/src/zenstore/xmake.lua
index f0bd64d2e..031a66829 100644
--- a/src/zenstore/xmake.lua
+++ b/src/zenstore/xmake.lua
@@ -8,3 +8,4 @@ target('zenstore')
add_includedirs("include", {public=true})
add_deps("zencore", "zenutil")
add_packages("vcpkg::robin-map")
+ add_packages("vcpkg::eastl", {public=true});
diff --git a/src/zenstore/zenstore.cpp b/src/zenstore/zenstore.cpp
index c697647d2..654fb3510 100644
--- a/src/zenstore/zenstore.cpp
+++ b/src/zenstore/zenstore.cpp
@@ -5,6 +5,7 @@
#if ZEN_WITH_TESTS
# include <zenstore/blockstore.h>
+# include <zenstore/buildstore/buildstore.h>
# include <zenstore/cache/structuredcachestore.h>
# include <zenstore/workspaces.h>
# include <zenstore/gc.h>
@@ -19,6 +20,7 @@ namespace zen {
void
zenstore_forcelinktests()
{
+ buildstore_forcelink();
CAS_forcelink();
filecas_forcelink();
blockstore_forcelink();