aboutsummaryrefslogtreecommitdiff
path: root/zenstore/compactcas.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2022-03-22 23:25:55 +0100
committerDan Engelbrecht <[email protected]>2022-03-31 11:29:26 +0200
commitc9ecf2a3014d3a2b0e8efbc236300814f6d55e45 (patch)
tree291023601678769756d92871b2be3c54bceff207 /zenstore/compactcas.cpp
parentTry to recreate gc reserve after successful garbage collect if it is not present (diff)
downloadzen-c9ecf2a3014d3a2b0e8efbc236300814f6d55e45.tar.xz
zen-c9ecf2a3014d3a2b0e8efbc236300814f6d55e45.zip
Make garbage collection state copy less complex
Diffstat (limited to 'zenstore/compactcas.cpp')
-rw-r--r--zenstore/compactcas.cpp130
1 files changed, 64 insertions, 66 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp
index 4e379194a..731f16a38 100644
--- a/zenstore/compactcas.cpp
+++ b/zenstore/compactcas.cpp
@@ -427,16 +427,16 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
for (const auto& Block : m_ChunkBlocks)
{
- uint64_t WindowStart = 0;
- uint64_t WindowEnd = WindowSize;
- auto& SmallObjectFile = *Block.second;
- SmallObjectFile.Open();
- const uint64_t FileSize = SmallObjectFile.FileSize();
+ uint64_t WindowStart = 0;
+ uint64_t WindowEnd = WindowSize;
+ auto& BlockFile = *Block.second;
+ BlockFile.Open();
+ const uint64_t FileSize = BlockFile.FileSize();
do
{
const uint64_t ChunkSize = Min(WindowSize, FileSize - WindowStart);
- SmallObjectFile.Read(BufferBase, ChunkSize, WindowStart);
+ BlockFile.Read(BufferBase, ChunkSize, WindowStart);
for (auto& Entry : m_LocationMap)
{
@@ -475,11 +475,9 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
for (const CasDiskIndexEntry& Entry : BigChunks)
{
IoHashStream Hasher;
- const CasLocation Location = Entry.Location.Get(m_PayloadAlignment);
- auto& SmallObjectFile = *m_ChunkBlocks[Location.BlockIndex];
- SmallObjectFile.StreamByteRange(Location.Offset, Location.Size, [&](const void* Data, uint64_t Size) {
- Hasher.Append(Data, Size);
- });
+ const CasLocation Location = Entry.Location.Get(m_PayloadAlignment);
+ auto& BlockFile = *m_ChunkBlocks[Location.BlockIndex];
+ BlockFile.StreamByteRange(Location.Offset, Location.Size, [&](const void* Data, uint64_t Size) { Hasher.Append(Data, Size); });
IoHash ComputedHash = Hasher.GetHash();
if (Entry.Key != ComputedHash)
@@ -549,8 +547,8 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
// path to the next new block.
ZEN_INFO("collecting garbage from '{}'", m_Config.RootDirectory / m_ContainerBaseName);
- std::unordered_map<IoHash, CasLocation, IoHash::Hasher> LocationMap;
- size_t BlockCount;
+ std::unordered_map<IoHash, CasDiskLocation, IoHash::Hasher> LocationMap;
+ size_t BlockCount;
{
RwLock::SharedLockScope _i(m_InsertLock);
RwLock::SharedLockScope _l(m_LocationMapLock);
@@ -559,12 +557,11 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
uint32_t WritingBlock = m_WriteBlockIndex.load(std::memory_order_acquire);
for (const auto& Entry : m_LocationMap)
{
- CasLocation Location = Entry.second.Get(m_PayloadAlignment);
- if (IsWriting && Location.BlockIndex == WritingBlock)
+ if (IsWriting && Entry.second.GetBlockIndex() == WritingBlock)
{
continue;
}
- LocationMap.emplace(Entry.first, Location);
+ LocationMap.emplace(Entry.first, Entry.second);
}
BlockCount = m_ChunkBlocks.size();
}
@@ -593,11 +590,12 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
for (const auto& Entry : LocationMap)
{
TotalChunkHashes.push_back(Entry.first);
- if (BlockIndexToChunkMapIndex.contains(Entry.second.BlockIndex))
+ uint32_t BlockIndex = Entry.second.GetBlockIndex();
+ if (BlockIndexToChunkMapIndex.contains(BlockIndex))
{
continue;
}
- BlockIndexToChunkMapIndex[Entry.second.BlockIndex] = KeepChunks.size();
+ BlockIndexToChunkMapIndex[BlockIndex] = KeepChunks.size();
KeepChunks.resize(KeepChunks.size() + 1);
KeepChunks.back().reserve(GuesstimateCountPerBlock);
DeleteChunks.resize(DeleteChunks.size() + 1);
@@ -610,21 +608,21 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
uint64_t NewTotalSize = 0;
GcCtx.FilterCas(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) {
- auto KeyIt = LocationMap.find(ChunkHash);
- const CasLocation& ChunkLocation = KeyIt->second;
- size_t ChunkMapIndex = BlockIndexToChunkMapIndex[ChunkLocation.BlockIndex];
+ auto KeyIt = LocationMap.find(ChunkHash);
+ uint32_t BlockIndex = KeyIt->second.GetBlockIndex();
+ size_t ChunkMapIndex = BlockIndexToChunkMapIndex[BlockIndex];
if (Keep)
{
auto& ChunkMap = KeepChunks[ChunkMapIndex];
ChunkMap.insert(ChunkHash);
- NewTotalSize += ChunkLocation.Size;
+ NewTotalSize += KeyIt->second.GetSize();
}
else
{
auto& ChunkMap = DeleteChunks[ChunkMapIndex];
ChunkMap.insert(ChunkHash);
DeleteCount++;
- BlocksToReWrite.insert(ChunkLocation.BlockIndex);
+ BlocksToReWrite.insert(BlockIndex);
}
});
@@ -642,10 +640,10 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
// Move all chunks in blocks that have chunks removed to new blocks
- std::shared_ptr<ChunkBlock> NewBlockFile;
- uint64_t WriteOffset = {};
- uint32_t NewBlockIndex = {};
- std::vector<IoHash> DeletedChunks;
+ std::shared_ptr<ChunkBlock> NewBlockFile;
+ uint64_t WriteOffset = {};
+ uint32_t NewBlockIndex = {};
+ std::vector<IoHash> DeletedChunks;
DeletedChunks.reserve(DeleteCount);
std::vector<IoHash> MovedChunks;
DeletedChunks.reserve(MoveCount);
@@ -653,8 +651,8 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
std::unordered_map<IoHash, CasDiskLocation> MovedBlockChunks;
for (auto BlockIndex : BlocksToReWrite)
{
- const size_t ChunkMapIndex = BlockIndexToChunkMapIndex[BlockIndex];
- const auto& KeepMap = KeepChunks[ChunkMapIndex];
+ const size_t ChunkMapIndex = BlockIndexToChunkMapIndex[BlockIndex];
+ const auto& KeepMap = KeepChunks[ChunkMapIndex];
if (KeepMap.empty())
{
std::shared_ptr<ChunkBlock> BlockFile;
@@ -663,10 +661,10 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
const auto& DeleteMap = DeleteChunks[ChunkMapIndex];
for (const auto& ChunkHash : DeleteMap)
{
- auto KeyIt = m_LocationMap.find(ChunkHash);
- const CasLocation& DeleteChunkLocation = KeyIt->second.Get(m_PayloadAlignment);
+ auto KeyIt = m_LocationMap.find(ChunkHash);
+ uint64_t ChunkSize = KeyIt->second.GetSize();
m_CasLog.Append({.Key = ChunkHash, .Location = KeyIt->second, .Flags = CasDiskIndexEntry::kTombstone});
- m_TotalSize.fetch_sub(static_cast<uint64_t>(DeleteChunkLocation.Size));
+ m_TotalSize.fetch_sub(ChunkSize);
m_LocationMap.erase(KeyIt);
}
DeletedChunks.insert(DeletedChunks.end(), DeleteMap.begin(), DeleteMap.end());
@@ -694,7 +692,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
for (const auto& ChunkHash : KeepMap)
{
auto KeyIt = LocationMap.find(ChunkHash);
- const CasLocation ChunkLocation = KeyIt->second;
+ const CasLocation ChunkLocation = KeyIt->second.Get(m_PayloadAlignment);
Chunk.resize(ChunkLocation.Size);
OldBlockFile->Read(Chunk.data(), Chunk.size(), ChunkLocation.Offset);
@@ -792,10 +790,10 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
for (const auto& ChunkHash : DeleteMap)
{
- auto KeyIt = m_LocationMap.find(ChunkHash);
- const CasLocation& DeleteChunkLocation = KeyIt->second.Get(m_PayloadAlignment);
+ auto KeyIt = m_LocationMap.find(ChunkHash);
+ uint64_t ChunkSize = KeyIt->second.GetSize();
m_CasLog.Append({.Key = ChunkHash, .Location = KeyIt->second, .Flags = CasDiskIndexEntry::kTombstone});
- m_TotalSize.fetch_sub(static_cast<uint64_t>(DeleteChunkLocation.Size));
+ m_TotalSize.fetch_sub(ChunkSize);
m_LocationMap.erase(KeyIt);
}
m_ChunkBlocks[BlockIndex].reset();
@@ -847,8 +845,8 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
GCReserveFile.SetFileSize(m_MaxBlockSize);
ZEN_DEBUG("recreated garbage collect reserve '{}' FAILED, {} bytes",
- m_Config.RootDirectory / m_ContainerBaseName,
- NiceBytes(Space.Free));
+ m_Config.RootDirectory / m_ContainerBaseName,
+ NiceBytes(Space.Free));
}
void
@@ -909,12 +907,12 @@ CasContainerStrategy::MakeIndexSnapshot()
}
}
- BasicFile SmallObjectIndex;
- SmallObjectIndex.Open(SidxPath, true);
+ BasicFile ObjectIndexFile;
+ ObjectIndexFile.Open(SidxPath, true);
CasDiskIndexHeader Header = {.PayloadAlignement = gsl::narrow<uint32_t>(m_PayloadAlignment), .EntryCount = Entries.size()};
- SmallObjectIndex.Write(&Header, sizeof(CasDiskIndexEntry), 0);
- SmallObjectIndex.Write(Entries.data(), Entries.size() * sizeof(CasDiskIndexEntry), sizeof(CasDiskIndexEntry));
- SmallObjectIndex.Close();
+ ObjectIndexFile.Write(&Header, sizeof(CasDiskIndexEntry), 0);
+ ObjectIndexFile.Write(Entries.data(), Entries.size() * sizeof(CasDiskIndexEntry), sizeof(CasDiskIndexEntry));
+ ObjectIndexFile.Close();
}
catch (std::exception& Err)
{
@@ -1066,8 +1064,8 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
return;
}
- BasicFile SmallObjectFile;
- SmallObjectFile.Open(LegacySobsPath, false);
+ BasicFile BlockFile;
+ BlockFile.Open(LegacySobsPath, false);
std::unordered_map<IoHash, LegacyCasDiskIndexEntry, IoHash::Hasher> LegacyDiskIndex;
@@ -1095,7 +1093,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
}
}
- SmallObjectFile.SetFileSize(MaxUsedSize);
+ BlockFile.SetFileSize(MaxUsedSize);
uint64_t MaxRequiredChunkCount = MaxUsedSize / m_MaxBlockSize;
if (MaxRequiredChunkCount > CasDiskLocation::MaxBlockIndex)
@@ -1134,7 +1132,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
const auto& Entry = LegacyDiskIndex[ChunkHash];
const LegacyCasDiskLocation& ChunkLocation = Entry.Location;
Chunk.resize(ChunkLocation.GetSize());
- SmallObjectFile.Read(Chunk.data(), Chunk.size(), ChunkLocation.GetOffset());
+ BlockFile.Read(Chunk.data(), Chunk.size(), ChunkLocation.GetOffset());
if (!NewBlockFile)
{
auto BlockPath = BuildUcasPath(m_BlocksBasePath, NewBlockIndex);
@@ -1144,7 +1142,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
else if (WriteOffset + Chunk.size() > m_MaxBlockSize)
{
uint64_t ChunkEnd = ChunkLocation.GetOffset() + Chunk.size();
- SmallObjectFile.SetFileSize(ChunkEnd);
+ BlockFile.SetFileSize(ChunkEnd);
NewBlockIndex = NewBlockIndex + 1;
auto BlockPath = BuildUcasPath(m_BlocksBasePath, NewBlockIndex);
NewBlockFile = std::make_unique<ChunkBlock>(BlockPath);
@@ -1158,7 +1156,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
}
m_CasLog.Close();
- SmallObjectFile.Close();
+ BlockFile.Close();
std::filesystem::remove(LegacySobsPath);
ZEN_INFO("migrated store {} to {} to chunks", m_Config.RootDirectory / m_ContainerBaseName, NewBlockIndex + 1);
@@ -1167,20 +1165,20 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
if (std::filesystem::is_regular_file(SidxPath))
{
- BasicFile SmallObjectIndex;
- SmallObjectIndex.Open(SidxPath, false);
- uint64_t Size = SmallObjectIndex.FileSize();
+ BasicFile ObjectIndexFile;
+ ObjectIndexFile.Open(SidxPath, false);
+ uint64_t Size = ObjectIndexFile.FileSize();
if (Size >= sizeof(CasDiskIndexHeader))
{
uint64_t ExpectedEntryCount = (Size - sizeof(sizeof(CasDiskIndexHeader))) / sizeof(CasDiskIndexEntry);
CasDiskIndexHeader Header;
- SmallObjectIndex.Read(&Header, sizeof(Header), 0);
+ ObjectIndexFile.Read(&Header, sizeof(Header), 0);
if (Header.Magic == CasDiskIndexHeader::ExpectedMagic && Header.Version == CasDiskIndexHeader::CurrentVersion &&
Header.PayloadAlignement > 0 && Header.EntryCount == ExpectedEntryCount)
{
std::vector<CasDiskIndexEntry> Entries{Header.EntryCount};
- SmallObjectIndex.Read(Entries.data(), Header.EntryCount * sizeof(CasDiskIndexEntry), sizeof(CasDiskIndexHeader));
- SmallObjectIndex.Close();
+ ObjectIndexFile.Read(Entries.data(), Header.EntryCount * sizeof(CasDiskIndexEntry), sizeof(CasDiskIndexHeader));
+ ObjectIndexFile.Close();
for (const auto& Entry : Entries)
{
m_LocationMap[Entry.Key] = Entry.Location;
@@ -1207,9 +1205,9 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
std::unordered_set<uint32_t> BlockUsage;
for (const auto& Entry : m_LocationMap)
{
- const CasLocation Location = Entry.second.Get(m_PayloadAlignment);
- m_TotalSize.fetch_add(Location.Size);
- BlockUsage.insert(Location.BlockIndex);
+ const auto& Location = Entry.second;
+ m_TotalSize.fetch_add(Location.GetSize());
+ BlockUsage.insert(Location.GetBlockIndex());
}
if (std::filesystem::is_directory(m_BlocksBasePath))
@@ -1252,8 +1250,8 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
continue;
}
auto BlockPath = BuildUcasPath(m_BlocksBasePath, BlockIndex);
- auto SmallObjectFile = std::make_shared<ChunkBlock>(BlockPath);
- m_ChunkBlocks[BlockIndex] = SmallObjectFile;
+ auto BlockFile = std::make_shared<ChunkBlock>(BlockPath);
+ m_ChunkBlocks[BlockIndex] = BlockFile;
}
}
++FolderOffset;
@@ -1994,22 +1992,22 @@ TEST_CASE("compactcas.legacyconversion")
std::filesystem::path SidxPath = CasConfig.RootDirectory / ("test.uidx");
if (std::filesystem::is_regular_file(SidxPath))
{
- BasicFile SmallObjectIndex;
- SmallObjectIndex.Open(SidxPath, false);
- uint64_t Size = SmallObjectIndex.FileSize();
+ BasicFile ObjectIndexFile;
+ ObjectIndexFile.Open(SidxPath, false);
+ uint64_t Size = ObjectIndexFile.FileSize();
if (Size >= sizeof(CasDiskIndexHeader))
{
uint64_t ExpectedEntryCount = (Size - sizeof(sizeof(CasDiskIndexHeader))) / sizeof(CasDiskIndexEntry);
CasDiskIndexHeader Header;
- SmallObjectIndex.Read(&Header, sizeof(Header), 0);
+ ObjectIndexFile.Read(&Header, sizeof(Header), 0);
if (Header.Magic == CasDiskIndexHeader::ExpectedMagic && Header.Version == CasDiskIndexHeader::CurrentVersion &&
Header.PayloadAlignement > 0 && Header.EntryCount == ExpectedEntryCount)
{
LogEntries.resize(Header.EntryCount);
- SmallObjectIndex.Read(LogEntries.data(), Header.EntryCount * sizeof(CasDiskIndexEntry), sizeof(CasDiskIndexHeader));
+ ObjectIndexFile.Read(LogEntries.data(), Header.EntryCount * sizeof(CasDiskIndexEntry), sizeof(CasDiskIndexHeader));
}
}
- SmallObjectIndex.Close();
+ ObjectIndexFile.Close();
std::filesystem::remove(SidxPath);
}