aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore/compactcas.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2023-10-03 13:31:02 +0200
committerGitHub <[email protected]>2023-10-03 13:31:02 +0200
commit68a72b68592c416969bd36f413eb2b2762b9fcff (patch)
tree9a5fc28eb9040f010c92f86a1745f9418dfc91ca /src/zenstore/compactcas.cpp
parentclean up date formatting (#440) (diff)
downloadzen-68a72b68592c416969bd36f413eb2b2762b9fcff.tar.xz
zen-68a72b68592c416969bd36f413eb2b2762b9fcff.zip
faster accesstime save restore (#439)
- Improvement: Reduce time a cache bucket is locked for write when flushing/garbage collecting - Change format for faster read/write and reduced size on disk - Don't lock index while writing manifest to disk - Skip garbage collect if we are currently in a Flush operation - BlockStore::Flush no longer terminates currently writing block - Garbage collect references to currently writing block but keep the block as new data may be added - Fix BlockStore::Prune used disk space calculation - Don't materialize data in filecas when we just need the size
Diffstat (limited to 'src/zenstore/compactcas.cpp')
-rw-r--r--src/zenstore/compactcas.cpp145
1 files changed, 66 insertions, 79 deletions
diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp
index 1d1797597..ce2e53527 100644
--- a/src/zenstore/compactcas.cpp
+++ b/src/zenstore/compactcas.cpp
@@ -230,7 +230,7 @@ CasContainerStrategy::FilterChunks(HashKeySet& InOutChunks)
void
CasContainerStrategy::Flush()
{
- m_BlockStore.Flush();
+ m_BlockStore.Flush(/*ForceNewBlock*/ false);
m_CasLog.Flush();
MakeIndexSnapshot();
}
@@ -801,7 +801,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
auto BlockIt = BlockSizes.find(DiskLocation.GetBlockIndex());
if (BlockIt == BlockSizes.end())
{
- ZEN_WARN("Unknown block {} for entry {}", DiskLocation.GetBlockIndex(), Entry.first.ToHexString());
+ ZEN_WARN("Unknown block {} for entry {} in '{}'", DiskLocation.GetBlockIndex(), Entry.first.ToHexString(), BasePath);
}
else
{
@@ -810,7 +810,10 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
uint64_t BlockSize = BlockIt->second;
if (BlockLocation.Offset + BlockLocation.Size > BlockSize)
{
- ZEN_WARN("Range is outside of block {} for entry {}", BlockLocation.BlockIndex, Entry.first.ToHexString());
+ ZEN_WARN("Range is outside of block {} for entry {} in '{}'",
+ BlockLocation.BlockIndex,
+ Entry.first.ToHexString(),
+ BasePath);
}
else
{
@@ -1068,7 +1071,6 @@ TEST_CASE("compactcas.gc.removefile")
TEST_CASE("compactcas.gc.compact")
{
- // for (uint32_t i = 0; i < 100; ++i)
{
ScopedTemporaryDirectory TempDir;
@@ -1111,6 +1113,17 @@ TEST_CASE("compactcas.gc.compact")
CHECK(Cas.HaveChunk(ChunkHashes[7]));
CHECK(Cas.HaveChunk(ChunkHashes[8]));
+ auto ValidateChunkExists = [&](size_t Index) {
+ IoBuffer Chunk = Cas.FindChunk(ChunkHashes[Index]);
+ bool Exists = !!Chunk;
+ CHECK(Exists);
+ IoHash Hash = IoHash::HashBuffer(Chunk);
+ if (ChunkHashes[Index] != Hash)
+ {
+ CHECK(fmt::format("{}", ChunkHashes[Index]) == fmt::format("{}", Hash));
+ }
+ };
+
// Keep first and last
{
GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
@@ -1134,8 +1147,8 @@ TEST_CASE("compactcas.gc.compact")
CHECK(!Cas.HaveChunk(ChunkHashes[7]));
CHECK(Cas.HaveChunk(ChunkHashes[8]));
- CHECK(ChunkHashes[0] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[0])));
- CHECK(ChunkHashes[8] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[8])));
+ ValidateChunkExists(0);
+ ValidateChunkExists(8);
Cas.InsertChunk(Chunks[1], ChunkHashes[1]);
Cas.InsertChunk(Chunks[2], ChunkHashes[2]);
@@ -1167,7 +1180,7 @@ TEST_CASE("compactcas.gc.compact")
CHECK(!Cas.HaveChunk(ChunkHashes[7]));
CHECK(Cas.HaveChunk(ChunkHashes[8]));
- CHECK(ChunkHashes[8] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[8])));
+ ValidateChunkExists(8);
Cas.InsertChunk(Chunks[1], ChunkHashes[1]);
Cas.InsertChunk(Chunks[2], ChunkHashes[2]);
@@ -1201,9 +1214,9 @@ TEST_CASE("compactcas.gc.compact")
CHECK(Cas.HaveChunk(ChunkHashes[7]));
CHECK(!Cas.HaveChunk(ChunkHashes[8]));
- CHECK(ChunkHashes[1] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[1])));
- CHECK(ChunkHashes[4] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[4])));
- CHECK(ChunkHashes[7] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[7])));
+ ValidateChunkExists(1);
+ ValidateChunkExists(4);
+ ValidateChunkExists(7);
Cas.InsertChunk(Chunks[0], ChunkHashes[0]);
Cas.InsertChunk(Chunks[2], ChunkHashes[2]);
@@ -1236,9 +1249,9 @@ TEST_CASE("compactcas.gc.compact")
CHECK(Cas.HaveChunk(ChunkHashes[7]));
CHECK(Cas.HaveChunk(ChunkHashes[8]));
- CHECK(ChunkHashes[6] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[6])));
- CHECK(ChunkHashes[7] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[7])));
- CHECK(ChunkHashes[8] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[8])));
+ ValidateChunkExists(6);
+ ValidateChunkExists(7);
+ ValidateChunkExists(8);
Cas.InsertChunk(Chunks[0], ChunkHashes[0]);
Cas.InsertChunk(Chunks[1], ChunkHashes[1]);
@@ -1273,11 +1286,11 @@ TEST_CASE("compactcas.gc.compact")
CHECK(!Cas.HaveChunk(ChunkHashes[7]));
CHECK(Cas.HaveChunk(ChunkHashes[8]));
- CHECK(ChunkHashes[0] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[0])));
- CHECK(ChunkHashes[2] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[2])));
- CHECK(ChunkHashes[4] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[4])));
- CHECK(ChunkHashes[6] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[6])));
- CHECK(ChunkHashes[8] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[8])));
+ ValidateChunkExists(0);
+ ValidateChunkExists(2);
+ ValidateChunkExists(4);
+ ValidateChunkExists(6);
+ ValidateChunkExists(8);
Cas.InsertChunk(Chunks[1], ChunkHashes[1]);
Cas.InsertChunk(Chunks[3], ChunkHashes[3]);
@@ -1286,15 +1299,15 @@ TEST_CASE("compactcas.gc.compact")
}
// Verify that we nicely appended blocks even after all GC operations
- CHECK(ChunkHashes[0] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[0])));
- CHECK(ChunkHashes[1] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[1])));
- CHECK(ChunkHashes[2] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[2])));
- CHECK(ChunkHashes[3] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[3])));
- CHECK(ChunkHashes[4] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[4])));
- CHECK(ChunkHashes[5] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[5])));
- CHECK(ChunkHashes[6] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[6])));
- CHECK(ChunkHashes[7] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[7])));
- CHECK(ChunkHashes[8] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[8])));
+ ValidateChunkExists(0);
+ ValidateChunkExists(1);
+ ValidateChunkExists(2);
+ ValidateChunkExists(3);
+ ValidateChunkExists(4);
+ ValidateChunkExists(5);
+ ValidateChunkExists(6);
+ ValidateChunkExists(7);
+ ValidateChunkExists(8);
}
}
@@ -1497,6 +1510,7 @@ TEST_CASE("compactcas.threadedinsert")
IoBuffer Chunk = CreateRandomChunk(kChunkSize);
IoHash Hash = HashBuffer(Chunk);
NewChunks[Hash] = Chunk;
+ GcChunkHashes.insert(Hash);
}
std::atomic_uint32_t AddedChunkCount;
@@ -1522,42 +1536,40 @@ TEST_CASE("compactcas.threadedinsert")
});
}
- while (AddedChunkCount.load() < NewChunks.size())
+ std::unordered_set<IoHash, IoHash::Hasher> ChunksToDelete;
+ std::vector<IoHash> KeepHashes(GcChunkHashes.begin(), GcChunkHashes.end());
+ size_t C = 0;
+ while (C < KeepHashes.size())
{
- // Need to be careful since we might GC blocks we don't know outside of RwLock::ExclusiveLockScope
- for (const auto& Chunk : NewChunks)
+ if (C % 155 == 0)
{
- if (Cas.HaveChunk(Chunk.first))
+ if (C < KeepHashes.size() - 1)
{
- GcChunkHashes.emplace(Chunk.first);
+ ChunksToDelete.insert(KeepHashes[C]);
+ KeepHashes[C] = KeepHashes[KeepHashes.size() - 1];
+ KeepHashes.pop_back();
}
- }
- std::vector<IoHash> KeepHashes(GcChunkHashes.begin(), GcChunkHashes.end());
- size_t C = 0;
- while (C < KeepHashes.size())
- {
- if (C % 155 == 0)
+ if (C + 3 < KeepHashes.size() - 1)
{
- if (C < KeepHashes.size() - 1)
- {
- KeepHashes[C] = KeepHashes[KeepHashes.size() - 1];
- KeepHashes.pop_back();
- }
- if (C + 3 < KeepHashes.size() - 1)
- {
- KeepHashes[C + 3] = KeepHashes[KeepHashes.size() - 1];
- KeepHashes.pop_back();
- }
+ ChunksToDelete.insert(KeepHashes[C + 3]);
+ KeepHashes[C + 3] = KeepHashes[KeepHashes.size() - 1];
+ KeepHashes.pop_back();
}
- C++;
}
+ C++;
+ }
+ while (AddedChunkCount.load() < NewChunks.size())
+ {
GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
GcCtx.CollectSmallObjects(true);
GcCtx.AddRetainedCids(KeepHashes);
Cas.CollectGarbage(GcCtx);
const HashKeySet& Deleted = GcCtx.DeletedCids();
- Deleted.IterateHashes([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); });
+ Deleted.IterateHashes([&GcChunkHashes, &ChunksToDelete](const IoHash& ChunkHash) {
+ CHECK(ChunksToDelete.contains(ChunkHash));
+ GcChunkHashes.erase(ChunkHash);
+ });
}
while (WorkCompleted < NewChunks.size() + Chunks.size())
@@ -1565,40 +1577,15 @@ TEST_CASE("compactcas.threadedinsert")
Sleep(1);
}
- // Need to be careful since we might GC blocks we don't know outside of RwLock::ExclusiveLockScope
- for (const auto& Chunk : NewChunks)
- {
- if (Cas.HaveChunk(Chunk.first))
- {
- GcChunkHashes.emplace(Chunk.first);
- }
- }
- std::vector<IoHash> KeepHashes(GcChunkHashes.begin(), GcChunkHashes.end());
- size_t C = 0;
- while (C < KeepHashes.size())
- {
- if (C % 155 == 0)
- {
- if (C < KeepHashes.size() - 1)
- {
- KeepHashes[C] = KeepHashes[KeepHashes.size() - 1];
- KeepHashes.pop_back();
- }
- if (C + 3 < KeepHashes.size() - 1)
- {
- KeepHashes[C + 3] = KeepHashes[KeepHashes.size() - 1];
- KeepHashes.pop_back();
- }
- }
- C++;
- }
-
GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24));
GcCtx.CollectSmallObjects(true);
GcCtx.AddRetainedCids(KeepHashes);
Cas.CollectGarbage(GcCtx);
const HashKeySet& Deleted = GcCtx.DeletedCids();
- Deleted.IterateHashes([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); });
+ Deleted.IterateHashes([&GcChunkHashes, &ChunksToDelete](const IoHash& ChunkHash) {
+ CHECK(ChunksToDelete.contains(ChunkHash));
+ GcChunkHashes.erase(ChunkHash);
+ });
}
{
WorkCompleted = 0;