diff options
| author | Dan Engelbrecht <[email protected]> | 2023-10-03 13:31:02 +0200 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-10-03 13:31:02 +0200 |
| commit | 68a72b68592c416969bd36f413eb2b2762b9fcff (patch) | |
| tree | 9a5fc28eb9040f010c92f86a1745f9418dfc91ca /src/zenstore/compactcas.cpp | |
| parent | clean up date formatting (#440) (diff) | |
| download | zen-68a72b68592c416969bd36f413eb2b2762b9fcff.tar.xz zen-68a72b68592c416969bd36f413eb2b2762b9fcff.zip | |
faster accesstime save restore (#439)
- Improvement: Reduce time a cache bucket is locked for write when flushing/garbage collecting
- Change format for faster read/write and reduced size on disk
- Don't lock index while writing manifest to disk
- Skip garbage collect if we are currently in a Flush operation
- BlockStore::Flush no longer terminates currently writing block
- Garbage collect references to currently writing block but keep the block as new data may be added
- Fix BlockStore::Prune used disk space calculation
- Don't materialize data in filecas when we just need the size
Diffstat (limited to 'src/zenstore/compactcas.cpp')
| -rw-r--r-- | src/zenstore/compactcas.cpp | 145 |
1 files changed, 66 insertions, 79 deletions
diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp index 1d1797597..ce2e53527 100644 --- a/src/zenstore/compactcas.cpp +++ b/src/zenstore/compactcas.cpp @@ -230,7 +230,7 @@ CasContainerStrategy::FilterChunks(HashKeySet& InOutChunks) void CasContainerStrategy::Flush() { - m_BlockStore.Flush(); + m_BlockStore.Flush(/*ForceNewBlock*/ false); m_CasLog.Flush(); MakeIndexSnapshot(); } @@ -801,7 +801,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) auto BlockIt = BlockSizes.find(DiskLocation.GetBlockIndex()); if (BlockIt == BlockSizes.end()) { - ZEN_WARN("Unknown block {} for entry {}", DiskLocation.GetBlockIndex(), Entry.first.ToHexString()); + ZEN_WARN("Unknown block {} for entry {} in '{}'", DiskLocation.GetBlockIndex(), Entry.first.ToHexString(), BasePath); } else { @@ -810,7 +810,10 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) uint64_t BlockSize = BlockIt->second; if (BlockLocation.Offset + BlockLocation.Size > BlockSize) { - ZEN_WARN("Range is outside of block {} for entry {}", BlockLocation.BlockIndex, Entry.first.ToHexString()); + ZEN_WARN("Range is outside of block {} for entry {} in '{}'", + BlockLocation.BlockIndex, + Entry.first.ToHexString(), + BasePath); } else { @@ -1068,7 +1071,6 @@ TEST_CASE("compactcas.gc.removefile") TEST_CASE("compactcas.gc.compact") { - // for (uint32_t i = 0; i < 100; ++i) { ScopedTemporaryDirectory TempDir; @@ -1111,6 +1113,17 @@ TEST_CASE("compactcas.gc.compact") CHECK(Cas.HaveChunk(ChunkHashes[7])); CHECK(Cas.HaveChunk(ChunkHashes[8])); + auto ValidateChunkExists = [&](size_t Index) { + IoBuffer Chunk = Cas.FindChunk(ChunkHashes[Index]); + bool Exists = !!Chunk; + CHECK(Exists); + IoHash Hash = IoHash::HashBuffer(Chunk); + if (ChunkHashes[Index] != Hash) + { + CHECK(fmt::format("{}", ChunkHashes[Index]) == fmt::format("{}", Hash)); + } + }; + // Keep first and last { GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); @@ -1134,8 +1147,8 @@ TEST_CASE("compactcas.gc.compact") CHECK(!Cas.HaveChunk(ChunkHashes[7])); CHECK(Cas.HaveChunk(ChunkHashes[8])); - CHECK(ChunkHashes[0] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[0]))); - CHECK(ChunkHashes[8] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[8]))); + ValidateChunkExists(0); + ValidateChunkExists(8); Cas.InsertChunk(Chunks[1], ChunkHashes[1]); Cas.InsertChunk(Chunks[2], ChunkHashes[2]); @@ -1167,7 +1180,7 @@ TEST_CASE("compactcas.gc.compact") CHECK(!Cas.HaveChunk(ChunkHashes[7])); CHECK(Cas.HaveChunk(ChunkHashes[8])); - CHECK(ChunkHashes[8] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[8]))); + ValidateChunkExists(8); Cas.InsertChunk(Chunks[1], ChunkHashes[1]); Cas.InsertChunk(Chunks[2], ChunkHashes[2]); @@ -1201,9 +1214,9 @@ TEST_CASE("compactcas.gc.compact") CHECK(Cas.HaveChunk(ChunkHashes[7])); CHECK(!Cas.HaveChunk(ChunkHashes[8])); - CHECK(ChunkHashes[1] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[1]))); - CHECK(ChunkHashes[4] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[4]))); - CHECK(ChunkHashes[7] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[7]))); + ValidateChunkExists(1); + ValidateChunkExists(4); + ValidateChunkExists(7); Cas.InsertChunk(Chunks[0], ChunkHashes[0]); Cas.InsertChunk(Chunks[2], ChunkHashes[2]); @@ -1236,9 +1249,9 @@ TEST_CASE("compactcas.gc.compact") CHECK(Cas.HaveChunk(ChunkHashes[7])); CHECK(Cas.HaveChunk(ChunkHashes[8])); - CHECK(ChunkHashes[6] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[6]))); - CHECK(ChunkHashes[7] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[7]))); - CHECK(ChunkHashes[8] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[8]))); + ValidateChunkExists(6); + ValidateChunkExists(7); + ValidateChunkExists(8); Cas.InsertChunk(Chunks[0], ChunkHashes[0]); Cas.InsertChunk(Chunks[1], ChunkHashes[1]); @@ -1273,11 +1286,11 @@ TEST_CASE("compactcas.gc.compact") CHECK(!Cas.HaveChunk(ChunkHashes[7])); CHECK(Cas.HaveChunk(ChunkHashes[8])); - CHECK(ChunkHashes[0] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[0]))); - CHECK(ChunkHashes[2] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[2]))); - CHECK(ChunkHashes[4] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[4]))); - CHECK(ChunkHashes[6] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[6]))); - CHECK(ChunkHashes[8] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[8]))); + ValidateChunkExists(0); + ValidateChunkExists(2); + ValidateChunkExists(4); + ValidateChunkExists(6); + ValidateChunkExists(8); Cas.InsertChunk(Chunks[1], ChunkHashes[1]); Cas.InsertChunk(Chunks[3], ChunkHashes[3]); @@ -1286,15 +1299,15 @@ TEST_CASE("compactcas.gc.compact") } // Verify that we nicely appended blocks even after all GC operations - CHECK(ChunkHashes[0] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[0]))); - CHECK(ChunkHashes[1] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[1]))); - CHECK(ChunkHashes[2] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[2]))); - CHECK(ChunkHashes[3] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[3]))); - CHECK(ChunkHashes[4] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[4]))); - CHECK(ChunkHashes[5] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[5]))); - CHECK(ChunkHashes[6] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[6]))); - CHECK(ChunkHashes[7] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[7]))); - CHECK(ChunkHashes[8] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[8]))); + ValidateChunkExists(0); + ValidateChunkExists(1); + ValidateChunkExists(2); + ValidateChunkExists(3); + ValidateChunkExists(4); + ValidateChunkExists(5); + ValidateChunkExists(6); + ValidateChunkExists(7); + ValidateChunkExists(8); } } @@ -1497,6 +1510,7 @@ TEST_CASE("compactcas.threadedinsert") IoBuffer Chunk = CreateRandomChunk(kChunkSize); IoHash Hash = HashBuffer(Chunk); NewChunks[Hash] = Chunk; + GcChunkHashes.insert(Hash); } std::atomic_uint32_t AddedChunkCount; @@ -1522,42 +1536,40 @@ TEST_CASE("compactcas.threadedinsert") }); } - while (AddedChunkCount.load() < NewChunks.size()) + std::unordered_set<IoHash, IoHash::Hasher> ChunksToDelete; + std::vector<IoHash> KeepHashes(GcChunkHashes.begin(), GcChunkHashes.end()); + size_t C = 0; + while (C < KeepHashes.size()) { - // Need to be careful since we might GC blocks we don't know outside of RwLock::ExclusiveLockScope - for (const auto& Chunk : NewChunks) + if (C % 155 == 0) { - if (Cas.HaveChunk(Chunk.first)) + if (C < KeepHashes.size() - 1) { - GcChunkHashes.emplace(Chunk.first); + ChunksToDelete.insert(KeepHashes[C]); + KeepHashes[C] = KeepHashes[KeepHashes.size() - 1]; + KeepHashes.pop_back(); } - } - std::vector<IoHash> KeepHashes(GcChunkHashes.begin(), GcChunkHashes.end()); - size_t C = 0; - while (C < KeepHashes.size()) - { - if (C % 155 == 0) + if (C + 3 < KeepHashes.size() - 1) { - if (C < KeepHashes.size() - 1) - { - KeepHashes[C] = KeepHashes[KeepHashes.size() - 1]; - KeepHashes.pop_back(); - } - if (C + 3 < KeepHashes.size() - 1) - { - KeepHashes[C + 3] = KeepHashes[KeepHashes.size() - 1]; - KeepHashes.pop_back(); - } + ChunksToDelete.insert(KeepHashes[C + 3]); + KeepHashes[C + 3] = KeepHashes[KeepHashes.size() - 1]; + KeepHashes.pop_back(); } - C++; } + C++; + } + while (AddedChunkCount.load() < NewChunks.size()) + { GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); GcCtx.CollectSmallObjects(true); GcCtx.AddRetainedCids(KeepHashes); Cas.CollectGarbage(GcCtx); const HashKeySet& Deleted = GcCtx.DeletedCids(); - Deleted.IterateHashes([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); }); + Deleted.IterateHashes([&GcChunkHashes, &ChunksToDelete](const IoHash& ChunkHash) { + CHECK(ChunksToDelete.contains(ChunkHash)); + GcChunkHashes.erase(ChunkHash); + }); } while (WorkCompleted < NewChunks.size() + Chunks.size()) @@ -1565,40 +1577,15 @@ TEST_CASE("compactcas.threadedinsert") Sleep(1); } - // Need to be careful since we might GC blocks we don't know outside of RwLock::ExclusiveLockScope - for (const auto& Chunk : NewChunks) - { - if (Cas.HaveChunk(Chunk.first)) - { - GcChunkHashes.emplace(Chunk.first); - } - } - std::vector<IoHash> KeepHashes(GcChunkHashes.begin(), GcChunkHashes.end()); - size_t C = 0; - while (C < KeepHashes.size()) - { - if (C % 155 == 0) - { - if (C < KeepHashes.size() - 1) - { - KeepHashes[C] = KeepHashes[KeepHashes.size() - 1]; - KeepHashes.pop_back(); - } - if (C + 3 < KeepHashes.size() - 1) - { - KeepHashes[C + 3] = KeepHashes[KeepHashes.size() - 1]; - KeepHashes.pop_back(); - } - } - C++; - } - GcContext GcCtx(GcClock::Now() - std::chrono::hours(24), GcClock::Now() - std::chrono::hours(24)); GcCtx.CollectSmallObjects(true); GcCtx.AddRetainedCids(KeepHashes); Cas.CollectGarbage(GcCtx); const HashKeySet& Deleted = GcCtx.DeletedCids(); - Deleted.IterateHashes([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); }); + Deleted.IterateHashes([&GcChunkHashes, &ChunksToDelete](const IoHash& ChunkHash) { + CHECK(ChunksToDelete.contains(ChunkHash)); + GcChunkHashes.erase(ChunkHash); + }); } { WorkCompleted = 0; |