diff options
| author | Dan Engelbrecht <[email protected]> | 2022-03-11 22:15:39 +0100 |
|---|---|---|
| committer | Dan Engelbrecht <[email protected]> | 2022-03-31 11:28:31 +0200 |
| commit | 7b9f4bd105c7db6cb94d1129fe7db5e0a323c50d (patch) | |
| tree | b0078b524adab6b874440db392916f2ffe10baee /zenstore/compactcas.cpp | |
| parent | More tests (diff) | |
| download | zen-7b9f4bd105c7db6cb94d1129fe7db5e0a323c50d.tar.xz zen-7b9f4bd105c7db6cb94d1129fe7db5e0a323c50d.zip | |
Simplified logic of last chunk move
Diffstat (limited to 'zenstore/compactcas.cpp')
| -rw-r--r-- | zenstore/compactcas.cpp | 173 |
1 files changed, 74 insertions, 99 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 020fd6dbc..806cea69f 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -75,12 +75,6 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const const uint64_t InsertOffset = m_CurrentInsertOffset; m_SmallObjectFile.Write(ChunkData, ChunkSize, InsertOffset); - auto VerifyChunkHash = IoHash::HashBuffer(IoBuffer(IoBuffer::Wrap, ChunkData, ChunkSize)); - if (VerifyChunkHash != ChunkHash) - { - ZEN_ASSERT(false); - } - m_CurrentInsertOffset = (m_CurrentInsertOffset + ChunkSize + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1); RwLock::ExclusiveLockScope __(m_LocationMapLock); @@ -262,11 +256,8 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) ZEN_INFO("collecting garbage from '{}'", m_Config.RootDirectory / m_ContainerBaseName); - std::vector<CasDiskLocation> ChunkLocations; // Sorted by position - std::vector<IoHash> ChunkHashes; // Same sort order as ChunkLocations - std::unordered_set<IoHash, IoHash::Hasher> ChunksToKeep; - const uint64_t ChunkCount = m_LocationMap.size(); - uint64_t TotalSize{}; + const uint64_t TotalChunkCount = m_LocationMap.size(); + uint64_t TotalSize = m_TotalSize.load(); RwLock::ExclusiveLockScope _i(m_InsertLock); RwLock::ExclusiveLockScope _l(m_LocationMapLock); @@ -279,66 +270,83 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) return; } - ChunkLocations.reserve(m_LocationMap.size()); - ChunkHashes.reserve(m_LocationMap.size()); - + std::vector<IoHash> TotalChunkHashes; + TotalChunkHashes.reserve(m_LocationMap.size()); for (auto& Entry : m_LocationMap) { - ChunkHashes.push_back(Entry.first); - TotalSize += Entry.second.GetSize(); + TotalChunkHashes.push_back(Entry.first); } - GcCtx.FilterCas(ChunkHashes, [&ChunksToKeep](const IoHash& Hash, bool Keep) { + std::vector<IoHash> DeletedChunks; + std::vector<IoHash> ChunkHashes; // Same sort order as ChunkLocations + ChunkHashes.reserve(m_LocationMap.size()); + + const bool CollectSmallObjects = GcCtx.IsDeletionMode() && GcCtx.CollectSmallObjects(); + + GcCtx.FilterCas(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) { if (Keep) { - ChunksToKeep.insert(Hash); + ChunkHashes.push_back(ChunkHash); + } + else + { + DeletedChunks.push_back(ChunkHash); } }); - if (ChunksToKeep.size() == m_LocationMap.size()) + if (ChunkHashes.size() == TotalChunkCount) { ZEN_INFO("garbage collect DONE, scanned #{} {} chunks from '{}', nothing to delete", - ChunkCount, + TotalChunkCount, NiceBytes(TotalSize), m_Config.RootDirectory / m_ContainerBaseName); return; } + const uint64_t ChunkCount = ChunkHashes.size(); + std::sort(begin(ChunkHashes), end(ChunkHashes), [&](IoHash Lhs, IoHash Rhs) { auto LhsKeyIt = m_LocationMap.find(Lhs); auto RhsKeyIt = m_LocationMap.find(Rhs); return LhsKeyIt->second.GetOffset() < RhsKeyIt->second.GetOffset(); }); + uint64_t NewTotalSize = 0; + std::vector<CasDiskLocation> ChunkLocations; // Sorted by position + ChunkLocations.reserve(ChunkHashes.size()); for (auto Entry : ChunkHashes) { - auto KeyIt = m_LocationMap.find(Entry); - ChunkLocations.push_back(KeyIt->second); + auto KeyIt = m_LocationMap.find(Entry); + const auto& ChunkLocation = KeyIt->second; + ChunkLocations.push_back(ChunkLocation); + NewTotalSize += ChunkLocation.GetSize(); } - const uint64_t NewChunkCount = ChunksToKeep.size(); - uint64_t NewTotalSize = 0; - - for (const IoHash& Key : ChunksToKeep) - { - const CasDiskLocation& Loc = m_LocationMap[Key]; - NewTotalSize += Loc.GetSize(); - } - - const bool CollectSmallObjects = GcCtx.IsDeletionMode() && GcCtx.CollectSmallObjects(); - if (!CollectSmallObjects) { ZEN_INFO("garbage collect from '{}' DISABLED, found #{} {} chunks of total #{} {}", m_Config.RootDirectory / m_ContainerBaseName, - ChunkCount - NewChunkCount, + TotalChunkCount - ChunkCount, NiceBytes(TotalSize - NewTotalSize), - ChunkCount, + TotalChunkCount, NiceBytes(TotalSize)); return; } - std::vector<IoHash> DeletedChunks; + for (auto ChunkHash : DeletedChunks) + { + auto KeyIt = m_LocationMap.find(ChunkHash); + const auto& ChunkLocation = KeyIt->second; + uint64_t NextChunkOffset = ChunkLocation.GetOffset() + ChunkLocation.GetSize(); + m_CasLog.Append({.Key = ChunkHash, .Location = ChunkLocation, .Flags = CasDiskIndexEntry::kTombstone}); + m_LocationMap.erase(ChunkHash); + if (m_CurrentInsertOffset == NextChunkOffset) + { + m_CurrentInsertOffset = ChunkLocation.GetOffset(); + } + m_TotalSize.fetch_sub(static_cast<uint64_t>(ChunkLocation.GetSize())); + } + std::vector<IoHash> MovedChunks; uint64_t WriteOffset{}; @@ -347,24 +355,9 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) { IoHash ChunkHash = ChunkHashes[ChunkIndex]; const auto& ChunkLocation = ChunkLocations[ChunkIndex]; - bool KeepChunk = ChunksToKeep.end() != ChunksToKeep.find(ChunkHash); - - uint64_t NextWriteOffset = ChunkLocation.GetOffset() + ChunkLocation.GetSize(); - NextWriteOffset = (NextWriteOffset + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1); - if (!KeepChunk) - { - DeletedChunks.push_back(ChunkHash); - m_CasLog.Append({.Key = ChunkHash, .Location = ChunkLocation, .Flags = CasDiskIndexEntry::kTombstone}); - m_LocationMap.erase(ChunkHash); - if (m_CurrentInsertOffset == NextWriteOffset) - { - m_CurrentInsertOffset = WriteOffset; - } - m_TotalSize.fetch_sub(static_cast<uint64_t>(ChunkLocation.GetSize())); - ChunkIndex++; - continue; - } + uint64_t NextChunkOffset = + (ChunkLocation.GetOffset() + ChunkLocation.GetSize() + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1); uint64_t FreeChunkSize = ChunkLocation.GetOffset() - WriteOffset; @@ -373,17 +366,6 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) { // We should move as many keep chunk at the end as we can possibly fit uint64_t LastKeepChunkIndex = ChunkHashes.size() - 1; - while (LastKeepChunkIndex > ChunkIndex) - { - IoHash LastChunkHash = ChunkHashes[LastKeepChunkIndex]; - bool LastKeepChunk = ChunksToKeep.end() != ChunksToKeep.find(LastChunkHash); - if (LastKeepChunk) - { - break; - } - LastKeepChunkIndex--; - } - if (LastKeepChunkIndex == ChunkIndex) { break; @@ -400,11 +382,6 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) std::vector<uint8_t> Chunk; Chunk.resize(LastChunkLocation.GetSize()); m_SmallObjectFile.Read(Chunk.data(), Chunk.size(), LastChunkLocation.GetOffset()); - auto VerifyChunkHash = IoHash::HashBuffer(IoBuffer(IoBuffer::Wrap, Chunk.data(), Chunk.size())); - if (VerifyChunkHash != LastChunkHash) - { - ZEN_ASSERT(false); - } CasDiskLocation NewChunkLocation(WriteOffset, LastChunkLocation.GetSize()); m_SmallObjectFile.Write(Chunk.data(), Chunk.size(), NewChunkLocation.GetOffset()); @@ -413,17 +390,17 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) m_LocationMap[LastChunkHash] = NewChunkLocation; ChunkHashes.pop_back(); - uint64_t OldNextChunkWritePos = LastChunkLocation.GetOffset() + Chunk.size(); - OldNextChunkWritePos = (OldNextChunkWritePos + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1); - - if (m_CurrentInsertOffset == OldNextChunkWritePos) - { - m_CurrentInsertOffset = LastChunkLocation.GetOffset(); - } + uint64_t LastChunkNextChunkOffset = + (LastChunkLocation.GetOffset() + Chunk.size() + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1); WriteOffset = (WriteOffset + NewChunkLocation.GetSize() + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1); FreeChunkSize = ChunkLocation.GetOffset() - WriteOffset; MovedChunks.push_back(LastChunkHash); + + if (m_CurrentInsertOffset == LastChunkNextChunkOffset) + { + m_CurrentInsertOffset = WriteOffset; + } } // TODO: We could keep some wiggle room here, don't move chunk if we only move it a very small amount @@ -432,11 +409,6 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) std::vector<uint8_t> Chunk; Chunk.resize(ChunkLocation.GetSize()); m_SmallObjectFile.Read(Chunk.data(), Chunk.size(), ChunkLocation.GetOffset()); - auto VerifyChunkHash = IoHash::HashBuffer(IoBuffer(IoBuffer::Wrap, Chunk.data(), Chunk.size())); - if (VerifyChunkHash != ChunkHash) - { - ZEN_ASSERT(false); - } CasDiskLocation NewChunkLocation(WriteOffset, ChunkLocation.GetSize()); m_SmallObjectFile.Write(Chunk.data(), Chunk.size(), NewChunkLocation.GetOffset()); @@ -444,34 +416,37 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) m_CasLog.Append(IndexEntry); m_LocationMap[ChunkHash] = NewChunkLocation; - uint64_t ChunkEnd = ChunkLocation.GetOffset() + ChunkLocation.GetSize(); - uint64_t NextChunkWritePos = (ChunkEnd + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1); - // Update insert location if this is the last chunk in the file - - if (m_CurrentInsertOffset == NextChunkWritePos) - { - uint64_t NewChunkEnd = NewChunkLocation.GetOffset() + NewChunkLocation.GetSize(); - m_CurrentInsertOffset = (NewChunkEnd + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1); - } - MovedChunks.push_back(ChunkHash); - WriteOffset = (WriteOffset + ChunkLocation.GetSize() + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1); + WriteOffset = (NewChunkLocation.GetOffset() + ChunkLocation.GetSize() + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1); } else { - WriteOffset = NextWriteOffset; + WriteOffset = NextChunkOffset; } + + // Update insert location if this is the last chunk in the file + if (m_CurrentInsertOffset == NextChunkOffset) + { + m_CurrentInsertOffset = WriteOffset; + } + ChunkIndex++; } - uint64_t CurrentSize = m_SmallObjectFile.FileSize(); - if (CurrentSize > m_CurrentInsertOffset) + if (ChunkCount == 0) { - ZEN_INFO("new write position '{}', from {} to {}", - m_Config.RootDirectory / m_ContainerBaseName, - NiceBytes(CurrentSize), - NiceBytes(m_CurrentInsertOffset)); + m_CurrentInsertOffset = 0; } + + uint64_t CurrentSize = m_SmallObjectFile.FileSize(); + ZEN_INFO("garbage collection complete '{}', space {} to {}, moved {} and delete {} chunks", + m_Config.RootDirectory / m_ContainerBaseName, + NiceBytes(CurrentSize), + NiceBytes(m_CurrentInsertOffset), + MovedChunks.size(), + DeletedChunks.size()); + + // TODO: Should we truncate the file or just keep the size of the file and reuse the space? } void |