diff options
| author | Dan Engelbrecht <[email protected]> | 2022-03-11 12:17:46 +0100 |
|---|---|---|
| committer | Dan Engelbrecht <[email protected]> | 2022-03-31 11:28:31 +0200 |
| commit | 769dc100be423f43df52e24b78b210a969520478 (patch) | |
| tree | f2eec935b3fa51d97899945e0ff5ea484c69800c /zenstore/compactcas.cpp | |
| parent | Fix gc shutdown stalling if shutting down while gc is running (diff) | |
| download | zen-769dc100be423f43df52e24b78b210a969520478.tar.xz zen-769dc100be423f43df52e24b78b210a969520478.zip | |
WIP
Diffstat (limited to 'zenstore/compactcas.cpp')
| -rw-r--r-- | zenstore/compactcas.cpp | 203 |
1 files changed, 94 insertions, 109 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 3bf0c70df..a2142b497 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -261,43 +261,52 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) ZEN_INFO("collecting garbage from '{}'", m_Config.RootDirectory / m_ContainerBaseName); - RwLock::ExclusiveLockScope _(m_LocationMapLock); + std::vector<CasDiskLocation> ChunkLocations; // Sorted by position + std::vector<IoHash> ChunkHashes; // Same sort order as ChunkLocations + std::unordered_set<IoHash, IoHash::Hasher> ChunksToKeep; + const uint64_t ChunkCount = m_LocationMap.size(); + uint64_t TotalSize{}; + { + RwLock::ExclusiveLockScope _(m_LocationMapLock); - Flush(); + Flush(); - std::vector<IoHash> Candidates; - std::vector<IoHash> ChunksToKeep; - std::vector<IoHash> ChunksToDelete; - const uint64_t ChunkCount = m_LocationMap.size(); - uint64_t TotalSize{}; + ChunkLocations.reserve(m_LocationMap.size()); + ChunkHashes.reserve(m_LocationMap.size()); - Candidates.reserve(m_LocationMap.size()); + for (auto& Entry : m_LocationMap) + { + ChunkHashes.push_back(Entry.first); + TotalSize += Entry.second.GetSize(); + } - for (auto& Entry : m_LocationMap) - { - Candidates.push_back(Entry.first); - TotalSize += Entry.second.GetSize(); - } + GcCtx.FilterCas(ChunkHashes, [&ChunksToKeep](const IoHash& Hash, bool Keep) { + if (Keep) + { + ChunksToKeep.insert(Hash); + } + }); - ChunksToKeep.reserve(Candidates.size()); - GcCtx.FilterCas(Candidates, [&ChunksToKeep, &ChunksToDelete](const IoHash& Hash, bool Keep) { - if (Keep) + if (m_LocationMap.empty() || ChunksToKeep.size() == m_LocationMap.size()) { - ChunksToKeep.push_back(Hash); + ZEN_INFO("garbage collect DONE, scanned #{} {} chunks from '{}', nothing to delete", + ChunkCount, + NiceBytes(TotalSize), + m_Config.RootDirectory / m_ContainerBaseName); + return; } - else + + std::sort(begin(ChunkHashes), end(ChunkHashes), [&](IoHash Lhs, IoHash Rhs) { + auto LhsKeyIt = m_LocationMap.find(Lhs); + auto RhsKeyIt = m_LocationMap.find(Rhs); + return LhsKeyIt->second.GetOffset() < RhsKeyIt->second.GetOffset(); + }); + + for (auto Entry : ChunkHashes) { - ChunksToDelete.push_back(Hash); + auto KeyIt = m_LocationMap.find(Entry); + ChunkLocations.push_back(KeyIt->second); } - }); - - if (m_LocationMap.empty() || ChunksToKeep.size() == m_LocationMap.size()) - { - ZEN_INFO("garbage collect DONE, scanned #{} {} chunks from '{}', nothing to delete", - ChunkCount, - NiceBytes(TotalSize), - m_Config.RootDirectory / m_ContainerBaseName); - return; } const uint64_t NewChunkCount = ChunksToKeep.size(); @@ -309,109 +318,84 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) NewTotalSize += Loc.GetSize(); } - std::error_code Error; - DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Error); - if (Error) - { - ZEN_ERROR("get disk space FAILED, reason '{}'", Error.message()); - return; - } - - if (Space.Free < NewTotalSize + (64 << 20)) - { - ZEN_INFO("garbage collect from '{}' FAILED, required disk space {}, free {}", - m_Config.RootDirectory / m_ContainerBaseName, - NiceBytes(NewTotalSize), - NiceBytes(Space.Free)); - return; - } - const bool CollectSmallObjects = GcCtx.IsDeletionMode() && GcCtx.CollectSmallObjects(); if (!CollectSmallObjects) { ZEN_INFO("garbage collect from '{}' DISABLED, found #{} {} chunks of total #{} {}", - m_Config.RootDirectory / m_ContainerBaseName, - ChunkCount - NewChunkCount, - NiceBytes(TotalSize - NewTotalSize), - ChunkCount, - NiceBytes(TotalSize)); + m_Config.RootDirectory / m_ContainerBaseName, + ChunkCount - NewChunkCount, + NiceBytes(TotalSize - NewTotalSize), + ChunkCount, + NiceBytes(TotalSize)); return; } - fs::path TmpSobsPath = m_Config.RootDirectory / (m_ContainerBaseName + ".gc.ucas"); - fs::path TmpSlogPath = m_Config.RootDirectory / (m_ContainerBaseName + ".gc.ulog"); - + std::vector<IoHash> DeletedChunks; + std::vector<IoHash> MovedChunks; + uint64_t WriteOffset{}; + for (uint64_t ChunkIndex = 0; ChunkIndex < ChunkCount; ChunkIndex++) { - ZEN_DEBUG("creating temporary container cas '{}'...", TmpSobsPath); - - TCasLogFile<CasDiskIndexEntry> TmpLog; - BasicFile TmpObjectFile; - bool IsNew = true; - - TmpLog.Open(TmpSlogPath, IsNew); - TmpObjectFile.Open(TmpSobsPath, IsNew); - - std::vector<uint8_t> Chunk; - uint64_t NextInsertOffset{}; - - for (const IoHash& Key : ChunksToKeep) + IoHash ChunkHash = ChunkHashes[ChunkIndex]; + const auto& ChunkLocation = ChunkLocations[ChunkIndex]; + uint64_t ChunkEnd = ChunkLocation.GetOffset() + ChunkLocation.GetSize(); + uint64_t NextChunkWritePos = (ChunkEnd + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1); + bool KeepChunk = ChunksToKeep.end() != ChunksToKeep.find(ChunkHash); + if (KeepChunk && WriteOffset == ChunkLocation.GetOffset()) { - const auto Entry = m_LocationMap.find(Key); - const auto& Loc = Entry->second; - - Chunk.resize(Loc.GetSize()); - m_SmallObjectFile.Read(Chunk.data(), Chunk.size(), Loc.GetOffset()); + WriteOffset = ChunkLocation.GetOffset() + ChunkLocation.GetSize(); + continue; + } - const uint64_t InsertOffset = NextInsertOffset; - TmpObjectFile.Write(Chunk.data(), Chunk.size(), InsertOffset); - TmpLog.Append({.Key = Key, .Location = {InsertOffset, Chunk.size()}}); + RwLock::ExclusiveLockScope _(m_LocationMapLock); - NextInsertOffset = (NextInsertOffset + Chunk.size() + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1); + // Verify if we should still keep the chunk + std::vector<IoHash> Chunks; + GcCtx.FilterCas(ChunkHashes, [&KeepChunk](const IoHash& /*Hash*/, bool Keep) { KeepChunk = Keep; }); + if (!KeepChunk) + { + DeletedChunks.push_back(ChunkHash); + m_CasLog.Append({.Key = ChunkHash, .Location = ChunkLocation, .Flags = CasDiskIndexEntry::kTombstone}); + m_LocationMap.erase(ChunkHash); + if (m_CurrentInsertOffset == NextChunkWritePos) + { + m_CurrentInsertOffset = WriteOffset; + } + m_TotalSize.fetch_sub(static_cast<uint64_t>(ChunkLocation.GetSize())); + continue; } - } - try - { - CloseContainer(); + // Move the chunk + std::vector<uint8_t> Chunk; + m_SmallObjectFile.Read(Chunk.data(), Chunk.size(), ChunkLocation.GetOffset()); + CasDiskLocation NewChunkLocation(WriteOffset, ChunkLocation.GetSize()); + m_SmallObjectFile.Write(Chunk.data(), Chunk.size(), NewChunkLocation.GetOffset()); - fs::path SobsPath = m_Config.RootDirectory / (m_ContainerBaseName + ".ucas"); - fs::path SidxPath = m_Config.RootDirectory / (m_ContainerBaseName + ".uidx"); - fs::path SlogPath = m_Config.RootDirectory / (m_ContainerBaseName + ".ulog"); + CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = NewChunkLocation}; + m_CasLog.Append(IndexEntry); - fs::remove(SobsPath); - fs::remove(SidxPath); - fs::remove(SlogPath); + MovedChunks.push_back(ChunkHash); + m_LocationMap[ChunkHash] = NewChunkLocation; - fs::rename(TmpSobsPath, SobsPath); - fs::rename(TmpSlogPath, SlogPath); + WriteOffset = (WriteOffset + ChunkLocation.GetSize() + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1); + // Update insert location if this is the last chunk in the file + if (m_CurrentInsertOffset == NextChunkWritePos) { - // Create a new empty index file - BasicFile SidxFile; - SidxFile.Open(SidxPath, true); + uint64_t NewChunkEnd = NewChunkLocation.GetOffset() + NewChunkLocation.GetSize(); + m_CurrentInsertOffset = (NewChunkEnd + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1); } - - OpenContainer(false /* IsNewStore */); - - GcCtx.DeletedCas(ChunksToDelete); - - ZEN_INFO("garbage collect from '{}' DONE, collected #{} {} chunks of total #{} {}", - m_Config.RootDirectory / m_ContainerBaseName, - ChunkCount - NewChunkCount, - NiceBytes(TotalSize - NewTotalSize), - ChunkCount, - NiceBytes(TotalSize)); } - catch (std::exception& Err) { - ZEN_ERROR("garbage collection FAILED, reason '{}'", Err.what()); - - // Something went wrong, try create a new container - OpenContainer(true /* IsNewStore */); - - GcCtx.DeletedCas(ChunksToDelete); - GcCtx.DeletedCas(ChunksToKeep); + RwLock::ExclusiveLockScope _(m_LocationMapLock); + uint64_t CurrentSize = m_SmallObjectFile.FileSize(); + if (CurrentSize > m_CurrentInsertOffset) + { + ZEN_INFO("truncate '{}' DISABLED, from {} to {}", + m_Config.RootDirectory / m_ContainerBaseName, + NiceBytes(CurrentSize), + NiceBytes(m_CurrentInsertOffset)); + } } } @@ -458,6 +442,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) if (Record.Flags & CasDiskIndexEntry::kTombstone) { m_TotalSize.fetch_sub(Record.Location.GetSize()); + m_LocationMap.erase(Record.Key); } else { |