diff options
| author | Dan Engelbrecht <[email protected]> | 2022-03-14 23:50:38 +0100 |
|---|---|---|
| committer | Dan Engelbrecht <[email protected]> | 2022-03-31 11:28:31 +0200 |
| commit | 481e3cfad99792ac8b2de733ae3a97db08ceb666 (patch) | |
| tree | e70dc226cf4c638acc86c91277164f0230775d4a | |
| parent | Split chunkbundler into size-limited blocks (diff) | |
| download | zen-481e3cfad99792ac8b2de733ae3a97db08ceb666.tar.xz zen-481e3cfad99792ac8b2de733ae3a97db08ceb666.zip | |
block files
| -rw-r--r-- | zenstore/chunkbundler.cpp | 88 | ||||
| -rw-r--r-- | zenstore/chunkbundler.h | 12 |
2 files changed, 78 insertions, 22 deletions
diff --git a/zenstore/chunkbundler.cpp b/zenstore/chunkbundler.cpp index 87b71df09..2c1924303 100644 --- a/zenstore/chunkbundler.cpp +++ b/zenstore/chunkbundler.cpp @@ -167,6 +167,7 @@ ChunkBundler::InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& uint64_t CurrentBlockSize = m_CurrentBlock.lock()->FileSize(); if (CurrentBlockSize + m_CurrentInsertOffset > m_MaxBlockSize) { + RwLock::ExclusiveLockScope __(m_LocationMapLock); m_CurrentFileIndex++; std::filesystem::path path = m_RootDirectory / "ucas" / (std::to_string(m_CurrentFileIndex) + ".ucas"); auto SmallObjectFile = std::make_shared<BasicFile>(); @@ -221,12 +222,7 @@ ChunkBundler::HaveChunk(const IoHash& ChunkHash) { RwLock::SharedLockScope _(m_LocationMapLock); - if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end()) - { - return true; - } - - return false; + return m_LocationMap.contains(ChunkHash); } void @@ -245,6 +241,7 @@ ChunkBundler::FilterChunks(CasChunkSet& InOutChunks) void ChunkBundler::Flush() { + RwLock::ExclusiveLockScope _l(m_InsertLock); m_OpLog.Flush(); m_CurrentBlock.lock()->Flush(); } @@ -386,13 +383,15 @@ ChunkBundler::CollectGarbage(GcContext& GcCtx) // added betwen each move of a block. ZEN_INFO("collecting garbage from '{}'", m_RootDirectory / m_ContainerBaseName); + std::unordered_map<uint16_t, std::unordered_map<IoHash, CompactDiskLocation, IoHash::Hasher>> KeepChunksPerBlock; std::vector<IoHash> DeletedChunks; - std::unordered_set<int> BlocksToReWrite; + std::unordered_set<uint16_t> BlocksToReWrite; { RwLock::ExclusiveLockScope _i(m_InsertLock); RwLock::ExclusiveLockScope _l(m_LocationMapLock); - Flush(); + m_OpLog.Flush(); + m_CurrentBlock.lock()->Flush(); BlocksToReWrite.reserve(m_OpenBlocks.size()); @@ -407,27 +406,31 @@ ChunkBundler::CollectGarbage(GcContext& GcCtx) std::vector<IoHash> TotalChunkHashes; TotalChunkHashes.reserve(m_LocationMap.size()); - for (auto& Entry : m_LocationMap) + for (const auto& Entry : m_LocationMap) { TotalChunkHashes.push_back(Entry.first); } - std::vector<IoHash> ChunkHashes; // Same sort order as ChunkLocations - ChunkHashes.reserve(m_LocationMap.size()); + //std::vector<IoHash> ChunkHashes; // Same sort order as ChunkLocations + //ChunkHashes.reserve(m_LocationMap.size()); const bool CollectSmallObjects = GcCtx.IsDeletionMode() && GcCtx.CollectSmallObjects(); + uint64_t NewTotalSize = 0; GcCtx.FilterCas(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) { if (Keep) { - ChunkHashes.push_back(ChunkHash); + auto KeyIt = m_LocationMap.find(ChunkHash); + const auto& ChunkLocation = KeyIt->second; + KeepChunksPerBlock[ChunkLocation.BlockIndex][ChunkHash] = ChunkLocation; + NewTotalSize += ChunkLocation.Size; } else { DeletedChunks.push_back(ChunkHash); } }); - + /* if (ChunkHashes.size() == TotalChunkCount) { ZEN_INFO("garbage collect DONE, scanned #{} {} chunks from '{}', nothing to delete", @@ -455,19 +458,19 @@ ChunkBundler::CollectGarbage(GcContext& GcCtx) ChunkLocations.push_back(ChunkLocation); NewTotalSize += ChunkLocation.Size; } - + */ if (!CollectSmallObjects) { ZEN_INFO("garbage collect from '{}' DISABLED, found #{} {} chunks of total #{} {}", m_RootDirectory / m_ContainerBaseName, - TotalChunkCount - ChunkCount, + DeletedChunks.size(), NiceBytes(TotalSize - NewTotalSize), TotalChunkCount, NiceBytes(TotalSize)); return; } - for (auto ChunkHash : DeletedChunks) + for (const auto& ChunkHash : DeletedChunks) { auto KeyIt = m_LocationMap.find(ChunkHash); const auto& ChunkLocation = KeyIt->second; @@ -491,6 +494,59 @@ ChunkBundler::CollectGarbage(GcContext& GcCtx) { // Rewrite all BlocksToReWrite + for (auto BlockIndex : BlocksToReWrite) + { + std::shared_ptr<BasicFile> BlockFile; + { + RwLock::ExclusiveLockScope _i(m_InsertLock); + BlockFile = m_OpenBlocks[BlockIndex]; + } + std::filesystem::path BlockPath = m_RootDirectory / "ucas" / (std::to_string(BlockIndex) + ".ucas"); + auto& KeepChunksForBlock = KeepChunksPerBlock[BlockIndex]; + if (KeepChunksForBlock.empty()) + { + RwLock::ExclusiveLockScope _i(m_InsertLock); + BlockFile = m_OpenBlocks[BlockIndex]; + BlockFile->Close(); // TODO: We can't know that someone isn't holding a IoBuffer for this block at this point! + m_OpenBlocks.erase(BlockIndex); + fs::remove(BlockPath); + } + else + { + std::filesystem::path TmpBlockPath = m_RootDirectory / "ucas" / (std::to_string(BlockIndex) + ".gc.ucas"); + auto TmpBlock = std::make_shared<BasicFile>(); + TmpBlock->Open(TmpBlockPath, true); + std::vector<uint8_t> Chunk; + uint64_t WriteOffset = 0; + + for (auto& Entry : KeepChunksForBlock) + { + const CompactDiskLocation& ChunkLocation = Entry.second; + Chunk.resize(ChunkLocation.Size); + BlockFile->Read(Chunk.data(), Chunk.size(), ChunkLocation.Offset); + CompactDiskLocation NewChunkLocation(ChunkLocation.BlockIndex, + gsl::narrow<uint32_t>(WriteOffset), + gsl::narrow<uint32_t>(Chunk.size())); + TmpBlock->Write(Chunk.data(), Chunk.size(), NewChunkLocation.Offset); + Entry.second = NewChunkLocation; + WriteOffset = AlignPositon(WriteOffset + Chunk.size(), m_PayloadAlignment); + } + TmpBlock->Close(); + + RwLock::ExclusiveLockScope _i(m_InsertLock); + RwLock::ExclusiveLockScope _l(m_LocationMapLock); + for (const auto& Entry : KeepChunksForBlock) + { + m_LocationMap[Entry.first] = Entry.second; + m_OpLog.Append({.Key = Entry.first, .Location = Entry.second}); + } + BlockFile->Close(); // TODO: We can't know that someone isn't holding a IoBuffer for this block at this point! + fs::remove(BlockPath); + fs::rename(TmpBlockPath, BlockPath); + BlockFile->Open(BlockPath, false); + } + } + #if 0 // We can break here if we only want to remove items without compacting of space diff --git a/zenstore/chunkbundler.h b/zenstore/chunkbundler.h index d84ee9627..4b24ec958 100644 --- a/zenstore/chunkbundler.h +++ b/zenstore/chunkbundler.h @@ -85,18 +85,18 @@ private: std::string m_ContainerBaseName; RwLock m_LocationMapLock; - RwLock m_InsertLock; // used to serialize inserts std::unordered_map<IoHash, CompactDiskLocation, IoHash::Hasher> m_LocationMap; + std::unordered_map<uint16_t, std::shared_ptr<BasicFile>> m_OpenBlocks; + uint16_t m_CurrentFileIndex = 0; + + RwLock m_InsertLock; // used to serialize inserts + std::weak_ptr<BasicFile> m_CurrentBlock; std::atomic_uint32_t m_CurrentInsertOffset{}; + std::atomic_uint64_t m_CurrentIndexOffset{}; std::atomic_uint64_t m_TotalSize{}; void MakeIndexSnapshot(); - - // Reserve one block of 1Gb - std::unordered_map<uint16_t, std::shared_ptr<BasicFile>> m_OpenBlocks; - std::weak_ptr<BasicFile> m_CurrentBlock; - uint16_t m_CurrentFileIndex = 0; }; ////////////////////////////////////////////////////////////////////////// |