diff options
| author | Dan Engelbrecht <[email protected]> | 2022-03-17 23:16:57 +0100 |
|---|---|---|
| committer | Dan Engelbrecht <[email protected]> | 2022-03-31 11:28:32 +0200 |
| commit | db802259323abf66bc49186408461db1df3447dd (patch) | |
| tree | 2d36faf094215a6347b6853f130b7fea7f49adaa /zenstore/compactcas.cpp | |
| parent | WIP - bit assignement for CasDiskLocation (diff) | |
| download | zen-db802259323abf66bc49186408461db1df3447dd.tar.xz zen-db802259323abf66bc49186408461db1df3447dd.zip | |
Use bitpacking for Cas block location
Diffstat (limited to 'zenstore/compactcas.cpp')
| -rw-r--r-- | zenstore/compactcas.cpp | 129 |
1 files changed, 82 insertions, 47 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 27f0d0e29..aaefa9c6c 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -42,7 +42,7 @@ namespace { std::filesystem::path BuildUcasPath(const std::filesystem::path& RootDirectory, const std::string_view ContainerBaseName, - const uint16_t BlockIndex) + const uint32_t BlockIndex) { return RootDirectory / (std::string(ContainerBaseName) + "." + (std::to_string(BlockIndex) + ".ucas")); } @@ -74,7 +74,7 @@ namespace { struct CasContainerStrategy::ChunkBlock { - ChunkBlock(const std::filesystem::path& RootDirectory, const std::string_view ContainerBaseName, uint16_t BlockIndex); + ChunkBlock(const std::filesystem::path& RootDirectory, const std::string_view ContainerBaseName, uint32_t BlockIndex); ~ChunkBlock(); const std::filesystem::path GetPath() const; void Open(); @@ -95,7 +95,7 @@ private: CasContainerStrategy::ChunkBlock::ChunkBlock(const std::filesystem::path& RootDirectory, const std::string_view ContainerBaseName, - uint16_t BlockIndex) + uint32_t BlockIndex) : m_Path(BuildUcasPath(RootDirectory, ContainerBaseName, BlockIndex)) { } @@ -237,7 +237,7 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const if ((m_CurrentInsertOffset + ChunkSize) > m_MaxBlockSize) { RwLock::ExclusiveLockScope __(m_LocationMapLock); - uint16_t NewBlockIndex = m_CurrentBlockIndex + 1; + uint32_t NewBlockIndex = m_CurrentBlockIndex + 1; while (m_OpenBlocks.contains(NewBlockIndex)) { NewBlockIndex++; @@ -253,15 +253,15 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const m_CurrentBlock = SmallObjectFile; m_CurrentInsertOffset = 0; } - const uint32_t InsertOffset = m_CurrentInsertOffset; + const uint64_t InsertOffset = m_CurrentInsertOffset; m_CurrentBlock.lock()->Write(ChunkData, ChunkSize, InsertOffset); - m_CurrentInsertOffset = static_cast<uint32_t>(AlignPositon(InsertOffset + ChunkSize, m_PayloadAlignment)); + m_CurrentInsertOffset = AlignPositon(InsertOffset + ChunkSize, m_PayloadAlignment); - const CasDiskLocation Location{m_CurrentBlockIndex, InsertOffset, static_cast<uint32_t>(ChunkSize)}; - CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = Location}; + const CasLocation Location(m_CurrentBlockIndex, InsertOffset, ChunkSize); + CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = CasDiskLocation(Location)}; RwLock::ExclusiveLockScope __(m_LocationMapLock); - m_LocationMap[ChunkHash] = Location; + m_LocationMap[ChunkHash] = CasDiskLocation(Location); m_TotalSize.fetch_add(static_cast<uint64_t>(ChunkSize)); m_CasLog.Append(IndexEntry); @@ -281,7 +281,7 @@ CasContainerStrategy::FindChunk(const IoHash& ChunkHash) if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end()) { - const CasDiskLocation& Location = KeyIt->second; + CasLocation Location = *KeyIt->second; if (auto BlockIt = m_OpenBlocks.find(Location.BlockIndex); BlockIt != m_OpenBlocks.end()) { @@ -356,11 +356,12 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) for (auto& Entry : m_LocationMap) { - const uint64_t EntryOffset = Entry.second.Offset; + const CasLocation Location = *Entry.second; + const uint64_t EntryOffset = Location.Offset; if ((EntryOffset >= WindowStart) && (EntryOffset < WindowEnd)) { - const uint64_t EntryEnd = EntryOffset + Entry.second.Size; + const uint64_t EntryEnd = EntryOffset + Location.Size; if (EntryEnd >= WindowEnd) { @@ -370,8 +371,7 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) } const IoHash ComputedHash = - IoHash::HashBuffer(reinterpret_cast<uint8_t*>(BufferBase) + Entry.second.Offset - WindowStart, - Entry.second.Size); + IoHash::HashBuffer(reinterpret_cast<uint8_t*>(BufferBase) + Location.Offset - WindowStart, Location.Size); if (Entry.first != ComputedHash) { @@ -391,9 +391,10 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) for (const CasDiskIndexEntry& Entry : BigChunks) { - IoHashStream Hasher; - auto& SmallObjectFile = *m_OpenBlocks[Entry.Location.BlockIndex]; - SmallObjectFile.StreamByteRange(Entry.Location.Offset, Entry.Location.Size, [&](const void* Data, uint64_t Size) { + IoHashStream Hasher; + const CasLocation Location = *Entry.Location; + auto& SmallObjectFile = *m_OpenBlocks[Location.BlockIndex]; + SmallObjectFile.StreamByteRange(Location.Offset, Location.Size, [&](const void* Data, uint64_t Size) { Hasher.Append(Data, Size); }); IoHash ComputedHash = Hasher.GetHash(); @@ -469,7 +470,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) std::unordered_map<uint64_t, size_t> BlockIndexToKeepChunksMap; std::vector<std::unordered_map<IoHash, CasDiskLocation, IoHash::Hasher>> KeepChunks; std::vector<IoHash> DeletedChunks; - std::unordered_set<uint16_t> BlocksToReWrite; + std::unordered_set<uint32_t> BlocksToReWrite; { RwLock::ExclusiveLockScope _i(m_InsertLock); RwLock::ExclusiveLockScope _l(m_LocationMapLock); @@ -493,11 +494,12 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) for (const auto& Entry : m_LocationMap) { TotalChunkHashes.push_back(Entry.first); - if (BlockIndexToKeepChunksMap.contains(Entry.second.BlockIndex)) + const CasLocation Location = *Entry.second; + if (BlockIndexToKeepChunksMap.contains(Location.BlockIndex)) { continue; } - BlockIndexToKeepChunksMap[Entry.second.BlockIndex] = KeepChunks.size(); + BlockIndexToKeepChunksMap[Location.BlockIndex] = KeepChunks.size(); KeepChunks.resize(KeepChunks.size() + 1); } @@ -507,10 +509,10 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) GcCtx.FilterCas(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) { if (Keep) { - auto KeyIt = m_LocationMap.find(ChunkHash); - const auto& ChunkLocation = KeyIt->second; - auto& ChunkMap = KeepChunks[BlockIndexToKeepChunksMap[ChunkLocation.BlockIndex]]; - ChunkMap[ChunkHash] = ChunkLocation; + auto KeyIt = m_LocationMap.find(ChunkHash); + const CasLocation ChunkLocation = *KeyIt->second; + auto& ChunkMap = KeepChunks[BlockIndexToKeepChunksMap[ChunkLocation.BlockIndex]]; + ChunkMap[ChunkHash] = KeyIt->second; NewTotalSize += ChunkLocation.Size; } else @@ -532,10 +534,10 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) for (const auto& ChunkHash : DeletedChunks) { - auto KeyIt = m_LocationMap.find(ChunkHash); - const auto& ChunkLocation = KeyIt->second; + auto KeyIt = m_LocationMap.find(ChunkHash); + const CasLocation& ChunkLocation = *KeyIt->second; BlocksToReWrite.insert(ChunkLocation.BlockIndex); - m_CasLog.Append({.Key = ChunkHash, .Location = ChunkLocation, .Flags = CasDiskIndexEntry::kTombstone}); + m_CasLog.Append({.Key = ChunkHash, .Location = KeyIt->second, .Flags = CasDiskIndexEntry::kTombstone}); m_LocationMap.erase(ChunkHash); m_TotalSize.fetch_sub(static_cast<uint64_t>(ChunkLocation.Size)); } @@ -544,7 +546,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) // a small amount of bytes to gain. if (BlocksToReWrite.contains(m_CurrentBlockIndex)) { - uint16_t NewBlockIndex = m_CurrentBlockIndex + 1; + uint32_t NewBlockIndex = m_CurrentBlockIndex + 1; while (m_OpenBlocks.contains(NewBlockIndex)) { NewBlockIndex++; @@ -552,7 +554,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) { ZEN_ERROR("unable to allocate a new block in {}, count limit {} exeeded", m_ContainerBaseName, - static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1); + static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) + 1); return; } } @@ -569,7 +571,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) std::shared_ptr<ChunkBlock> NewBlockFile; uint64_t WriteOffset = {}; - uint16_t NewBlockIndex = {}; + uint32_t NewBlockIndex = {}; std::unordered_map<IoHash, CasDiskLocation> MovedBlocks; for (auto BlockIndex : BlocksToReWrite) @@ -604,7 +606,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) std::vector<uint8_t> Chunk; for (auto& Entry : ChunkMap) { - const CasDiskLocation& ChunkLocation = Entry.second; + const CasLocation ChunkLocation = *Entry.second; Chunk.resize(ChunkLocation.Size); BlockFile->Read(Chunk.data(), Chunk.size(), ChunkLocation.Offset); @@ -630,7 +632,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) { ZEN_ERROR("unable to allocate a new block in {}, count limit {} exeeded", m_ContainerBaseName, - static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1); + static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) + 1); return; } } @@ -665,10 +667,8 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) } NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset); - CasDiskLocation NewChunkLocation(NewBlockIndex, gsl::narrow<uint32_t>(WriteOffset), gsl::narrow<uint32_t>(Chunk.size())); - Entry.second = {.BlockIndex = NewBlockIndex, - .Offset = gsl::narrow<uint32_t>(WriteOffset), - .Size = gsl::narrow<uint32_t>(Chunk.size())}; + CasLocation NewChunkLocation(NewBlockIndex, WriteOffset, Chunk.size()); + Entry.second = CasDiskLocation(NewChunkLocation); MovedBlocks[Entry.first] = Entry.second; WriteOffset = AlignPositon(WriteOffset + Chunk.size(), m_PayloadAlignment); } @@ -914,7 +914,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) SmallObjectFile.Open(LegacySobsPath, false); uint64_t MaxRequiredChunkCount = SmallObjectFile.FileSize() / m_MaxBlockSize; - uint64_t MaxPossibleChunkCount = static_cast<uint64_t>(std::numeric_limits<std::uint16_t>::max()) + 1; + uint64_t MaxPossibleChunkCount = static_cast<uint64_t>(std::numeric_limits<std::uint32_t>::max()) + 1; if (MaxRequiredChunkCount > MaxPossibleChunkCount) { ZEN_ERROR("legacy store migration from '{}' FAILED, required block count {}, possible {}", @@ -958,7 +958,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) std::unique_ptr<ChunkBlock> NewBlockFile; uint64_t WriteOffset = {}; - uint16_t NewBlockIndex = {}; + uint32_t NewBlockIndex = {}; std::vector<uint8_t> Chunk; for (const auto& ChunkHash : ChunkHashes) @@ -982,8 +982,8 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) WriteOffset = 0; } NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset); - CasDiskLocation NewChunkLocation(NewBlockIndex, gsl::narrow<uint32_t>(WriteOffset), gsl::narrow<uint32_t>(Chunk.size())); - m_CasLog.Append({.Key = ChunkHash, .Location = NewChunkLocation}); + CasLocation NewChunkLocation(NewBlockIndex, WriteOffset, Chunk.size()); + m_CasLog.Append({.Key = ChunkHash, .Location = CasDiskLocation(NewChunkLocation)}); WriteOffset = AlignPositon(WriteOffset + Chunk.size(), m_PayloadAlignment); } m_CasLog.Close(); @@ -1033,10 +1033,10 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) }); } - std::unordered_map<uint16_t, uint64_t> BlockUsage; + std::unordered_map<uint32_t, uint64_t> BlockUsage; for (const auto& Entry : m_LocationMap) { - const auto& Location = Entry.second; + const CasLocation Location = *Entry.second; m_TotalSize.fetch_add(Location.Size); uint64_t NextBlockStart = Location.Offset + Location.Size; auto It = BlockUsage.find(Location.BlockIndex); @@ -1072,7 +1072,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) } try { - uint16_t BlockIndex = static_cast<uint16_t>(std::stoi(FileName.substr(m_ContainerBaseName.length() + 1))); + uint32_t BlockIndex = static_cast<uint32_t>(std::stoi(FileName.substr(m_ContainerBaseName.length() + 1))); if (!BlockUsage.contains(BlockIndex)) { // Clear out unused blocks @@ -1090,8 +1090,8 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) } } - uint32_t LargestSizeToUse = gsl::narrow<uint32_t>(m_MaxBlockSize - m_PayloadAlignment); - uint32_t SmallestBlockSize = LargestSizeToUse; + uint64_t LargestSizeToUse = m_MaxBlockSize - m_PayloadAlignment; + uint64_t SmallestBlockSize = LargestSizeToUse; bool CreateNewBlock = m_OpenBlocks.empty(); if (!CreateNewBlock) { @@ -1099,7 +1099,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) { if (Entry.second < SmallestBlockSize) { - SmallestBlockSize = gsl::narrow<uint32_t>(Entry.second); + SmallestBlockSize = Entry.second; m_CurrentBlockIndex = Entry.first; CreateNewBlock = false; } @@ -1117,7 +1117,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) else { m_CurrentBlock = m_OpenBlocks[m_CurrentBlockIndex]; - m_CurrentInsertOffset = static_cast<uint32_t>(AlignPositon(SmallestBlockSize, m_PayloadAlignment)); + m_CurrentInsertOffset = AlignPositon(SmallestBlockSize, m_PayloadAlignment); } // TODO: should validate integrity of container files here @@ -1145,6 +1145,41 @@ namespace { } } // namespace +bool +operator==(const CasLocation& Lhs, const CasLocation& Rhs) +{ + return Lhs.BlockIndex == Rhs.BlockIndex && Lhs.Offset == Rhs.Offset && Lhs.Size == Rhs.Size; +} + +TEST_CASE("cas.casdisklocation") +{ + CasLocation Zero = CasLocation{.BlockIndex = 0, .Offset = 0, .Size = 0}; + CHECK(Zero == *CasDiskLocation(Zero)); + + CasLocation MaxBlockIndex = CasLocation{.BlockIndex = CasDiskLocation::MaxBlockIndex, .Offset = 0, .Size = 0}; + CHECK(MaxBlockIndex == *CasDiskLocation(MaxBlockIndex)); + + CasLocation MaxOffset = CasLocation{.BlockIndex = 0, .Offset = CasDiskLocation::MaxOffset, .Size = 0}; + CHECK(MaxOffset == *CasDiskLocation(MaxOffset)); + + CasLocation MaxSize = CasLocation{.BlockIndex = 0, .Offset = 0, .Size = std::numeric_limits<uint32_t>::max()}; + CHECK(MaxSize == *CasDiskLocation(MaxSize)); + + CasLocation MaxBlockIndexAndOffset = + CasLocation{.BlockIndex = CasDiskLocation::MaxBlockIndex, .Offset = CasDiskLocation::MaxOffset, .Size = 0}; + CHECK(MaxBlockIndexAndOffset == *CasDiskLocation(MaxBlockIndexAndOffset)); + + CasLocation MaxAll = CasLocation{.BlockIndex = CasDiskLocation::MaxBlockIndex, + .Offset = CasDiskLocation::MaxOffset, + .Size = std::numeric_limits<uint32_t>::max()}; + CHECK(MaxAll == *CasDiskLocation(MaxAll)); + + CasLocation Middle = CasLocation{.BlockIndex = (CasDiskLocation::MaxBlockIndex) / 2, + .Offset = (CasDiskLocation::MaxOffset) / 2, + .Size = std::numeric_limits<uint32_t>::max() / 2}; + CHECK(Middle == *CasDiskLocation(Middle)); +} + TEST_CASE("cas.compact.gc") { ScopedTemporaryDirectory TempDir; |