diff options
| author | zousar <[email protected]> | 2023-12-07 08:48:04 -0700 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-12-07 08:48:04 -0700 |
| commit | 6229149482f00893afa6874cc75d5e5ed0c438a9 (patch) | |
| tree | 531317314903da569eea099c4a07e721de659b93 /src/zenstore | |
| parent | Change naming to ChunkInfos instead of Chunks (diff) | |
| parent | Update CHANGELOG.md (diff) | |
| download | zen-zs/get-all-chunk-infos.tar.xz zen-zs/get-all-chunk-infos.zip | |
Merge branch 'main' into zs/get-all-chunk-infoszs/get-all-chunk-infos
Diffstat (limited to 'src/zenstore')
| -rw-r--r-- | src/zenstore/blockstore.cpp | 31 | ||||
| -rw-r--r-- | src/zenstore/compactcas.cpp | 25 | ||||
| -rw-r--r-- | src/zenstore/filecas.cpp | 6 | ||||
| -rw-r--r-- | src/zenstore/filecas.h | 4 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/blockstore.h | 38 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/cidstore.h | 4 |
6 files changed, 71 insertions, 37 deletions
diff --git a/src/zenstore/blockstore.cpp b/src/zenstore/blockstore.cpp index 918f464ac..71e306eca 100644 --- a/src/zenstore/blockstore.cpp +++ b/src/zenstore/blockstore.cpp @@ -282,10 +282,10 @@ BlockStore::SyncExistingBlocksOnDisk(const BlockIndexSet& KnownLocations) } } -std::vector<uint32_t> -BlockStore::GetBlocksToCompact(const std::unordered_map<uint32_t, uint64_t>& BlockUsage, uint32_t BlockUsageThresholdPercent) +BlockStore::BlockEntryCountMap +BlockStore::GetBlocksToCompact(const BlockUsageMap& BlockUsage, uint32_t BlockUsageThresholdPercent) { - std::unordered_set<uint32_t> Result; + BlockEntryCountMap Result; { RwLock::SharedLockScope InsertLock(m_InsertLock); for (const auto& It : m_ChunkBlocks) @@ -299,31 +299,34 @@ BlockStore::GetBlocksToCompact(const std::unordered_map<uint32_t, uint64_t>& Blo { continue; } - uint64_t BlockSize = It.second ? It.second->FileSize() : 0u; - if (BlockSize == 0) + + uint64_t UsedSize = 0; + uint32_t UsedCount = 0; + if (auto UsageIt = BlockUsage.find(BlockIndex); UsageIt != BlockUsage.end()) { - Result.insert(BlockIndex); - continue; + UsedSize = UsageIt->second.DiskUsage; + UsedCount = UsageIt->second.EntryCount; } - uint64_t UsedSize = 0; - if (auto UsageIt = BlockUsage.find(BlockIndex); UsageIt != BlockUsage.end()) + uint64_t BlockSize = It.second ? It.second->FileSize() : 0u; + if (BlockSize == 0) { - UsedSize = UsageIt->second; + Result.insert_or_assign(BlockIndex, UsedCount); + continue; } if (BlockUsageThresholdPercent == 100) { if (UsedSize < BlockSize) { - Result.insert(BlockIndex); + Result.insert_or_assign(BlockIndex, UsedCount); } } else if (BlockUsageThresholdPercent == 0) { if (UsedSize == 0) { - Result.insert(BlockIndex); + Result.insert_or_assign(BlockIndex, UsedCount); } } else @@ -331,12 +334,12 @@ BlockStore::GetBlocksToCompact(const std::unordered_map<uint32_t, uint64_t>& Blo const uint32_t UsedPercent = UsedSize < BlockSize ? gsl::narrow<uint32_t>((100 * UsedSize) / BlockSize) : 100u; if (UsedPercent < BlockUsageThresholdPercent) { - Result.insert(BlockIndex); + Result.insert_or_assign(BlockIndex, UsedCount); } } } } - return std::vector<uint32_t>(Result.begin(), Result.end()); + return Result; } void diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp index 5de82f219..96ab65a5f 100644 --- a/src/zenstore/compactcas.cpp +++ b/src/zenstore/compactcas.cpp @@ -25,6 +25,9 @@ # include <zenstore/cidstore.h> # include <algorithm> # include <random> +ZEN_THIRD_PARTY_INCLUDES_START +# include <tsl/robin_map.h> +ZEN_THIRD_PARTY_INCLUDES_END #endif ////////////////////////////////////////////////////////////////////////// @@ -114,8 +117,14 @@ namespace { ////////////////////////////////////////////////////////////////////////// +static const float IndexMinLoadFactor = 0.2f; +static const float IndexMaxLoadFactor = 0.7f; + CasContainerStrategy::CasContainerStrategy(GcManager& Gc) : m_Log(logging::Get("containercas")), m_Gc(Gc) { + m_LocationMap.min_load_factor(IndexMinLoadFactor); + m_LocationMap.max_load_factor(IndexMaxLoadFactor); + m_Gc.AddGcStorage(this); m_Gc.AddGcReferenceStore(*this); } @@ -576,7 +585,7 @@ public: if (Ctx.Settings.CollectSmallObjects) { - std::unordered_map<uint32_t, uint64_t> BlockUsage; + BlockStore::BlockUsageMap BlockUsage; { RwLock::SharedLockScope __(m_CasContainerStrategy.m_LocationMapLock); if (Ctx.IsCancelledFlag.load()) @@ -591,14 +600,14 @@ public: uint32_t BlockIndex = Loc.GetBlockIndex(); uint64_t ChunkSize = RoundUp(Loc.GetSize(), m_CasContainerStrategy.m_PayloadAlignment); - auto It = BlockUsage.find(BlockIndex); - if (It == BlockUsage.end()) + if (auto It = BlockUsage.find(BlockIndex); It != BlockUsage.end()) { - BlockUsage.insert_or_assign(BlockIndex, ChunkSize); + It->second.EntryCount++; + It->second.DiskUsage += ChunkSize; } else { - It->second += ChunkSize; + BlockUsage.insert_or_assign(BlockIndex, BlockStore::BlockUsageInfo{.DiskUsage = ChunkSize, .EntryCount = 1}); } } } @@ -607,7 +616,7 @@ public: BlockStoreCompactState BlockCompactState; std::vector<IoHash> BlockCompactStateKeys; - std::vector<uint32_t> BlocksToCompact = + BlockStore::BlockEntryCountMap BlocksToCompact = m_CasContainerStrategy.m_BlockStore.GetBlocksToCompact(BlockUsage, Ctx.Settings.CompactBlockUsageThresholdPercent); BlockCompactState.IncludeBlocks(BlocksToCompact); @@ -980,13 +989,14 @@ CasContainerStrategy::ReadIndexFile(const std::filesystem::path& IndexPath, uint Entries.resize(128 * 1024 / sizeof(CasDiskIndexEntry)); uint64_t RemainingEntries = Header.EntryCount; + uint64_t ReadOffset = sizeof(CasDiskIndexHeader); do { const uint64_t NumToRead = Min(RemainingEntries, Entries.size()); Entries.resize(NumToRead); - ObjectIndexFile.Read(Entries.data(), Entries.size() * sizeof(CasDiskIndexEntry), sizeof(CasDiskIndexHeader)); + ObjectIndexFile.Read(Entries.data(), Entries.size() * sizeof(CasDiskIndexEntry), ReadOffset); std::string InvalidEntryReason; for (const CasDiskIndexEntry& Entry : Entries) @@ -1002,6 +1012,7 @@ CasContainerStrategy::ReadIndexFile(const std::filesystem::path& IndexPath, uint } RemainingEntries -= NumToRead; + ReadOffset += NumToRead * sizeof(CasDiskIndexEntry); } while (RemainingEntries); OutVersion = CasDiskIndexHeader::CurrentVersion; diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp index aeca01dd1..5da612e30 100644 --- a/src/zenstore/filecas.cpp +++ b/src/zenstore/filecas.cpp @@ -128,8 +128,14 @@ FileCasStrategy::ShardingHelper::ShardingHelper(const std::filesystem::path& Roo ////////////////////////////////////////////////////////////////////////// +static const float IndexMinLoadFactor = 0.2f; +static const float IndexMaxLoadFactor = 0.7f; + FileCasStrategy::FileCasStrategy(GcManager& Gc) : m_Log(logging::Get("filecas")), m_Gc(Gc) { + m_Index.min_load_factor(IndexMinLoadFactor); + m_Index.max_load_factor(IndexMaxLoadFactor); + m_Gc.AddGcStorage(this); m_Gc.AddGcReferenceStore(*this); } diff --git a/src/zenstore/filecas.h b/src/zenstore/filecas.h index cb1347580..70cd4ef5a 100644 --- a/src/zenstore/filecas.h +++ b/src/zenstore/filecas.h @@ -16,6 +16,10 @@ #include <atomic> #include <functional> +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +ZEN_THIRD_PARTY_INCLUDES_END + namespace zen { class BasicFile; diff --git a/src/zenstore/include/zenstore/blockstore.h b/src/zenstore/include/zenstore/blockstore.h index 919684e41..786780b5e 100644 --- a/src/zenstore/include/zenstore/blockstore.h +++ b/src/zenstore/include/zenstore/blockstore.h @@ -132,6 +132,14 @@ public: typedef std::function<void(size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size)> IterateChunksLargeSizeCallback; typedef std::function<void(const BlockStoreLocation& Location)> WriteChunkCallback; + struct BlockUsageInfo + { + uint64_t DiskUsage; + uint32_t EntryCount; + }; + typedef std::unordered_map<uint32_t, BlockUsageInfo> BlockUsageMap; + typedef std::unordered_map<uint32_t, uint32_t> BlockEntryCountMap; + void Initialize(const std::filesystem::path& BlocksBasePath, uint64_t MaxBlockSize, uint64_t MaxBlockCount); struct BlockIndexSet @@ -145,8 +153,8 @@ public: // Ask the store to create empty blocks for all locations that does not have a block // Remove any block that is not referenced - void SyncExistingBlocksOnDisk(const BlockIndexSet& KnownLocations); - std::vector<uint32_t> GetBlocksToCompact(const std::unordered_map<uint32_t, uint64_t>& BlockUsage, uint32_t BlockUsageThresholdPercent); + void SyncExistingBlocksOnDisk(const BlockIndexSet& KnownLocations); + BlockEntryCountMap GetBlocksToCompact(const BlockUsageMap& BlockUsage, uint32_t BlockUsageThresholdPercent); void Close(); @@ -205,23 +213,29 @@ class BlockStoreCompactState public: BlockStoreCompactState() = default; - void IncludeBlocks(const std::span<const uint32_t> BlockIndexes) + void IncludeBlocks(const BlockStore::BlockEntryCountMap& BlockEntryCountMap) { - for (uint32_t BlockIndex : BlockIndexes) + size_t EntryCountTotal = 0; + for (auto& BlockUsageIt : BlockEntryCountMap) { - auto It = m_BlockIndexToChunkMapIndex.find(BlockIndex); - if (It == m_BlockIndexToChunkMapIndex.end()) - { - m_KeepChunks.emplace_back(std::vector<size_t>()); - m_BlockIndexToChunkMapIndex.insert_or_assign(BlockIndex, m_KeepChunks.size() - 1); - } + uint32_t BlockIndex = BlockUsageIt.first; + ZEN_ASSERT(m_BlockIndexToChunkMapIndex.find(BlockIndex) == m_BlockIndexToChunkMapIndex.end()); + + m_KeepChunks.emplace_back(std::vector<size_t>()); + m_KeepChunks.back().reserve(BlockUsageIt.second); + m_BlockIndexToChunkMapIndex.insert_or_assign(BlockIndex, m_KeepChunks.size() - 1); + EntryCountTotal += BlockUsageIt.second; } + m_ChunkLocations.reserve(EntryCountTotal); } void IncludeBlock(uint32_t BlockIndex) { - const uint32_t Blocks[1] = {BlockIndex}; - IncludeBlocks(Blocks); + if (m_BlockIndexToChunkMapIndex.find(BlockIndex) == m_BlockIndexToChunkMapIndex.end()) + { + m_KeepChunks.emplace_back(std::vector<size_t>()); + m_BlockIndexToChunkMapIndex.insert_or_assign(BlockIndex, m_KeepChunks.size() - 1); + } } bool AddKeepLocation(const BlockStoreLocation& Location) diff --git a/src/zenstore/include/zenstore/cidstore.h b/src/zenstore/include/zenstore/cidstore.h index 319683dcb..4c9f30608 100644 --- a/src/zenstore/include/zenstore/cidstore.h +++ b/src/zenstore/include/zenstore/cidstore.h @@ -9,10 +9,6 @@ #include <zenstore/hashkeyset.h> #include <zenutil/statsreporter.h> -ZEN_THIRD_PARTY_INCLUDES_START -#include <tsl/robin_map.h> -ZEN_THIRD_PARTY_INCLUDES_END - #include <filesystem> namespace zen { |