aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore
diff options
context:
space:
mode:
authorzousar <[email protected]>2023-12-07 08:48:04 -0700
committerGitHub <[email protected]>2023-12-07 08:48:04 -0700
commit6229149482f00893afa6874cc75d5e5ed0c438a9 (patch)
tree531317314903da569eea099c4a07e721de659b93 /src/zenstore
parentChange naming to ChunkInfos instead of Chunks (diff)
parentUpdate CHANGELOG.md (diff)
downloadzen-zs/get-all-chunk-infos.tar.xz
zen-zs/get-all-chunk-infos.zip
Merge branch 'main' into zs/get-all-chunk-infoszs/get-all-chunk-infos
Diffstat (limited to 'src/zenstore')
-rw-r--r--src/zenstore/blockstore.cpp31
-rw-r--r--src/zenstore/compactcas.cpp25
-rw-r--r--src/zenstore/filecas.cpp6
-rw-r--r--src/zenstore/filecas.h4
-rw-r--r--src/zenstore/include/zenstore/blockstore.h38
-rw-r--r--src/zenstore/include/zenstore/cidstore.h4
6 files changed, 71 insertions, 37 deletions
diff --git a/src/zenstore/blockstore.cpp b/src/zenstore/blockstore.cpp
index 918f464ac..71e306eca 100644
--- a/src/zenstore/blockstore.cpp
+++ b/src/zenstore/blockstore.cpp
@@ -282,10 +282,10 @@ BlockStore::SyncExistingBlocksOnDisk(const BlockIndexSet& KnownLocations)
}
}
-std::vector<uint32_t>
-BlockStore::GetBlocksToCompact(const std::unordered_map<uint32_t, uint64_t>& BlockUsage, uint32_t BlockUsageThresholdPercent)
+BlockStore::BlockEntryCountMap
+BlockStore::GetBlocksToCompact(const BlockUsageMap& BlockUsage, uint32_t BlockUsageThresholdPercent)
{
- std::unordered_set<uint32_t> Result;
+ BlockEntryCountMap Result;
{
RwLock::SharedLockScope InsertLock(m_InsertLock);
for (const auto& It : m_ChunkBlocks)
@@ -299,31 +299,34 @@ BlockStore::GetBlocksToCompact(const std::unordered_map<uint32_t, uint64_t>& Blo
{
continue;
}
- uint64_t BlockSize = It.second ? It.second->FileSize() : 0u;
- if (BlockSize == 0)
+
+ uint64_t UsedSize = 0;
+ uint32_t UsedCount = 0;
+ if (auto UsageIt = BlockUsage.find(BlockIndex); UsageIt != BlockUsage.end())
{
- Result.insert(BlockIndex);
- continue;
+ UsedSize = UsageIt->second.DiskUsage;
+ UsedCount = UsageIt->second.EntryCount;
}
- uint64_t UsedSize = 0;
- if (auto UsageIt = BlockUsage.find(BlockIndex); UsageIt != BlockUsage.end())
+ uint64_t BlockSize = It.second ? It.second->FileSize() : 0u;
+ if (BlockSize == 0)
{
- UsedSize = UsageIt->second;
+ Result.insert_or_assign(BlockIndex, UsedCount);
+ continue;
}
if (BlockUsageThresholdPercent == 100)
{
if (UsedSize < BlockSize)
{
- Result.insert(BlockIndex);
+ Result.insert_or_assign(BlockIndex, UsedCount);
}
}
else if (BlockUsageThresholdPercent == 0)
{
if (UsedSize == 0)
{
- Result.insert(BlockIndex);
+ Result.insert_or_assign(BlockIndex, UsedCount);
}
}
else
@@ -331,12 +334,12 @@ BlockStore::GetBlocksToCompact(const std::unordered_map<uint32_t, uint64_t>& Blo
const uint32_t UsedPercent = UsedSize < BlockSize ? gsl::narrow<uint32_t>((100 * UsedSize) / BlockSize) : 100u;
if (UsedPercent < BlockUsageThresholdPercent)
{
- Result.insert(BlockIndex);
+ Result.insert_or_assign(BlockIndex, UsedCount);
}
}
}
}
- return std::vector<uint32_t>(Result.begin(), Result.end());
+ return Result;
}
void
diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp
index 5de82f219..96ab65a5f 100644
--- a/src/zenstore/compactcas.cpp
+++ b/src/zenstore/compactcas.cpp
@@ -25,6 +25,9 @@
# include <zenstore/cidstore.h>
# include <algorithm>
# include <random>
+ZEN_THIRD_PARTY_INCLUDES_START
+# include <tsl/robin_map.h>
+ZEN_THIRD_PARTY_INCLUDES_END
#endif
//////////////////////////////////////////////////////////////////////////
@@ -114,8 +117,14 @@ namespace {
//////////////////////////////////////////////////////////////////////////
+static const float IndexMinLoadFactor = 0.2f;
+static const float IndexMaxLoadFactor = 0.7f;
+
CasContainerStrategy::CasContainerStrategy(GcManager& Gc) : m_Log(logging::Get("containercas")), m_Gc(Gc)
{
+ m_LocationMap.min_load_factor(IndexMinLoadFactor);
+ m_LocationMap.max_load_factor(IndexMaxLoadFactor);
+
m_Gc.AddGcStorage(this);
m_Gc.AddGcReferenceStore(*this);
}
@@ -576,7 +585,7 @@ public:
if (Ctx.Settings.CollectSmallObjects)
{
- std::unordered_map<uint32_t, uint64_t> BlockUsage;
+ BlockStore::BlockUsageMap BlockUsage;
{
RwLock::SharedLockScope __(m_CasContainerStrategy.m_LocationMapLock);
if (Ctx.IsCancelledFlag.load())
@@ -591,14 +600,14 @@ public:
uint32_t BlockIndex = Loc.GetBlockIndex();
uint64_t ChunkSize = RoundUp(Loc.GetSize(), m_CasContainerStrategy.m_PayloadAlignment);
- auto It = BlockUsage.find(BlockIndex);
- if (It == BlockUsage.end())
+ if (auto It = BlockUsage.find(BlockIndex); It != BlockUsage.end())
{
- BlockUsage.insert_or_assign(BlockIndex, ChunkSize);
+ It->second.EntryCount++;
+ It->second.DiskUsage += ChunkSize;
}
else
{
- It->second += ChunkSize;
+ BlockUsage.insert_or_assign(BlockIndex, BlockStore::BlockUsageInfo{.DiskUsage = ChunkSize, .EntryCount = 1});
}
}
}
@@ -607,7 +616,7 @@ public:
BlockStoreCompactState BlockCompactState;
std::vector<IoHash> BlockCompactStateKeys;
- std::vector<uint32_t> BlocksToCompact =
+ BlockStore::BlockEntryCountMap BlocksToCompact =
m_CasContainerStrategy.m_BlockStore.GetBlocksToCompact(BlockUsage, Ctx.Settings.CompactBlockUsageThresholdPercent);
BlockCompactState.IncludeBlocks(BlocksToCompact);
@@ -980,13 +989,14 @@ CasContainerStrategy::ReadIndexFile(const std::filesystem::path& IndexPath, uint
Entries.resize(128 * 1024 / sizeof(CasDiskIndexEntry));
uint64_t RemainingEntries = Header.EntryCount;
+ uint64_t ReadOffset = sizeof(CasDiskIndexHeader);
do
{
const uint64_t NumToRead = Min(RemainingEntries, Entries.size());
Entries.resize(NumToRead);
- ObjectIndexFile.Read(Entries.data(), Entries.size() * sizeof(CasDiskIndexEntry), sizeof(CasDiskIndexHeader));
+ ObjectIndexFile.Read(Entries.data(), Entries.size() * sizeof(CasDiskIndexEntry), ReadOffset);
std::string InvalidEntryReason;
for (const CasDiskIndexEntry& Entry : Entries)
@@ -1002,6 +1012,7 @@ CasContainerStrategy::ReadIndexFile(const std::filesystem::path& IndexPath, uint
}
RemainingEntries -= NumToRead;
+ ReadOffset += NumToRead * sizeof(CasDiskIndexEntry);
} while (RemainingEntries);
OutVersion = CasDiskIndexHeader::CurrentVersion;
diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp
index aeca01dd1..5da612e30 100644
--- a/src/zenstore/filecas.cpp
+++ b/src/zenstore/filecas.cpp
@@ -128,8 +128,14 @@ FileCasStrategy::ShardingHelper::ShardingHelper(const std::filesystem::path& Roo
//////////////////////////////////////////////////////////////////////////
+static const float IndexMinLoadFactor = 0.2f;
+static const float IndexMaxLoadFactor = 0.7f;
+
FileCasStrategy::FileCasStrategy(GcManager& Gc) : m_Log(logging::Get("filecas")), m_Gc(Gc)
{
+ m_Index.min_load_factor(IndexMinLoadFactor);
+ m_Index.max_load_factor(IndexMaxLoadFactor);
+
m_Gc.AddGcStorage(this);
m_Gc.AddGcReferenceStore(*this);
}
diff --git a/src/zenstore/filecas.h b/src/zenstore/filecas.h
index cb1347580..70cd4ef5a 100644
--- a/src/zenstore/filecas.h
+++ b/src/zenstore/filecas.h
@@ -16,6 +16,10 @@
#include <atomic>
#include <functional>
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <tsl/robin_map.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
namespace zen {
class BasicFile;
diff --git a/src/zenstore/include/zenstore/blockstore.h b/src/zenstore/include/zenstore/blockstore.h
index 919684e41..786780b5e 100644
--- a/src/zenstore/include/zenstore/blockstore.h
+++ b/src/zenstore/include/zenstore/blockstore.h
@@ -132,6 +132,14 @@ public:
typedef std::function<void(size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size)> IterateChunksLargeSizeCallback;
typedef std::function<void(const BlockStoreLocation& Location)> WriteChunkCallback;
+ struct BlockUsageInfo
+ {
+ uint64_t DiskUsage;
+ uint32_t EntryCount;
+ };
+ typedef std::unordered_map<uint32_t, BlockUsageInfo> BlockUsageMap;
+ typedef std::unordered_map<uint32_t, uint32_t> BlockEntryCountMap;
+
void Initialize(const std::filesystem::path& BlocksBasePath, uint64_t MaxBlockSize, uint64_t MaxBlockCount);
struct BlockIndexSet
@@ -145,8 +153,8 @@ public:
// Ask the store to create empty blocks for all locations that does not have a block
// Remove any block that is not referenced
- void SyncExistingBlocksOnDisk(const BlockIndexSet& KnownLocations);
- std::vector<uint32_t> GetBlocksToCompact(const std::unordered_map<uint32_t, uint64_t>& BlockUsage, uint32_t BlockUsageThresholdPercent);
+ void SyncExistingBlocksOnDisk(const BlockIndexSet& KnownLocations);
+ BlockEntryCountMap GetBlocksToCompact(const BlockUsageMap& BlockUsage, uint32_t BlockUsageThresholdPercent);
void Close();
@@ -205,23 +213,29 @@ class BlockStoreCompactState
public:
BlockStoreCompactState() = default;
- void IncludeBlocks(const std::span<const uint32_t> BlockIndexes)
+ void IncludeBlocks(const BlockStore::BlockEntryCountMap& BlockEntryCountMap)
{
- for (uint32_t BlockIndex : BlockIndexes)
+ size_t EntryCountTotal = 0;
+ for (auto& BlockUsageIt : BlockEntryCountMap)
{
- auto It = m_BlockIndexToChunkMapIndex.find(BlockIndex);
- if (It == m_BlockIndexToChunkMapIndex.end())
- {
- m_KeepChunks.emplace_back(std::vector<size_t>());
- m_BlockIndexToChunkMapIndex.insert_or_assign(BlockIndex, m_KeepChunks.size() - 1);
- }
+ uint32_t BlockIndex = BlockUsageIt.first;
+ ZEN_ASSERT(m_BlockIndexToChunkMapIndex.find(BlockIndex) == m_BlockIndexToChunkMapIndex.end());
+
+ m_KeepChunks.emplace_back(std::vector<size_t>());
+ m_KeepChunks.back().reserve(BlockUsageIt.second);
+ m_BlockIndexToChunkMapIndex.insert_or_assign(BlockIndex, m_KeepChunks.size() - 1);
+ EntryCountTotal += BlockUsageIt.second;
}
+ m_ChunkLocations.reserve(EntryCountTotal);
}
void IncludeBlock(uint32_t BlockIndex)
{
- const uint32_t Blocks[1] = {BlockIndex};
- IncludeBlocks(Blocks);
+ if (m_BlockIndexToChunkMapIndex.find(BlockIndex) == m_BlockIndexToChunkMapIndex.end())
+ {
+ m_KeepChunks.emplace_back(std::vector<size_t>());
+ m_BlockIndexToChunkMapIndex.insert_or_assign(BlockIndex, m_KeepChunks.size() - 1);
+ }
}
bool AddKeepLocation(const BlockStoreLocation& Location)
diff --git a/src/zenstore/include/zenstore/cidstore.h b/src/zenstore/include/zenstore/cidstore.h
index 319683dcb..4c9f30608 100644
--- a/src/zenstore/include/zenstore/cidstore.h
+++ b/src/zenstore/include/zenstore/cidstore.h
@@ -9,10 +9,6 @@
#include <zenstore/hashkeyset.h>
#include <zenutil/statsreporter.h>
-ZEN_THIRD_PARTY_INCLUDES_START
-#include <tsl/robin_map.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-
#include <filesystem>
namespace zen {