aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore/include
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2023-11-21 15:06:25 +0100
committerGitHub <[email protected]>2023-11-21 15:06:25 +0100
commit05178f7c18a48b21b9e260de282a86b91df26955 (patch)
tree25f77af287730c6dbe8d655e0cb503f2652cbd36 /src/zenstore/include
parentzen run command (#552) (diff)
downloadzen-05178f7c18a48b21b9e260de282a86b91df26955.tar.xz
zen-05178f7c18a48b21b9e260de282a86b91df26955.zip
compact separate for gc referencer (#533)
- Refactor GCV2 so GcReferencer::RemoveExpiredData returns a store compactor, moving out the actual disk work from deleting items in the index. - Refactor GCV2 GcResult to reuse GcCompactStoreStats and GcStats - Make Compacting of stores non-parallell to not eat all the disk I/O when running GC
Diffstat (limited to 'src/zenstore/include')
-rw-r--r--src/zenstore/include/zenstore/blockstore.h22
-rw-r--r--src/zenstore/include/zenstore/gc.h84
2 files changed, 57 insertions, 49 deletions
diff --git a/src/zenstore/include/zenstore/blockstore.h b/src/zenstore/include/zenstore/blockstore.h
index 75accd9b8..1429a6b02 100644
--- a/src/zenstore/include/zenstore/blockstore.h
+++ b/src/zenstore/include/zenstore/blockstore.h
@@ -136,7 +136,8 @@ public:
// Ask the store to create empty blocks for all locations that does not have a block
// Remove any block that is not referenced
- void SyncExistingBlocksOnDisk(const std::vector<BlockStoreLocation>& KnownLocations);
+ void SyncExistingBlocksOnDisk(const std::vector<BlockStoreLocation>& KnownLocations);
+ std::vector<uint32_t> GetBlocksToCompact(const std::unordered_map<uint32_t, uint64_t>& BlockUsage, uint32_t BlockUsageThresholdPercent);
void Close();
@@ -193,16 +194,25 @@ class BlockStoreCompactState
public:
BlockStoreCompactState() = default;
- void IncludeBlock(uint32_t BlockIndex)
+ void IncludeBlocks(const std::span<const uint32_t> BlockIndexes)
{
- auto It = m_BlockIndexToChunkMapIndex.find(BlockIndex);
- if (It == m_BlockIndexToChunkMapIndex.end())
+ for (uint32_t BlockIndex : BlockIndexes)
{
- m_KeepChunks.emplace_back(std::vector<size_t>());
- m_BlockIndexToChunkMapIndex.insert_or_assign(BlockIndex, m_KeepChunks.size() - 1);
+ auto It = m_BlockIndexToChunkMapIndex.find(BlockIndex);
+ if (It == m_BlockIndexToChunkMapIndex.end())
+ {
+ m_KeepChunks.emplace_back(std::vector<size_t>());
+ m_BlockIndexToChunkMapIndex.insert_or_assign(BlockIndex, m_KeepChunks.size() - 1);
+ }
}
}
+ void IncludeBlock(uint32_t BlockIndex)
+ {
+ const uint32_t Blocks[1] = {BlockIndex};
+ IncludeBlocks(Blocks);
+ }
+
bool AddKeepLocation(const BlockStoreLocation& Location)
{
auto It = m_BlockIndexToChunkMapIndex.find(Location.BlockIndex);
diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h
index d4c7bba25..4cd01bc2c 100644
--- a/src/zenstore/include/zenstore/gc.h
+++ b/src/zenstore/include/zenstore/gc.h
@@ -60,17 +60,32 @@ struct GcSettings
bool SkipCidDelete = false;
bool Verbose = false;
bool SingleThread = false;
+ uint32_t CompactBlockUsageThresholdPercent =
+ 90; // 0 = compact only empty eligible blocks, 100 = compact all non-full eligible blocks, 1-99 = compact eligible blocks with less
+ // usage than CompactBlockUsageThresholdPercent
+ std::filesystem::path DiskReservePath;
+};
+
+struct GcCompactStoreStats
+{
+ std::uint64_t RemovedDisk = 0;
+ std::chrono::milliseconds ElapsedMS = {};
+};
+
+struct GcStats
+{
+ std::uint64_t CheckedCount = 0;
+ std::uint64_t FoundCount = 0;
+ std::uint64_t DeletedCount = 0;
+ std::uint64_t FreedMemory = 0;
+ std::chrono::milliseconds ElapsedMS = {};
};
struct GcReferencerStats
{
- std::uint64_t Count = 0;
- std::uint64_t Expired = 0;
- std::uint64_t Deleted = 0;
- std::uint64_t RemovedDisk = 0;
- std::uint64_t RemovedMemory = 0;
+ GcStats RemoveExpiredDataStats;
+ GcCompactStoreStats CompactStoreStats;
- std::chrono::milliseconds RemoveExpiredDataMS = {};
std::chrono::milliseconds CreateReferenceCheckersMS = {};
std::chrono::milliseconds LockStateMS = {};
std::chrono::milliseconds ElapsedMS = {};
@@ -78,43 +93,34 @@ struct GcReferencerStats
struct GcReferenceStoreStats
{
- std::uint64_t Count = 0;
- std::uint64_t Pruned = 0;
- std::uint64_t Compacted = 0;
- std::uint64_t RemovedDisk = 0;
- std::uint64_t RemovedMemory = 0;
+ GcStats RemoveUnreferencedDataStats;
+ GcCompactStoreStats CompactStoreStats;
- std::chrono::milliseconds CreateReferencePrunerMS = {};
- std::chrono::milliseconds RemoveUnreferencedDataMS = {};
- std::chrono::milliseconds CompactReferenceStoreMS = {};
+ std::chrono::milliseconds CreateReferencePrunersMS = {};
std::chrono::milliseconds ElapsedMS = {};
};
struct GcResult
{
- GcReferencerStats ReferencerStat;
- GcReferenceStoreStats ReferenceStoreStat;
-
- std::uint64_t RemovedDisk = 0;
- std::uint64_t RemovedMemory = 0;
-
std::vector<std::pair<std::string, GcReferencerStats>> ReferencerStats;
std::vector<std::pair<std::string, GcReferenceStoreStats>> ReferenceStoreStats;
+ GcReferencerStats ReferencerStatSum;
+ GcReferenceStoreStats ReferenceStoreStatSum;
+ GcCompactStoreStats CompactStoresStatSum;
+
// Wall times, not sum of each
std::chrono::milliseconds RemoveExpiredDataMS = {};
std::chrono::milliseconds CreateReferenceCheckersMS = {};
std::chrono::milliseconds LockStateMS = {};
- std::chrono::milliseconds CreateReferencePrunerMS = {};
+ std::chrono::milliseconds CreateReferencePrunersMS = {};
std::chrono::milliseconds RemoveUnreferencedDataMS = {};
- std::chrono::milliseconds CompactReferenceStoreMS = {};
+ std::chrono::milliseconds CompactStoresMS = {};
std::chrono::milliseconds WriteBlockMS = {};
std::chrono::milliseconds ElapsedMS = {};
-
- void Sum();
};
class CbObjectWriter;
@@ -129,22 +135,23 @@ struct GcCtx
typedef tsl::robin_set<IoHash> HashSet;
/**
- * @brief An interface to remove the stored data on disk after a GcReferencePruner::RemoveUnreferencedData
+ * @brief An interface to remove the stored data on disk after a GcReferencer::RemoveExpiredData and
+ * GcReferencePruner::RemoveUnreferencedData
*
- * CompactReferenceStore is called after pruning (GcReferencePruner::RemoveUnreferencedData) and state locking is
- * complete so implementor must take care to only remove data that has not been altered since the prune operation.
+ * CompactStore is called after state locking is complete so implementor must take care to only remove
+ * data that has not been altered since the prune operation.
*
- * Instance will be deleted after CompactReferenceStore has completed execution.
+ * Instance will be deleted after CompactStore has completed execution.
*
* The subclass constructor should be provided with information on what is intended to be removed.
*/
-class GcReferenceStoreCompactor
+class GcStoreCompactor
{
public:
- virtual ~GcReferenceStoreCompactor() = default;
+ virtual ~GcStoreCompactor() = default;
// Remove data on disk based on results from GcReferencePruner::RemoveUnreferencedData
- virtual void CompactReferenceStore(GcCtx& Ctx, GcReferenceStoreStats& Stats) = 0;
+ virtual void CompactStore(GcCtx& Ctx, GcCompactStoreStats& Stats, const std::function<uint64_t()>& ClaimDiskReserveCallback) = 0;
};
/**
@@ -175,10 +182,6 @@ public:
/**
* @brief Interface to handle GC of data that references Cid data
*
- * TODO: Maybe we should split up being a referencer and something that holds cache values?
- *
- * GcCacheStore and GcReferencer?
- *
* This interface is registered/unregistered to GcManager vua AddGcReferencer() and RemoveGcReferencer()
*/
class GcReferencer
@@ -190,10 +193,7 @@ public:
virtual std::string GetGcName(GcCtx& Ctx) = 0;
// Remove expired data based on either GcCtx::Settings CacheExpireTime/ProjectExpireTime
- // TODO: For disk layer we need to first update it with access times from the memory layer
- // The implementer of GcReferencer (in our case a disk bucket) does not know about any
- // potential memory cache layer :(
- virtual void RemoveExpiredData(GcCtx& Ctx, GcReferencerStats& Stats) = 0;
+ virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) = 0;
// Create 0-n GcReferenceChecker for this GcReferencer. Caller will manage lifetime of
// returned instances
@@ -213,14 +213,12 @@ public:
// Check a set of references to see if they are in use.
// Use the GetUnusedReferences input function to check if references are used and update any pointers
// so any query for references determined to be unreferences will not be found.
- // If any references a found to be unused, return a GcReferenceStoreCompactor instance which will
+ // If any references a found to be unused, return a GcStoreCompactor instance which will
// clean up any stored bulk data mapping to the pruned references.
// Caller will manage lifetime of returned instance
// This function should execute as fast as possible, so try to prepare a list of references to check ahead of
// call to this function and make sure the removal of unreferences items is as lightweight as possible.
- virtual GcReferenceStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx,
- GcReferenceStoreStats& Stats,
- const GetUnusedReferencesFunc& GetUnusedReferences) = 0;
+ virtual GcStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, GcStats& Stats, const GetUnusedReferencesFunc& GetUnusedReferences) = 0;
};
/**