diff options
| author | Dan Engelbrecht <[email protected]> | 2023-11-21 15:06:25 +0100 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-11-21 15:06:25 +0100 |
| commit | 05178f7c18a48b21b9e260de282a86b91df26955 (patch) | |
| tree | 25f77af287730c6dbe8d655e0cb503f2652cbd36 /src/zenstore/include | |
| parent | zen run command (#552) (diff) | |
| download | zen-05178f7c18a48b21b9e260de282a86b91df26955.tar.xz zen-05178f7c18a48b21b9e260de282a86b91df26955.zip | |
compact separate for gc referencer (#533)
- Refactor GCV2 so GcReferencer::RemoveExpiredData returns a store compactor, moving out the actual disk work from deleting items in the index.
- Refactor GCV2 GcResult to reuse GcCompactStoreStats and GcStats
- Make Compacting of stores non-parallell to not eat all the disk I/O when running GC
Diffstat (limited to 'src/zenstore/include')
| -rw-r--r-- | src/zenstore/include/zenstore/blockstore.h | 22 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/gc.h | 84 |
2 files changed, 57 insertions, 49 deletions
diff --git a/src/zenstore/include/zenstore/blockstore.h b/src/zenstore/include/zenstore/blockstore.h index 75accd9b8..1429a6b02 100644 --- a/src/zenstore/include/zenstore/blockstore.h +++ b/src/zenstore/include/zenstore/blockstore.h @@ -136,7 +136,8 @@ public: // Ask the store to create empty blocks for all locations that does not have a block // Remove any block that is not referenced - void SyncExistingBlocksOnDisk(const std::vector<BlockStoreLocation>& KnownLocations); + void SyncExistingBlocksOnDisk(const std::vector<BlockStoreLocation>& KnownLocations); + std::vector<uint32_t> GetBlocksToCompact(const std::unordered_map<uint32_t, uint64_t>& BlockUsage, uint32_t BlockUsageThresholdPercent); void Close(); @@ -193,16 +194,25 @@ class BlockStoreCompactState public: BlockStoreCompactState() = default; - void IncludeBlock(uint32_t BlockIndex) + void IncludeBlocks(const std::span<const uint32_t> BlockIndexes) { - auto It = m_BlockIndexToChunkMapIndex.find(BlockIndex); - if (It == m_BlockIndexToChunkMapIndex.end()) + for (uint32_t BlockIndex : BlockIndexes) { - m_KeepChunks.emplace_back(std::vector<size_t>()); - m_BlockIndexToChunkMapIndex.insert_or_assign(BlockIndex, m_KeepChunks.size() - 1); + auto It = m_BlockIndexToChunkMapIndex.find(BlockIndex); + if (It == m_BlockIndexToChunkMapIndex.end()) + { + m_KeepChunks.emplace_back(std::vector<size_t>()); + m_BlockIndexToChunkMapIndex.insert_or_assign(BlockIndex, m_KeepChunks.size() - 1); + } } } + void IncludeBlock(uint32_t BlockIndex) + { + const uint32_t Blocks[1] = {BlockIndex}; + IncludeBlocks(Blocks); + } + bool AddKeepLocation(const BlockStoreLocation& Location) { auto It = m_BlockIndexToChunkMapIndex.find(Location.BlockIndex); diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h index d4c7bba25..4cd01bc2c 100644 --- a/src/zenstore/include/zenstore/gc.h +++ b/src/zenstore/include/zenstore/gc.h @@ -60,17 +60,32 @@ struct GcSettings bool SkipCidDelete = false; bool Verbose = false; bool SingleThread = false; + uint32_t CompactBlockUsageThresholdPercent = + 90; // 0 = compact only empty eligible blocks, 100 = compact all non-full eligible blocks, 1-99 = compact eligible blocks with less + // usage than CompactBlockUsageThresholdPercent + std::filesystem::path DiskReservePath; +}; + +struct GcCompactStoreStats +{ + std::uint64_t RemovedDisk = 0; + std::chrono::milliseconds ElapsedMS = {}; +}; + +struct GcStats +{ + std::uint64_t CheckedCount = 0; + std::uint64_t FoundCount = 0; + std::uint64_t DeletedCount = 0; + std::uint64_t FreedMemory = 0; + std::chrono::milliseconds ElapsedMS = {}; }; struct GcReferencerStats { - std::uint64_t Count = 0; - std::uint64_t Expired = 0; - std::uint64_t Deleted = 0; - std::uint64_t RemovedDisk = 0; - std::uint64_t RemovedMemory = 0; + GcStats RemoveExpiredDataStats; + GcCompactStoreStats CompactStoreStats; - std::chrono::milliseconds RemoveExpiredDataMS = {}; std::chrono::milliseconds CreateReferenceCheckersMS = {}; std::chrono::milliseconds LockStateMS = {}; std::chrono::milliseconds ElapsedMS = {}; @@ -78,43 +93,34 @@ struct GcReferencerStats struct GcReferenceStoreStats { - std::uint64_t Count = 0; - std::uint64_t Pruned = 0; - std::uint64_t Compacted = 0; - std::uint64_t RemovedDisk = 0; - std::uint64_t RemovedMemory = 0; + GcStats RemoveUnreferencedDataStats; + GcCompactStoreStats CompactStoreStats; - std::chrono::milliseconds CreateReferencePrunerMS = {}; - std::chrono::milliseconds RemoveUnreferencedDataMS = {}; - std::chrono::milliseconds CompactReferenceStoreMS = {}; + std::chrono::milliseconds CreateReferencePrunersMS = {}; std::chrono::milliseconds ElapsedMS = {}; }; struct GcResult { - GcReferencerStats ReferencerStat; - GcReferenceStoreStats ReferenceStoreStat; - - std::uint64_t RemovedDisk = 0; - std::uint64_t RemovedMemory = 0; - std::vector<std::pair<std::string, GcReferencerStats>> ReferencerStats; std::vector<std::pair<std::string, GcReferenceStoreStats>> ReferenceStoreStats; + GcReferencerStats ReferencerStatSum; + GcReferenceStoreStats ReferenceStoreStatSum; + GcCompactStoreStats CompactStoresStatSum; + // Wall times, not sum of each std::chrono::milliseconds RemoveExpiredDataMS = {}; std::chrono::milliseconds CreateReferenceCheckersMS = {}; std::chrono::milliseconds LockStateMS = {}; - std::chrono::milliseconds CreateReferencePrunerMS = {}; + std::chrono::milliseconds CreateReferencePrunersMS = {}; std::chrono::milliseconds RemoveUnreferencedDataMS = {}; - std::chrono::milliseconds CompactReferenceStoreMS = {}; + std::chrono::milliseconds CompactStoresMS = {}; std::chrono::milliseconds WriteBlockMS = {}; std::chrono::milliseconds ElapsedMS = {}; - - void Sum(); }; class CbObjectWriter; @@ -129,22 +135,23 @@ struct GcCtx typedef tsl::robin_set<IoHash> HashSet; /** - * @brief An interface to remove the stored data on disk after a GcReferencePruner::RemoveUnreferencedData + * @brief An interface to remove the stored data on disk after a GcReferencer::RemoveExpiredData and + * GcReferencePruner::RemoveUnreferencedData * - * CompactReferenceStore is called after pruning (GcReferencePruner::RemoveUnreferencedData) and state locking is - * complete so implementor must take care to only remove data that has not been altered since the prune operation. + * CompactStore is called after state locking is complete so implementor must take care to only remove + * data that has not been altered since the prune operation. * - * Instance will be deleted after CompactReferenceStore has completed execution. + * Instance will be deleted after CompactStore has completed execution. * * The subclass constructor should be provided with information on what is intended to be removed. */ -class GcReferenceStoreCompactor +class GcStoreCompactor { public: - virtual ~GcReferenceStoreCompactor() = default; + virtual ~GcStoreCompactor() = default; // Remove data on disk based on results from GcReferencePruner::RemoveUnreferencedData - virtual void CompactReferenceStore(GcCtx& Ctx, GcReferenceStoreStats& Stats) = 0; + virtual void CompactStore(GcCtx& Ctx, GcCompactStoreStats& Stats, const std::function<uint64_t()>& ClaimDiskReserveCallback) = 0; }; /** @@ -175,10 +182,6 @@ public: /** * @brief Interface to handle GC of data that references Cid data * - * TODO: Maybe we should split up being a referencer and something that holds cache values? - * - * GcCacheStore and GcReferencer? - * * This interface is registered/unregistered to GcManager vua AddGcReferencer() and RemoveGcReferencer() */ class GcReferencer @@ -190,10 +193,7 @@ public: virtual std::string GetGcName(GcCtx& Ctx) = 0; // Remove expired data based on either GcCtx::Settings CacheExpireTime/ProjectExpireTime - // TODO: For disk layer we need to first update it with access times from the memory layer - // The implementer of GcReferencer (in our case a disk bucket) does not know about any - // potential memory cache layer :( - virtual void RemoveExpiredData(GcCtx& Ctx, GcReferencerStats& Stats) = 0; + virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) = 0; // Create 0-n GcReferenceChecker for this GcReferencer. Caller will manage lifetime of // returned instances @@ -213,14 +213,12 @@ public: // Check a set of references to see if they are in use. // Use the GetUnusedReferences input function to check if references are used and update any pointers // so any query for references determined to be unreferences will not be found. - // If any references a found to be unused, return a GcReferenceStoreCompactor instance which will + // If any references a found to be unused, return a GcStoreCompactor instance which will // clean up any stored bulk data mapping to the pruned references. // Caller will manage lifetime of returned instance // This function should execute as fast as possible, so try to prepare a list of references to check ahead of // call to this function and make sure the removal of unreferences items is as lightweight as possible. - virtual GcReferenceStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, - GcReferenceStoreStats& Stats, - const GetUnusedReferencesFunc& GetUnusedReferences) = 0; + virtual GcStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, GcStats& Stats, const GetUnusedReferencesFunc& GetUnusedReferences) = 0; }; /** |