diff options
| author | Dan Engelbrecht <[email protected]> | 2024-09-23 19:19:40 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2024-09-23 19:19:40 +0200 |
| commit | bc9e590727211d803cce7be84c1cbc026179b841 (patch) | |
| tree | 96d89b59cdced94ce1d795cd941d35d26f6c5e88 /src/zenstore/include | |
| parent | made fmt formatter format function const (#162) (diff) | |
| download | zen-bc9e590727211d803cce7be84c1cbc026179b841.tar.xz zen-bc9e590727211d803cce7be84c1cbc026179b841.zip | |
gc unused refactor (#165)
* optimize IoHash and OId comparisions
* refactor filtering of unused references
* add attachment filtering to gc
Diffstat (limited to 'src/zenstore/include')
| -rw-r--r-- | src/zenstore/include/zenstore/cache/cachedisklayer.h | 24 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/gc.h | 21 |
2 files changed, 27 insertions, 18 deletions
diff --git a/src/zenstore/include/zenstore/cache/cachedisklayer.h b/src/zenstore/include/zenstore/cache/cachedisklayer.h index a735893a1..8e31d3222 100644 --- a/src/zenstore/include/zenstore/cache/cachedisklayer.h +++ b/src/zenstore/include/zenstore/cache/cachedisklayer.h @@ -343,18 +343,18 @@ public: metrics::RequestStats m_PutOps; metrics::RequestStats m_GetOps; - mutable RwLock m_IndexLock; - IndexMap m_Index; - std::vector<AccessTime> m_AccessTimes; - std::vector<BucketPayload> m_Payloads; - std::vector<BucketMetaData> m_MetaDatas; - std::vector<MetaDataIndex> m_FreeMetaDatas; - std::vector<MemCacheData> m_MemCachedPayloads; - std::vector<MemCachedIndex> m_FreeMemCachedPayloads; - std::unique_ptr<HashSet> m_TrackedCacheKeys; - std::unique_ptr<HashSet> m_TrackedReferences; - std::atomic_uint64_t m_StandaloneSize{}; - std::atomic_uint64_t m_MemCachedSize{}; + mutable RwLock m_IndexLock; + IndexMap m_Index; + std::vector<AccessTime> m_AccessTimes; + std::vector<BucketPayload> m_Payloads; + std::vector<BucketMetaData> m_MetaDatas; + std::vector<MetaDataIndex> m_FreeMetaDatas; + std::vector<MemCacheData> m_MemCachedPayloads; + std::vector<MemCachedIndex> m_FreeMemCachedPayloads; + std::unique_ptr<HashSet> m_TrackedCacheKeys; + std::unique_ptr<std::vector<IoHash>> m_TrackedReferences; + std::atomic_uint64_t m_StandaloneSize{}; + std::atomic_uint64_t m_MemCachedSize{}; virtual std::string GetGcName(GcCtx& Ctx) override; virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override; diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h index a8c5c0219..3f2f5448d 100644 --- a/src/zenstore/include/zenstore/gc.h +++ b/src/zenstore/include/zenstore/gc.h @@ -64,6 +64,8 @@ struct GcSettings 90; // 0 = compact only empty eligible blocks, 100 = compact all non-full eligible blocks, 1-99 = compact eligible blocks with less // usage than CompactBlockUsageThresholdPercent std::filesystem::path DiskReservePath; + IoHash AttachmentRangeMin = IoHash::Zero; + IoHash AttachmentRangeMax = IoHash::Max; }; struct GcCompactStoreStats @@ -166,10 +168,10 @@ public: /** * @brief An interface to check if a set of Cids are referenced * - * Instance will be deleted after RemoveUsedReferencesFromSet has been called 0-n times. + * Instance will be deleted after GetUnusedReferences has been called 0-n times. * * During construction of the GcReferenceChecker the world is not stopped and this is a good - * place to do caching to be able to execute LockState and RemoveUsedReferencesFromSet quickly. + * place to do caching to be able to execute LockState and GetUnusedReferences quickly. */ class GcReferenceChecker { @@ -188,16 +190,19 @@ public: // *IMPORTANT* Do *not* take any locks (shared or exclusive) in this code. // This is because we need to acquire the locks in an ordered manner and not end up in a deadlock due to other code // trying to get exclusive locks halfway through our execution. - // Called once before any calls to RemoveUsedReferencesFromSet. + // Called once before any calls to GetUnusedReferences. // The implementation should be as fast as possible as UpdateLockedState is part of a stop the world (from changes) // until all instances of GcReferenceChecker UpdateLockedState are completed virtual void UpdateLockedState(GcCtx& Ctx) = 0; // Go through IoCids and see which ones are referenced. If it is the reference must be removed from IoCids // This function should use pre-cached information on what is referenced as we are in stop the world mode - virtual void RemoveUsedReferencesFromSet(GcCtx& Ctx, HashSet& IoCids) = 0; + virtual std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) = 0; }; +std::span<IoHash> KeepUnusedReferences(std::span<const IoHash> SortedUsedReferences, std::span<IoHash> SortedReferences); +bool FilterReferences(GcCtx& Ctx, std::vector<IoHash>& InOutReferences); + /** * @brief An interface to implement a lock for Stop The World (from writing new data) * @@ -209,7 +214,7 @@ public: virtual ~GcReferenceLocker() = default; // Take all the locks needed to execute UpdateLockedState for the all the GcReferenceChecker in your domain - // Once all the GcReferenceChecker has executed UpdateLockedState and RemoveUsedReferencesFromSet for all + // Once all the GcReferenceChecker has executed UpdateLockedState and GetUnusedReferences for all // domains has completed, the locks will be disposed and writes are allowed once again virtual std::vector<RwLock::SharedLockScope> LockState(GcCtx& Ctx) = 0; }; @@ -245,7 +250,7 @@ public: virtual std::string GetGcName(GcCtx& Ctx) = 0; - typedef std::function<std::vector<IoHash>(std::span<IoHash> References)> GetUnusedReferencesFunc; + typedef std::function<std::span<IoHash>(std::span<IoHash> References)> GetUnusedReferencesFunc; // Check a set of references to see if they are in use. // Use the GetUnusedReferences input function to check if references are used and update any pointers @@ -520,6 +525,8 @@ public: std::optional<uint32_t> CompactBlockUsageThresholdPercent; std::optional<bool> Verbose; std::optional<bool> SingleThreaded; + IoHash AttachmentRangeMin = IoHash::Zero; + IoHash AttachmentRangeMax = IoHash::Max; }; bool TriggerGc(const TriggerGcParams& Params); @@ -547,6 +554,8 @@ private: uint32_t CompactBlockUsageThresholdPercent, bool Verbose, bool SingleThreaded, + const IoHash& AttachmentRangeMin, + const IoHash& AttachmentRangeMax, bool SilenceErrors); void ScrubStorage(bool DoDelete, bool SkipCid, std::chrono::seconds TimeSlice); LoggerRef Log() { return m_Log; } |