aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore/include
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2024-09-23 19:19:40 +0200
committerGitHub Enterprise <[email protected]>2024-09-23 19:19:40 +0200
commitbc9e590727211d803cce7be84c1cbc026179b841 (patch)
tree96d89b59cdced94ce1d795cd941d35d26f6c5e88 /src/zenstore/include
parentmade fmt formatter format function const (#162) (diff)
downloadzen-bc9e590727211d803cce7be84c1cbc026179b841.tar.xz
zen-bc9e590727211d803cce7be84c1cbc026179b841.zip
gc unused refactor (#165)
* optimize IoHash and OId comparisions * refactor filtering of unused references * add attachment filtering to gc
Diffstat (limited to 'src/zenstore/include')
-rw-r--r--src/zenstore/include/zenstore/cache/cachedisklayer.h24
-rw-r--r--src/zenstore/include/zenstore/gc.h21
2 files changed, 27 insertions, 18 deletions
diff --git a/src/zenstore/include/zenstore/cache/cachedisklayer.h b/src/zenstore/include/zenstore/cache/cachedisklayer.h
index a735893a1..8e31d3222 100644
--- a/src/zenstore/include/zenstore/cache/cachedisklayer.h
+++ b/src/zenstore/include/zenstore/cache/cachedisklayer.h
@@ -343,18 +343,18 @@ public:
metrics::RequestStats m_PutOps;
metrics::RequestStats m_GetOps;
- mutable RwLock m_IndexLock;
- IndexMap m_Index;
- std::vector<AccessTime> m_AccessTimes;
- std::vector<BucketPayload> m_Payloads;
- std::vector<BucketMetaData> m_MetaDatas;
- std::vector<MetaDataIndex> m_FreeMetaDatas;
- std::vector<MemCacheData> m_MemCachedPayloads;
- std::vector<MemCachedIndex> m_FreeMemCachedPayloads;
- std::unique_ptr<HashSet> m_TrackedCacheKeys;
- std::unique_ptr<HashSet> m_TrackedReferences;
- std::atomic_uint64_t m_StandaloneSize{};
- std::atomic_uint64_t m_MemCachedSize{};
+ mutable RwLock m_IndexLock;
+ IndexMap m_Index;
+ std::vector<AccessTime> m_AccessTimes;
+ std::vector<BucketPayload> m_Payloads;
+ std::vector<BucketMetaData> m_MetaDatas;
+ std::vector<MetaDataIndex> m_FreeMetaDatas;
+ std::vector<MemCacheData> m_MemCachedPayloads;
+ std::vector<MemCachedIndex> m_FreeMemCachedPayloads;
+ std::unique_ptr<HashSet> m_TrackedCacheKeys;
+ std::unique_ptr<std::vector<IoHash>> m_TrackedReferences;
+ std::atomic_uint64_t m_StandaloneSize{};
+ std::atomic_uint64_t m_MemCachedSize{};
virtual std::string GetGcName(GcCtx& Ctx) override;
virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override;
diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h
index a8c5c0219..3f2f5448d 100644
--- a/src/zenstore/include/zenstore/gc.h
+++ b/src/zenstore/include/zenstore/gc.h
@@ -64,6 +64,8 @@ struct GcSettings
90; // 0 = compact only empty eligible blocks, 100 = compact all non-full eligible blocks, 1-99 = compact eligible blocks with less
// usage than CompactBlockUsageThresholdPercent
std::filesystem::path DiskReservePath;
+ IoHash AttachmentRangeMin = IoHash::Zero;
+ IoHash AttachmentRangeMax = IoHash::Max;
};
struct GcCompactStoreStats
@@ -166,10 +168,10 @@ public:
/**
* @brief An interface to check if a set of Cids are referenced
*
- * Instance will be deleted after RemoveUsedReferencesFromSet has been called 0-n times.
+ * Instance will be deleted after GetUnusedReferences has been called 0-n times.
*
* During construction of the GcReferenceChecker the world is not stopped and this is a good
- * place to do caching to be able to execute LockState and RemoveUsedReferencesFromSet quickly.
+ * place to do caching to be able to execute LockState and GetUnusedReferences quickly.
*/
class GcReferenceChecker
{
@@ -188,16 +190,19 @@ public:
// *IMPORTANT* Do *not* take any locks (shared or exclusive) in this code.
// This is because we need to acquire the locks in an ordered manner and not end up in a deadlock due to other code
// trying to get exclusive locks halfway through our execution.
- // Called once before any calls to RemoveUsedReferencesFromSet.
+ // Called once before any calls to GetUnusedReferences.
// The implementation should be as fast as possible as UpdateLockedState is part of a stop the world (from changes)
// until all instances of GcReferenceChecker UpdateLockedState are completed
virtual void UpdateLockedState(GcCtx& Ctx) = 0;
// Go through IoCids and see which ones are referenced. If it is the reference must be removed from IoCids
// This function should use pre-cached information on what is referenced as we are in stop the world mode
- virtual void RemoveUsedReferencesFromSet(GcCtx& Ctx, HashSet& IoCids) = 0;
+ virtual std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) = 0;
};
+std::span<IoHash> KeepUnusedReferences(std::span<const IoHash> SortedUsedReferences, std::span<IoHash> SortedReferences);
+bool FilterReferences(GcCtx& Ctx, std::vector<IoHash>& InOutReferences);
+
/**
* @brief An interface to implement a lock for Stop The World (from writing new data)
*
@@ -209,7 +214,7 @@ public:
virtual ~GcReferenceLocker() = default;
// Take all the locks needed to execute UpdateLockedState for the all the GcReferenceChecker in your domain
- // Once all the GcReferenceChecker has executed UpdateLockedState and RemoveUsedReferencesFromSet for all
+ // Once all the GcReferenceChecker has executed UpdateLockedState and GetUnusedReferences for all
// domains has completed, the locks will be disposed and writes are allowed once again
virtual std::vector<RwLock::SharedLockScope> LockState(GcCtx& Ctx) = 0;
};
@@ -245,7 +250,7 @@ public:
virtual std::string GetGcName(GcCtx& Ctx) = 0;
- typedef std::function<std::vector<IoHash>(std::span<IoHash> References)> GetUnusedReferencesFunc;
+ typedef std::function<std::span<IoHash>(std::span<IoHash> References)> GetUnusedReferencesFunc;
// Check a set of references to see if they are in use.
// Use the GetUnusedReferences input function to check if references are used and update any pointers
@@ -520,6 +525,8 @@ public:
std::optional<uint32_t> CompactBlockUsageThresholdPercent;
std::optional<bool> Verbose;
std::optional<bool> SingleThreaded;
+ IoHash AttachmentRangeMin = IoHash::Zero;
+ IoHash AttachmentRangeMax = IoHash::Max;
};
bool TriggerGc(const TriggerGcParams& Params);
@@ -547,6 +554,8 @@ private:
uint32_t CompactBlockUsageThresholdPercent,
bool Verbose,
bool SingleThreaded,
+ const IoHash& AttachmentRangeMin,
+ const IoHash& AttachmentRangeMax,
bool SilenceErrors);
void ScrubStorage(bool DoDelete, bool SkipCid, std::chrono::seconds TimeSlice);
LoggerRef Log() { return m_Log; }