diff options
| author | Dan Engelbrecht <[email protected]> | 2023-10-30 09:32:54 +0100 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-10-30 09:32:54 +0100 |
| commit | 3a6a5855cf36967c6bde31292669bfaf832c6f0b (patch) | |
| tree | 593e7c21e6840e7ad312207fddc63e1934e19d85 /src/zenstore/include | |
| parent | set up arch properly when running tests (mac) (#505) (diff) | |
| download | zen-3a6a5855cf36967c6bde31292669bfaf832c6f0b.tar.xz zen-3a6a5855cf36967c6bde31292669bfaf832c6f0b.zip | |
New GC implementation (#459)
- Feature: New garbage collection implementation, still in evaluation mode. Enabled by `--gc-v2` command line option
Diffstat (limited to 'src/zenstore/include')
| -rw-r--r-- | src/zenstore/include/zenstore/blockstore.h | 58 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/gc.h | 187 |
2 files changed, 238 insertions, 7 deletions
diff --git a/src/zenstore/include/zenstore/blockstore.h b/src/zenstore/include/zenstore/blockstore.h index 56906f570..cd475cd8b 100644 --- a/src/zenstore/include/zenstore/blockstore.h +++ b/src/zenstore/include/zenstore/blockstore.h @@ -108,6 +108,8 @@ private: BasicFile m_File; }; +class BlockStoreCompactState; + class BlockStore { public: @@ -124,6 +126,7 @@ public: typedef std::vector<size_t> ChunkIndexArray; typedef std::function<void(const MovedChunksArray& MovedChunks, const ChunkIndexArray& RemovedChunks)> ReclaimCallback; + typedef std::function<void(const MovedChunksArray& MovedChunks, uint64_t FreedDiskSpace)> CompactCallback; typedef std::function<uint64_t()> ClaimDiskReserveCallback; typedef std::function<void(size_t ChunkIndex, const void* Data, uint64_t Size)> IterateChunksSmallSizeCallback; typedef std::function<void(size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size)> IterateChunksLargeSizeCallback; @@ -156,6 +159,12 @@ public: const IterateChunksSmallSizeCallback& SmallSizeCallback, const IterateChunksLargeSizeCallback& LargeSizeCallback); + void CompactBlocks( + const BlockStoreCompactState& CompactState, + uint64_t PayloadAlignment, + const CompactCallback& ChangeCallback = [](const MovedChunksArray&, uint64_t) {}, + const ClaimDiskReserveCallback& DiskReserveCallback = []() { return 0; }); + static const char* GetBlockFileExtension(); static std::filesystem::path GetBlockPath(const std::filesystem::path& BlocksBasePath, const uint32_t BlockIndex); @@ -179,6 +188,55 @@ private: std::atomic_uint64_t m_TotalSize{}; }; +class BlockStoreCompactState +{ +public: + BlockStoreCompactState() = default; + + void AddBlock(uint32_t BlockIndex) + { + auto It = m_BlockIndexToChunkMapIndex.find(BlockIndex); + if (It == m_BlockIndexToChunkMapIndex.end()) + { + m_KeepChunks.emplace_back(std::vector<size_t>()); + m_BlockIndexToChunkMapIndex.insert_or_assign(BlockIndex, m_KeepChunks.size() - 1); + } + } + + bool AddKeepLocation(const BlockStoreLocation& Location) + { + auto It = m_BlockIndexToChunkMapIndex.find(Location.BlockIndex); + if (It == m_BlockIndexToChunkMapIndex.end()) + { + return false; + } + + std::vector<size_t>& KeepChunks = m_KeepChunks[It->second]; + size_t Index = m_ChunkLocations.size(); + KeepChunks.push_back(Index); + m_ChunkLocations.push_back(Location); + return true; + }; + + const BlockStoreLocation& GetLocation(size_t Index) const { return m_ChunkLocations[Index]; } + + void IterateBlocks(std::function<void(uint32_t BlockIndex, + const std::vector<size_t>& KeepChunkIndexes, + const std::vector<BlockStoreLocation>& ChunkLocations)> Callback) const + { + for (auto It : m_BlockIndexToChunkMapIndex) + { + size_t ChunkMapIndex = It.second; + Callback(It.first, m_KeepChunks[ChunkMapIndex], m_ChunkLocations); + } + } + +private: + std::unordered_map<uint32_t, size_t> m_BlockIndexToChunkMapIndex; // Maps to which vector in BlockKeepChunks to use for a block + std::vector<std::vector<size_t>> m_KeepChunks; // One vector per block index with index into ChunkLocations + std::vector<BlockStoreLocation> m_ChunkLocations; +}; + void blockstore_forcelink(); } // namespace zen diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h index 42605804e..fa7dce331 100644 --- a/src/zenstore/include/zenstore/gc.h +++ b/src/zenstore/include/zenstore/gc.h @@ -20,6 +20,10 @@ ZEN_THIRD_PARTY_INCLUDES_END #include <span> #include <thread> +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_set.h> +ZEN_THIRD_PARTY_INCLUDES_END + namespace spdlog { class logger; } @@ -48,6 +52,151 @@ public: static TimePoint TimePointFromTick(const Tick TickCount) { return TimePoint{Duration{TickCount}}; } }; +//////// Begin New GC WIP + +struct GcSettings +{ + GcClock::TimePoint CacheExpireTime = GcClock::Now(); + GcClock::TimePoint ProjectStoreExpireTime = GcClock::Now(); + bool CollectSmallObjects = false; + bool IsDeleteMode = false; + bool SkipCidDelete = false; +}; + +struct GcResult +{ + uint64_t Items = 0; + uint64_t ExpiredItems = 0; + uint64_t DeletedItems = 0; + uint64_t References = 0; + uint64_t PrunedReferences = 0; + uint64_t CompactedReferences = 0; + uint64_t RemovedDiskSpace = 0; + uint64_t RemovedMemory = 0; +}; + +struct GcCtx +{ + const GcSettings Settings; + std::atomic_uint64_t Items = 0; + std::atomic_uint64_t ExpiredItems = 0; + std::atomic_uint64_t DeletedItems = 0; + std::atomic_uint64_t References = 0; + std::atomic_uint64_t PrunedReferences = 0; + std::atomic_uint64_t CompactedReferences = 0; + std::atomic_uint64_t RemovedDiskSpace = 0; + std::atomic_uint64_t RemovedMemory = 0; +}; + +typedef tsl::robin_set<IoHash> HashSet; + +/** + * @brief An interface to remove the stored data on disk after a GcReferencePruner::RemoveUnreferencedData + * + * CompactReferenceStore is called after pruning (GcReferencePruner::RemoveUnreferencedData) and state locking is + * complete so implementor must take care to only remove data that has not been altered since the prune operation. + * + * Instance will be deleted after CompactReferenceStore has completed execution. + * + * The subclass constructor should be provided with information on what is intended to be removed. + */ +class GcReferenceStoreCompactor +{ +public: + virtual ~GcReferenceStoreCompactor() = default; + + // Remove data on disk based on results from GcReferencePruner::RemoveUnreferencedData + virtual void CompactReferenceStore(GcCtx& Ctx) = 0; +}; + +/** + * @brief An interface to check if a set of Cids are referenced + * + * Instance will be deleted after RemoveUsedReferencesFromSet has been called 0-n times. + * + * During construction of the GcReferenceChecker the world is not stopped and this is a good + * place to do caching to be able to execute LockState and RemoveUsedReferencesFromSet quickly. + */ +class GcReferenceChecker +{ +public: + // Destructor should unlock what was locked in LockState + virtual ~GcReferenceChecker() = default; + + // Lock the state and make sure no references changes, usually a read-lock is taken until the destruction + // of the instance. Called once before any calls to RemoveUsedReferencesFromSet + // The implementation should be as fast as possible as LockState is part of a stop the world (from changes) + // until all instances of GcReferenceChecker are deleted + virtual void LockState(GcCtx& Ctx) = 0; + + // Go through IoCids and see which ones are referenced. If it is the reference must be removed from IoCids + // This function should use pre-cached information on what is referenced as we are in stop the world mode + virtual void RemoveUsedReferencesFromSet(GcCtx& Ctx, HashSet& IoCids) = 0; +}; + +/** + * @brief Interface to handle GC of data that references Cid data + * + * TODO: Maybe we should split up being a referencer and something that holds cache values? + * + * GcCacheStore and GcReferencer? + * + * This interface is registered/unregistered to GcManager vua AddGcReferencer() and RemoveGcReferencer() + */ +class GcReferencer +{ +protected: + virtual ~GcReferencer() = default; + +public: + // Remove expired data based on either GcCtx::Settings CacheExpireTime/ProjectExpireTime + // TODO: For disk layer we need to first update it with access times from the memory layer + // The implementer of GcReferencer (in our case a disk bucket) does not know about any + // potential memory cache layer :( + virtual void RemoveExpiredData(GcCtx& Ctx) = 0; + + // Create 0-n GcReferenceChecker for this GcReferencer. Caller will manage lifetime of + // returned instances + virtual std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) = 0; +}; + +/** + * @brief Interface to prune - remove pointers to data but not the bulk data on disk - references from a GcReferenceStore + */ +class GcReferencePruner +{ +public: + virtual ~GcReferencePruner() = default; + + typedef std::function<std::vector<IoHash>(std::span<IoHash> References)> GetUnusedReferencesFunc; + + // Check a set of references to see if they are in use. + // Use the GetUnusedReferences input function to check if references are used and update any pointers + // so any query for references determined to be unreferences will not be found. + // If any references a found to be unused, return a GcReferenceStoreCompactor instance which will + // clean up any stored bulk data mapping to the pruned references. + // Caller will manage lifetime of returned instance + // This function should execute as fast as possible, so try to prepare a list of references to check ahead of + // call to this function and make sure the removal of unreferences items is as lightweight as possible. + virtual GcReferenceStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, const GetUnusedReferencesFunc& GetUnusedReferences) = 0; +}; + +/** + * @brief A interface to prune referenced (Cid) data from a store + */ +class GcReferenceStore +{ +protected: + virtual ~GcReferenceStore() = default; + +public: + // Create a GcReferencePruner which can check a set of references (decided by implementor) if they are no longer in use + // Caller will manage lifetime of returned instance + virtual GcReferencePruner* CreateReferencePruner(GcCtx& Ctx) = 0; +}; + +//////// End New GC WIP + /** Garbage Collection context object */ class GcContext @@ -141,6 +290,18 @@ public: GcManager(); ~GcManager(); + //////// Begin New GC WIP + + void AddGcReferencer(GcReferencer& Referencer); + void RemoveGcReferencer(GcReferencer& Referencer); + + void AddGcReferenceStore(GcReferenceStore& ReferenceStore); + void RemoveGcReferenceStore(GcReferenceStore& ReferenceStore); + + GcResult CollectGarbage(const GcSettings& Settings); + + //////// End New GC WIP + void AddGcContributor(GcContributor* Contributor); void RemoveGcContributor(GcContributor* Contributor); @@ -163,6 +324,9 @@ private: std::vector<GcStorage*> m_GcStorage; CidStore* m_CidStore = nullptr; const DiskWriteBlocker* m_DiskWriteBlocker = nullptr; + + std::vector<GcReferencer*> m_GcReferencers; + std::vector<GcReferenceStore*> m_GcReferenceStores; }; enum class GcSchedulerStatus : uint32_t @@ -172,6 +336,12 @@ enum class GcSchedulerStatus : uint32_t kStopped }; +enum class GcVersion : uint32_t +{ + kV1, + kV2 +}; + struct GcSchedulerConfig { std::filesystem::path RootDirectory; @@ -185,6 +355,7 @@ struct GcSchedulerConfig uint64_t DiskSizeSoftLimit = 0; uint64_t MinimumFreeDiskSpaceToAllowWrites = 1ul << 28; std::chrono::seconds LightweightInterval{}; + GcVersion UseGCVersion = GcVersion::kV1; }; struct GcSchedulerState @@ -246,12 +417,13 @@ public: struct TriggerGcParams { - bool CollectSmallObjects = false; - std::chrono::seconds MaxCacheDuration = std::chrono::seconds::max(); - std::chrono::seconds MaxProjectStoreDuration = std::chrono::seconds::max(); - uint64_t DiskSizeSoftLimit = 0; - bool SkipCid = false; - bool SkipDelete = false; + bool CollectSmallObjects = false; + std::chrono::seconds MaxCacheDuration = std::chrono::seconds::max(); + std::chrono::seconds MaxProjectStoreDuration = std::chrono::seconds::max(); + uint64_t DiskSizeSoftLimit = 0; + bool SkipCid = false; + bool SkipDelete = false; + std::optional<GcVersion> ForceGCVersion; }; bool TriggerGc(const TriggerGcParams& Params); @@ -270,7 +442,8 @@ private: const GcClock::TimePoint& ProjectStoreExpireTime, bool Delete, bool CollectSmallObjects, - bool SkipCid); + bool SkipCid, + GcVersion UseGCVersion); void ScrubStorage(bool DoDelete, std::chrono::seconds TimeSlice); spdlog::logger& Log() { return m_Log; } virtual bool AreDiskWritesAllowed() const override { return !m_AreDiskWritesBlocked.load(); } |