aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore/include
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2023-10-30 09:32:54 +0100
committerGitHub <[email protected]>2023-10-30 09:32:54 +0100
commit3a6a5855cf36967c6bde31292669bfaf832c6f0b (patch)
tree593e7c21e6840e7ad312207fddc63e1934e19d85 /src/zenstore/include
parentset up arch properly when running tests (mac) (#505) (diff)
downloadzen-3a6a5855cf36967c6bde31292669bfaf832c6f0b.tar.xz
zen-3a6a5855cf36967c6bde31292669bfaf832c6f0b.zip
New GC implementation (#459)
- Feature: New garbage collection implementation, still in evaluation mode. Enabled by `--gc-v2` command line option
Diffstat (limited to 'src/zenstore/include')
-rw-r--r--src/zenstore/include/zenstore/blockstore.h58
-rw-r--r--src/zenstore/include/zenstore/gc.h187
2 files changed, 238 insertions, 7 deletions
diff --git a/src/zenstore/include/zenstore/blockstore.h b/src/zenstore/include/zenstore/blockstore.h
index 56906f570..cd475cd8b 100644
--- a/src/zenstore/include/zenstore/blockstore.h
+++ b/src/zenstore/include/zenstore/blockstore.h
@@ -108,6 +108,8 @@ private:
BasicFile m_File;
};
+class BlockStoreCompactState;
+
class BlockStore
{
public:
@@ -124,6 +126,7 @@ public:
typedef std::vector<size_t> ChunkIndexArray;
typedef std::function<void(const MovedChunksArray& MovedChunks, const ChunkIndexArray& RemovedChunks)> ReclaimCallback;
+ typedef std::function<void(const MovedChunksArray& MovedChunks, uint64_t FreedDiskSpace)> CompactCallback;
typedef std::function<uint64_t()> ClaimDiskReserveCallback;
typedef std::function<void(size_t ChunkIndex, const void* Data, uint64_t Size)> IterateChunksSmallSizeCallback;
typedef std::function<void(size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size)> IterateChunksLargeSizeCallback;
@@ -156,6 +159,12 @@ public:
const IterateChunksSmallSizeCallback& SmallSizeCallback,
const IterateChunksLargeSizeCallback& LargeSizeCallback);
+ void CompactBlocks(
+ const BlockStoreCompactState& CompactState,
+ uint64_t PayloadAlignment,
+ const CompactCallback& ChangeCallback = [](const MovedChunksArray&, uint64_t) {},
+ const ClaimDiskReserveCallback& DiskReserveCallback = []() { return 0; });
+
static const char* GetBlockFileExtension();
static std::filesystem::path GetBlockPath(const std::filesystem::path& BlocksBasePath, const uint32_t BlockIndex);
@@ -179,6 +188,55 @@ private:
std::atomic_uint64_t m_TotalSize{};
};
+class BlockStoreCompactState
+{
+public:
+ BlockStoreCompactState() = default;
+
+ void AddBlock(uint32_t BlockIndex)
+ {
+ auto It = m_BlockIndexToChunkMapIndex.find(BlockIndex);
+ if (It == m_BlockIndexToChunkMapIndex.end())
+ {
+ m_KeepChunks.emplace_back(std::vector<size_t>());
+ m_BlockIndexToChunkMapIndex.insert_or_assign(BlockIndex, m_KeepChunks.size() - 1);
+ }
+ }
+
+ bool AddKeepLocation(const BlockStoreLocation& Location)
+ {
+ auto It = m_BlockIndexToChunkMapIndex.find(Location.BlockIndex);
+ if (It == m_BlockIndexToChunkMapIndex.end())
+ {
+ return false;
+ }
+
+ std::vector<size_t>& KeepChunks = m_KeepChunks[It->second];
+ size_t Index = m_ChunkLocations.size();
+ KeepChunks.push_back(Index);
+ m_ChunkLocations.push_back(Location);
+ return true;
+ };
+
+ const BlockStoreLocation& GetLocation(size_t Index) const { return m_ChunkLocations[Index]; }
+
+ void IterateBlocks(std::function<void(uint32_t BlockIndex,
+ const std::vector<size_t>& KeepChunkIndexes,
+ const std::vector<BlockStoreLocation>& ChunkLocations)> Callback) const
+ {
+ for (auto It : m_BlockIndexToChunkMapIndex)
+ {
+ size_t ChunkMapIndex = It.second;
+ Callback(It.first, m_KeepChunks[ChunkMapIndex], m_ChunkLocations);
+ }
+ }
+
+private:
+ std::unordered_map<uint32_t, size_t> m_BlockIndexToChunkMapIndex; // Maps to which vector in BlockKeepChunks to use for a block
+ std::vector<std::vector<size_t>> m_KeepChunks; // One vector per block index with index into ChunkLocations
+ std::vector<BlockStoreLocation> m_ChunkLocations;
+};
+
void blockstore_forcelink();
} // namespace zen
diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h
index 42605804e..fa7dce331 100644
--- a/src/zenstore/include/zenstore/gc.h
+++ b/src/zenstore/include/zenstore/gc.h
@@ -20,6 +20,10 @@ ZEN_THIRD_PARTY_INCLUDES_END
#include <span>
#include <thread>
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <tsl/robin_set.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
namespace spdlog {
class logger;
}
@@ -48,6 +52,151 @@ public:
static TimePoint TimePointFromTick(const Tick TickCount) { return TimePoint{Duration{TickCount}}; }
};
+//////// Begin New GC WIP
+
+struct GcSettings
+{
+ GcClock::TimePoint CacheExpireTime = GcClock::Now();
+ GcClock::TimePoint ProjectStoreExpireTime = GcClock::Now();
+ bool CollectSmallObjects = false;
+ bool IsDeleteMode = false;
+ bool SkipCidDelete = false;
+};
+
+struct GcResult
+{
+ uint64_t Items = 0;
+ uint64_t ExpiredItems = 0;
+ uint64_t DeletedItems = 0;
+ uint64_t References = 0;
+ uint64_t PrunedReferences = 0;
+ uint64_t CompactedReferences = 0;
+ uint64_t RemovedDiskSpace = 0;
+ uint64_t RemovedMemory = 0;
+};
+
+struct GcCtx
+{
+ const GcSettings Settings;
+ std::atomic_uint64_t Items = 0;
+ std::atomic_uint64_t ExpiredItems = 0;
+ std::atomic_uint64_t DeletedItems = 0;
+ std::atomic_uint64_t References = 0;
+ std::atomic_uint64_t PrunedReferences = 0;
+ std::atomic_uint64_t CompactedReferences = 0;
+ std::atomic_uint64_t RemovedDiskSpace = 0;
+ std::atomic_uint64_t RemovedMemory = 0;
+};
+
+typedef tsl::robin_set<IoHash> HashSet;
+
+/**
+ * @brief An interface to remove the stored data on disk after a GcReferencePruner::RemoveUnreferencedData
+ *
+ * CompactReferenceStore is called after pruning (GcReferencePruner::RemoveUnreferencedData) and state locking is
+ * complete so implementor must take care to only remove data that has not been altered since the prune operation.
+ *
+ * Instance will be deleted after CompactReferenceStore has completed execution.
+ *
+ * The subclass constructor should be provided with information on what is intended to be removed.
+ */
+class GcReferenceStoreCompactor
+{
+public:
+ virtual ~GcReferenceStoreCompactor() = default;
+
+ // Remove data on disk based on results from GcReferencePruner::RemoveUnreferencedData
+ virtual void CompactReferenceStore(GcCtx& Ctx) = 0;
+};
+
+/**
+ * @brief An interface to check if a set of Cids are referenced
+ *
+ * Instance will be deleted after RemoveUsedReferencesFromSet has been called 0-n times.
+ *
+ * During construction of the GcReferenceChecker the world is not stopped and this is a good
+ * place to do caching to be able to execute LockState and RemoveUsedReferencesFromSet quickly.
+ */
+class GcReferenceChecker
+{
+public:
+ // Destructor should unlock what was locked in LockState
+ virtual ~GcReferenceChecker() = default;
+
+ // Lock the state and make sure no references changes, usually a read-lock is taken until the destruction
+ // of the instance. Called once before any calls to RemoveUsedReferencesFromSet
+ // The implementation should be as fast as possible as LockState is part of a stop the world (from changes)
+ // until all instances of GcReferenceChecker are deleted
+ virtual void LockState(GcCtx& Ctx) = 0;
+
+ // Go through IoCids and see which ones are referenced. If it is the reference must be removed from IoCids
+ // This function should use pre-cached information on what is referenced as we are in stop the world mode
+ virtual void RemoveUsedReferencesFromSet(GcCtx& Ctx, HashSet& IoCids) = 0;
+};
+
+/**
+ * @brief Interface to handle GC of data that references Cid data
+ *
+ * TODO: Maybe we should split up being a referencer and something that holds cache values?
+ *
+ * GcCacheStore and GcReferencer?
+ *
+ * This interface is registered/unregistered to GcManager vua AddGcReferencer() and RemoveGcReferencer()
+ */
+class GcReferencer
+{
+protected:
+ virtual ~GcReferencer() = default;
+
+public:
+ // Remove expired data based on either GcCtx::Settings CacheExpireTime/ProjectExpireTime
+ // TODO: For disk layer we need to first update it with access times from the memory layer
+ // The implementer of GcReferencer (in our case a disk bucket) does not know about any
+ // potential memory cache layer :(
+ virtual void RemoveExpiredData(GcCtx& Ctx) = 0;
+
+ // Create 0-n GcReferenceChecker for this GcReferencer. Caller will manage lifetime of
+ // returned instances
+ virtual std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) = 0;
+};
+
+/**
+ * @brief Interface to prune - remove pointers to data but not the bulk data on disk - references from a GcReferenceStore
+ */
+class GcReferencePruner
+{
+public:
+ virtual ~GcReferencePruner() = default;
+
+ typedef std::function<std::vector<IoHash>(std::span<IoHash> References)> GetUnusedReferencesFunc;
+
+ // Check a set of references to see if they are in use.
+ // Use the GetUnusedReferences input function to check if references are used and update any pointers
+ // so any query for references determined to be unreferences will not be found.
+ // If any references a found to be unused, return a GcReferenceStoreCompactor instance which will
+ // clean up any stored bulk data mapping to the pruned references.
+ // Caller will manage lifetime of returned instance
+ // This function should execute as fast as possible, so try to prepare a list of references to check ahead of
+ // call to this function and make sure the removal of unreferences items is as lightweight as possible.
+ virtual GcReferenceStoreCompactor* RemoveUnreferencedData(GcCtx& Ctx, const GetUnusedReferencesFunc& GetUnusedReferences) = 0;
+};
+
+/**
+ * @brief A interface to prune referenced (Cid) data from a store
+ */
+class GcReferenceStore
+{
+protected:
+ virtual ~GcReferenceStore() = default;
+
+public:
+ // Create a GcReferencePruner which can check a set of references (decided by implementor) if they are no longer in use
+ // Caller will manage lifetime of returned instance
+ virtual GcReferencePruner* CreateReferencePruner(GcCtx& Ctx) = 0;
+};
+
+//////// End New GC WIP
+
/** Garbage Collection context object
*/
class GcContext
@@ -141,6 +290,18 @@ public:
GcManager();
~GcManager();
+ //////// Begin New GC WIP
+
+ void AddGcReferencer(GcReferencer& Referencer);
+ void RemoveGcReferencer(GcReferencer& Referencer);
+
+ void AddGcReferenceStore(GcReferenceStore& ReferenceStore);
+ void RemoveGcReferenceStore(GcReferenceStore& ReferenceStore);
+
+ GcResult CollectGarbage(const GcSettings& Settings);
+
+ //////// End New GC WIP
+
void AddGcContributor(GcContributor* Contributor);
void RemoveGcContributor(GcContributor* Contributor);
@@ -163,6 +324,9 @@ private:
std::vector<GcStorage*> m_GcStorage;
CidStore* m_CidStore = nullptr;
const DiskWriteBlocker* m_DiskWriteBlocker = nullptr;
+
+ std::vector<GcReferencer*> m_GcReferencers;
+ std::vector<GcReferenceStore*> m_GcReferenceStores;
};
enum class GcSchedulerStatus : uint32_t
@@ -172,6 +336,12 @@ enum class GcSchedulerStatus : uint32_t
kStopped
};
+enum class GcVersion : uint32_t
+{
+ kV1,
+ kV2
+};
+
struct GcSchedulerConfig
{
std::filesystem::path RootDirectory;
@@ -185,6 +355,7 @@ struct GcSchedulerConfig
uint64_t DiskSizeSoftLimit = 0;
uint64_t MinimumFreeDiskSpaceToAllowWrites = 1ul << 28;
std::chrono::seconds LightweightInterval{};
+ GcVersion UseGCVersion = GcVersion::kV1;
};
struct GcSchedulerState
@@ -246,12 +417,13 @@ public:
struct TriggerGcParams
{
- bool CollectSmallObjects = false;
- std::chrono::seconds MaxCacheDuration = std::chrono::seconds::max();
- std::chrono::seconds MaxProjectStoreDuration = std::chrono::seconds::max();
- uint64_t DiskSizeSoftLimit = 0;
- bool SkipCid = false;
- bool SkipDelete = false;
+ bool CollectSmallObjects = false;
+ std::chrono::seconds MaxCacheDuration = std::chrono::seconds::max();
+ std::chrono::seconds MaxProjectStoreDuration = std::chrono::seconds::max();
+ uint64_t DiskSizeSoftLimit = 0;
+ bool SkipCid = false;
+ bool SkipDelete = false;
+ std::optional<GcVersion> ForceGCVersion;
};
bool TriggerGc(const TriggerGcParams& Params);
@@ -270,7 +442,8 @@ private:
const GcClock::TimePoint& ProjectStoreExpireTime,
bool Delete,
bool CollectSmallObjects,
- bool SkipCid);
+ bool SkipCid,
+ GcVersion UseGCVersion);
void ScrubStorage(bool DoDelete, std::chrono::seconds TimeSlice);
spdlog::logger& Log() { return m_Log; }
virtual bool AreDiskWritesAllowed() const override { return !m_AreDiskWritesBlocked.load(); }