aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore/include
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2023-05-02 10:01:47 +0200
committerGitHub <[email protected]>2023-05-02 10:01:47 +0200
commit075d17f8ada47e990fe94606c3d21df409223465 (patch)
treee50549b766a2f3c354798a54ff73404217b4c9af /src/zenstore/include
parentfix: bundle shouldn't append content zip to zen (diff)
downloadzen-075d17f8ada47e990fe94606c3d21df409223465.tar.xz
zen-075d17f8ada47e990fe94606c3d21df409223465.zip
moved source directories into `/src` (#264)
* moved source directories into `/src` * updated bundle.lua for new `src` path * moved some docs, icon * removed old test trees
Diffstat (limited to 'src/zenstore/include')
-rw-r--r--src/zenstore/include/zenstore/blockstore.h175
-rw-r--r--src/zenstore/include/zenstore/caslog.h91
-rw-r--r--src/zenstore/include/zenstore/cidstore.h87
-rw-r--r--src/zenstore/include/zenstore/gc.h242
-rw-r--r--src/zenstore/include/zenstore/hashkeyset.h54
-rw-r--r--src/zenstore/include/zenstore/scrubcontext.h41
-rw-r--r--src/zenstore/include/zenstore/zenstore.h13
7 files changed, 703 insertions, 0 deletions
diff --git a/src/zenstore/include/zenstore/blockstore.h b/src/zenstore/include/zenstore/blockstore.h
new file mode 100644
index 000000000..857ccae38
--- /dev/null
+++ b/src/zenstore/include/zenstore/blockstore.h
@@ -0,0 +1,175 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/filesystem.h>
+#include <zencore/zencore.h>
+#include <zenutil/basicfile.h>
+
+#include <unordered_map>
+#include <unordered_set>
+
+namespace zen {
+
+//////////////////////////////////////////////////////////////////////////
+
+struct BlockStoreLocation
+{
+ uint32_t BlockIndex;
+ uint64_t Offset;
+ uint64_t Size;
+
+ inline auto operator<=>(const BlockStoreLocation& Rhs) const = default;
+};
+
+#pragma pack(push)
+#pragma pack(1)
+
+struct BlockStoreDiskLocation
+{
+ constexpr static uint32_t MaxBlockIndexBits = 20;
+ constexpr static uint32_t MaxOffsetBits = 28;
+ constexpr static uint32_t MaxBlockIndex = (1ul << BlockStoreDiskLocation::MaxBlockIndexBits) - 1ul;
+ constexpr static uint32_t MaxOffset = (1ul << BlockStoreDiskLocation::MaxOffsetBits) - 1ul;
+
+ BlockStoreDiskLocation(const BlockStoreLocation& Location, uint64_t OffsetAlignment)
+ {
+ Init(Location.BlockIndex, Location.Offset / OffsetAlignment, Location.Size);
+ }
+
+ BlockStoreDiskLocation() = default;
+
+ inline BlockStoreLocation Get(uint64_t OffsetAlignment) const
+ {
+ uint64_t PackedOffset = 0;
+ memcpy(&PackedOffset, &m_Offset, sizeof m_Offset);
+ return {.BlockIndex = static_cast<std::uint32_t>(PackedOffset >> MaxOffsetBits),
+ .Offset = (PackedOffset & MaxOffset) * OffsetAlignment,
+ .Size = GetSize()};
+ }
+
+ inline uint32_t GetBlockIndex() const
+ {
+ uint64_t PackedOffset = 0;
+ memcpy(&PackedOffset, &m_Offset, sizeof m_Offset);
+ return static_cast<std::uint32_t>(PackedOffset >> MaxOffsetBits);
+ }
+
+ inline uint64_t GetOffset(uint64_t OffsetAlignment) const
+ {
+ uint64_t PackedOffset = 0;
+ memcpy(&PackedOffset, &m_Offset, sizeof m_Offset);
+ return (PackedOffset & MaxOffset) * OffsetAlignment;
+ }
+
+ inline uint64_t GetSize() const { return m_Size; }
+
+ inline auto operator<=>(const BlockStoreDiskLocation& Rhs) const = default;
+
+private:
+ inline void Init(uint32_t BlockIndex, uint64_t Offset, uint64_t Size)
+ {
+ ZEN_ASSERT(BlockIndex <= MaxBlockIndex);
+ ZEN_ASSERT(Offset <= MaxOffset);
+ ZEN_ASSERT(Size <= std::numeric_limits<std::uint32_t>::max());
+
+ m_Size = static_cast<uint32_t>(Size);
+ uint64_t PackedOffset = (static_cast<uint64_t>(BlockIndex) << MaxOffsetBits) + Offset;
+ memcpy(&m_Offset[0], &PackedOffset, sizeof m_Offset);
+ }
+
+ uint32_t m_Size;
+ uint8_t m_Offset[6];
+};
+
+#pragma pack(pop)
+
+struct BlockStoreFile : public RefCounted
+{
+ explicit BlockStoreFile(const std::filesystem::path& BlockPath);
+ ~BlockStoreFile();
+ const std::filesystem::path& GetPath() const;
+ void Open();
+ void Create(uint64_t InitialSize);
+ void MarkAsDeleteOnClose();
+ uint64_t FileSize();
+ IoBuffer GetChunk(uint64_t Offset, uint64_t Size);
+ void Read(void* Data, uint64_t Size, uint64_t FileOffset);
+ void Write(const void* Data, uint64_t Size, uint64_t FileOffset);
+ void Flush();
+ BasicFile& GetBasicFile();
+ void StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function<void(const void* Data, uint64_t Size)>&& ChunkFun);
+
+private:
+ const std::filesystem::path m_Path;
+ IoBuffer m_IoBuffer;
+ BasicFile m_File;
+};
+
+class BlockStore
+{
+public:
+ struct ReclaimSnapshotState
+ {
+ std::unordered_set<uint32_t> m_ActiveWriteBlocks;
+ size_t BlockCount;
+ };
+
+ typedef std::vector<std::pair<size_t, BlockStoreLocation>> MovedChunksArray;
+ typedef std::vector<size_t> ChunkIndexArray;
+
+ typedef std::function<void(const MovedChunksArray& MovedChunks, const ChunkIndexArray& RemovedChunks)> ReclaimCallback;
+ typedef std::function<uint64_t()> ClaimDiskReserveCallback;
+ typedef std::function<void(size_t ChunkIndex, const void* Data, uint64_t Size)> IterateChunksSmallSizeCallback;
+ typedef std::function<void(size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size)> IterateChunksLargeSizeCallback;
+ typedef std::function<void(const BlockStoreLocation& Location)> WriteChunkCallback;
+
+ void Initialize(const std::filesystem::path& BlocksBasePath,
+ uint64_t MaxBlockSize,
+ uint64_t MaxBlockCount,
+ const std::vector<BlockStoreLocation>& KnownLocations);
+ void Close();
+
+ void WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment, const WriteChunkCallback& Callback);
+
+ IoBuffer TryGetChunk(const BlockStoreLocation& Location) const;
+ void Flush();
+
+ ReclaimSnapshotState GetReclaimSnapshotState();
+ void ReclaimSpace(
+ const ReclaimSnapshotState& Snapshot,
+ const std::vector<BlockStoreLocation>& ChunkLocations,
+ const ChunkIndexArray& KeepChunkIndexes,
+ uint64_t PayloadAlignment,
+ bool DryRun,
+ const ReclaimCallback& ChangeCallback = [](const MovedChunksArray&, const ChunkIndexArray&) {},
+ const ClaimDiskReserveCallback& DiskReserveCallback = []() { return 0; });
+
+ void IterateChunks(const std::vector<BlockStoreLocation>& ChunkLocations,
+ const IterateChunksSmallSizeCallback& SmallSizeCallback,
+ const IterateChunksLargeSizeCallback& LargeSizeCallback);
+
+ static const char* GetBlockFileExtension();
+ static std::filesystem::path GetBlockPath(const std::filesystem::path& BlocksBasePath, const uint32_t BlockIndex);
+
+ inline uint64_t TotalSize() const { return m_TotalSize.load(std::memory_order::relaxed); }
+
+private:
+ std::unordered_map<uint32_t, Ref<BlockStoreFile>> m_ChunkBlocks;
+
+ mutable RwLock m_InsertLock; // used to serialize inserts
+ Ref<BlockStoreFile> m_WriteBlock;
+ std::uint64_t m_CurrentInsertOffset = 0;
+ std::atomic_uint32_t m_WriteBlockIndex{};
+ std::vector<uint32_t> m_ActiveWriteBlocks;
+
+ uint64_t m_MaxBlockSize = 1u << 28;
+ uint64_t m_MaxBlockCount = BlockStoreDiskLocation::MaxBlockIndex + 1;
+ std::filesystem::path m_BlocksBasePath;
+
+ std::atomic_uint64_t m_TotalSize{};
+};
+
+void blockstore_forcelink();
+
+} // namespace zen
diff --git a/src/zenstore/include/zenstore/caslog.h b/src/zenstore/include/zenstore/caslog.h
new file mode 100644
index 000000000..d8c3f22f3
--- /dev/null
+++ b/src/zenstore/include/zenstore/caslog.h
@@ -0,0 +1,91 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/uid.h>
+#include <zenutil/basicfile.h>
+
+namespace zen {
+
+class CasLogFile
+{
+public:
+ CasLogFile();
+ ~CasLogFile();
+
+ enum class Mode
+ {
+ kRead,
+ kWrite,
+ kTruncate
+ };
+
+ static bool IsValid(std::filesystem::path FileName, size_t RecordSize);
+ void Open(std::filesystem::path FileName, size_t RecordSize, Mode Mode);
+ void Append(const void* DataPointer, uint64_t DataSize);
+ void Replay(std::function<void(const void*)>&& Handler, uint64_t SkipEntryCount);
+ void Flush();
+ void Close();
+ uint64_t GetLogSize();
+ uint64_t GetLogCount();
+
+private:
+ struct FileHeader
+ {
+ uint8_t Magic[16];
+ uint32_t RecordSize = 0;
+ Oid LogId;
+ uint32_t ValidatedTail = 0;
+ uint32_t Pad[6];
+ uint32_t Checksum = 0;
+
+ static const inline uint8_t MagicSequence[16] = {'.', '-', '=', ' ', 'C', 'A', 'S', 'L', 'O', 'G', 'v', '1', ' ', '=', '-', '.'};
+
+ ZENCORE_API uint32_t ComputeChecksum();
+ void Finalize() { Checksum = ComputeChecksum(); }
+ };
+
+ static_assert(sizeof(FileHeader) == 64);
+
+private:
+ void Open(std::filesystem::path FileName, size_t RecordSize, BasicFile::Mode Mode);
+
+ BasicFile m_File;
+ FileHeader m_Header;
+ size_t m_RecordSize = 1;
+ std::atomic<uint64_t> m_AppendOffset = 0;
+};
+
+template<typename T>
+class TCasLogFile : public CasLogFile
+{
+public:
+ static bool IsValid(std::filesystem::path FileName) { return CasLogFile::IsValid(FileName, sizeof(T)); }
+ void Open(std::filesystem::path FileName, Mode Mode) { CasLogFile::Open(FileName, sizeof(T), Mode); }
+
+ // This should be called before the Replay() is called to do some basic sanity checking
+ bool Initialize() { return true; }
+
+ void Replay(Invocable<const T&> auto Handler, uint64_t SkipEntryCount)
+ {
+ CasLogFile::Replay(
+ [&](const void* VoidPtr) {
+ const T& Record = *reinterpret_cast<const T*>(VoidPtr);
+
+ Handler(Record);
+ },
+ SkipEntryCount);
+ }
+
+ void Append(const T& Record)
+ {
+ // TODO: implement some more efficent path here so we don't end up with
+ // a syscall per append
+
+ CasLogFile::Append(&Record, sizeof Record);
+ }
+
+ void Append(const std::span<T>& Records) { CasLogFile::Append(Records.data(), sizeof(T) * Records.size()); }
+};
+
+} // namespace zen
diff --git a/src/zenstore/include/zenstore/cidstore.h b/src/zenstore/include/zenstore/cidstore.h
new file mode 100644
index 000000000..16ca78225
--- /dev/null
+++ b/src/zenstore/include/zenstore/cidstore.h
@@ -0,0 +1,87 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include "zenstore.h"
+
+#include <zencore/iohash.h>
+#include <zenstore/hashkeyset.h>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <tsl/robin_map.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+#include <filesystem>
+
+namespace zen {
+
+class GcManager;
+class CasStore;
+class CompressedBuffer;
+class IoBuffer;
+class ScrubContext;
+
+/** Content Store
+ *
+ * Data in the content store is referenced by content identifiers (CIDs), it works
+ * with compressed buffers so the CID is expected to be the RAW hash. It stores the
+ * chunk directly under the RAW hash.
+ * This class maps uncompressed hashes (CIDs) to compressed hashes and may
+ * be used to deal with other kinds of indirections in the future. For example, if we want
+ * to support chunking then a CID may represent a list of chunks which could be concatenated
+ * to form the referenced chunk.
+ *
+ */
+
+struct CidStoreSize
+{
+ uint64_t TinySize = 0;
+ uint64_t SmallSize = 0;
+ uint64_t LargeSize = 0;
+ uint64_t TotalSize = 0;
+};
+
+struct CidStoreConfiguration
+{
+ // Root directory for CAS store
+ std::filesystem::path RootDirectory;
+
+ // Threshold below which values are considered 'tiny' and managed using the 'tiny values' strategy
+ uint64_t TinyValueThreshold = 1024;
+
+ // Threshold above which values are considered 'huge' and managed using the 'huge values' strategy
+ uint64_t HugeValueThreshold = 1024 * 1024;
+};
+
+class CidStore
+{
+public:
+ CidStore(GcManager& Gc);
+ ~CidStore();
+
+ struct InsertResult
+ {
+ bool New = false;
+ };
+ enum class InsertMode
+ {
+ kCopyOnly,
+ kMayBeMovedInPlace
+ };
+
+ void Initialize(const CidStoreConfiguration& Config);
+ InsertResult AddChunk(const IoBuffer& ChunkData, const IoHash& RawHash, InsertMode Mode = InsertMode::kMayBeMovedInPlace);
+ IoBuffer FindChunkByCid(const IoHash& DecompressedId);
+ bool ContainsChunk(const IoHash& DecompressedId);
+ void FilterChunks(HashKeySet& InOutChunks);
+ void Flush();
+ void Scrub(ScrubContext& Ctx);
+ CidStoreSize TotalSize() const;
+
+private:
+ struct Impl;
+ std::unique_ptr<CasStore> m_CasStore;
+ std::unique_ptr<Impl> m_Impl;
+};
+
+} // namespace zen
diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h
new file mode 100644
index 000000000..e0354b331
--- /dev/null
+++ b/src/zenstore/include/zenstore/gc.h
@@ -0,0 +1,242 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/iohash.h>
+#include <zencore/thread.h>
+#include <zenstore/caslog.h>
+
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <filesystem>
+#include <functional>
+#include <optional>
+#include <span>
+#include <thread>
+
+#define ZEN_USE_REF_TRACKING 0 // This is not currently functional
+
+namespace spdlog {
+class logger;
+}
+
+namespace zen {
+
+class HashKeySet;
+class GcManager;
+class CidStore;
+struct IoHash;
+
+/** GC clock
+ */
+class GcClock
+{
+public:
+ using Clock = std::chrono::system_clock;
+ using TimePoint = Clock::time_point;
+ using Duration = Clock::duration;
+ using Tick = int64_t;
+
+ static Tick TickCount() { return Now().time_since_epoch().count(); }
+ static TimePoint Now() { return Clock::now(); }
+ static TimePoint TimePointFromTick(const Tick TickCount) { return TimePoint{Duration{TickCount}}; }
+};
+
+/** Garbage Collection context object
+ */
+class GcContext
+{
+public:
+ GcContext(const GcClock::TimePoint& ExpireTime);
+ ~GcContext();
+
+ void AddRetainedCids(std::span<const IoHash> Cid);
+ void SetExpiredCacheKeys(const std::string& CacheKeyContext, std::vector<IoHash>&& ExpiredKeys);
+
+ void IterateCids(std::function<void(const IoHash&)> Callback);
+
+ void FilterCids(std::span<const IoHash> Cid, std::function<void(const IoHash&)> KeepFunc);
+ void FilterCids(std::span<const IoHash> Cid, std::function<void(const IoHash&, bool)>&& FilterFunc);
+
+ void AddDeletedCids(std::span<const IoHash> Cas);
+ const HashKeySet& DeletedCids();
+
+ std::span<const IoHash> ExpiredCacheKeys(const std::string& CacheKeyContext) const;
+
+ bool IsDeletionMode() const;
+ void SetDeletionMode(bool NewState);
+
+ bool CollectSmallObjects() const;
+ void CollectSmallObjects(bool NewState);
+
+ GcClock::TimePoint ExpireTime() const;
+
+ void DiskReservePath(const std::filesystem::path& Path);
+ uint64_t ClaimGCReserve();
+
+private:
+ struct GcState;
+
+ std::unique_ptr<GcState> m_State;
+};
+
+/** GC root contributor
+
+ Higher level data structures provide roots for the garbage collector,
+ which ultimately determine what is garbage and what data we need to
+ retain.
+
+ */
+class GcContributor
+{
+public:
+ GcContributor(GcManager& Gc);
+ ~GcContributor();
+
+ virtual void GatherReferences(GcContext& GcCtx) = 0;
+
+protected:
+ GcManager& m_Gc;
+};
+
+struct GcStorageSize
+{
+ uint64_t DiskSize{};
+ uint64_t MemorySize{};
+};
+
+/** GC storage provider
+ */
+class GcStorage
+{
+public:
+ GcStorage(GcManager& Gc);
+ ~GcStorage();
+
+ virtual void CollectGarbage(GcContext& GcCtx) = 0;
+ virtual GcStorageSize StorageSize() const = 0;
+
+private:
+ GcManager& m_Gc;
+};
+
+/** GC orchestrator
+ */
+class GcManager
+{
+public:
+ GcManager();
+ ~GcManager();
+
+ void AddGcContributor(GcContributor* Contributor);
+ void RemoveGcContributor(GcContributor* Contributor);
+
+ void AddGcStorage(GcStorage* Contributor);
+ void RemoveGcStorage(GcStorage* Contributor);
+
+ void CollectGarbage(GcContext& GcCtx);
+
+ GcStorageSize TotalStorageSize() const;
+
+#if ZEN_USE_REF_TRACKING
+ void OnNewCidReferences(std::span<IoHash> Hashes);
+ void OnCommittedCidReferences(std::span<IoHash> Hashes);
+ void OnDroppedCidReferences(std::span<IoHash> Hashes);
+#endif
+
+private:
+ spdlog::logger& Log() { return m_Log; }
+ spdlog::logger& m_Log;
+ mutable RwLock m_Lock;
+ std::vector<GcContributor*> m_GcContribs;
+ std::vector<GcStorage*> m_GcStorage;
+ CidStore* m_CidStore = nullptr;
+};
+
+enum class GcSchedulerStatus : uint32_t
+{
+ kIdle,
+ kRunning,
+ kStopped
+};
+
+struct GcSchedulerConfig
+{
+ std::filesystem::path RootDirectory;
+ std::chrono::seconds MonitorInterval{30};
+ std::chrono::seconds Interval{};
+ std::chrono::seconds MaxCacheDuration{86400};
+ bool CollectSmallObjects = true;
+ bool Enabled = true;
+ uint64_t DiskReserveSize = 1ul << 28;
+ uint64_t DiskSizeSoftLimit = 0;
+};
+
+class DiskUsageWindow
+{
+public:
+ struct DiskUsageEntry
+ {
+ GcClock::Tick SampleTime;
+ uint64_t DiskUsage;
+ };
+
+ std::vector<DiskUsageEntry> m_LogWindow;
+ inline void Append(const DiskUsageEntry& Entry) { m_LogWindow.push_back(Entry); }
+ inline void Append(DiskUsageEntry&& Entry) { m_LogWindow.emplace_back(std::move(Entry)); }
+ void KeepRange(GcClock::Tick StartTick, GcClock::Tick EndTick);
+ std::vector<uint64_t> GetDiskDeltas(GcClock::Tick StartTick,
+ GcClock::Tick EndTick,
+ GcClock::Tick DeltaWidth,
+ uint64_t& OutMaxDelta) const;
+ GcClock::Tick FindTimepointThatRemoves(uint64_t Amount, GcClock::Tick EndTick) const;
+};
+
+/**
+ * GC scheduler
+ */
+class GcScheduler
+{
+public:
+ GcScheduler(GcManager& GcManager);
+ ~GcScheduler();
+
+ void Initialize(const GcSchedulerConfig& Config);
+ void Shutdown();
+ GcSchedulerStatus Status() const { return static_cast<GcSchedulerStatus>(m_Status.load()); }
+
+ struct TriggerParams
+ {
+ bool CollectSmallObjects = false;
+ std::chrono::seconds MaxCacheDuration = std::chrono::seconds::max();
+ uint64_t DiskSizeSoftLimit = 0;
+ };
+
+ bool Trigger(const TriggerParams& Params);
+
+private:
+ void SchedulerThread();
+ void CollectGarbage(const GcClock::TimePoint& ExpireTime, bool Delete, bool CollectSmallObjects);
+ GcClock::TimePoint NextGcTime(GcClock::TimePoint CurrentTime);
+ spdlog::logger& Log() { return m_Log; }
+
+ spdlog::logger& m_Log;
+ GcManager& m_GcManager;
+ GcSchedulerConfig m_Config;
+ GcClock::TimePoint m_LastGcTime{};
+ GcClock::TimePoint m_LastGcExpireTime{};
+ GcClock::TimePoint m_NextGcTime{};
+ std::atomic_uint32_t m_Status{};
+ std::thread m_GcThread;
+ std::mutex m_GcMutex;
+ std::condition_variable m_GcSignal;
+ std::optional<TriggerParams> m_TriggerParams;
+
+ TCasLogFile<DiskUsageWindow::DiskUsageEntry> m_DiskUsageLog;
+ DiskUsageWindow m_DiskUsageWindow;
+};
+
+void gc_forcelink();
+
+} // namespace zen
diff --git a/src/zenstore/include/zenstore/hashkeyset.h b/src/zenstore/include/zenstore/hashkeyset.h
new file mode 100644
index 000000000..411a6256e
--- /dev/null
+++ b/src/zenstore/include/zenstore/hashkeyset.h
@@ -0,0 +1,54 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include "zenstore.h"
+
+#include <zencore/iohash.h>
+
+#include <functional>
+#include <unordered_set>
+
+namespace zen {
+
+/** Manage a set of IoHash values
+ */
+
+class HashKeySet
+{
+public:
+ void AddHashToSet(const IoHash& HashToAdd);
+ void AddHashesToSet(std::span<const IoHash> HashesToAdd);
+ void RemoveHashesIf(std::function<bool(const IoHash& CandidateHash)>&& Predicate);
+ void IterateHashes(std::function<void(const IoHash& Hash)>&& Callback) const;
+ [[nodiscard]] inline bool ContainsHash(const IoHash& Hash) const { return m_HashSet.find(Hash) != m_HashSet.end(); }
+ [[nodiscard]] inline bool IsEmpty() const { return m_HashSet.empty(); }
+ [[nodiscard]] inline size_t GetSize() const { return m_HashSet.size(); }
+
+ inline void FilterHashes(std::span<const IoHash> Candidates, Invocable<const IoHash&> auto MatchFunc) const
+ {
+ for (const IoHash& Candidate : Candidates)
+ {
+ if (ContainsHash(Candidate))
+ {
+ MatchFunc(Candidate);
+ }
+ }
+ }
+
+ inline void FilterHashes(std::span<const IoHash> Candidates, Invocable<const IoHash&, bool> auto MatchFunc) const
+ {
+ for (const IoHash& Candidate : Candidates)
+ {
+ MatchFunc(Candidate, ContainsHash(Candidate));
+ }
+ }
+
+private:
+ // Q: should we protect this with a lock, or is that a higher level concern?
+ std::unordered_set<IoHash, IoHash::Hasher> m_HashSet;
+};
+
+void hashkeyset_forcelink();
+
+} // namespace zen
diff --git a/src/zenstore/include/zenstore/scrubcontext.h b/src/zenstore/include/zenstore/scrubcontext.h
new file mode 100644
index 000000000..0b884fcc6
--- /dev/null
+++ b/src/zenstore/include/zenstore/scrubcontext.h
@@ -0,0 +1,41 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/timer.h>
+#include <zenstore/hashkeyset.h>
+
+namespace zen {
+
+/** Context object for data scrubbing
+ *
+ * Data scrubbing is when we traverse stored data to validate it and
+ * optionally correct/recover
+ */
+
+class ScrubContext
+{
+public:
+ virtual void ReportBadCidChunks(std::span<IoHash> BadCasChunks) { m_BadCid.AddHashesToSet(BadCasChunks); }
+ inline uint64_t ScrubTimestamp() const { return m_ScrubTime; }
+ inline bool RunRecovery() const { return m_Recover; }
+ void ReportScrubbed(uint64_t ChunkCount, uint64_t ChunkBytes)
+ {
+ m_ChunkCount.fetch_add(ChunkCount);
+ m_ByteCount.fetch_add(ChunkBytes);
+ }
+
+ inline uint64_t ScrubbedChunks() const { return m_ChunkCount; }
+ inline uint64_t ScrubbedBytes() const { return m_ByteCount; }
+
+ const HashKeySet BadCids() const { return m_BadCid; }
+
+private:
+ uint64_t m_ScrubTime = GetHifreqTimerValue();
+ bool m_Recover = true;
+ std::atomic<uint64_t> m_ChunkCount{0};
+ std::atomic<uint64_t> m_ByteCount{0};
+ HashKeySet m_BadCid;
+};
+
+} // namespace zen
diff --git a/src/zenstore/include/zenstore/zenstore.h b/src/zenstore/include/zenstore/zenstore.h
new file mode 100644
index 000000000..46d62029d
--- /dev/null
+++ b/src/zenstore/include/zenstore/zenstore.h
@@ -0,0 +1,13 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/zencore.h>
+
+#define ZENSTORE_API
+
+namespace zen {
+
+ZENSTORE_API void zenstore_forcelinktests();
+
+}