// Copyright Epic Games, Inc. All Rights Reserved. #pragma once #include #include #include #include #include #include #include #include ZEN_THIRD_PARTY_INCLUDES_START #include ZEN_THIRD_PARTY_INCLUDES_END #include #include #include #include namespace zen { class PathBuilderBase; class GcManager; class ScrubContext; /****************************************************************************** /$$$$$$$$ /$$$$$$ /$$ |_____ $$ /$$__ $$ | $$ /$$/ /$$$$$$ /$$$$$$$ | $$ \__/ /$$$$$$ /$$$$$$| $$$$$$$ /$$$$$$ /$$/ /$$__ $| $$__ $$ | $$ |____ $$/$$_____| $$__ $$/$$__ $$ /$$/ | $$$$$$$| $$ \ $$ | $$ /$$$$$$| $$ | $$ \ $| $$$$$$$$ /$$/ | $$_____| $$ | $$ | $$ $$/$$__ $| $$ | $$ | $| $$_____/ /$$$$$$$| $$$$$$| $$ | $$ | $$$$$$| $$$$$$| $$$$$$| $$ | $| $$$$$$$ |________/\_______|__/ |__/ \______/ \_______/\_______|__/ |__/\_______/ Cache store for UE5. Restricts keys to "{bucket}/{hash}" pairs where the hash is 40 (hex) chars in size. Values may be opaque blobs or structured objects which can in turn contain references to other objects (or blobs). ******************************************************************************/ namespace access_tracking { struct KeyAccessTime { IoHash Key; GcClock::Tick LastAccess{}; }; struct AccessTimes { std::unordered_map> Buckets; }; }; // namespace access_tracking struct ZenCacheValue { IoBuffer Value; uint64_t RawSize = 0; IoHash RawHash = IoHash::Zero; }; struct CacheValueDetails { struct ValueDetails { uint64_t Size; uint64_t RawSize; IoHash RawHash; GcClock::Tick LastAccess{}; std::vector Attachments; ZenContentType ContentType; }; struct BucketDetails { std::unordered_map Values; }; struct NamespaceDetails { std::unordered_map Buckets; }; std::unordered_map Namespaces; }; ////////////////////////////////////////////////////////////////////////// #pragma pack(push) #pragma pack(1) struct DiskLocation { inline DiskLocation() = default; inline DiskLocation(uint64_t ValueSize, uint8_t Flags) : Flags(Flags | kStandaloneFile) { Location.StandaloneSize = ValueSize; } inline DiskLocation(const BlockStoreLocation& Location, uint64_t PayloadAlignment, uint8_t Flags) : Flags(Flags & ~kStandaloneFile) { this->Location.BlockLocation = BlockStoreDiskLocation(Location, PayloadAlignment); } inline BlockStoreLocation GetBlockLocation(uint64_t PayloadAlignment) const { ZEN_ASSERT(!(Flags & kStandaloneFile)); return Location.BlockLocation.Get(PayloadAlignment); } inline uint64_t Size() const { return (Flags & kStandaloneFile) ? Location.StandaloneSize : Location.BlockLocation.GetSize(); } inline uint8_t IsFlagSet(uint64_t Flag) const { return Flags & Flag; } inline uint8_t GetFlags() const { return Flags; } inline ZenContentType GetContentType() const { ZenContentType ContentType = ZenContentType::kBinary; if (IsFlagSet(kStructured)) { ContentType = ZenContentType::kCbObject; } if (IsFlagSet(kCompressed)) { ContentType = ZenContentType::kCompressedBinary; } return ContentType; } union { BlockStoreDiskLocation BlockLocation; // 10 bytes uint64_t StandaloneSize = 0; // 8 bytes } Location; static const uint8_t kStandaloneFile = 0x80u; // Stored as a separate file static const uint8_t kStructured = 0x40u; // Serialized as compact binary static const uint8_t kTombStone = 0x20u; // Represents a deleted key/value static const uint8_t kCompressed = 0x10u; // Stored in compressed buffer format uint8_t Flags = 0; uint8_t Reserved = 0; }; struct DiskIndexEntry { IoHash Key; // 20 bytes DiskLocation Location; // 12 bytes }; #pragma pack(pop) static_assert(sizeof(DiskIndexEntry) == 32); // This store the access time as seconds since epoch internally in a 32-bit value giving is a range of 136 years since epoch struct AccessTime { explicit AccessTime(GcClock::Tick Tick) noexcept : SecondsSinceEpoch(ToSeconds(Tick)) {} AccessTime& operator=(GcClock::Tick Tick) noexcept { SecondsSinceEpoch.store(ToSeconds(Tick), std::memory_order_relaxed); return *this; } operator GcClock::Tick() const noexcept { return std::chrono::duration_cast(std::chrono::seconds(SecondsSinceEpoch.load(std::memory_order_relaxed))) .count(); } AccessTime(AccessTime&& Rhs) noexcept : SecondsSinceEpoch(Rhs.SecondsSinceEpoch.load(std::memory_order_relaxed)) {} AccessTime(const AccessTime& Rhs) noexcept : SecondsSinceEpoch(Rhs.SecondsSinceEpoch.load(std::memory_order_relaxed)) {} AccessTime& operator=(AccessTime&& Rhs) noexcept { SecondsSinceEpoch.store(Rhs.SecondsSinceEpoch.load(std::memory_order_relaxed), std::memory_order_relaxed); return *this; } AccessTime& operator=(const AccessTime& Rhs) noexcept { SecondsSinceEpoch.store(Rhs.SecondsSinceEpoch.load(std::memory_order_relaxed), std::memory_order_relaxed); return *this; } private: static uint32_t ToSeconds(GcClock::Tick Tick) { return gsl::narrow(std::chrono::duration_cast(GcClock::Duration(Tick)).count()); } std::atomic_uint32_t SecondsSinceEpoch; }; /** In-memory cache storage Intended for small values which are frequently accessed This should have a better memory management policy to maintain reasonable footprint. */ class ZenCacheMemoryLayer { public: struct Configuration { uint64_t TargetFootprintBytes = 16 * 1024 * 1024; uint64_t ScavengeThreshold = 4 * 1024 * 1024; }; struct BucketInfo { uint64_t EntryCount = 0; uint64_t TotalSize = 0; }; struct Info { Configuration Config; std::vector BucketNames; uint64_t EntryCount = 0; uint64_t TotalSize = 0; }; ZenCacheMemoryLayer(); ~ZenCacheMemoryLayer(); bool Get(std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue); void Put(std::string_view Bucket, const IoHash& HashKey, const ZenCacheValue& Value); void Drop(); bool DropBucket(std::string_view Bucket); void ScrubStorage(ScrubContext& Ctx); void GatherAccessTimes(zen::access_tracking::AccessTimes& AccessTimes); void Reset(); uint64_t TotalSize() const; Info GetInfo() const; std::optional GetBucketInfo(std::string_view Bucket) const; const Configuration& GetConfiguration() const { return m_Configuration; } void SetConfiguration(const Configuration& NewConfig) { m_Configuration = NewConfig; } private: struct CacheBucket { #pragma pack(push) #pragma pack(1) struct BucketPayload { IoBuffer Payload; // 8 uint32_t RawSize; // 4 IoHash RawHash; // 20 }; #pragma pack(pop) static_assert(sizeof(BucketPayload) == 32u); static_assert(sizeof(AccessTime) == 4u); mutable RwLock m_BucketLock; std::vector m_AccessTimes; std::vector m_Payloads; tsl::robin_map m_CacheMap; std::atomic_uint64_t m_TotalSize{}; bool Get(const IoHash& HashKey, ZenCacheValue& OutValue); void Put(const IoHash& HashKey, const ZenCacheValue& Value); void Drop(); void ScrubStorage(ScrubContext& Ctx); void GatherAccessTimes(std::vector& AccessTimes); inline uint64_t TotalSize() const { return m_TotalSize; } uint64_t EntryCount() const; }; mutable RwLock m_Lock; std::unordered_map> m_Buckets; std::vector> m_DroppedBuckets; Configuration m_Configuration; ZenCacheMemoryLayer(const ZenCacheMemoryLayer&) = delete; ZenCacheMemoryLayer& operator=(const ZenCacheMemoryLayer&) = delete; }; class ZenCacheDiskLayer { public: struct Configuration { std::filesystem::path RootDir; }; struct BucketInfo { uint64_t EntryCount = 0; uint64_t TotalSize = 0; }; struct Info { Configuration Config; std::vector BucketNames; uint64_t EntryCount = 0; uint64_t TotalSize = 0; }; explicit ZenCacheDiskLayer(const std::filesystem::path& RootDir); ~ZenCacheDiskLayer(); bool Get(std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue); void Put(std::string_view Bucket, const IoHash& HashKey, const ZenCacheValue& Value); bool Drop(); bool DropBucket(std::string_view Bucket); void Flush(); void ScrubStorage(ScrubContext& Ctx); void GatherReferences(GcContext& GcCtx); void CollectGarbage(GcContext& GcCtx); void UpdateAccessTimes(const zen::access_tracking::AccessTimes& AccessTimes); void DiscoverBuckets(); uint64_t TotalSize() const; Info GetInfo() const; std::optional GetBucketInfo(std::string_view Bucket) const; CacheValueDetails::NamespaceDetails GetValueDetails(const std::string_view BucketFilter, const std::string_view ValueFilter) const; private: /** A cache bucket manages a single directory containing metadata and data for that bucket */ struct CacheBucket { CacheBucket(std::string BucketName); ~CacheBucket(); bool OpenOrCreate(std::filesystem::path BucketDir, bool AllowCreate = true); bool Get(const IoHash& HashKey, ZenCacheValue& OutValue); void Put(const IoHash& HashKey, const ZenCacheValue& Value); bool Drop(); void Flush(); void ScrubStorage(ScrubContext& Ctx); void GatherReferences(GcContext& GcCtx); void CollectGarbage(GcContext& GcCtx); void UpdateAccessTimes(const std::vector& AccessTimes); inline uint64_t TotalSize() const { return m_TotalStandaloneSize.load(std::memory_order::relaxed) + m_BlockStore.TotalSize(); } uint64_t EntryCount() const; CacheValueDetails::BucketDetails GetValueDetails(const std::string_view ValueFilter) const; private: const uint64_t MaxBlockSize = 1ull << 30; uint64_t m_PayloadAlignment = 1ull << 4; std::string m_BucketName; std::filesystem::path m_BucketDir; std::filesystem::path m_BlocksBasePath; BlockStore m_BlockStore; Oid m_BucketId; uint64_t m_LargeObjectThreshold = 128 * 1024; // These files are used to manage storage of small objects for this bucket TCasLogFile m_SlogFile; uint64_t m_LogFlushPosition = 0; #pragma pack(push) #pragma pack(1) struct BucketPayload { DiskLocation Location; // 12 uint64_t RawSize; // 8 IoHash RawHash; // 20 }; #pragma pack(pop) static_assert(sizeof(BucketPayload) == 40u); static_assert(sizeof(AccessTime) == 4u); using IndexMap = tsl::robin_map; mutable RwLock m_IndexLock; std::vector m_AccessTimes; std::vector m_Payloads; IndexMap m_Index; std::atomic_uint64_t m_TotalStandaloneSize{}; void BuildPath(PathBuilderBase& Path, const IoHash& HashKey) const; void PutStandaloneCacheValue(const IoHash& HashKey, const ZenCacheValue& Value); IoBuffer GetStandaloneCacheValue(const DiskLocation& Loc, const IoHash& HashKey) const; void PutInlineCacheValue(const IoHash& HashKey, const ZenCacheValue& Value); IoBuffer GetInlineCacheValue(const DiskLocation& Loc) const; void MakeIndexSnapshot(); uint64_t ReadIndexFile(const std::filesystem::path& IndexPath, uint32_t& OutVersion); uint64_t ReadLog(const std::filesystem::path& LogPath, uint64_t LogPosition); void OpenLog(const bool IsNew); void SaveManifest(); CacheValueDetails::ValueDetails GetValueDetails(const IoHash& Key, size_t Index) const; // These locks are here to avoid contention on file creation, therefore it's sufficient // that we take the same lock for the same hash // // These locks are small and should really be spaced out so they don't share cache lines, // but we don't currently access them at particularly high frequency so it should not be // an issue in practice mutable RwLock m_ShardedLocks[256]; inline RwLock& LockForHash(const IoHash& Hash) const { return m_ShardedLocks[Hash.Hash[19]]; } }; std::filesystem::path m_RootDir; mutable RwLock m_Lock; std::unordered_map> m_Buckets; // TODO: make this case insensitive std::vector> m_DroppedBuckets; ZenCacheDiskLayer(const ZenCacheDiskLayer&) = delete; ZenCacheDiskLayer& operator=(const ZenCacheDiskLayer&) = delete; }; /* Z$ namespace A namespace scopes a set of buckets, and would typically be used to isolate projects from each other. */ class ZenCacheNamespace final : public GcStorage, public GcContributor { public: struct Configuration { std::filesystem::path RootDir; uint64_t DiskLayerThreshold = 0; }; struct BucketInfo { ZenCacheDiskLayer::BucketInfo DiskLayerInfo; ZenCacheMemoryLayer::BucketInfo MemoryLayerInfo; }; struct Info { Configuration Config; std::vector BucketNames; ZenCacheDiskLayer::Info DiskLayerInfo; ZenCacheMemoryLayer::Info MemoryLayerInfo; }; ZenCacheNamespace(GcManager& Gc, const std::filesystem::path& RootDir); ~ZenCacheNamespace(); bool Get(std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue); void Put(std::string_view Bucket, const IoHash& HashKey, const ZenCacheValue& Value); bool Drop(); bool DropBucket(std::string_view Bucket); void Flush(); uint64_t DiskLayerThreshold() const { return m_DiskLayerSizeThreshold; } // GcContributor virtual void GatherReferences(GcContext& GcCtx) override; // GcStorage virtual void ScrubStorage(ScrubContext& ScrubCtx) override; virtual void CollectGarbage(GcContext& GcCtx) override; virtual GcStorageSize StorageSize() const override; Info GetInfo() const; std::optional GetBucketInfo(std::string_view Bucket) const; CacheValueDetails::NamespaceDetails GetValueDetails(const std::string_view BucketFilter, const std::string_view ValueFilter) const; private: std::filesystem::path m_RootDir; ZenCacheMemoryLayer m_MemLayer; ZenCacheDiskLayer m_DiskLayer; uint64_t m_DiskLayerSizeThreshold = 1 * 1024; uint64_t m_LastScrubTime = 0; ZenCacheNamespace(const ZenCacheNamespace&) = delete; ZenCacheNamespace& operator=(const ZenCacheNamespace&) = delete; }; /** Cache store interface This manages a set of namespaces used for caching purposes. */ class ZenCacheStore final { public: static constexpr std::string_view DefaultNamespace = "!default!"; // This is intentionally not a valid namespace name and will only be used for mapping when no namespace is given static constexpr std::string_view NamespaceDiskPrefix = "ns_"; struct Configuration { std::filesystem::path BasePath; bool AllowAutomaticCreationOfNamespaces = false; bool EnableWriteLog = true; bool EnableAccessLog = true; }; struct Info { Configuration Config; std::vector NamespaceNames; uint64_t DiskEntryCount = 0; uint64_t MemoryEntryCount = 0; GcStorageSize StorageSize; }; ZenCacheStore(GcManager& Gc, const Configuration& Configuration); ~ZenCacheStore(); bool Get(std::string_view Namespace, std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue); void Put(std::string_view Namespace, std::string_view Bucket, const IoHash& HashKey, const ZenCacheValue& Value); bool DropBucket(std::string_view Namespace, std::string_view Bucket); bool DropNamespace(std::string_view Namespace); void Flush(); void ScrubStorage(ScrubContext& Ctx); CacheValueDetails GetValueDetails(const std::string_view NamespaceFilter, const std::string_view BucketFilter, const std::string_view ValueFilter) const; GcStorageSize StorageSize() const; Info GetInfo() const; std::optional GetNamespaceInfo(std::string_view Namespace); std::optional GetBucketInfo(std::string_view Namespace, std::string_view Bucket); private: const ZenCacheNamespace* FindNamespace(std::string_view Namespace) const; ZenCacheNamespace* GetNamespace(std::string_view Namespace); void IterateNamespaces(const std::function& Callback) const; typedef std::unordered_map> NamespaceMap; mutable RwLock m_NamespacesLock; NamespaceMap m_Namespaces; std::vector> m_DroppedNamespaces; GcManager& m_Gc; Configuration m_Configuration; }; void z$_forcelink(); } // namespace zen