diff options
| author | Stefan Boberg <[email protected]> | 2023-05-17 10:31:50 +0200 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-05-17 10:31:50 +0200 |
| commit | 5adba30f4528a7d74090a8391d09b287501846a7 (patch) | |
| tree | 25476b8e49fb5a44170b4d181de60de1f2d88ebe /src/zenserver/cache/cachedisklayer.h | |
| parent | amended CHANGELOG.md with recent changes (diff) | |
| download | zen-5adba30f4528a7d74090a8391d09b287501846a7.tar.xz zen-5adba30f4528a7d74090a8391d09b287501846a7.zip | |
Restructured structured cache store (#314)
This change separates out the disk and memory storage strategies into separate cpp/h files to improve maintainability.
Diffstat (limited to 'src/zenserver/cache/cachedisklayer.h')
| -rw-r--r-- | src/zenserver/cache/cachedisklayer.h | 222 |
1 files changed, 222 insertions, 0 deletions
diff --git a/src/zenserver/cache/cachedisklayer.h b/src/zenserver/cache/cachedisklayer.h new file mode 100644 index 000000000..127e194f1 --- /dev/null +++ b/src/zenserver/cache/cachedisklayer.h @@ -0,0 +1,222 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "cacheshared.h" + +#include <zenstore/blockstore.h> +#include <zenstore/caslog.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +ZEN_THIRD_PARTY_INCLUDES_END + +#include <filesystem> + +namespace zen { + +class IoBuffer; + +#pragma pack(push) +#pragma pack(1) + +struct DiskLocation +{ + inline DiskLocation() = default; + + inline DiskLocation(uint64_t ValueSize, uint8_t Flags) : Flags(Flags | kStandaloneFile) { Location.StandaloneSize = ValueSize; } + + inline DiskLocation(const BlockStoreLocation& Location, uint64_t PayloadAlignment, uint8_t Flags) : Flags(Flags & ~kStandaloneFile) + { + this->Location.BlockLocation = BlockStoreDiskLocation(Location, PayloadAlignment); + } + + inline BlockStoreLocation GetBlockLocation(uint64_t PayloadAlignment) const + { + ZEN_ASSERT(!(Flags & kStandaloneFile)); + return Location.BlockLocation.Get(PayloadAlignment); + } + + inline uint64_t Size() const { return (Flags & kStandaloneFile) ? Location.StandaloneSize : Location.BlockLocation.GetSize(); } + inline uint8_t IsFlagSet(uint64_t Flag) const { return Flags & Flag; } + inline uint8_t GetFlags() const { return Flags; } + inline ZenContentType GetContentType() const + { + ZenContentType ContentType = ZenContentType::kBinary; + + if (IsFlagSet(kStructured)) + { + ContentType = ZenContentType::kCbObject; + } + + if (IsFlagSet(kCompressed)) + { + ContentType = ZenContentType::kCompressedBinary; + } + + return ContentType; + } + + union + { + BlockStoreDiskLocation BlockLocation; // 10 bytes + uint64_t StandaloneSize = 0; // 8 bytes + } Location; + + static const uint8_t kStandaloneFile = 0x80u; // Stored as a separate file + static const uint8_t kStructured = 0x40u; // Serialized as compact binary + static const uint8_t kTombStone = 0x20u; // Represents a deleted key/value + static const uint8_t kCompressed = 0x10u; // Stored in compressed buffer format + + uint8_t Flags = 0; + uint8_t Reserved = 0; +}; + +struct DiskIndexEntry +{ + IoHash Key; // 20 bytes + DiskLocation Location; // 12 bytes +}; + +#pragma pack(pop) + +static_assert(sizeof(DiskIndexEntry) == 32); + +////////////////////////////////////////////////////////////////////////// + +class ZenCacheDiskLayer +{ +public: + struct Configuration + { + std::filesystem::path RootDir; + }; + + struct BucketInfo + { + uint64_t EntryCount = 0; + uint64_t TotalSize = 0; + }; + + struct Info + { + Configuration Config; + std::vector<std::string> BucketNames; + uint64_t EntryCount = 0; + uint64_t TotalSize = 0; + }; + + explicit ZenCacheDiskLayer(const std::filesystem::path& RootDir); + ~ZenCacheDiskLayer(); + + bool Get(std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue); + void Put(std::string_view Bucket, const IoHash& HashKey, const ZenCacheValue& Value); + bool Drop(); + bool DropBucket(std::string_view Bucket); + void Flush(); + void ScrubStorage(ScrubContext& Ctx); + void GatherReferences(GcContext& GcCtx); + void CollectGarbage(GcContext& GcCtx); + void UpdateAccessTimes(const zen::access_tracking::AccessTimes& AccessTimes); + + void DiscoverBuckets(); + uint64_t TotalSize() const; + + Info GetInfo() const; + std::optional<BucketInfo> GetBucketInfo(std::string_view Bucket) const; + + CacheValueDetails::NamespaceDetails GetValueDetails(const std::string_view BucketFilter, const std::string_view ValueFilter) const; + +private: + /** A cache bucket manages a single directory containing + metadata and data for that bucket + */ + struct CacheBucket + { + CacheBucket(std::string BucketName); + ~CacheBucket(); + + bool OpenOrCreate(std::filesystem::path BucketDir, bool AllowCreate = true); + bool Get(const IoHash& HashKey, ZenCacheValue& OutValue); + void Put(const IoHash& HashKey, const ZenCacheValue& Value); + bool Drop(); + void Flush(); + void ScrubStorage(ScrubContext& Ctx); + void GatherReferences(GcContext& GcCtx); + void CollectGarbage(GcContext& GcCtx); + void UpdateAccessTimes(const std::vector<zen::access_tracking::KeyAccessTime>& AccessTimes); + + inline uint64_t TotalSize() const { return m_TotalStandaloneSize.load(std::memory_order::relaxed) + m_BlockStore.TotalSize(); } + uint64_t EntryCount() const; + + CacheValueDetails::BucketDetails GetValueDetails(const std::string_view ValueFilter) const; + + private: + const uint64_t MaxBlockSize = 1ull << 30; + uint64_t m_PayloadAlignment = 1ull << 4; + + std::string m_BucketName; + std::filesystem::path m_BucketDir; + std::filesystem::path m_BlocksBasePath; + BlockStore m_BlockStore; + Oid m_BucketId; + uint64_t m_LargeObjectThreshold = 128 * 1024; + + // These files are used to manage storage of small objects for this bucket + + TCasLogFile<DiskIndexEntry> m_SlogFile; + uint64_t m_LogFlushPosition = 0; + +#pragma pack(push) +#pragma pack(1) + struct BucketPayload + { + DiskLocation Location; // 12 + uint64_t RawSize; // 8 + IoHash RawHash; // 20 + }; +#pragma pack(pop) + static_assert(sizeof(BucketPayload) == 40u); + static_assert(sizeof(AccessTime) == 4u); + + using IndexMap = tsl::robin_map<IoHash, size_t, IoHash::Hasher>; + + mutable RwLock m_IndexLock; + std::vector<AccessTime> m_AccessTimes; + std::vector<BucketPayload> m_Payloads; + IndexMap m_Index; + + std::atomic_uint64_t m_TotalStandaloneSize{}; + + void BuildPath(PathBuilderBase& Path, const IoHash& HashKey) const; + void PutStandaloneCacheValue(const IoHash& HashKey, const ZenCacheValue& Value); + IoBuffer GetStandaloneCacheValue(const DiskLocation& Loc, const IoHash& HashKey) const; + void PutInlineCacheValue(const IoHash& HashKey, const ZenCacheValue& Value); + IoBuffer GetInlineCacheValue(const DiskLocation& Loc) const; + void MakeIndexSnapshot(); + uint64_t ReadIndexFile(const std::filesystem::path& IndexPath, uint32_t& OutVersion); + uint64_t ReadLog(const std::filesystem::path& LogPath, uint64_t LogPosition); + void OpenLog(const bool IsNew); + void SaveManifest(); + CacheValueDetails::ValueDetails GetValueDetails(const IoHash& Key, size_t Index) const; + // These locks are here to avoid contention on file creation, therefore it's sufficient + // that we take the same lock for the same hash + // + // These locks are small and should really be spaced out so they don't share cache lines, + // but we don't currently access them at particularly high frequency so it should not be + // an issue in practice + + mutable RwLock m_ShardedLocks[256]; + inline RwLock& LockForHash(const IoHash& Hash) const { return m_ShardedLocks[Hash.Hash[19]]; } + }; + + std::filesystem::path m_RootDir; + mutable RwLock m_Lock; + std::unordered_map<std::string, std::unique_ptr<CacheBucket>> m_Buckets; // TODO: make this case insensitive + std::vector<std::unique_ptr<CacheBucket>> m_DroppedBuckets; + + ZenCacheDiskLayer(const ZenCacheDiskLayer&) = delete; + ZenCacheDiskLayer& operator=(const ZenCacheDiskLayer&) = delete; +}; + +} // namespace zen |