aboutsummaryrefslogtreecommitdiff
path: root/src/zenserver/cache/cachedisklayer.h
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2023-05-17 10:31:50 +0200
committerGitHub <[email protected]>2023-05-17 10:31:50 +0200
commit5adba30f4528a7d74090a8391d09b287501846a7 (patch)
tree25476b8e49fb5a44170b4d181de60de1f2d88ebe /src/zenserver/cache/cachedisklayer.h
parentamended CHANGELOG.md with recent changes (diff)
downloadzen-5adba30f4528a7d74090a8391d09b287501846a7.tar.xz
zen-5adba30f4528a7d74090a8391d09b287501846a7.zip
Restructured structured cache store (#314)
This change separates out the disk and memory storage strategies into separate cpp/h files to improve maintainability.
Diffstat (limited to 'src/zenserver/cache/cachedisklayer.h')
-rw-r--r--src/zenserver/cache/cachedisklayer.h222
1 files changed, 222 insertions, 0 deletions
diff --git a/src/zenserver/cache/cachedisklayer.h b/src/zenserver/cache/cachedisklayer.h
new file mode 100644
index 000000000..127e194f1
--- /dev/null
+++ b/src/zenserver/cache/cachedisklayer.h
@@ -0,0 +1,222 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include "cacheshared.h"
+
+#include <zenstore/blockstore.h>
+#include <zenstore/caslog.h>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <tsl/robin_map.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+#include <filesystem>
+
+namespace zen {
+
+class IoBuffer;
+
+#pragma pack(push)
+#pragma pack(1)
+
+struct DiskLocation
+{
+ inline DiskLocation() = default;
+
+ inline DiskLocation(uint64_t ValueSize, uint8_t Flags) : Flags(Flags | kStandaloneFile) { Location.StandaloneSize = ValueSize; }
+
+ inline DiskLocation(const BlockStoreLocation& Location, uint64_t PayloadAlignment, uint8_t Flags) : Flags(Flags & ~kStandaloneFile)
+ {
+ this->Location.BlockLocation = BlockStoreDiskLocation(Location, PayloadAlignment);
+ }
+
+ inline BlockStoreLocation GetBlockLocation(uint64_t PayloadAlignment) const
+ {
+ ZEN_ASSERT(!(Flags & kStandaloneFile));
+ return Location.BlockLocation.Get(PayloadAlignment);
+ }
+
+ inline uint64_t Size() const { return (Flags & kStandaloneFile) ? Location.StandaloneSize : Location.BlockLocation.GetSize(); }
+ inline uint8_t IsFlagSet(uint64_t Flag) const { return Flags & Flag; }
+ inline uint8_t GetFlags() const { return Flags; }
+ inline ZenContentType GetContentType() const
+ {
+ ZenContentType ContentType = ZenContentType::kBinary;
+
+ if (IsFlagSet(kStructured))
+ {
+ ContentType = ZenContentType::kCbObject;
+ }
+
+ if (IsFlagSet(kCompressed))
+ {
+ ContentType = ZenContentType::kCompressedBinary;
+ }
+
+ return ContentType;
+ }
+
+ union
+ {
+ BlockStoreDiskLocation BlockLocation; // 10 bytes
+ uint64_t StandaloneSize = 0; // 8 bytes
+ } Location;
+
+ static const uint8_t kStandaloneFile = 0x80u; // Stored as a separate file
+ static const uint8_t kStructured = 0x40u; // Serialized as compact binary
+ static const uint8_t kTombStone = 0x20u; // Represents a deleted key/value
+ static const uint8_t kCompressed = 0x10u; // Stored in compressed buffer format
+
+ uint8_t Flags = 0;
+ uint8_t Reserved = 0;
+};
+
+struct DiskIndexEntry
+{
+ IoHash Key; // 20 bytes
+ DiskLocation Location; // 12 bytes
+};
+
+#pragma pack(pop)
+
+static_assert(sizeof(DiskIndexEntry) == 32);
+
+//////////////////////////////////////////////////////////////////////////
+
+class ZenCacheDiskLayer
+{
+public:
+ struct Configuration
+ {
+ std::filesystem::path RootDir;
+ };
+
+ struct BucketInfo
+ {
+ uint64_t EntryCount = 0;
+ uint64_t TotalSize = 0;
+ };
+
+ struct Info
+ {
+ Configuration Config;
+ std::vector<std::string> BucketNames;
+ uint64_t EntryCount = 0;
+ uint64_t TotalSize = 0;
+ };
+
+ explicit ZenCacheDiskLayer(const std::filesystem::path& RootDir);
+ ~ZenCacheDiskLayer();
+
+ bool Get(std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue);
+ void Put(std::string_view Bucket, const IoHash& HashKey, const ZenCacheValue& Value);
+ bool Drop();
+ bool DropBucket(std::string_view Bucket);
+ void Flush();
+ void ScrubStorage(ScrubContext& Ctx);
+ void GatherReferences(GcContext& GcCtx);
+ void CollectGarbage(GcContext& GcCtx);
+ void UpdateAccessTimes(const zen::access_tracking::AccessTimes& AccessTimes);
+
+ void DiscoverBuckets();
+ uint64_t TotalSize() const;
+
+ Info GetInfo() const;
+ std::optional<BucketInfo> GetBucketInfo(std::string_view Bucket) const;
+
+ CacheValueDetails::NamespaceDetails GetValueDetails(const std::string_view BucketFilter, const std::string_view ValueFilter) const;
+
+private:
+ /** A cache bucket manages a single directory containing
+ metadata and data for that bucket
+ */
+ struct CacheBucket
+ {
+ CacheBucket(std::string BucketName);
+ ~CacheBucket();
+
+ bool OpenOrCreate(std::filesystem::path BucketDir, bool AllowCreate = true);
+ bool Get(const IoHash& HashKey, ZenCacheValue& OutValue);
+ void Put(const IoHash& HashKey, const ZenCacheValue& Value);
+ bool Drop();
+ void Flush();
+ void ScrubStorage(ScrubContext& Ctx);
+ void GatherReferences(GcContext& GcCtx);
+ void CollectGarbage(GcContext& GcCtx);
+ void UpdateAccessTimes(const std::vector<zen::access_tracking::KeyAccessTime>& AccessTimes);
+
+ inline uint64_t TotalSize() const { return m_TotalStandaloneSize.load(std::memory_order::relaxed) + m_BlockStore.TotalSize(); }
+ uint64_t EntryCount() const;
+
+ CacheValueDetails::BucketDetails GetValueDetails(const std::string_view ValueFilter) const;
+
+ private:
+ const uint64_t MaxBlockSize = 1ull << 30;
+ uint64_t m_PayloadAlignment = 1ull << 4;
+
+ std::string m_BucketName;
+ std::filesystem::path m_BucketDir;
+ std::filesystem::path m_BlocksBasePath;
+ BlockStore m_BlockStore;
+ Oid m_BucketId;
+ uint64_t m_LargeObjectThreshold = 128 * 1024;
+
+ // These files are used to manage storage of small objects for this bucket
+
+ TCasLogFile<DiskIndexEntry> m_SlogFile;
+ uint64_t m_LogFlushPosition = 0;
+
+#pragma pack(push)
+#pragma pack(1)
+ struct BucketPayload
+ {
+ DiskLocation Location; // 12
+ uint64_t RawSize; // 8
+ IoHash RawHash; // 20
+ };
+#pragma pack(pop)
+ static_assert(sizeof(BucketPayload) == 40u);
+ static_assert(sizeof(AccessTime) == 4u);
+
+ using IndexMap = tsl::robin_map<IoHash, size_t, IoHash::Hasher>;
+
+ mutable RwLock m_IndexLock;
+ std::vector<AccessTime> m_AccessTimes;
+ std::vector<BucketPayload> m_Payloads;
+ IndexMap m_Index;
+
+ std::atomic_uint64_t m_TotalStandaloneSize{};
+
+ void BuildPath(PathBuilderBase& Path, const IoHash& HashKey) const;
+ void PutStandaloneCacheValue(const IoHash& HashKey, const ZenCacheValue& Value);
+ IoBuffer GetStandaloneCacheValue(const DiskLocation& Loc, const IoHash& HashKey) const;
+ void PutInlineCacheValue(const IoHash& HashKey, const ZenCacheValue& Value);
+ IoBuffer GetInlineCacheValue(const DiskLocation& Loc) const;
+ void MakeIndexSnapshot();
+ uint64_t ReadIndexFile(const std::filesystem::path& IndexPath, uint32_t& OutVersion);
+ uint64_t ReadLog(const std::filesystem::path& LogPath, uint64_t LogPosition);
+ void OpenLog(const bool IsNew);
+ void SaveManifest();
+ CacheValueDetails::ValueDetails GetValueDetails(const IoHash& Key, size_t Index) const;
+ // These locks are here to avoid contention on file creation, therefore it's sufficient
+ // that we take the same lock for the same hash
+ //
+ // These locks are small and should really be spaced out so they don't share cache lines,
+ // but we don't currently access them at particularly high frequency so it should not be
+ // an issue in practice
+
+ mutable RwLock m_ShardedLocks[256];
+ inline RwLock& LockForHash(const IoHash& Hash) const { return m_ShardedLocks[Hash.Hash[19]]; }
+ };
+
+ std::filesystem::path m_RootDir;
+ mutable RwLock m_Lock;
+ std::unordered_map<std::string, std::unique_ptr<CacheBucket>> m_Buckets; // TODO: make this case insensitive
+ std::vector<std::unique_ptr<CacheBucket>> m_DroppedBuckets;
+
+ ZenCacheDiskLayer(const ZenCacheDiskLayer&) = delete;
+ ZenCacheDiskLayer& operator=(const ZenCacheDiskLayer&) = delete;
+};
+
+} // namespace zen