diff options
Diffstat (limited to 'zenserver/cache/structuredcachestore.cpp')
| -rw-r--r-- | zenserver/cache/structuredcachestore.cpp | 165 |
1 files changed, 138 insertions, 27 deletions
diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 8b9ce8ff9..5cce7f325 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -2,22 +2,25 @@ #include "structuredcachestore.h" -#include <zencore/except.h> -#include <zencore/windows.h> +#include "cachetracking.h" #include <zencore/compactbinary.h> #include <zencore/compactbinarybuilder.h> #include <zencore/compactbinarypackage.h> #include <zencore/compactbinaryvalidation.h> #include <zencore/compress.h> +#include <zencore/except.h> #include <zencore/filesystem.h> #include <zencore/fmtutils.h> #include <zencore/iobuffer.h> #include <zencore/logging.h> +#include <zencore/scopeguard.h> #include <zencore/string.h> #include <zencore/testing.h> #include <zencore/testutils.h> #include <zencore/thread.h> +#include <zencore/windows.h> +#include <zenstore/basicfile.h> #include <zenstore/cas.h> #include <zenstore/caslog.h> #include <zenstore/cidstore.h> @@ -25,6 +28,7 @@ #include <concepts> #include <filesystem> +#include <memory_resource> #include <ranges> #include <unordered_map> @@ -39,12 +43,14 @@ namespace zen { using namespace fmt::literals; -ZenCacheStore::ZenCacheStore(const std::filesystem::path& RootDir) : m_DiskLayer{RootDir} +ZenCacheStore::ZenCacheStore(CasGc& Gc, const std::filesystem::path& RootDir) : GcContributor(Gc), m_DiskLayer(RootDir) { ZEN_INFO("initializing structured cache at '{}'", RootDir); CreateDirectories(RootDir); m_DiskLayer.DiscoverBuckets(); + + m_AccessTracker.reset(new ZenCacheTracker(RootDir)); } ZenCacheStore::~ZenCacheStore() @@ -56,21 +62,27 @@ ZenCacheStore::Get(std::string_view InBucket, const IoHash& HashKey, ZenCacheVal { bool Ok = m_MemLayer.Get(InBucket, HashKey, OutValue); + auto _ = MakeGuard([&] { + if (!Ok) + return; + + m_AccessTracker->TrackAccess(InBucket, HashKey); + }); + if (Ok) { ZEN_ASSERT(OutValue.Value.Size()); + + return true; } - if (!Ok) - { - Ok = m_DiskLayer.Get(InBucket, HashKey, OutValue); + Ok = m_DiskLayer.Get(InBucket, HashKey, OutValue); - if (Ok) - { - ZEN_ASSERT(OutValue.Value.Size()); - } + if (Ok) + { + ZEN_ASSERT(OutValue.Value.Size()); - if (Ok && (OutValue.Value.Size() <= m_DiskLayerSizeThreshold)) + if (OutValue.Value.Size() <= m_DiskLayerSizeThreshold) { m_MemLayer.Put(InBucket, HashKey, OutValue); } @@ -88,6 +100,25 @@ ZenCacheStore::Put(std::string_view InBucket, const IoHash& HashKey, const ZenCa m_DiskLayer.Put(InBucket, HashKey, Value); +#if ZEN_USE_REF_TRACKING + if (Value.Value.GetContentType() == ZenContentType::kCbObject) + { + if (ValidateCompactBinary(Value.Value, CbValidateMode::All) == CbValidateError::None) + { + CbObject Object{SharedBuffer(Value.Value)}; + + uint8_t TempBuffer[8 * sizeof(IoHash)]; + std::pmr::monotonic_buffer_resource Linear{TempBuffer, sizeof TempBuffer}; + std::pmr::polymorphic_allocator Allocator{&Linear}; + std::pmr::vector<IoHash> CidReferences{Allocator}; + + Object.IterateAttachments([&](CbFieldView Field) { CidReferences.push_back(Field.AsAttachment()); }); + + m_Gc.OnNewCidReferences(CidReferences); + } + } +#endif + if (Value.Value.Size() <= m_DiskLayerSizeThreshold) { m_MemLayer.Put(InBucket, HashKey, Value); @@ -131,10 +162,10 @@ ZenCacheStore::Scrub(ScrubContext& Ctx) } void -ZenCacheStore::GarbageCollect(GcContext& GcCtx) +ZenCacheStore::GatherReferences(GcContext& GcCtx) { - m_DiskLayer.GarbageCollect(GcCtx); - m_MemLayer.GarbageCollect(GcCtx); + m_MemLayer.GatherReferences(GcCtx); + m_DiskLayer.GatherReferences(GcCtx); } ////////////////////////////////////////////////////////////////////////// @@ -220,13 +251,13 @@ ZenCacheMemoryLayer::Scrub(ScrubContext& Ctx) } void -ZenCacheMemoryLayer::GarbageCollect(GcContext& GcCtx) +ZenCacheMemoryLayer::GatherReferences(GcContext& GcCtx) { RwLock::SharedLockScope _(m_Lock); for (auto& Kv : m_Buckets) { - Kv.second.GarbageCollect(GcCtx); + Kv.second.GatherReferences(GcCtx); } } @@ -252,7 +283,7 @@ ZenCacheMemoryLayer::CacheBucket::Scrub(ScrubContext& Ctx) } void -ZenCacheMemoryLayer::CacheBucket::GarbageCollect(GcContext& GcCtx) +ZenCacheMemoryLayer::CacheBucket::GatherReferences(GcContext& GcCtx) { // Is it even meaningful to do this? The memory layer shouldn't // contain anything which is not already in the disk layer @@ -362,7 +393,68 @@ DiskLocation::GetContentType() const return ContentType; } -////////////////////////////////////////////////////////////////////////// +private: + uint64_t OffsetAndFlags = 0; + uint32_t LowerSize = 0; + uint32_t IndexDataSize = 0; +}; + +struct DiskIndexEntry +{ + IoHash Key; + DiskLocation Location; +}; + +#pragma pack(pop) + +static_assert(sizeof(DiskIndexEntry) == 36); + +struct ZenCacheDiskLayer::CacheBucket +{ + CacheBucket(); + ~CacheBucket(); + + void OpenOrCreate(std::filesystem::path BucketDir, bool AllowCreate = true); + static bool Delete(std::filesystem::path BucketDir); + bool Get(const IoHash& HashKey, ZenCacheValue& OutValue); + void Put(const IoHash& HashKey, const ZenCacheValue& Value); + void Drop(); + void Flush(); + void Scrub(ScrubContext& Ctx); + void GatherReferences(GcContext& GcCtx); + + inline bool IsOk() const { return m_IsOk; } + +private: + std::filesystem::path m_BucketDir; + Oid m_BucketId; + bool m_IsOk = false; + uint64_t m_LargeObjectThreshold = 64 * 1024; + + // These files are used to manage storage of small objects for this bucket + + BasicFile m_SobsFile; + TCasLogFile<DiskIndexEntry> m_SlogFile; + + RwLock m_IndexLock; + tsl::robin_map<IoHash, DiskLocation, IoHash::Hasher> m_Index; + uint64_t m_WriteCursor = 0; + + void BuildPath(WideStringBuilderBase& Path, const IoHash& HashKey); + void PutStandaloneCacheValue(const IoHash& HashKey, const ZenCacheValue& Value); + bool GetStandaloneCacheValue(const DiskLocation& Loc, const IoHash& HashKey, ZenCacheValue& OutValue); + bool GetInlineCacheValue(const DiskLocation& Loc, ZenCacheValue& OutValue); + + // These locks are here to avoid contention on file creation, therefore it's sufficient + // that we take the same lock for the same hash + // + // These locks are small and should really be spaced out so they don't share cache lines, + // but we don't currently access them at particularly high frequency so it should not be + // an issue in practice + + RwLock m_ShardedLocks[256]; + inline RwLock& LockForHash(const IoHash& Hash) { return m_ShardedLocks[Hash.Hash[19]]; } +}; ZenCacheDiskLayer::CacheBucket::CacheBucket() { @@ -454,16 +546,29 @@ ZenCacheDiskLayer::CacheBucket::OpenOrCreate(std::filesystem::path BucketDir, bo m_SlogFile.Open(SlogPath, IsNew); - uint64_t MaxFileOffset = 0; + uint64_t MaxFileOffset = 0; + uint64_t InvalidEntryCount = 0; if (RwLock::ExclusiveLockScope _(m_IndexLock); m_Index.empty()) { m_SlogFile.Replay([&](const DiskIndexEntry& Record) { - m_Index[Record.Key] = Record.Location; + if (Record.Key == IoHash::Zero) + { + ++InvalidEntryCount; + } + else + { + m_Index[Record.Key] = Record.Location; - MaxFileOffset = std::max<uint64_t>(MaxFileOffset, Record.Location.Offset() + Record.Location.Size()); + MaxFileOffset = std::max<uint64_t>(MaxFileOffset, Record.Location.Offset() + Record.Location.Size()); + } }); + if (InvalidEntryCount) + { + ZEN_WARN("found {} invalid entries in '{}'", InvalidEntryCount, SlogPath); + } + m_WriteCursor = (MaxFileOffset + 15) & ~15; } @@ -661,7 +766,7 @@ ZenCacheDiskLayer::CacheBucket::Scrub(ScrubContext& Ctx) } void -ZenCacheDiskLayer::CacheBucket::GarbageCollect(GcContext& GcCtx) +ZenCacheDiskLayer::CacheBucket::GatherReferences(GcContext& GcCtx) { RwLock::SharedLockScope _(m_IndexLock); @@ -924,7 +1029,7 @@ ZenCacheDiskLayer::DiscoverBuckets() { // New bucket needs to be created - std::string BucketName8 = WideToUtf8(BucketName); + const std::string BucketName8 = ToUtf8(BucketName); if (auto It = m_Buckets.find(BucketName8); It != m_Buckets.end()) { @@ -940,7 +1045,11 @@ ZenCacheDiskLayer::DiscoverBuckets() Bucket.OpenOrCreate(BucketPath, /* AllowCreate */ false); - if (!Bucket.IsOk()) + if (Bucket.IsOk()) + { + ZEN_INFO("Discovered bucket '{}'", BucketName8); + } + else { ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", BucketName8, m_RootDir); @@ -1007,13 +1116,13 @@ ZenCacheDiskLayer::Scrub(ScrubContext& Ctx) } void -ZenCacheDiskLayer::GarbageCollect(GcContext& GcCtx) +ZenCacheDiskLayer::GatherReferences(GcContext& GcCtx) { RwLock::SharedLockScope _(m_Lock); for (auto& Kv : m_Buckets) { - Kv.second.GarbageCollect(GcCtx); + Kv.second.GatherReferences(GcCtx); } } @@ -1028,7 +1137,9 @@ TEST_CASE("z$.store") ScopedTemporaryDirectory TempDir; - ZenCacheStore Zcs(TempDir.Path() / "cache"); + CasGc Gc; + + ZenCacheStore Zcs(Gc, TempDir.Path() / "cache"); const int kIterationCount = 100; |