aboutsummaryrefslogtreecommitdiff
path: root/zenserver/cache/structuredcachestore.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'zenserver/cache/structuredcachestore.cpp')
-rw-r--r--zenserver/cache/structuredcachestore.cpp227
1 files changed, 165 insertions, 62 deletions
diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp
index 8b9ce8ff9..44226457c 100644
--- a/zenserver/cache/structuredcachestore.cpp
+++ b/zenserver/cache/structuredcachestore.cpp
@@ -2,22 +2,25 @@
#include "structuredcachestore.h"
-#include <zencore/except.h>
-#include <zencore/windows.h>
+#include "cachetracking.h"
#include <zencore/compactbinary.h>
#include <zencore/compactbinarybuilder.h>
#include <zencore/compactbinarypackage.h>
#include <zencore/compactbinaryvalidation.h>
#include <zencore/compress.h>
+#include <zencore/except.h>
#include <zencore/filesystem.h>
#include <zencore/fmtutils.h>
#include <zencore/iobuffer.h>
#include <zencore/logging.h>
+#include <zencore/scopeguard.h>
#include <zencore/string.h>
#include <zencore/testing.h>
#include <zencore/testutils.h>
#include <zencore/thread.h>
+#include <zencore/windows.h>
+#include <zenstore/basicfile.h>
#include <zenstore/cas.h>
#include <zenstore/caslog.h>
#include <zenstore/cidstore.h>
@@ -25,6 +28,7 @@
#include <concepts>
#include <filesystem>
+#include <memory_resource>
#include <ranges>
#include <unordered_map>
@@ -39,12 +43,14 @@ namespace zen {
using namespace fmt::literals;
-ZenCacheStore::ZenCacheStore(const std::filesystem::path& RootDir) : m_DiskLayer{RootDir}
+ZenCacheStore::ZenCacheStore(CasGc& Gc, const std::filesystem::path& RootDir) : GcContributor(Gc), m_DiskLayer(RootDir)
{
ZEN_INFO("initializing structured cache at '{}'", RootDir);
CreateDirectories(RootDir);
m_DiskLayer.DiscoverBuckets();
+
+ m_AccessTracker.reset(new ZenCacheTracker(RootDir));
}
ZenCacheStore::~ZenCacheStore()
@@ -56,21 +62,27 @@ ZenCacheStore::Get(std::string_view InBucket, const IoHash& HashKey, ZenCacheVal
{
bool Ok = m_MemLayer.Get(InBucket, HashKey, OutValue);
+ auto _ = MakeGuard([&] {
+ if (!Ok)
+ return;
+
+ m_AccessTracker->TrackAccess(InBucket, HashKey);
+ });
+
if (Ok)
{
ZEN_ASSERT(OutValue.Value.Size());
+
+ return true;
}
- if (!Ok)
- {
- Ok = m_DiskLayer.Get(InBucket, HashKey, OutValue);
+ Ok = m_DiskLayer.Get(InBucket, HashKey, OutValue);
- if (Ok)
- {
- ZEN_ASSERT(OutValue.Value.Size());
- }
+ if (Ok)
+ {
+ ZEN_ASSERT(OutValue.Value.Size());
- if (Ok && (OutValue.Value.Size() <= m_DiskLayerSizeThreshold))
+ if (OutValue.Value.Size() <= m_DiskLayerSizeThreshold)
{
m_MemLayer.Put(InBucket, HashKey, OutValue);
}
@@ -88,6 +100,25 @@ ZenCacheStore::Put(std::string_view InBucket, const IoHash& HashKey, const ZenCa
m_DiskLayer.Put(InBucket, HashKey, Value);
+#if ZEN_USE_REF_TRACKING
+ if (Value.Value.GetContentType() == ZenContentType::kCbObject)
+ {
+ if (ValidateCompactBinary(Value.Value, CbValidateMode::All) == CbValidateError::None)
+ {
+ CbObject Object{SharedBuffer(Value.Value)};
+
+ uint8_t TempBuffer[8 * sizeof(IoHash)];
+ std::pmr::monotonic_buffer_resource Linear{TempBuffer, sizeof TempBuffer};
+ std::pmr::polymorphic_allocator Allocator{&Linear};
+ std::pmr::vector<IoHash> CidReferences{Allocator};
+
+ Object.IterateAttachments([&](CbFieldView Field) { CidReferences.push_back(Field.AsAttachment()); });
+
+ m_Gc.OnNewCidReferences(CidReferences);
+ }
+ }
+#endif
+
if (Value.Value.Size() <= m_DiskLayerSizeThreshold)
{
m_MemLayer.Put(InBucket, HashKey, Value);
@@ -131,10 +162,10 @@ ZenCacheStore::Scrub(ScrubContext& Ctx)
}
void
-ZenCacheStore::GarbageCollect(GcContext& GcCtx)
+ZenCacheStore::GatherReferences(GcContext& GcCtx)
{
- m_DiskLayer.GarbageCollect(GcCtx);
- m_MemLayer.GarbageCollect(GcCtx);
+ m_MemLayer.GatherReferences(GcCtx);
+ m_DiskLayer.GatherReferences(GcCtx);
}
//////////////////////////////////////////////////////////////////////////
@@ -220,13 +251,13 @@ ZenCacheMemoryLayer::Scrub(ScrubContext& Ctx)
}
void
-ZenCacheMemoryLayer::GarbageCollect(GcContext& GcCtx)
+ZenCacheMemoryLayer::GatherReferences(GcContext& GcCtx)
{
RwLock::SharedLockScope _(m_Lock);
for (auto& Kv : m_Buckets)
{
- Kv.second.GarbageCollect(GcCtx);
+ Kv.second.GatherReferences(GcCtx);
}
}
@@ -252,7 +283,7 @@ ZenCacheMemoryLayer::CacheBucket::Scrub(ScrubContext& Ctx)
}
void
-ZenCacheMemoryLayer::CacheBucket::GarbageCollect(GcContext& GcCtx)
+ZenCacheMemoryLayer::CacheBucket::GatherReferences(GcContext& GcCtx)
{
// Is it even meaningful to do this? The memory layer shouldn't
// contain anything which is not already in the disk layer
@@ -316,53 +347,106 @@ ZenCacheMemoryLayer::CacheBucket::Put(const IoHash& HashKey, const ZenCacheValue
//////////////////////////////////////////////////////////////////////////
-inline DiskLocation::DiskLocation() = default;
+#pragma pack(push)
+#pragma pack(1)
-inline DiskLocation::DiskLocation(uint64_t Offset, uint64_t ValueSize, uint32_t IndexSize, uint64_t Flags)
-: OffsetAndFlags(CombineOffsetAndFlags(Offset, Flags))
-, LowerSize(ValueSize & 0xFFFFffff)
-, IndexDataSize(IndexSize)
+struct DiskLocation
{
-}
+ inline DiskLocation() = default;
-inline uint64_t
-DiskLocation::CombineOffsetAndFlags(uint64_t Offset, uint64_t Flags)
-{
- return Offset | Flags;
-}
+ inline DiskLocation(uint64_t Offset, uint64_t ValueSize, uint32_t IndexSize, uint64_t Flags)
+ : OffsetAndFlags(CombineOffsetAndFlags(Offset, Flags))
+ , LowerSize(ValueSize & 0xFFFFffff)
+ , IndexDataSize(IndexSize)
+ {
+ }
-inline uint64_t
-DiskLocation::Offset() const
-{
- return OffsetAndFlags & kOffsetMask;
-}
+ static const uint64_t kOffsetMask = 0x0000'ffFF'ffFF'ffFFull;
+ static const uint64_t kSizeMask = 0x00FF'0000'0000'0000ull;
+ static const uint64_t kFlagsMask = 0xff00'0000'0000'0000ull;
+ static const uint64_t kStandaloneFile = 0x8000'0000'0000'0000ull;
+ static const uint64_t kStructured = 0x4000'0000'0000'0000ull;
+ static const uint64_t kTombStone = 0x2000'0000'0000'0000ull;
-inline uint64_t
-DiskLocation::Size() const
-{
- return LowerSize;
-}
+ static uint64_t CombineOffsetAndFlags(uint64_t Offset, uint64_t Flags) { return Offset | Flags; }
-inline uint64_t
-DiskLocation::IsFlagSet(uint64_t Flag) const
-{
- return OffsetAndFlags & Flag;
-}
+ inline uint64_t Offset() const { return OffsetAndFlags & kOffsetMask; }
+ inline uint64_t Size() const { return LowerSize; }
+ inline uint64_t IsFlagSet(uint64_t Flag) const { return OffsetAndFlags & Flag; }
+ inline ZenContentType GetContentType() const
+ {
+ ZenContentType ContentType = ZenContentType::kBinary;
-inline ZenContentType
-DiskLocation::GetContentType() const
-{
- ZenContentType ContentType = ZenContentType::kBinary;
+ if (IsFlagSet(DiskLocation::kStructured))
+ {
+ ContentType = ZenContentType::kCbObject;
+ }
- if (IsFlagSet(DiskLocation::kStructured))
- {
- ContentType = ZenContentType::kCbObject;
+ return ContentType;
}
- return ContentType;
-}
+private:
+ uint64_t OffsetAndFlags = 0;
+ uint32_t LowerSize = 0;
+ uint32_t IndexDataSize = 0;
+};
-//////////////////////////////////////////////////////////////////////////
+struct DiskIndexEntry
+{
+ IoHash Key;
+ DiskLocation Location;
+};
+
+#pragma pack(pop)
+
+static_assert(sizeof(DiskIndexEntry) == 36);
+
+struct ZenCacheDiskLayer::CacheBucket
+{
+ CacheBucket();
+ ~CacheBucket();
+
+ void OpenOrCreate(std::filesystem::path BucketDir, bool AllowCreate = true);
+ static bool Delete(std::filesystem::path BucketDir);
+ bool Get(const IoHash& HashKey, ZenCacheValue& OutValue);
+ void Put(const IoHash& HashKey, const ZenCacheValue& Value);
+ void Drop();
+ void Flush();
+ void Scrub(ScrubContext& Ctx);
+ void GatherReferences(GcContext& GcCtx);
+
+ inline bool IsOk() const { return m_IsOk; }
+
+private:
+ std::filesystem::path m_BucketDir;
+ Oid m_BucketId;
+ bool m_IsOk = false;
+ uint64_t m_LargeObjectThreshold = 64 * 1024;
+
+ // These files are used to manage storage of small objects for this bucket
+
+ BasicFile m_SobsFile;
+ TCasLogFile<DiskIndexEntry> m_SlogFile;
+
+ RwLock m_IndexLock;
+ tsl::robin_map<IoHash, DiskLocation, IoHash::Hasher> m_Index;
+ uint64_t m_WriteCursor = 0;
+
+ void BuildPath(WideStringBuilderBase& Path, const IoHash& HashKey);
+ void PutStandaloneCacheValue(const IoHash& HashKey, const ZenCacheValue& Value);
+ bool GetStandaloneCacheValue(const DiskLocation& Loc, const IoHash& HashKey, ZenCacheValue& OutValue);
+ bool GetInlineCacheValue(const DiskLocation& Loc, ZenCacheValue& OutValue);
+
+ // These locks are here to avoid contention on file creation, therefore it's sufficient
+ // that we take the same lock for the same hash
+ //
+ // These locks are small and should really be spaced out so they don't share cache lines,
+ // but we don't currently access them at particularly high frequency so it should not be
+ // an issue in practice
+
+ RwLock m_ShardedLocks[256];
+ inline RwLock& LockForHash(const IoHash& Hash) { return m_ShardedLocks[Hash.Hash[19]]; }
+};
ZenCacheDiskLayer::CacheBucket::CacheBucket()
{
@@ -454,16 +538,29 @@ ZenCacheDiskLayer::CacheBucket::OpenOrCreate(std::filesystem::path BucketDir, bo
m_SlogFile.Open(SlogPath, IsNew);
- uint64_t MaxFileOffset = 0;
+ uint64_t MaxFileOffset = 0;
+ uint64_t InvalidEntryCount = 0;
if (RwLock::ExclusiveLockScope _(m_IndexLock); m_Index.empty())
{
m_SlogFile.Replay([&](const DiskIndexEntry& Record) {
- m_Index[Record.Key] = Record.Location;
+ if (Record.Key == IoHash::Zero)
+ {
+ ++InvalidEntryCount;
+ }
+ else
+ {
+ m_Index[Record.Key] = Record.Location;
- MaxFileOffset = std::max<uint64_t>(MaxFileOffset, Record.Location.Offset() + Record.Location.Size());
+ MaxFileOffset = std::max<uint64_t>(MaxFileOffset, Record.Location.Offset() + Record.Location.Size());
+ }
});
+ if (InvalidEntryCount)
+ {
+ ZEN_WARN("found {} invalid entries in '{}'", InvalidEntryCount, SlogPath);
+ }
+
m_WriteCursor = (MaxFileOffset + 15) & ~15;
}
@@ -661,7 +758,7 @@ ZenCacheDiskLayer::CacheBucket::Scrub(ScrubContext& Ctx)
}
void
-ZenCacheDiskLayer::CacheBucket::GarbageCollect(GcContext& GcCtx)
+ZenCacheDiskLayer::CacheBucket::GatherReferences(GcContext& GcCtx)
{
RwLock::SharedLockScope _(m_IndexLock);
@@ -924,7 +1021,7 @@ ZenCacheDiskLayer::DiscoverBuckets()
{
// New bucket needs to be created
- std::string BucketName8 = WideToUtf8(BucketName);
+ const std::string BucketName8 = ToUtf8(BucketName);
if (auto It = m_Buckets.find(BucketName8); It != m_Buckets.end())
{
@@ -940,7 +1037,11 @@ ZenCacheDiskLayer::DiscoverBuckets()
Bucket.OpenOrCreate(BucketPath, /* AllowCreate */ false);
- if (!Bucket.IsOk())
+ if (Bucket.IsOk())
+ {
+ ZEN_INFO("Discovered bucket '{}'", BucketName8);
+ }
+ else
{
ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", BucketName8, m_RootDir);
@@ -1007,13 +1108,13 @@ ZenCacheDiskLayer::Scrub(ScrubContext& Ctx)
}
void
-ZenCacheDiskLayer::GarbageCollect(GcContext& GcCtx)
+ZenCacheDiskLayer::GatherReferences(GcContext& GcCtx)
{
RwLock::SharedLockScope _(m_Lock);
for (auto& Kv : m_Buckets)
{
- Kv.second.GarbageCollect(GcCtx);
+ Kv.second.GatherReferences(GcCtx);
}
}
@@ -1028,7 +1129,9 @@ TEST_CASE("z$.store")
ScopedTemporaryDirectory TempDir;
- ZenCacheStore Zcs(TempDir.Path() / "cache");
+ CasGc Gc;
+
+ ZenCacheStore Zcs(Gc, TempDir.Path() / "cache");
const int kIterationCount = 100;