aboutsummaryrefslogtreecommitdiff
path: root/src/zenserver/cache/structuredcachestore.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2023-10-10 13:12:02 +0200
committerGitHub <[email protected]>2023-10-10 13:12:02 +0200
commit7df585a13cd8d445396bbfbc10ad127dce89b272 (patch)
tree32da843f1f032504a8c8de0127f735fef53c8619 /src/zenserver/cache/structuredcachestore.cpp
parentfixed GC logging output stats (#458) (diff)
downloadzen-7df585a13cd8d445396bbfbc10ad127dce89b272.tar.xz
zen-7df585a13cd8d445396bbfbc10ad127dce89b272.zip
cache reference tracking (#455)
- Feature: Add caching of referenced CId content for structured cache records, this avoid disk thrashing when gathering references for GC - disabled by default, enable with `--cache-reference-cache-enabled` - Improvement: Faster collection of referenced CId content in project store
Diffstat (limited to 'src/zenserver/cache/structuredcachestore.cpp')
-rw-r--r--src/zenserver/cache/structuredcachestore.cpp81
1 files changed, 45 insertions, 36 deletions
diff --git a/src/zenserver/cache/structuredcachestore.cpp b/src/zenserver/cache/structuredcachestore.cpp
index fe0b84f33..48463fcd8 100644
--- a/src/zenserver/cache/structuredcachestore.cpp
+++ b/src/zenserver/cache/structuredcachestore.cpp
@@ -61,13 +61,14 @@ IsKnownBadBucketName(std::string_view Bucket)
ZenCacheNamespace::ZenCacheNamespace(GcManager& Gc,
JobQueue& JobQueue,
const std::filesystem::path& RootDir,
+ bool EnableReferenceCaching,
const ZenCacheMemoryLayer::Configuration MemLayerConfig)
: GcStorage(Gc)
, GcContributor(Gc)
, m_RootDir(RootDir)
, m_JobQueue(JobQueue)
, m_MemLayer(m_JobQueue, MemLayerConfig)
-, m_DiskLayer(RootDir)
+, m_DiskLayer(RootDir, EnableReferenceCaching)
{
ZEN_INFO("initializing structured cache at '{}'", RootDir);
CreateDirectories(RootDir);
@@ -116,7 +117,7 @@ ZenCacheNamespace::Get(std::string_view InBucket, const IoHash& HashKey, ZenCach
}
void
-ZenCacheNamespace::Put(std::string_view InBucket, const IoHash& HashKey, const ZenCacheValue& Value)
+ZenCacheNamespace::Put(std::string_view InBucket, const IoHash& HashKey, const ZenCacheValue& Value, std::span<IoHash> References)
{
ZEN_TRACE_CPU("Z$::Namespace::Put");
@@ -126,7 +127,7 @@ ZenCacheNamespace::Put(std::string_view InBucket, const IoHash& HashKey, const Z
ZEN_ASSERT(Value.Value.Size());
- m_DiskLayer.Put(InBucket, HashKey, Value);
+ m_DiskLayer.Put(InBucket, HashKey, Value, References);
if (Value.Value.Size() <= m_DiskLayerSizeThreshold)
{
@@ -321,6 +322,7 @@ ZenCacheStore::ZenCacheStore(GcManager& Gc,
std::make_unique<ZenCacheNamespace>(Gc,
m_JobQueue,
m_Configuration.BasePath / fmt::format("{}{}", NamespaceDiskPrefix, NamespaceName),
+ m_Configuration.EnableReferenceCaching,
m_Configuration.MemLayerConfig);
}
}
@@ -476,7 +478,8 @@ ZenCacheStore::Put(const CacheRequestContext& Context,
std::string_view Namespace,
std::string_view Bucket,
const IoHash& HashKey,
- const ZenCacheValue& Value)
+ const ZenCacheValue& Value,
+ std::span<IoHash> References)
{
// Ad hoc rejection of known bad usage patterns for DDC bucket names
@@ -512,7 +515,7 @@ ZenCacheStore::Put(const CacheRequestContext& Context,
if (ZenCacheNamespace* Store = GetNamespace(Namespace); Store)
{
- Store->Put(Bucket, HashKey, Value);
+ Store->Put(Bucket, HashKey, Value, References);
m_WriteCount++;
return;
}
@@ -625,6 +628,7 @@ ZenCacheStore::GetNamespace(std::string_view Namespace)
std::make_unique<ZenCacheNamespace>(m_Gc,
m_JobQueue,
m_Configuration.BasePath / fmt::format("{}{}", NamespaceDiskPrefix, Namespace),
+ m_Configuration.EnableReferenceCaching,
m_Configuration.MemLayerConfig));
return NewNamespace.first->second.get();
}
@@ -803,7 +807,7 @@ TEST_CASE("z$.store")
GcManager Gc;
auto JobQueue = MakeJobQueue(1, "testqueue");
- ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache");
+ ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", false);
const int kIterationCount = 100;
@@ -819,7 +823,7 @@ TEST_CASE("z$.store")
Value.Value = Obj.GetBuffer().AsIoBuffer();
Value.Value.SetContentType(ZenContentType::kCbObject);
- Zcs.Put("test_bucket"sv, Key, Value);
+ Zcs.Put("test_bucket"sv, Key, Value, {});
}
for (int i = 0; i < kIterationCount; ++i)
@@ -859,7 +863,7 @@ TEST_CASE("z$.size")
{
GcManager Gc;
- ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache");
+ ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", false);
CbObject CacheValue = CreateCacheValue(Zcs.DiskLayerThreshold() - 256);
@@ -869,7 +873,10 @@ TEST_CASE("z$.size")
for (size_t Key = 0; Key < Count; ++Key)
{
const size_t Bucket = Key % 4;
- Zcs.Put(fmt::format("test_bucket-{}", Bucket), IoHash::HashBuffer(&Key, sizeof(uint32_t)), ZenCacheValue{.Value = Buffer});
+ Zcs.Put(fmt::format("test_bucket-{}", Bucket),
+ IoHash::HashBuffer(&Key, sizeof(uint32_t)),
+ ZenCacheValue{.Value = Buffer},
+ {});
}
CacheSize = Zcs.StorageSize();
@@ -879,7 +886,7 @@ TEST_CASE("z$.size")
{
GcManager Gc;
- ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache");
+ ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", false);
const GcStorageSize SerializedSize = Zcs.StorageSize();
CHECK_EQ(SerializedSize.MemorySize, 0);
@@ -902,7 +909,7 @@ TEST_CASE("z$.size")
{
GcManager Gc;
- ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache");
+ ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", false);
CbObject CacheValue = CreateCacheValue(Zcs.DiskLayerThreshold() + 64);
@@ -912,7 +919,7 @@ TEST_CASE("z$.size")
for (size_t Key = 0; Key < Count; ++Key)
{
const size_t Bucket = Key % 4;
- Zcs.Put(fmt::format("test_bucket-{}", Bucket), IoHash::HashBuffer(&Key, sizeof(uint32_t)), {.Value = Buffer});
+ Zcs.Put(fmt::format("test_bucket-{}", Bucket), IoHash::HashBuffer(&Key, sizeof(uint32_t)), {.Value = Buffer}, {});
}
CacheSize = Zcs.StorageSize();
@@ -922,7 +929,7 @@ TEST_CASE("z$.size")
{
GcManager Gc;
- ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache");
+ ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", false);
const GcStorageSize SerializedSize = Zcs.StorageSize();
CHECK_EQ(SerializedSize.MemorySize, 0);
@@ -961,7 +968,7 @@ TEST_CASE("z$.gc")
{
GcManager Gc;
- ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache");
+ ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", true);
const auto Bucket = "teardrinker"sv;
// Create a cache record
@@ -978,7 +985,7 @@ TEST_CASE("z$.gc")
IoBuffer Buffer = Record.Save().GetBuffer().AsIoBuffer();
Buffer.SetContentType(ZenContentType::kCbObject);
- Zcs.Put(Bucket, Key, {.Value = Buffer});
+ Zcs.Put(Bucket, Key, {.Value = Buffer}, Cids);
std::vector<IoHash> Keep;
@@ -998,7 +1005,7 @@ TEST_CASE("z$.gc")
// Expect timestamps to be serialized
{
GcManager Gc;
- ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache");
+ ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", true);
std::vector<IoHash> Keep;
// Collect garbage with 1 hour max cache duration
@@ -1019,7 +1026,7 @@ TEST_CASE("z$.gc")
{
ScopedTemporaryDirectory TempDir;
GcManager Gc;
- ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache");
+ ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", true);
const auto Bucket = "fortysixandtwo"sv;
const GcClock::TimePoint CurrentTime = GcClock::Now();
@@ -1028,7 +1035,7 @@ TEST_CASE("z$.gc")
for (const auto& Key : Keys)
{
IoBuffer Value = testutils::CreateBinaryCacheValue(128 << 10);
- Zcs.Put(Bucket, Key, {.Value = Value});
+ Zcs.Put(Bucket, Key, {.Value = Value}, {});
}
{
@@ -1065,7 +1072,7 @@ TEST_CASE("z$.gc")
ScopedTemporaryDirectory TempDir;
GcManager Gc;
{
- ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache");
+ ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", true);
const auto Bucket = "rightintwo"sv;
std::vector<IoHash> Keys{CreateKey(1), CreateKey(2), CreateKey(3)};
@@ -1073,7 +1080,7 @@ TEST_CASE("z$.gc")
for (const auto& Key : Keys)
{
IoBuffer Value = testutils::CreateBinaryCacheValue(128);
- Zcs.Put(Bucket, Key, {.Value = Value});
+ Zcs.Put(Bucket, Key, {.Value = Value}, {});
}
{
@@ -1107,7 +1114,7 @@ TEST_CASE("z$.gc")
}
}
{
- ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache");
+ ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", true);
CHECK_EQ(0, Zcs.StorageSize().DiskSize);
}
}
@@ -1164,14 +1171,14 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true))
WorkerThreadPool ThreadPool(4);
GcManager Gc;
auto JobQueue = MakeJobQueue(1, "testqueue");
- ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path());
+ ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path(), true);
{
std::atomic<size_t> WorkCompleted = 0;
for (const auto& Chunk : Chunks)
{
ThreadPool.ScheduleWork([&Zcs, &WorkCompleted, &Chunk]() {
- Zcs.Put(Chunk.second.Bucket, Chunk.first, {.Value = Chunk.second.Buffer});
+ Zcs.Put(Chunk.second.Bucket, Chunk.first, {.Value = Chunk.second.Buffer}, {});
WorkCompleted.fetch_add(1);
});
}
@@ -1232,7 +1239,7 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true))
for (const auto& Chunk : NewChunks)
{
ThreadPool.ScheduleWork([&Zcs, &WorkCompleted, Chunk, &AddedChunkCount]() {
- Zcs.Put(Chunk.second.Bucket, Chunk.first, {.Value = Chunk.second.Buffer});
+ Zcs.Put(Chunk.second.Bucket, Chunk.first, {.Value = Chunk.second.Buffer}, {});
AddedChunkCount.fetch_add(1);
WorkCompleted.fetch_add(1);
});
@@ -1395,14 +1402,14 @@ TEST_CASE("z$.namespaces")
Buffer.SetContentType(ZenContentType::kCbObject);
ZenCacheValue PutValue = {.Value = Buffer};
- Zcs.Put(Context, ZenCacheStore::DefaultNamespace, Bucket, Key1, PutValue);
+ Zcs.Put(Context, ZenCacheStore::DefaultNamespace, Bucket, Key1, PutValue, {});
ZenCacheValue GetValue;
CHECK(Zcs.Get(Context, ZenCacheStore::DefaultNamespace, Bucket, Key1, GetValue));
CHECK(!Zcs.Get(Context, CustomNamespace, Bucket, Key1, GetValue));
// This should just be dropped as we don't allow creating of namespaces on the fly
- Zcs.Put(Context, CustomNamespace, Bucket, Key1, PutValue);
+ Zcs.Put(Context, CustomNamespace, Bucket, Key1, PutValue, {});
CHECK(!Zcs.Get(Context, CustomNamespace, Bucket, Key1, GetValue));
}
@@ -1418,7 +1425,7 @@ TEST_CASE("z$.namespaces")
IoBuffer Buffer2 = CacheValue2.GetBuffer().AsIoBuffer();
Buffer2.SetContentType(ZenContentType::kCbObject);
ZenCacheValue PutValue2 = {.Value = Buffer2};
- Zcs.Put(Context, CustomNamespace, Bucket, Key2, PutValue2);
+ Zcs.Put(Context, CustomNamespace, Bucket, Key2, PutValue2, {});
ZenCacheValue GetValue;
CHECK(!Zcs.Get(Context, ZenCacheStore::DefaultNamespace, Bucket, Key2, GetValue));
@@ -1460,7 +1467,7 @@ TEST_CASE("z$.drop.bucket")
Buffer.SetContentType(ZenContentType::kCbObject);
ZenCacheValue PutValue = {.Value = Buffer};
- Zcs.Put(Context, Namespace, Bucket, Key, PutValue);
+ Zcs.Put(Context, Namespace, Bucket, Key, PutValue, {});
return Key;
};
auto GetValue = [&Context](ZenCacheStore& Zcs, std::string_view Namespace, std::string_view Bucket, const IoHash& Key) {
@@ -1533,7 +1540,7 @@ TEST_CASE("z$.drop.namespace")
Buffer.SetContentType(ZenContentType::kCbObject);
ZenCacheValue PutValue = {.Value = Buffer};
- Zcs.Put(Context, Namespace, Bucket, Key, PutValue);
+ Zcs.Put(Context, Namespace, Bucket, Key, PutValue, {});
return Key;
};
auto GetValue = [&Context](ZenCacheStore& Zcs, std::string_view Namespace, std::string_view Bucket, const IoHash& Key) {
@@ -1610,7 +1617,7 @@ TEST_CASE("z$.blocked.disklayer.put")
GcManager Gc;
auto JobQueue = MakeJobQueue(1, "testqueue");
- ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache");
+ ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", false);
CbObject CacheValue = CreateCacheValue(64 * 1024 + 64);
@@ -1619,7 +1626,7 @@ TEST_CASE("z$.blocked.disklayer.put")
size_t Key = Buffer.Size();
IoHash HashKey = IoHash::HashBuffer(&Key, sizeof(uint32_t));
- Zcs.Put("test_bucket", HashKey, {.Value = Buffer});
+ Zcs.Put("test_bucket", HashKey, {.Value = Buffer}, {});
ZenCacheValue BufferGet;
CHECK(Zcs.Get("test_bucket", HashKey, BufferGet));
@@ -1629,7 +1636,7 @@ TEST_CASE("z$.blocked.disklayer.put")
Buffer2.SetContentType(ZenContentType::kCbObject);
// We should be able to overwrite even if the file is open for read
- Zcs.Put("test_bucket", HashKey, {.Value = Buffer2});
+ Zcs.Put("test_bucket", HashKey, {.Value = Buffer2}, {});
MemoryView OldView = BufferGet.Value.GetView();
@@ -1705,7 +1712,7 @@ TEST_CASE("z$.scrub")
GcManager Gc;
CidStore CidStore(Gc);
auto JobQueue = MakeJobQueue(1, "testqueue");
- ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache");
+ ZenCacheNamespace Zcs(Gc, *JobQueue, TempDir.Path() / "cache", true);
CidStoreConfiguration CidConfig = {.RootDirectory = TempDir.Path() / "cas", .TinyValueThreshold = 1024, .HugeValueThreshold = 4096};
CidStore.Initialize(CidConfig);
@@ -1713,12 +1720,14 @@ TEST_CASE("z$.scrub")
[&](bool IsStructured, std::string_view BucketName, const std::vector<IoHash>& Cids, const std::vector<size_t>& AttachmentSizes) {
for (const IoHash& Cid : Cids)
{
- CacheRecord Record = CreateCacheRecord(IsStructured, BucketName, Cid, AttachmentSizes);
- Zcs.Put("mybucket", Cid, {.Value = Record.Record});
+ CacheRecord Record = CreateCacheRecord(IsStructured, BucketName, Cid, AttachmentSizes);
+ std::vector<IoHash> AttachmentHashes;
for (const CompressedBuffer& Attachment : Record.Attachments)
{
- CidStore.AddChunk(Attachment.GetCompressed().Flatten().AsIoBuffer(), Attachment.DecodeRawHash());
+ AttachmentHashes.push_back(Attachment.DecodeRawHash());
+ CidStore.AddChunk(Attachment.GetCompressed().Flatten().AsIoBuffer(), AttachmentHashes.back());
}
+ Zcs.Put("mybucket", Cid, {.Value = Record.Record}, AttachmentHashes);
}
};