diff options
| author | Dan Engelbrecht <[email protected]> | 2024-10-21 15:40:13 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2024-10-21 15:40:13 +0200 |
| commit | efd016d84d0940bf616e3efef135532cbf5fedef (patch) | |
| tree | 50bf249799b65af4a30c473c53bfa8b243be335e /src | |
| parent | 5.5.9-pre8 (diff) | |
| download | zen-efd016d84d0940bf616e3efef135532cbf5fedef.tar.xz zen-efd016d84d0940bf616e3efef135532cbf5fedef.zip | |
bucket size queries (#203)
- Feature: Added options --bucketsize and --bucketsizes to zen cache-info to get data sizes in cache buckets and attachments
Diffstat (limited to 'src')
| -rw-r--r-- | src/zen/cmds/cache_cmd.cpp | 35 | ||||
| -rw-r--r-- | src/zen/cmds/cache_cmd.h | 2 | ||||
| -rw-r--r-- | src/zenserver/cache/httpstructuredcache.cpp | 115 | ||||
| -rw-r--r-- | src/zenstore/cache/cachedisklayer.cpp | 66 | ||||
| -rw-r--r-- | src/zenstore/cache/structuredcachestore.cpp | 24 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/cache/cachedisklayer.h | 18 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/cache/cacheshared.h | 8 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/cache/structuredcachestore.h | 4 |
8 files changed, 258 insertions, 14 deletions
diff --git a/src/zen/cmds/cache_cmd.cpp b/src/zen/cmds/cache_cmd.cpp index 37e7c8fd1..00099cebc 100644 --- a/src/zen/cmds/cache_cmd.cpp +++ b/src/zen/cmds/cache_cmd.cpp @@ -130,7 +130,15 @@ CacheInfoCommand::CacheInfoCommand() m_Options.add_options()("h,help", "Print help"); m_Options.add_option("", "u", "hosturl", "Host URL", cxxopts::value(m_HostName)->default_value(""), "<hosturl>"); m_Options.add_option("", "n", "namespace", "Namespace name", cxxopts::value(m_NamespaceName), "<namespacename>"); + m_Options.add_option("", + "", + "bucketsizes", + "Comma delimited list of bucket names to get size info from, * to get info on all buckets", + cxxopts::value(m_SizeInfoBucketNames), + "<bucketnames>"); m_Options.add_option("", "b", "bucket", "Bucket name", cxxopts::value(m_BucketName), "<bucketname>"); + m_Options.add_option("", "", "bucketsize", "Show detailed bucket size info", cxxopts::value(m_BucketSizeInfo), "<bucketsize>"); + m_Options.parse_positional({"namespace", "bucket"}); } @@ -157,20 +165,47 @@ CacheInfoCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) Session.SetHeader(cpr::Header{{"Accept", "application/json"}}); if (m_HostName.empty()) { + if (!m_SizeInfoBucketNames.empty()) + { + throw zen::OptionParseException("--bucketsizes option needs a --namespace"); + } + if (m_BucketSizeInfo) + { + throw zen::OptionParseException("--bucketsizes option needs a --namespace and a --bucket"); + } ZEN_CONSOLE("Info on cache from '{}'", m_HostName); Session.SetUrl({fmt::format("{}/z$", m_HostName)}); } else if (m_BucketName.empty()) { + if (m_BucketSizeInfo) + { + throw zen::OptionParseException("--bucketsizes option needs a --bucket"); + } ZEN_CONSOLE("Info on cache namespace '{}' from '{}'", m_NamespaceName, m_HostName); Session.SetUrl({fmt::format("{}/z$/{}", m_HostName, m_NamespaceName)}); } else { + if (!m_SizeInfoBucketNames.empty()) + { + throw zen::OptionParseException("--bucketsizes option can not be used together with --bucket option"); + } ZEN_CONSOLE("Info on cache bucket '{}/{}' from '{}'", m_NamespaceName, m_BucketName, m_HostName); Session.SetUrl({fmt::format("{}/z$/{}/{}", m_HostName, m_NamespaceName, m_BucketName)}); } + cpr::Parameters Parameters; + if (!m_SizeInfoBucketNames.empty()) + { + Parameters.Add({"bucketsizes", m_SizeInfoBucketNames}); + } + if (m_BucketSizeInfo) + { + Parameters.Add({"bucketsize", "true"}); + } + + Session.SetParameters(Parameters); cpr::Response Result = Session.Get(); if (zen::IsHttpSuccessCode(Result.status_code)) diff --git a/src/zen/cmds/cache_cmd.h b/src/zen/cmds/cache_cmd.h index 654af8512..73702cada 100644 --- a/src/zen/cmds/cache_cmd.h +++ b/src/zen/cmds/cache_cmd.h @@ -34,6 +34,8 @@ private: cxxopts::Options m_Options{"cache-info", "Info on cache, namespace or bucket"}; std::string m_HostName; std::string m_NamespaceName; + std::string m_SizeInfoBucketNames; + bool m_BucketSizeInfo = false; std::string m_BucketName; }; diff --git a/src/zenserver/cache/httpstructuredcache.cpp b/src/zenserver/cache/httpstructuredcache.cpp index 109fb34f6..551b5a76d 100644 --- a/src/zenserver/cache/httpstructuredcache.cpp +++ b/src/zenserver/cache/httpstructuredcache.cpp @@ -23,6 +23,7 @@ #include <zenutil/cache/cacherequests.h> #include <zenutil/cache/rpcrecording.h> #include <zenutil/packageformat.h> +#include <zenutil/workerpools.h> #include "upstream/jupiter.h" #include "upstream/upstreamcache.h" @@ -606,6 +607,81 @@ HttpStructuredCacheService::HandleCacheNamespaceRequest(HttpServerRequest& Reque ResponseWriter.AddInteger("EntryCount", Info->DiskLayerInfo.EntryCount); + if (auto Buckets = HttpServerRequest::Decode(Request.GetQueryParams().GetValue("bucketsizes")); !Buckets.empty()) + { + ResponseWriter.BeginObject("BucketSizes"); + + ResponseWriter.BeginArray("Buckets"); + + std::vector<std::string> BucketNames; + if (Buckets == "*") // Get all - empty FieldFilter equal getting all fields + { + BucketNames = Info.value().BucketNames; + } + else + { + ForEachStrTok(Buckets, ',', [&](std::string_view BucketName) { + BucketNames.push_back(std::string(BucketName)); + return true; + }); + } + WorkerThreadPool& WorkerPool = GetMediumWorkerPool(EWorkloadType::Background); + std::vector<IoHash> AllAttachments; + for (const std::string& BucketName : BucketNames) + { + ResponseWriter.BeginObject(); + ResponseWriter << "Name" << BucketName; + CacheContentStats ContentStats; + bool Success = m_CacheStore.GetContentStats(NamespaceName, BucketName, ContentStats); + if (Success) + { + size_t ValuesSize = 0; + for (const uint64_t Size : ContentStats.ValueSizes) + { + ValuesSize += Size; + } + + std::sort(ContentStats.Attachments.begin(), ContentStats.Attachments.end()); + auto NewEnd = std::unique(ContentStats.Attachments.begin(), ContentStats.Attachments.end()); + ContentStats.Attachments.erase(NewEnd, ContentStats.Attachments.end()); + + ResponseWriter << "Count" << ContentStats.ValueSizes.size(); + ResponseWriter << "StructuredCount" << ContentStats.StructuredValuesCount; + ResponseWriter << "StandaloneCount" << ContentStats.StandaloneValuesCount; + ResponseWriter << "Size" << ValuesSize; + ResponseWriter << "AttachmentCount" << ContentStats.Attachments.size(); + + AllAttachments.insert(AllAttachments.end(), ContentStats.Attachments.begin(), ContentStats.Attachments.end()); + } + ResponseWriter.EndObject(); + } + + ResponseWriter.EndArray(); + + ResponseWriter.BeginObject("Attachments"); + std::sort(AllAttachments.begin(), AllAttachments.end()); + auto NewEnd = std::unique(AllAttachments.begin(), AllAttachments.end()); + AllAttachments.erase(NewEnd, AllAttachments.end()); + + uint64_t AttachmentsSize = 0; + + m_CidStore.IterateChunks( + AllAttachments, + [&](size_t Index, const IoBuffer& Payload) { + ZEN_UNUSED(Index); + AttachmentsSize += Payload.GetSize(); + return true; + }, + &WorkerPool); + + ResponseWriter << "Count" << AllAttachments.size(); + ResponseWriter << "Size" << AttachmentsSize; + + ResponseWriter.EndObject(); + + ResponseWriter.EndObject(); + } + return Request.WriteResponse(HttpResponseCode::OK, ResponseWriter.Save()); } break; @@ -656,6 +732,45 @@ HttpStructuredCacheService::HandleCacheBucketRequest(HttpServerRequest& Request, ResponseWriter.AddInteger("DiskEntryCount", Info->DiskLayerInfo.EntryCount); + if (auto GetBucketSize = Request.GetQueryParams().GetValue("bucketsize"); GetBucketSize == "true") + { + CacheContentStats ContentStats; + bool Success = m_CacheStore.GetContentStats(NamespaceName, BucketName, ContentStats); + if (Success) + { + size_t ValuesSize = 0; + for (const uint64_t Size : ContentStats.ValueSizes) + { + ValuesSize += Size; + } + + std::sort(ContentStats.Attachments.begin(), ContentStats.Attachments.end()); + auto NewEnd = std::unique(ContentStats.Attachments.begin(), ContentStats.Attachments.end()); + ContentStats.Attachments.erase(NewEnd, ContentStats.Attachments.end()); + + ResponseWriter << "Count" << ContentStats.ValueSizes.size(); + ResponseWriter << "StructuredCount" << ContentStats.StructuredValuesCount; + ResponseWriter << "StandaloneCount" << ContentStats.StandaloneValuesCount; + ResponseWriter << "Size" << ValuesSize; + ResponseWriter << "AttachmentCount" << ContentStats.Attachments.size(); + + uint64_t AttachmentsSize = 0; + + WorkerThreadPool& WorkerPool = GetMediumWorkerPool(EWorkloadType::Background); + + m_CidStore.IterateChunks( + ContentStats.Attachments, + [&](size_t Index, const IoBuffer& Payload) { + ZEN_UNUSED(Index); + AttachmentsSize += Payload.GetSize(); + return true; + }, + &WorkerPool); + + ResponseWriter << "AttachmentsSize" << AttachmentsSize; + } + } + return Request.WriteResponse(HttpResponseCode::OK, ResponseWriter.Save()); } break; diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp index 9161905d7..8c93d8a3a 100644 --- a/src/zenstore/cache/cachedisklayer.cpp +++ b/src/zenstore/cache/cachedisklayer.cpp @@ -3117,11 +3117,17 @@ ZenCacheDiskLayer::CacheBucket::ReadAttachmentsFromMetaData(uint32_t BlockI } bool -ZenCacheDiskLayer::CacheBucket::GetReferences(GcCtx& Ctx, bool StateIsAlreadyLocked, std::vector<IoHash>& OutReferences) +ZenCacheDiskLayer::CacheBucket::GetReferences(const LoggerRef& Logger, + std::atomic_bool& IsCancelledFlag, + bool StateIsAlreadyLocked, + bool ReadCacheAttachmentMetaData, + bool WriteCacheAttachmentMetaData, + std::vector<IoHash>& OutReferences, + ReferencesStats* OptionalOutReferencesStats) { ZEN_TRACE_CPU("Z$::Bucket::GetReferencesLocked"); - auto Log = [&Ctx]() { return Ctx.Logger; }; + auto Log = [&Logger]() { return Logger; }; auto GetAttachments = [&](MemoryView Data) -> bool { if (ValidateCompactBinary(Data, CbValidateMode::Default) == CbValidateError::None) @@ -3148,7 +3154,7 @@ ZenCacheDiskLayer::CacheBucket::GetReferences(GcCtx& Ctx, bool StateIsAlreadyLoc } for (const auto& Entry : m_Index) { - if (Ctx.IsCancelledFlag.load()) + if (IsCancelledFlag.load()) { return false; } @@ -3157,15 +3163,29 @@ ZenCacheDiskLayer::CacheBucket::GetReferences(GcCtx& Ctx, bool StateIsAlreadyLoc const BucketPayload& Payload = m_Payloads[EntryIndex]; const DiskLocation& Loc = Payload.Location; + if (OptionalOutReferencesStats != nullptr) + { + OptionalOutReferencesStats->ValueSizes.push_back(Loc.Size()); + } + if (!Loc.IsFlagSet(DiskLocation::kStructured)) { continue; } + if (OptionalOutReferencesStats) + { + OptionalOutReferencesStats->StructuredValuesCount++; + } + const IoHash& Key = Entry.first; if (Loc.IsFlagSet(DiskLocation::kStandaloneFile)) { StandaloneKeys.push_back(std::make_pair(Key, Loc)); + if (OptionalOutReferencesStats) + { + OptionalOutReferencesStats->StandaloneValuesCount++; + } continue; } @@ -3188,21 +3208,19 @@ ZenCacheDiskLayer::CacheBucket::GetReferences(GcCtx& Ctx, bool StateIsAlreadyLoc OutReferences.reserve(OutReferences.size() + InlineKeys.size() + StandaloneKeys.size()); // Make space for at least one attachment per record - bool UseMetaData = Ctx.Settings.StoreCacheAttachmentMetaData; - for (const std::vector<std::size_t>& ChunkIndexes : InlineBlockChunkIndexes) { ZEN_ASSERT(!ChunkIndexes.empty()); uint32_t BlockIndex = InlineLocations[ChunkIndexes[0]].BlockIndex; - if (!UseMetaData || !ReadAttachmentsFromMetaData(BlockIndex, InlineKeys, ChunkIndexes, OutReferences)) + if (!ReadCacheAttachmentMetaData || !ReadAttachmentsFromMetaData(BlockIndex, InlineKeys, ChunkIndexes, OutReferences)) { std::vector<IoHash> Keys; std::vector<uint32_t> AttachmentCounts; size_t PrecachedReferencesStart = OutReferences.size(); size_t NextPrecachedReferencesStart = PrecachedReferencesStart; - bool WriteMetaData = UseMetaData && !m_BlockStore.IsWriting(BlockIndex); + bool WriteMetaData = WriteCacheAttachmentMetaData && !m_BlockStore.IsWriting(BlockIndex); if (WriteMetaData) { Keys.reserve(InlineLocations.size()); @@ -3230,12 +3248,12 @@ ZenCacheDiskLayer::CacheBucket::GetReferences(GcCtx& Ctx, bool StateIsAlreadyLoc [&](size_t ChunkIndex, const void* Data, uint64_t Size) { ZEN_UNUSED(ChunkIndex); CaptureAttachments(ChunkIndex, MemoryView(Data, Size)); - return !Ctx.IsCancelledFlag.load(); + return !IsCancelledFlag.load(); }, [&](size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size) { ZEN_UNUSED(ChunkIndex); CaptureAttachments(ChunkIndex, File.GetChunk(Offset, Size).GetView()); - return !Ctx.IsCancelledFlag.load(); + return !IsCancelledFlag.load(); }); if (Continue) @@ -3260,7 +3278,7 @@ ZenCacheDiskLayer::CacheBucket::GetReferences(GcCtx& Ctx, bool StateIsAlreadyLoc return false; } } - if (Ctx.IsCancelledFlag.load()) + if (IsCancelledFlag.load()) { return false; } @@ -3269,7 +3287,7 @@ ZenCacheDiskLayer::CacheBucket::GetReferences(GcCtx& Ctx, bool StateIsAlreadyLoc for (const auto& It : StandaloneKeys) { - if (Ctx.IsCancelledFlag.load()) + if (IsCancelledFlag.load()) { return false; } @@ -3326,7 +3344,13 @@ public: m_CacheBucket.m_IndexLock.WithExclusiveLock([&]() { m_CacheBucket.m_TrackedReferences = std::make_unique<std::vector<IoHash>>(); }); - bool Continue = m_CacheBucket.GetReferences(Ctx, /*StateIsAlreadyLocked*/ false, m_PrecachedReferences); + bool Continue = m_CacheBucket.GetReferences(Ctx.Logger, + Ctx.IsCancelledFlag, + /*StateIsAlreadyLocked*/ false, + Ctx.Settings.StoreCacheAttachmentMetaData, + Ctx.Settings.StoreCacheAttachmentMetaData, + m_PrecachedReferences, + /*OptionalOutReferencesStats*/ nullptr); if (!Continue) { m_CacheBucket.m_IndexLock.WithExclusiveLock([&]() { m_CacheBucket.m_TrackedReferences.reset(); }); @@ -4188,6 +4212,24 @@ ZenCacheDiskLayer::GetCapturedBuckets() } bool +ZenCacheDiskLayer::GetContentStats(std::string_view BucketName, CacheContentStats& OutContentStats) const +{ + std::atomic_bool CancelFlag = false; + if (auto It = m_Buckets.find(std::string(BucketName)); It != m_Buckets.end()) + { + CacheBucket::ReferencesStats BucketStats; + if (It->second->GetReferences(Log(), CancelFlag, false, true, false, OutContentStats.Attachments, &BucketStats)) + { + OutContentStats.ValueSizes = std::move(BucketStats.ValueSizes); + OutContentStats.StructuredValuesCount = BucketStats.StructuredValuesCount; + OutContentStats.StandaloneValuesCount = BucketStats.StandaloneValuesCount; + return true; + } + } + return false; +} + +bool ZenCacheDiskLayer::StartAsyncMemCacheTrim() { ZEN_TRACE_CPU("Z$::MemCacheTrim"); diff --git a/src/zenstore/cache/structuredcachestore.cpp b/src/zenstore/cache/structuredcachestore.cpp index d30bd93cc..578929198 100644 --- a/src/zenstore/cache/structuredcachestore.cpp +++ b/src/zenstore/cache/structuredcachestore.cpp @@ -370,6 +370,12 @@ ZenCacheNamespace::DisableUpdateCapture() m_DiskLayer.DisableUpdateCapture(); } +bool +ZenCacheNamespace::GetContentStats(std::string_view BucketName, CacheContentStats& OutContentStats) const +{ + return m_DiskLayer.GetContentStats(BucketName, OutContentStats); +} + #if ZEN_WITH_TESTS void ZenCacheNamespace::SetAccessTime(std::string_view Bucket, const IoHash& HashKey, GcClock::TimePoint Time) @@ -1080,6 +1086,16 @@ ZenCacheStore::GetCapturedNamespaces() return {}; } +bool +ZenCacheStore::GetContentStats(std::string_view NamespaceName, std::string_view BucketName, CacheContentStats& OutContentStats) const +{ + if (const ZenCacheNamespace* Namespace = FindNamespace(NamespaceName); Namespace) + { + return Namespace->GetContentStats(BucketName, OutContentStats); + } + return false; +} + std::string ZenCacheStore::GetGcName(GcCtx&) { @@ -1161,7 +1177,13 @@ public: for (ZenCacheDiskLayer::CacheBucket* Bucket : AddedBuckets) { - bool Continue = Bucket->GetReferences(Ctx, /*StateIsAlreadyLocked*/ true, m_References); + bool Continue = Bucket->GetReferences(Ctx.Logger, + Ctx.IsCancelledFlag, + /*StateIsAlreadyLocked*/ true, + Ctx.Settings.StoreCacheAttachmentMetaData, + Ctx.Settings.StoreCacheAttachmentMetaData, + m_References, + nullptr); if (!Continue) { break; diff --git a/src/zenstore/include/zenstore/cache/cachedisklayer.h b/src/zenstore/include/zenstore/cache/cachedisklayer.h index f8ce8641c..4b7cf6101 100644 --- a/src/zenstore/include/zenstore/cache/cachedisklayer.h +++ b/src/zenstore/include/zenstore/cache/cachedisklayer.h @@ -205,6 +205,8 @@ public: void SetAccessTime(std::string_view Bucket, const IoHash& HashKey, GcClock::TimePoint Time); #endif // ZEN_WITH_TESTS + bool GetContentStats(std::string_view BucketName, CacheContentStats& OutContentStats) const; + /** A cache bucket manages a single directory containing metadata and data for that bucket */ @@ -230,7 +232,21 @@ public: void Flush(); void ScrubStorage(ScrubContext& Ctx); RwLock::SharedLockScope GetGcReferencerLock(); - bool GetReferences(GcCtx& Ctx, bool StateIsAlreadyLocked, std::vector<IoHash>& OutReferences); + + struct ReferencesStats + { + std::vector<uint64_t> ValueSizes; + uint64_t StructuredValuesCount = 0; + uint64_t StandaloneValuesCount = 0; + }; + + bool GetReferences(const LoggerRef& Logger, + std::atomic_bool& IsCancelledFlag, + bool StateIsAlreadyLocked, + bool ReadCacheAttachmentMetaData, + bool WriteCacheAttachmentMetaData, + std::vector<IoHash>& OutReferences, + ReferencesStats* OptionalOutReferencesStats); bool ReadAttachmentsFromMetaData(uint32_t BlockIndex, std::span<const IoHash> InlineKeys, diff --git a/src/zenstore/include/zenstore/cache/cacheshared.h b/src/zenstore/include/zenstore/cache/cacheshared.h index 2d5b9cbc3..9b45c7b21 100644 --- a/src/zenstore/include/zenstore/cache/cacheshared.h +++ b/src/zenstore/include/zenstore/cache/cacheshared.h @@ -57,6 +57,14 @@ struct CacheValueDetails std::unordered_map<std::string, NamespaceDetails> Namespaces; }; +struct CacheContentStats +{ + std::vector<uint64_t> ValueSizes; + uint64_t StructuredValuesCount = 0; + uint64_t StandaloneValuesCount = 0; + std::vector<IoHash> Attachments; +}; + bool IsKnownBadBucketName(std::string_view BucketName); bool ValidateIoBuffer(ZenContentType ContentType, IoBuffer Buffer); diff --git a/src/zenstore/include/zenstore/cache/structuredcachestore.h b/src/zenstore/include/zenstore/cache/structuredcachestore.h index 50e40042a..dcdca71c6 100644 --- a/src/zenstore/include/zenstore/cache/structuredcachestore.h +++ b/src/zenstore/include/zenstore/cache/structuredcachestore.h @@ -120,6 +120,8 @@ public: void EnableUpdateCapture(); void DisableUpdateCapture(); + bool GetContentStats(std::string_view BucketName, CacheContentStats& OutContentStats) const; + #if ZEN_WITH_TESTS void SetAccessTime(std::string_view Bucket, const IoHash& HashKey, GcClock::TimePoint Time); #endif // ZEN_WITH_TESTS @@ -285,6 +287,8 @@ public: void DisableUpdateCapture(); std::vector<std::string> GetCapturedNamespaces(); + bool GetContentStats(std::string_view Namespace, std::string_view BucketName, CacheContentStats& OutContentStats) const; + private: const ZenCacheNamespace* FindNamespace(std::string_view Namespace) const; ZenCacheNamespace* GetNamespace(std::string_view Namespace); |