diff options
| author | Dan Engelbrecht <[email protected]> | 2024-11-15 10:06:39 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2024-11-15 10:06:39 +0100 |
| commit | aca6f56fde841454b13ed18136008b0ffe946aed (patch) | |
| tree | 3770efa6c789b45de8ea3ec426da7a77e7813775 /src | |
| parent | fixed some issues with ZenServerInstance::SpawnServer (#218) (diff) | |
| download | zen-aca6f56fde841454b13ed18136008b0ffe946aed.tar.xz zen-aca6f56fde841454b13ed18136008b0ffe946aed.zip | |
oplog prep gc fix (#216)
- Added option gc-validation to zenserver that does a check for missing references in all oplog post full GC. Enabled by default.
- Feature: Added option gc-validation to zen gc command to control reference validation. Enabled by default.
- Added more details in post GC log.
- Fixed race condition in oplog writes which could cause used attachments to be incorrectly removed by GC
Diffstat (limited to 'src')
| -rw-r--r-- | src/zen/cmds/admin_cmd.cpp | 8 | ||||
| -rw-r--r-- | src/zen/cmds/admin_cmd.h | 1 | ||||
| -rw-r--r-- | src/zen/cmds/projectstore_cmd.cpp | 64 | ||||
| -rw-r--r-- | src/zen/cmds/projectstore_cmd.h | 15 | ||||
| -rw-r--r-- | src/zen/zen.cpp | 4 | ||||
| -rw-r--r-- | src/zenserver/admin/admin.cpp | 5 | ||||
| -rw-r--r-- | src/zenserver/config.cpp | 8 | ||||
| -rw-r--r-- | src/zenserver/config.h | 1 | ||||
| -rw-r--r-- | src/zenserver/projectstore/httpprojectstore.cpp | 157 | ||||
| -rw-r--r-- | src/zenserver/projectstore/httpprojectstore.h | 1 | ||||
| -rw-r--r-- | src/zenserver/projectstore/projectstore.cpp | 824 | ||||
| -rw-r--r-- | src/zenserver/projectstore/projectstore.h | 95 | ||||
| -rw-r--r-- | src/zenstore/cache/cachedisklayer.cpp | 23 | ||||
| -rw-r--r-- | src/zenstore/cache/structuredcachestore.cpp | 53 | ||||
| -rw-r--r-- | src/zenstore/gc.cpp | 600 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/cache/cachedisklayer.h | 10 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/cache/structuredcachestore.h | 10 | ||||
| -rw-r--r-- | src/zenstore/include/zenstore/gc.h | 68 |
18 files changed, 1494 insertions, 453 deletions
diff --git a/src/zen/cmds/admin_cmd.cpp b/src/zen/cmds/admin_cmd.cpp index 3b24b9078..e07e28f54 100644 --- a/src/zen/cmds/admin_cmd.cpp +++ b/src/zen/cmds/admin_cmd.cpp @@ -149,6 +149,13 @@ GcCommand::GcCommand() "Enable storing attachments referenced by project oplogs in meta data", cxxopts::value(m_StoreProjectAttachmentMetaData)->default_value("false"), "<project-attachments-store>"); + + m_Options.add_option("", + "", + "gc-validation", + "Enable validation of references after full GC.", + cxxopts::value(m_EnableValidation)->default_value("true"), + "<cache-attachments-store>"); } GcCommand::~GcCommand() @@ -237,6 +244,7 @@ GcCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) { Params.Add({"storeprojectattachmentmetadata", m_StoreProjectAttachmentMetaData ? "true" : "false"}); } + Params.Add({"enablevalidation", m_EnableValidation ? "true" : "false"}); cpr::Session Session; Session.SetHeader(cpr::Header{{"Accept", "application/json"}}); diff --git a/src/zen/cmds/admin_cmd.h b/src/zen/cmds/admin_cmd.h index e42081745..c593b2cac 100644 --- a/src/zen/cmds/admin_cmd.h +++ b/src/zen/cmds/admin_cmd.h @@ -55,6 +55,7 @@ private: std::string m_ReferenceHashHigh; bool m_StoreCacheAttachmentMetaData; bool m_StoreProjectAttachmentMetaData; + bool m_EnableValidation; }; class GcStatusCommand : public StorageCommand diff --git a/src/zen/cmds/projectstore_cmd.cpp b/src/zen/cmds/projectstore_cmd.cpp index fed03b707..1e4f2675a 100644 --- a/src/zen/cmds/projectstore_cmd.cpp +++ b/src/zen/cmds/projectstore_cmd.cpp @@ -1998,4 +1998,68 @@ OplogMirrorCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** arg return 0; } +//////////////////////////// + +OplogValidateCommand::OplogValidateCommand() +{ + m_Options.add_options()("h,help", "Print help"); + m_Options.add_option("", "u", "hosturl", "Host URL", cxxopts::value(m_HostName)->default_value(""), "<hosturl>"); + m_Options.add_option("", "p", "project", "Project name to get info from", cxxopts::value(m_ProjectName), "<projectid>"); + m_Options.add_option("", "l", "oplog", "Oplog name to get info from", cxxopts::value(m_OplogName), "<oplogid>"); + + m_Options.parse_positional({"project", "oplog"}); + m_Options.positional_help("[<projectid> <oplogid>]"); +} + +OplogValidateCommand::~OplogValidateCommand() +{ +} + +int +OplogValidateCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) +{ + ZEN_UNUSED(GlobalOptions); + + if (!ParseOptions(argc, argv)) + { + return 0; + } + + m_HostName = ResolveTargetHostSpec(m_HostName); + + if (m_HostName.empty()) + { + throw OptionParseException("unable to resolve server specification"); + } + + HttpClient Http(m_HostName); + + m_ProjectName = ResolveProject(Http, m_ProjectName); + if (m_ProjectName.empty()) + { + return 1; + } + + m_OplogName = ResolveOplog(Http, m_ProjectName, m_OplogName); + if (m_OplogName.empty()) + { + return 1; + } + + std::string Url = fmt::format("/prj/{}/oplog/{}/validate", m_ProjectName, m_OplogName); + + if (HttpClient::Response Result = Http.Post(Url, HttpClient::Accept(ZenContentType::kJSON))) + { + ZEN_CONSOLE("{}", Result.ToText()); + return 0; + } + else + { + Result.ThrowError("failed to get validate project oplog"sv); + return 1; + } + + return 0; +} + } // namespace zen diff --git a/src/zen/cmds/projectstore_cmd.h b/src/zen/cmds/projectstore_cmd.h index 0e16f946d..3a9d5dcb8 100644 --- a/src/zen/cmds/projectstore_cmd.h +++ b/src/zen/cmds/projectstore_cmd.h @@ -240,4 +240,19 @@ private: bool m_TrimToReferencedSet = true; }; +class OplogValidateCommand : public ProjectStoreCommand +{ +public: + OplogValidateCommand(); + ~OplogValidateCommand(); + virtual int Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) override; + virtual cxxopts::Options& Options() override { return m_Options; } + +private: + cxxopts::Options m_Options{"oplog-validate", "Validate oplog for missing references"}; + std::string m_HostName; + std::string m_ProjectName; + std::string m_OplogName; +}; + } // namespace zen diff --git a/src/zen/zen.cpp b/src/zen/zen.cpp index 7bf715e01..16f5799e0 100644 --- a/src/zen/zen.cpp +++ b/src/zen/zen.cpp @@ -412,6 +412,8 @@ main(int argc, char** argv) InfoCommand InfoCmd; JobCommand JobCmd; OplogMirrorCommand OplogMirrorCmd; + SnapshotOplogCommand SnapshotOplogCmd; + OplogValidateCommand OplogValidateCmd; PrintCommand PrintCmd; PrintPackageCommand PrintPkgCmd; ProjectOpDetailsCommand ProjectOpDetailsCmd; @@ -424,7 +426,6 @@ main(int argc, char** argv) RunCommand RunCmd; ScrubCommand ScrubCmd; ServeCommand ServeCmd; - SnapshotOplogCommand SnapshotOplogCmd; StatusCommand StatusCmd; LoggingCommand LoggingCmd; TopCommand TopCmd; @@ -465,6 +466,7 @@ main(int argc, char** argv) {"oplog-import", &ImportOplogCmd, "Import project store oplog"}, {"oplog-mirror", &OplogMirrorCmd, "Mirror project store oplog to file system"}, {"oplog-snapshot", &SnapshotOplogCmd, "Snapshot project store oplog"}, + {"oplog-validate", &OplogValidateCmd, "Validate oplog for missing references"}, {"print", &PrintCmd, "Print compact binary object"}, {"printpackage", &PrintPkgCmd, "Print compact binary package"}, {"project-create", &CreateProjectCmd, "Create a project"}, diff --git a/src/zenserver/admin/admin.cpp b/src/zenserver/admin/admin.cpp index ea830923f..847ed5a50 100644 --- a/src/zenserver/admin/admin.cpp +++ b/src/zenserver/admin/admin.cpp @@ -454,6 +454,11 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler, GcParams.StoreProjectAttachmentMetaData = Param == "true"sv; } + if (auto Param = Params.GetValue("enablevalidation"); Param.empty() == false) + { + GcParams.EnableValidation = Param == "true"sv; + } + const bool Started = m_GcScheduler.TriggerGc(GcParams); CbObjectWriter Response; diff --git a/src/zenserver/config.cpp b/src/zenserver/config.cpp index 2fd9bbaf3..bedab7049 100644 --- a/src/zenserver/config.cpp +++ b/src/zenserver/config.cpp @@ -485,6 +485,7 @@ ParseConfigFile(const std::filesystem::path& Path, ServerOptions.GcConfig.StoreProjectAttachmentMetaData, "gc-projectstore-attachment-store"); LuaOptions.AddOption("gc.attachment.passes"sv, ServerOptions.GcConfig.AttachmentPassCount, "gc-attachment-passes"sv); + LuaOptions.AddOption("gc.validation"sv, ServerOptions.GcConfig.EnableValidation, "gc-validation"); ////// gc LuaOptions.AddOption("gc.cache.maxdurationseconds"sv, ServerOptions.GcConfig.Cache.MaxDurationSeconds, "gc-cache-duration-seconds"sv); @@ -890,6 +891,13 @@ ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions) options.add_option("gc", "", + "gc-validation", + "Enable validation of references after full GC.", + cxxopts::value<bool>(ServerOptions.GcConfig.EnableValidation)->default_value("true"), + ""); + + options.add_option("gc", + "", "gc-enabled", "Whether garbage collection is enabled or not.", cxxopts::value<bool>(ServerOptions.GcConfig.Enabled)->default_value("true"), diff --git a/src/zenserver/config.h b/src/zenserver/config.h index 3e01cac99..5c56695f3 100644 --- a/src/zenserver/config.h +++ b/src/zenserver/config.h @@ -80,6 +80,7 @@ struct ZenGcConfig uint16_t AttachmentPassCount = 1; bool StoreCacheAttachmentMetaData = false; bool StoreProjectAttachmentMetaData = false; + bool EnableValidation = true; }; struct ZenOpenIdProviderConfig diff --git a/src/zenserver/projectstore/httpprojectstore.cpp b/src/zenserver/projectstore/httpprojectstore.cpp index 1b45e66f3..710cbe5a7 100644 --- a/src/zenserver/projectstore/httpprojectstore.cpp +++ b/src/zenserver/projectstore/httpprojectstore.cpp @@ -292,6 +292,11 @@ HttpProjectService::HttpProjectService(CidStore& Store, ProjectStore* Projects, HttpVerb::kPost); m_Router.RegisterRoute( + "{project}/oplog/{log}/validate", + [this](HttpRouterRequest& Req) { HandleOplogValidateRequest(Req); }, + HttpVerb::kPost); + + m_Router.RegisterRoute( "{project}/oplog/{log}/{op}", [this](HttpRouterRequest& Req) { HandleOpLogOpRequest(Req); }, HttpVerb::kGet); @@ -962,28 +967,25 @@ HttpProjectService::HandleOplogOpPrepRequest(HttpRouterRequest& Req) IoBuffer Payload = HttpReq.ReadPayload(); CbObject RequestObject = LoadCompactBinaryObject(Payload); - std::vector<IoHash> NeedList; - - for (auto Entry : RequestObject["have"sv]) + std::vector<IoHash> ChunkList; + CbArrayView HaveList = RequestObject["have"sv].AsArrayView(); + ChunkList.reserve(HaveList.Num()); + for (auto& Entry : HaveList) { - const IoHash FileHash = Entry.AsHash(); - - if (!m_CidStore.ContainsChunk(FileHash)) - { - ZEN_DEBUG("prep - NEED: {}", FileHash); - - NeedList.push_back(FileHash); - } + ChunkList.push_back(Entry.AsHash()); } + std::vector<IoHash> NeedList = FoundLog->CheckPendingChunkReferences(ChunkList, std::chrono::minutes(2)); + CbObjectWriter Cbo; Cbo.BeginArray("need"); - - for (const IoHash& Hash : NeedList) { - Cbo << Hash; + for (const IoHash& Hash : NeedList) + { + ZEN_DEBUG("prep - NEED: {}", Hash); + Cbo << Hash; + } } - Cbo.EndArray(); CbObject Response = Cbo.Save(); @@ -1043,9 +1045,12 @@ HttpProjectService::HandleOplogOpNewRequest(HttpRouterRequest& Req) bool IsValid = true; std::vector<IoHash> MissingChunks; + std::vector<IoHash> ReferencedChunks; CbPackage::AttachmentResolver Resolver = [&](const IoHash& Hash) -> SharedBuffer { - Oplog.CaptureAddedAttachments(std::vector<IoHash>{Hash}); + // We want to add all chunks here so we can properly clear them from the 'prep' call where we retained them earlier + ReferencedChunks.push_back(Hash); + if (m_CidStore.ContainsChunk(Hash)) { // Return null attachment as we already have it, no point in reading it and storing it again @@ -1146,12 +1151,132 @@ HttpProjectService::HandleOplogOpNewRequest(HttpRouterRequest& Req) } m_ProjectStats.ChunkWriteCount += AttachmentCount; + // Once we stored the op, we no longer need to retain any chunks this op references + FoundLog->RemovePendingChunkReferences(ReferencedChunks); + m_ProjectStats.OpWriteCount++; ZEN_DEBUG("'{}/{}' op #{} ({}) - '{}'", ProjectId, OplogId, OpLsn, NiceBytes(Payload.Size()), Core["key"sv].AsString()); HttpReq.WriteResponse(HttpResponseCode::Created); } void +HttpProjectService::HandleOplogValidateRequest(HttpRouterRequest& Req) +{ + ZEN_TRACE_CPU("ProjectService::OplogOpNew"); + + using namespace std::literals; + + HttpServerRequest& HttpReq = Req.ServerRequest(); + + if (!m_ProjectStore->AreDiskWritesAllowed()) + { + return HttpReq.WriteResponse(HttpResponseCode::InsufficientStorage); + } + + const auto& ProjectId = Req.GetCapture(1); + const auto& OplogId = Req.GetCapture(2); + + Ref<ProjectStore::Project> Project = m_ProjectStore->OpenProject(ProjectId); + if (!Project) + { + return HttpReq.WriteResponse(HttpResponseCode::NotFound, ZenContentType::kText, fmt::format("Project '{}' not found", ProjectId)); + } + Project->TouchProject(); + + ProjectStore::Oplog* FoundLog = Project->OpenOplog(OplogId, /*AllowCompact*/ true, /*VerifyPathOnDisk*/ false); + if (!FoundLog) + { + return HttpReq.WriteResponse(HttpResponseCode::NotFound, + ZenContentType::kText, + fmt::format("Oplog '{}' not found in project '{}'", OplogId, ProjectId)); + } + Project->TouchOplog(OplogId); + + ProjectStore::Oplog& Oplog = *FoundLog; + + std::atomic_bool CancelFlag = false; + ProjectStore::Oplog::ValidationResult Result = Oplog.Validate(CancelFlag); + tsl::robin_map<Oid, std::string, Oid::Hasher> KeyNameLookup; + KeyNameLookup.reserve(Result.OpKeys.size()); + for (const auto& It : Result.OpKeys) + { + KeyNameLookup.insert_or_assign(It.first, It.second); + } + CbObjectWriter Writer; + Writer << "HasMissingData" << !Result.IsEmpty(); + Writer << "OpCount" << Result.OpCount; + Writer << "LSNLow" << Result.LSNLow; + Writer << "LSNHigh" << Result.LSNHigh; + if (!Result.MissingFiles.empty()) + { + Writer.BeginArray("MissingFiles"); + for (const auto& MissingFile : Result.MissingFiles) + { + Writer.BeginObject(); + { + Writer << "Key" << MissingFile.first; + Writer << "KeyName" << KeyNameLookup[MissingFile.first]; + Writer << "Id" << MissingFile.second.Id; + Writer << "Hash" << MissingFile.second.Hash; + Writer << "ServerPath" << MissingFile.second.ServerPath; + Writer << "ClientPath" << MissingFile.second.ClientPath; + } + Writer.EndObject(); + } + Writer.EndArray(); + } + if (!Result.MissingChunks.empty()) + { + Writer.BeginArray("MissingChunks"); + for (const auto& MissingChunk : Result.MissingChunks) + { + Writer.BeginObject(); + { + Writer << "Key" << MissingChunk.first; + Writer << "KeyName" << KeyNameLookup[MissingChunk.first]; + Writer << "Id" << MissingChunk.second.Id; + Writer << "Hash" << MissingChunk.second.Hash; + } + Writer.EndObject(); + } + Writer.EndArray(); + } + if (!Result.MissingMetas.empty()) + { + Writer.BeginArray("MissingMetas"); + for (const auto& MissingMeta : Result.MissingMetas) + { + Writer.BeginObject(); + { + Writer << "Key" << MissingMeta.first; + Writer << "KeyName" << KeyNameLookup[MissingMeta.first]; + Writer << "Id" << MissingMeta.second.Id; + Writer << "Hash" << MissingMeta.second.Hash; + } + Writer.EndObject(); + } + Writer.EndArray(); + } + if (!Result.MissingAttachments.empty()) + { + Writer.BeginArray("MissingAttachments"); + for (const auto& MissingMeta : Result.MissingAttachments) + { + Writer.BeginObject(); + { + Writer << "Key" << MissingMeta.first; + Writer << "KeyName" << KeyNameLookup[MissingMeta.first]; + Writer << "Hash" << MissingMeta.second; + } + Writer.EndObject(); + } + Writer.EndArray(); + } + CbObject Response = Writer.Save(); + HttpReq.WriteResponse(HttpResponseCode::OK, Response); +} + +void HttpProjectService::HandleOpLogOpRequest(HttpRouterRequest& Req) { ZEN_TRACE_CPU("ProjectService::OplogOp"); diff --git a/src/zenserver/projectstore/httpprojectstore.h b/src/zenserver/projectstore/httpprojectstore.h index 9990ee264..13810bd66 100644 --- a/src/zenserver/projectstore/httpprojectstore.h +++ b/src/zenserver/projectstore/httpprojectstore.h @@ -70,6 +70,7 @@ private: void HandleChunkByCidRequest(HttpRouterRequest& Req); void HandleOplogOpPrepRequest(HttpRouterRequest& Req); void HandleOplogOpNewRequest(HttpRouterRequest& Req); + void HandleOplogValidateRequest(HttpRouterRequest& Req); void HandleOpLogOpRequest(HttpRouterRequest& Req); void HandleOpLogRequest(HttpRouterRequest& Req); void HandleOpLogEntriesRequest(HttpRouterRequest& Req); diff --git a/src/zenserver/projectstore/projectstore.cpp b/src/zenserver/projectstore/projectstore.cpp index 7e03432d6..1b48a542c 100644 --- a/src/zenserver/projectstore/projectstore.cpp +++ b/src/zenserver/projectstore/projectstore.cpp @@ -949,6 +949,7 @@ ProjectStore::Oplog::Oplog(std::string_view Id, , m_BasePath(BasePath) , m_MarkerPath(MarkerPath) , m_MetaValid(false) +, m_PendingPrepOpAttachmentsRetainEnd(GcClock::Now()) { using namespace std::literals; @@ -1129,6 +1130,10 @@ bool ProjectStore::Oplog::PrepareForDelete(std::filesystem::path& OutRemoveDirectory) { RwLock::ExclusiveLockScope _(m_OplogLock); + m_UpdateCaptureRefCounter = 0; + m_CapturedLSNs.reset(); + m_CapturedAttachments.reset(); + m_PendingPrepOpAttachments.clear(); m_ChunkMap.clear(); m_MetaMap.clear(); m_FileMap.clear(); @@ -1191,12 +1196,12 @@ ProjectStore::Oplog::Read() const OplogEntryMapping OpMapping = GetMapping(Op); // Update chunk id maps - for (const OplogEntryMapping::Mapping& Chunk : OpMapping.Chunks) + for (const ChunkMapping& Chunk : OpMapping.Chunks) { m_ChunkMap.insert_or_assign(Chunk.Id, Chunk.Hash); } - for (const OplogEntryMapping::FileMapping& File : OpMapping.Files) + for (const FileMapping& File : OpMapping.Files) { if (File.Hash != IoHash::Zero) { @@ -1207,7 +1212,7 @@ ProjectStore::Oplog::Read() FileMapEntry{.ServerPath = File.Hash == IoHash::Zero ? File.ServerPath : std::string(), .ClientPath = File.ClientPath}); } - for (const OplogEntryMapping::Mapping& Meta : OpMapping.Meta) + for (const ChunkMapping& Meta : OpMapping.Meta) { m_MetaMap.insert_or_assign(Meta.Id, Meta.Hash); } @@ -1317,6 +1322,101 @@ ProjectStore::Oplog::ReadStateFile(const std::filesystem::path& BasePath, std::f return {}; } +ProjectStore::Oplog::ValidationResult +ProjectStore::Oplog::Validate(std::atomic_bool& IsCancelledFlag) +{ + using namespace std::literals; + + ValidationResult Result; + + std::vector<Oid> KeyHashes; + std::vector<std::string> Keys; + std::vector<std::vector<IoHash>> Attachments; + std::vector<OplogEntryMapping> Mappings; + + IterateOplogWithKey([&](uint32_t LSN, const Oid& Key, CbObjectView OpView) { + Result.LSNLow = Min(Result.LSNLow, LSN); + Result.LSNHigh = Max(Result.LSNHigh, LSN); + KeyHashes.push_back(Key); + Keys.emplace_back(std::string(OpView["key"sv].AsString())); + + std::vector<IoHash> OpAttachments; + OpView.IterateAttachments([&OpAttachments](CbFieldView Attachment) { OpAttachments.push_back(Attachment.AsAttachment()); }); + Attachments.emplace_back(std::move(OpAttachments)); + + Mappings.push_back(GetMapping(OpView)); + }); + + Result.OpCount = gsl::narrow<uint32_t>(Keys.size()); + for (uint32_t OpIndex = 0; !IsCancelledFlag && OpIndex < Result.OpCount; OpIndex++) + { + const Oid& KeyHash = KeyHashes[OpIndex]; + const std::string& Key = Keys[OpIndex]; + const OplogEntryMapping& Mapping(Mappings[OpIndex]); + bool HasMissingEntries = false; + for (const ChunkMapping& Chunk : Mapping.Chunks) + { + if (!m_CidStore.ContainsChunk(Chunk.Hash)) + { + Result.MissingChunks.push_back({KeyHash, Chunk}); + HasMissingEntries = true; + } + } + for (const ChunkMapping& Meta : Mapping.Meta) + { + if (!m_CidStore.ContainsChunk(Meta.Hash)) + { + Result.MissingMetas.push_back({KeyHash, Meta}); + HasMissingEntries = true; + } + } + for (const FileMapping& File : Mapping.Files) + { + if (File.Hash == IoHash::Zero) + { + std::filesystem::path FilePath = m_OuterProject->RootDir / File.ServerPath; + if (!std::filesystem::is_regular_file(FilePath)) + { + Result.MissingFiles.push_back({KeyHash, File}); + HasMissingEntries = true; + } + } + else + { + if (!m_CidStore.ContainsChunk(File.Hash)) + { + Result.MissingFiles.push_back({KeyHash, File}); + HasMissingEntries = true; + } + } + } + const std::vector<IoHash>& OpAttachments = Attachments[OpIndex]; + for (const IoHash& Attachment : OpAttachments) + { + if (!m_CidStore.ContainsChunk(Attachment)) + { + Result.MissingAttachments.push_back({KeyHash, Attachment}); + HasMissingEntries = true; + } + } + if (HasMissingEntries) + { + Result.OpKeys.push_back({KeyHash, Key}); + } + } + + { + // Check if we were deleted while we were checking the references without a lock... + RwLock::SharedLockScope _(m_OplogLock); + if (!m_Storage) + { + Result = {}; + } + } + + return Result; +} + void ProjectStore::Oplog::WriteIndexSnapshot() { @@ -2286,21 +2386,9 @@ ProjectStore::Oplog::AddChunkMappings(const std::unordered_map<Oid, IoHash, Oid: } void -ProjectStore::Oplog::CaptureUpdatedLSNs(std::span<const uint32_t> LSNs) -{ - m_UpdateCaptureLock.WithExclusiveLock([&]() { - if (m_CapturedLSNs) - { - m_CapturedLSNs->reserve(m_CapturedLSNs->size() + LSNs.size()); - m_CapturedLSNs->insert(m_CapturedLSNs->end(), LSNs.begin(), LSNs.end()); - } - }); -} - -void ProjectStore::Oplog::CaptureAddedAttachments(std::span<const IoHash> AttachmentHashes) { - m_UpdateCaptureLock.WithExclusiveLock([this, AttachmentHashes]() { + m_OplogLock.WithExclusiveLock([this, AttachmentHashes]() { if (m_CapturedAttachments) { m_CapturedAttachments->reserve(m_CapturedAttachments->size() + AttachmentHashes.size()); @@ -2312,7 +2400,7 @@ ProjectStore::Oplog::CaptureAddedAttachments(std::span<const IoHash> AttachmentH void ProjectStore::Oplog::EnableUpdateCapture() { - m_UpdateCaptureLock.WithExclusiveLock([&]() { + m_OplogLock.WithExclusiveLock([&]() { if (m_UpdateCaptureRefCounter == 0) { ZEN_ASSERT(!m_CapturedLSNs); @@ -2332,7 +2420,7 @@ ProjectStore::Oplog::EnableUpdateCapture() void ProjectStore::Oplog::DisableUpdateCapture() { - m_UpdateCaptureLock.WithExclusiveLock([&]() { + m_OplogLock.WithExclusiveLock([&]() { ZEN_ASSERT(m_CapturedLSNs); ZEN_ASSERT(m_CapturedAttachments); ZEN_ASSERT(m_UpdateCaptureRefCounter > 0); @@ -2346,30 +2434,27 @@ ProjectStore::Oplog::DisableUpdateCapture() } void -ProjectStore::Oplog::IterateCapturedLSNs(std::function<bool(const CbObjectView& UpdateOp)>&& Callback) +ProjectStore::Oplog::IterateCapturedLSNsLocked(std::function<bool(const CbObjectView& UpdateOp)>&& Callback) { - m_UpdateCaptureLock.WithExclusiveLock([&]() { - if (m_CapturedLSNs) + if (m_CapturedLSNs) + { + if (!m_Storage) { - if (!m_Storage) - { - return; - } - for (uint32_t UpdatedLSN : *m_CapturedLSNs) + return; + } + for (uint32_t UpdatedLSN : *m_CapturedLSNs) + { + if (const auto AddressEntryIt = m_OpAddressMap.find(UpdatedLSN); AddressEntryIt != m_OpAddressMap.end()) { - if (const auto AddressEntryIt = m_OpAddressMap.find(UpdatedLSN); AddressEntryIt != m_OpAddressMap.end()) - { - Callback(m_Storage->GetOp(AddressEntryIt->second)); - } + Callback(m_Storage->GetOp(AddressEntryIt->second)); } } - }); + } } std::vector<IoHash> -ProjectStore::Oplog::GetCapturedAttachments() +ProjectStore::Oplog::GetCapturedAttachmentsLocked() { - RwLock::SharedLockScope _(m_UpdateCaptureLock); if (m_CapturedAttachments) { return *m_CapturedAttachments; @@ -2377,6 +2462,69 @@ ProjectStore::Oplog::GetCapturedAttachments() return {}; } +std::vector<IoHash> +ProjectStore::Oplog::CheckPendingChunkReferences(std::span<const IoHash> ChunkHashes, const GcClock::Duration& RetainTime) +{ + m_OplogLock.WithExclusiveLock([&]() { + GcClock::TimePoint Now = GcClock::Now(); + if (m_PendingPrepOpAttachmentsRetainEnd < Now) + { + m_PendingPrepOpAttachments.clear(); + } + m_PendingPrepOpAttachments.insert(ChunkHashes.begin(), ChunkHashes.end()); + GcClock::TimePoint NewEndPoint = Now + RetainTime; + if (m_PendingPrepOpAttachmentsRetainEnd < NewEndPoint) + { + m_PendingPrepOpAttachmentsRetainEnd = NewEndPoint; + } + }); + + std::vector<IoHash> MissingChunks; + MissingChunks.reserve(ChunkHashes.size()); + for (const IoHash& FileHash : ChunkHashes) + { + if (!m_CidStore.ContainsChunk(FileHash)) + { + MissingChunks.push_back(FileHash); + } + } + + return MissingChunks; +} + +void +ProjectStore::Oplog::RemovePendingChunkReferences(std::span<const IoHash> ChunkHashes) +{ + m_OplogLock.WithExclusiveLock([&]() { + GcClock::TimePoint Now = GcClock::Now(); + if (m_PendingPrepOpAttachmentsRetainEnd < Now) + { + m_PendingPrepOpAttachments.clear(); + } + else + { + for (const IoHash& Chunk : ChunkHashes) + { + m_PendingPrepOpAttachments.erase(Chunk); + } + } + }); +} + +std::vector<IoHash> +ProjectStore::Oplog::GetPendingChunkReferencesLocked() +{ + std::vector<IoHash> Result; + Result.reserve(m_PendingPrepOpAttachments.size()); + Result.insert(Result.end(), m_PendingPrepOpAttachments.begin(), m_PendingPrepOpAttachments.end()); + GcClock::TimePoint Now = GcClock::Now(); + if (m_PendingPrepOpAttachmentsRetainEnd < Now) + { + m_PendingPrepOpAttachments.clear(); + } + return Result; +} + void ProjectStore::Oplog::AddFileMapping(const RwLock::ExclusiveLockScope&, const Oid& FileId, @@ -2428,7 +2576,7 @@ ProjectStore::Oplog::GetMapping(CbObjectView Core) CbObjectView PackageObj = Field.AsObjectView(); Oid Id = PackageObj["id"sv].AsObjectId(); IoHash Hash = PackageObj["data"sv].AsBinaryAttachment(); - Result.Chunks.emplace_back(OplogEntryMapping::Mapping{Id, Hash}); + Result.Chunks.emplace_back(ChunkMapping{Id, Hash}); ZEN_DEBUG("oplog {}/{}: package data {} -> {}", m_OuterProject->Identifier, m_OplogId, Id, Hash); continue; } @@ -2440,7 +2588,7 @@ ProjectStore::Oplog::GetMapping(CbObjectView Core) CbObjectView BulkObj = Entry.AsObjectView(); Oid Id = BulkObj["id"sv].AsObjectId(); IoHash Hash = BulkObj["data"sv].AsBinaryAttachment(); - Result.Chunks.emplace_back(OplogEntryMapping::Mapping{Id, Hash}); + Result.Chunks.emplace_back(ChunkMapping{Id, Hash}); ZEN_DEBUG("oplog {}/{}: bulkdata {} -> {}", m_OuterProject->Identifier, m_OplogId, Id, Hash); } continue; @@ -2453,7 +2601,7 @@ ProjectStore::Oplog::GetMapping(CbObjectView Core) CbObjectView PackageDataObj = Entry.AsObjectView(); Oid Id = PackageDataObj["id"sv].AsObjectId(); IoHash Hash = PackageDataObj["data"sv].AsBinaryAttachment(); - Result.Chunks.emplace_back(OplogEntryMapping::Mapping{Id, Hash}); + Result.Chunks.emplace_back(ChunkMapping{Id, Hash}); ZEN_DEBUG("oplog {}/{}: package {} -> {}", m_OuterProject->Identifier, m_OplogId, Id, Hash); } continue; @@ -2487,7 +2635,7 @@ ProjectStore::Oplog::GetMapping(CbObjectView Core) continue; } - Result.Files.emplace_back(OplogEntryMapping::FileMapping{Id, Hash, std::string(ServerPath), std::string(ClientPath)}); + Result.Files.emplace_back(FileMapping{Id, Hash, std::string(ServerPath), std::string(ClientPath)}); ZEN_DEBUG("oplog {}/{}: file {} -> {}, ServerPath: {}, ClientPath: {}", m_OuterProject->Identifier, m_OplogId, @@ -2507,7 +2655,7 @@ ProjectStore::Oplog::GetMapping(CbObjectView Core) CbObjectView MetaObj = Entry.AsObjectView(); Oid Id = MetaObj["id"sv].AsObjectId(); IoHash Hash = MetaObj["data"sv].AsBinaryAttachment(); - Result.Meta.emplace_back(OplogEntryMapping::Mapping{Id, Hash}); + Result.Meta.emplace_back(ChunkMapping{Id, Hash}); auto NameString = MetaObj["name"sv].AsString(); ZEN_DEBUG("oplog {}/{}: meta data ({}) {} -> {}", m_OuterProject->Identifier, m_OplogId, NameString, Id, Hash); } @@ -2529,17 +2677,17 @@ ProjectStore::Oplog::RegisterOplogEntry(RwLock::ExclusiveLockScope& OplogLock, using namespace std::literals; // Update chunk id maps - for (const OplogEntryMapping::Mapping& Chunk : OpMapping.Chunks) + for (const ChunkMapping& Chunk : OpMapping.Chunks) { AddChunkMapping(OplogLock, Chunk.Id, Chunk.Hash); } - for (const OplogEntryMapping::FileMapping& File : OpMapping.Files) + for (const FileMapping& File : OpMapping.Files) { AddFileMapping(OplogLock, File.Id, File.Hash, File.ServerPath, File.ClientPath); } - for (const OplogEntryMapping::Mapping& Meta : OpMapping.Meta) + for (const ChunkMapping& Meta : OpMapping.Meta) { AddMetaMapping(OplogLock, Meta.Id, Meta.Hash); } @@ -2637,7 +2785,10 @@ ProjectStore::Oplog::AppendNewOplogEntry(CbObjectView Core) RwLock::ExclusiveLockScope OplogLock(m_OplogLock); const uint32_t EntryId = RegisterOplogEntry(OplogLock, Mapping, OpEntry); - CaptureUpdatedLSNs(std::array<uint32_t, 1u>({EntryId})); + if (m_CapturedLSNs) + { + m_CapturedLSNs->push_back(EntryId); + } m_MetaValid = false; return EntryId; @@ -2676,12 +2827,19 @@ ProjectStore::Oplog::AppendNewOplogEntries(std::span<CbObjectView> Cores) { { RwLock::ExclusiveLockScope OplogLock(m_OplogLock); + if (m_CapturedLSNs) + { + m_CapturedLSNs->reserve(m_CapturedLSNs->size() + OpCount); + } for (size_t OpIndex = 0; OpIndex < OpCount; OpIndex++) { EntryIds[OpIndex] = RegisterOplogEntry(OplogLock, Mappings[OpIndex], OpEntries[OpIndex]); + if (m_CapturedLSNs) + { + m_CapturedLSNs->push_back(EntryIds[OpIndex]); + } } } - CaptureUpdatedLSNs(EntryIds); m_MetaValid = false; } return EntryIds; @@ -2909,12 +3067,10 @@ ProjectStore::Project::NewOplog(std::string_view OplogId, const std::filesystem: Log->Write(); - m_UpdateCaptureLock.WithExclusiveLock([&]() { - if (m_CapturedOplogs) - { - m_CapturedOplogs->push_back(std::string(OplogId)); - } - }); + if (m_CapturedOplogs) + { + m_CapturedOplogs->push_back(std::string(OplogId)); + } return Log; } @@ -3201,7 +3357,7 @@ ProjectStore::Project::PrepareForDelete(std::filesystem::path& OutDeletePath) void ProjectStore::Project::EnableUpdateCapture() { - m_UpdateCaptureLock.WithExclusiveLock([&]() { + m_ProjectLock.WithExclusiveLock([&]() { if (m_UpdateCaptureRefCounter == 0) { ZEN_ASSERT(!m_CapturedOplogs); @@ -3218,7 +3374,7 @@ ProjectStore::Project::EnableUpdateCapture() void ProjectStore::Project::DisableUpdateCapture() { - m_UpdateCaptureLock.WithExclusiveLock([&]() { + m_ProjectLock.WithExclusiveLock([&]() { ZEN_ASSERT(m_CapturedOplogs); ZEN_ASSERT(m_UpdateCaptureRefCounter > 0); m_UpdateCaptureRefCounter--; @@ -3230,9 +3386,8 @@ ProjectStore::Project::DisableUpdateCapture() } std::vector<std::string> -ProjectStore::Project::GetCapturedOplogs() +ProjectStore::Project::GetCapturedOplogsLocked() { - RwLock::SharedLockScope _(m_UpdateCaptureLock); if (m_CapturedOplogs) { return *m_CapturedOplogs; @@ -3593,12 +3748,10 @@ ProjectStore::NewProject(const std::filesystem::path& BasePath, Prj->ProjectFilePath = ProjectFilePath; Prj->Write(); - m_UpdateCaptureLock.WithExclusiveLock([&]() { - if (m_CapturedProjects) - { - m_CapturedProjects->push_back(std::string(ProjectId)); - } - }); + if (m_CapturedProjects) + { + m_CapturedProjects->push_back(std::string(ProjectId)); + } return Prj; } @@ -4850,7 +5003,7 @@ ProjectStore::AreDiskWritesAllowed() const void ProjectStore::EnableUpdateCapture() { - m_UpdateCaptureLock.WithExclusiveLock([&]() { + m_ProjectsLock.WithExclusiveLock([&]() { if (m_UpdateCaptureRefCounter == 0) { ZEN_ASSERT(!m_CapturedProjects); @@ -4867,7 +5020,7 @@ ProjectStore::EnableUpdateCapture() void ProjectStore::DisableUpdateCapture() { - m_UpdateCaptureLock.WithExclusiveLock([&]() { + m_ProjectsLock.WithExclusiveLock([&]() { ZEN_ASSERT(m_CapturedProjects); ZEN_ASSERT(m_UpdateCaptureRefCounter > 0); m_UpdateCaptureRefCounter--; @@ -4879,9 +5032,8 @@ ProjectStore::DisableUpdateCapture() } std::vector<std::string> -ProjectStore::GetCapturedProjects() +ProjectStore::GetCapturedProjectsLocked() { - RwLock::SharedLockScope _(m_UpdateCaptureLock); if (m_CapturedProjects) { return *m_CapturedProjects; @@ -5198,7 +5350,7 @@ public: AddedOplogs.size()); }); - std::vector<std::string> AddedProjects = m_ProjectStore.GetCapturedProjects(); + std::vector<std::string> AddedProjects = m_ProjectStore.GetCapturedProjectsLocked(); for (const std::string& AddedProject : AddedProjects) { if (auto It = m_ProjectStore.m_Projects.find(AddedProject); It != m_ProjectStore.m_Projects.end()) @@ -5213,8 +5365,9 @@ public: } for (auto& ProjectPair : m_ProjectStore.m_Projects) { - ProjectStore::Project& Project = *ProjectPair.second; - std::vector<std::string> AddedOplogNames(Project.GetCapturedOplogs()); + ProjectStore::Project& Project = *ProjectPair.second; + + std::vector<std::string> AddedOplogNames(Project.GetCapturedOplogsLocked()); for (const std::string& OplogName : AddedOplogNames) { if (auto It = Project.m_Oplogs.find(OplogName); It != Project.m_Oplogs.end()) @@ -5243,7 +5396,12 @@ public: }); Oplog->GetAttachmentsLocked(m_References, Ctx.Settings.StoreProjectAttachmentMetaData); + if (std::vector<IoHash> PendingChunkReferences = Oplog->GetPendingChunkReferencesLocked(); !PendingChunkReferences.empty()) + { + m_References.insert(m_References.end(), PendingChunkReferences.begin(), PendingChunkReferences.end()); + } } + FilterReferences(Ctx, fmt::format("projectstore [LOCKSTATE] '{}'", "projectstore"), m_References); } @@ -5404,12 +5562,16 @@ public: if (auto It = m_Project->m_Oplogs.find(m_OplogId); It != m_Project->m_Oplogs.end()) { ProjectStore::Oplog* Oplog = It->second.get(); - Oplog->IterateCapturedLSNs([&](const CbObjectView& UpdateOp) -> bool { + Oplog->IterateCapturedLSNsLocked([&](const CbObjectView& UpdateOp) -> bool { UpdateOp.IterateAttachments([&](CbFieldView Visitor) { m_AddedReferences.emplace_back(Visitor.AsAttachment()); }); return true; }); - std::vector<IoHash> AddedAttachments = Oplog->GetCapturedAttachments(); + std::vector<IoHash> AddedAttachments = Oplog->GetCapturedAttachmentsLocked(); m_AddedReferences.insert(m_AddedReferences.end(), AddedAttachments.begin(), AddedAttachments.end()); + if (std::vector<IoHash> PendingChunkReferences = Oplog->GetPendingChunkReferencesLocked(); !PendingChunkReferences.empty()) + { + m_AddedReferences.insert(m_AddedReferences.end(), PendingChunkReferences.begin(), PendingChunkReferences.end()); + } } else if (m_Project->LastOplogAccessTime(m_OplogId) > m_OplogAccessTime && ProjectStore::Oplog::ExistsAt(m_OplogBasePath)) { @@ -5551,6 +5713,137 @@ ProjectStore::LockState(GcCtx& Ctx) return Locks; } +class ProjectStoreOplogReferenceValidator : public GcReferenceValidator +{ +public: + ProjectStoreOplogReferenceValidator(ProjectStore& InProjectStore, std::string_view InProject, std::string_view InOplog) + : m_ProjectStore(InProjectStore) + , m_ProjectId(InProject) + , m_OplogId(InOplog) + { + } + + virtual ~ProjectStoreOplogReferenceValidator() {} + + virtual std::string GetGcName(GcCtx&) override { return fmt::format("oplog: '{}/{}'", m_ProjectId, m_OplogId); } + + virtual void Validate(GcCtx& Ctx, GcReferenceValidatorStats& Stats) override + { + ZEN_TRACE_CPU("Store::Validate"); + + auto Log = [&Ctx]() { return Ctx.Logger; }; + + ProjectStore::Oplog::ValidationResult Result; + + Stopwatch Timer; + const auto _ = MakeGuard([&] { + if (!Ctx.Settings.Verbose) + { + return; + } + std::string Status = Result.IsEmpty() ? "OK" : "Missing data"; + ZEN_INFO("GCV2: projectstore [VALIDATE] '{}/{}': Validated in {}. OpCount: {}, MinLSN: {}, MaxLSN: {}, Status: {}", + m_ProjectId, + m_OplogId, + NiceTimeSpanMs(Timer.GetElapsedTimeMs()), + Result.OpCount, + Result.LSNLow, + Result.LSNHigh, + Status); + }); + ProjectStore::Oplog* TempOplog = nullptr; + auto __ = MakeGuard([this, &TempOplog]() { + if (TempOplog != nullptr) + { + delete TempOplog; + } + }); + ProjectStore::Oplog* Oplog = nullptr; + Ref<ProjectStore::Project> Project = m_ProjectStore.OpenProject(m_ProjectId); + if (Project) + { + RwLock::SharedLockScope ___(Project->m_ProjectLock); + if (auto It = Project->m_Oplogs.find(m_OplogId); It != Project->m_Oplogs.end()) + { + Oplog = It->second.get(); + } + else + { + std::filesystem::path OplogBasePath = Project->BasePathForOplog(m_OplogId); + TempOplog = new ProjectStore::Oplog(m_OplogId, Project.Get(), Project->m_CidStore, OplogBasePath, std::filesystem::path{}); + Oplog = TempOplog; + Oplog->Read(); + + if (Ctx.IsCancelledFlag) + { + return; + } + } + + if (Oplog != nullptr) + { + Result = Oplog->Validate(Ctx.IsCancelledFlag); + if (Ctx.IsCancelledFlag) + { + return; + } + Stats.CheckedCount = Result.OpCount; + Stats.MissingChunks = Result.MissingChunks.size(); + Stats.MissingFiles = Result.MissingFiles.size(); + Stats.MissingMetas = Result.MissingMetas.size(); + Stats.MissingAttachments = Result.MissingAttachments.size(); + } + + if (!Result.IsEmpty()) + { + ZEN_WARN("GCV2: projectstore [VALIDATE] '{}/{}': Missing data: Files: {}, Chunks: {}, Metas: {}, Attachments: {}", + m_ProjectId, + m_OplogId, + Result.MissingFiles.size(), + Result.MissingChunks.size(), + Result.MissingMetas.size(), + Result.MissingAttachments.size()); + } + } + } + + ProjectStore& m_ProjectStore; + std::string m_ProjectId; + std::string m_OplogId; +}; + +std::vector<GcReferenceValidator*> +ProjectStore::CreateReferenceValidators(GcCtx& Ctx) +{ + if (Ctx.Settings.SkipCidDelete) + { + return {}; + } + DiscoverProjects(); + + std::vector<std::pair<std::string, std::string>> Oplogs; + { + RwLock::SharedLockScope _(m_ProjectsLock); + for (auto& ProjectPair : m_Projects) + { + ProjectStore::Project& Project = *ProjectPair.second; + std::vector<std::string> OpLogs = Project.ScanForOplogs(); + for (const std::string& OplogName : OpLogs) + { + Oplogs.push_back({Project.Identifier, OplogName}); + } + } + } + std::vector<GcReferenceValidator*> Validators; + Validators.reserve(Oplogs.size()); + for (const std::pair<std::string, std::string>& Oplog : Oplogs) + { + Validators.push_back(new ProjectStoreOplogReferenceValidator(*this, Oplog.first, Oplog.second)); + } + + return Validators; +} + ////////////////////////////////////////////////////////////////////////// Oid @@ -5933,106 +6226,317 @@ TEST_CASE("project.store.gc") } } - SUBCASE("v2") { - { - GcSettings Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24), - .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24), - .IsDeleteMode = true}; - GcResult Result = Gc.CollectGarbage(Settings); - CHECK_EQ(5u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); - CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); - CHECK_EQ(21u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); - CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); - CHECK(ProjectStore.OpenProject("proj1"sv)); - CHECK(ProjectStore.OpenProject("proj2"sv)); - } + GcSettings Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24), + .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24), + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + CHECK_EQ(5u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(21u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK(ProjectStore.OpenProject("proj1"sv)); + CHECK(ProjectStore.OpenProject("proj2"sv)); + } - { - GcSettings Settings = {.CacheExpireTime = GcClock::Now() + std::chrono::hours(24), - .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(24), - .IsDeleteMode = true}; - GcResult Result = Gc.CollectGarbage(Settings); - CHECK_EQ(5u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); - CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); - CHECK_EQ(21u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); - CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); - CHECK(ProjectStore.OpenProject("proj1"sv)); - CHECK(ProjectStore.OpenProject("proj2"sv)); - } + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now() + std::chrono::hours(24), + .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(24), + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + CHECK_EQ(5u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(21u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK(ProjectStore.OpenProject("proj1"sv)); + CHECK(ProjectStore.OpenProject("proj2"sv)); + } - std::filesystem::remove(Project1FilePath); + std::filesystem::remove(Project1FilePath); - { - GcSettings Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24), - .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24), - .IsDeleteMode = true}; - GcResult Result = Gc.CollectGarbage(Settings); - CHECK_EQ(5u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); - CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); - CHECK_EQ(21u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); - CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); - CHECK(ProjectStore.OpenProject("proj1"sv)); - CHECK(ProjectStore.OpenProject("proj2"sv)); - } + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24), + .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24), + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + CHECK_EQ(5u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(21u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK(ProjectStore.OpenProject("proj1"sv)); + CHECK(ProjectStore.OpenProject("proj2"sv)); + } - { - GcSettings Settings = {.CacheExpireTime = GcClock::Now() + std::chrono::hours(24), - .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(24), - .CollectSmallObjects = true, - .IsDeleteMode = true}; - GcResult Result = Gc.CollectGarbage(Settings); - CHECK_EQ(4u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); - CHECK_EQ(1u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); - CHECK_EQ(21u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); - CHECK_EQ(7u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); - CHECK(!ProjectStore.OpenProject("proj1"sv)); - CHECK(ProjectStore.OpenProject("proj2"sv)); - } + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now() + std::chrono::hours(24), + .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(24), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + CHECK_EQ(4u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(1u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(21u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(7u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK(!ProjectStore.OpenProject("proj1"sv)); + CHECK(ProjectStore.OpenProject("proj2"sv)); + } - std::filesystem::remove(Project2Oplog1Path); - { - GcSettings Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24), - .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24), - .CollectSmallObjects = true, - .IsDeleteMode = true}; - GcResult Result = Gc.CollectGarbage(Settings); - CHECK_EQ(3u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); - CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); - CHECK_EQ(14u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); - CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); - CHECK(!ProjectStore.OpenProject("proj1"sv)); - CHECK(ProjectStore.OpenProject("proj2"sv)); - } + std::filesystem::remove(Project2Oplog1Path); + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24), + .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + CHECK_EQ(3u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(14u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK(!ProjectStore.OpenProject("proj1"sv)); + CHECK(ProjectStore.OpenProject("proj2"sv)); + } - { - GcSettings Settings = {.CacheExpireTime = GcClock::Now() + std::chrono::hours(24), - .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(24), - .CollectSmallObjects = true, - .IsDeleteMode = true}; - GcResult Result = Gc.CollectGarbage(Settings); - CHECK_EQ(3u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); - CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); - CHECK_EQ(14u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); - CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); - CHECK(!ProjectStore.OpenProject("proj1"sv)); - CHECK(ProjectStore.OpenProject("proj2"sv)); - } + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now() + std::chrono::hours(24), + .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(24), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + CHECK_EQ(3u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(14u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK(!ProjectStore.OpenProject("proj1"sv)); + CHECK(ProjectStore.OpenProject("proj2"sv)); + } - std::filesystem::remove(Project2FilePath); - { - GcSettings Settings = {.CacheExpireTime = GcClock::Now() + std::chrono::hours(24), - .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(24), - .CollectSmallObjects = true, - .IsDeleteMode = true}; - GcResult Result = Gc.CollectGarbage(Settings); - CHECK_EQ(1u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); - CHECK_EQ(1u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); - CHECK_EQ(14u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); - CHECK_EQ(14u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); - CHECK(!ProjectStore.OpenProject("proj1"sv)); - CHECK(!ProjectStore.OpenProject("proj2"sv)); - } + std::filesystem::remove(Project2FilePath); + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now() + std::chrono::hours(24), + .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(24), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + CHECK_EQ(1u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(1u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(14u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(14u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK(!ProjectStore.OpenProject("proj1"sv)); + CHECK(!ProjectStore.OpenProject("proj2"sv)); + } +} + +TEST_CASE("project.store.gc.prep") +{ + using namespace std::literals; + using namespace testutils; + + ScopedTemporaryDirectory TempDir; + + auto JobQueue = MakeJobQueue(1, ""sv); + GcManager Gc; + CidStore CidStore(Gc); + CidStoreConfiguration CidConfig = {.RootDirectory = TempDir.Path() / "cas", .TinyValueThreshold = 1024, .HugeValueThreshold = 4096}; + CidStore.Initialize(CidConfig); + + std::filesystem::path BasePath = TempDir.Path() / "projectstore"; + ProjectStore ProjectStore(CidStore, BasePath, Gc, *JobQueue, ProjectStore::Configuration{}); + std::filesystem::path RootDir = TempDir.Path() / "root"; + std::filesystem::path EngineRootDir = TempDir.Path() / "engine"; + + std::filesystem::path Project1RootDir = TempDir.Path() / "game1"; + std::filesystem::path Project1FilePath = TempDir.Path() / "game1" / "game.uproject"; + { + CreateDirectories(Project1FilePath.parent_path()); + BasicFile ProjectFile; + ProjectFile.Open(Project1FilePath, BasicFile::Mode::kTruncate); + } + std::filesystem::path Project1OplogPath = TempDir.Path() / "game1" / "saves" / "cooked" / ".projectstore"; + { + CreateDirectories(Project1OplogPath.parent_path()); + BasicFile OplogFile; + OplogFile.Open(Project1OplogPath, BasicFile::Mode::kTruncate); + } + + std::vector<std::pair<Oid, CompressedBuffer>> OpAttachments = CreateAttachments(std::initializer_list<size_t>{7123, 583, 690, 99}); + std::vector<IoHash> OpChunkHashes; + for (const auto& Chunk : OpAttachments) + { + OpChunkHashes.push_back(Chunk.second.DecodeRawHash()); + } + + { + Ref<ProjectStore::Project> Project1(ProjectStore.NewProject(BasePath / "proj1"sv, + "proj1"sv, + RootDir.string(), + EngineRootDir.string(), + Project1RootDir.string(), + Project1FilePath.string())); + ProjectStore::Oplog* Oplog = Project1->NewOplog("oplog1"sv, Project1OplogPath); + Oplog->AppendNewOplogEntry(CreateOplogPackage(Oid::NewOid(), OpAttachments)); + } + { + Ref<ProjectStore::Project> Project1 = ProjectStore.OpenProject("proj1"sv); + Project1->DeleteOplog("oplog1"sv); + } + + // Equivalent of a `prep` existance check call + for (auto Attachment : OpAttachments) + { + CHECK(CidStore.ContainsChunk(Attachment.second.DecodeRawHash())); + } + + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now(), + .ProjectStoreExpireTime = GcClock::Now(), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + } + + // If a gc comes in between our prep and op write the chunks will be removed + for (auto Attachment : OpAttachments) + { + CHECK(!CidStore.ContainsChunk(Attachment.second.DecodeRawHash())); + } + + { + // Make sure the chunks are stored but not the referencing op + Ref<ProjectStore::Project> Project1 = ProjectStore.OpenProject("proj1"sv); + ProjectStore::Oplog* Oplog = Project1->NewOplog("oplog1"sv, Project1OplogPath); + Oplog->AppendNewOplogEntry(CreateOplogPackage(Oid::NewOid(), OpAttachments)); + Project1->DeleteOplog("oplog1"sv); + } + { + Ref<ProjectStore::Project> Project1 = ProjectStore.OpenProject("proj1"sv); + ProjectStore::Oplog* Oplog = Project1->NewOplog("oplog1"sv, Project1OplogPath); + + // Equivalent of a `prep` call with tracking of ops + CHECK(Oplog->CheckPendingChunkReferences(OpChunkHashes, std::chrono::hours(1)).empty()); + } + + for (auto Attachment : OpAttachments) + { + CHECK(CidStore.ContainsChunk(Attachment.second.DecodeRawHash())); + } + + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now(), + .ProjectStoreExpireTime = GcClock::Now(), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + } + + // Attachments should now be retained + for (auto Attachment : OpAttachments) + { + CHECK(CidStore.ContainsChunk(Attachment.second.DecodeRawHash())); + } + + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now(), + .ProjectStoreExpireTime = GcClock::Now(), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + } + + // Attachments should now be retained across multiple GCs if retain time is still valud + for (auto Attachment : OpAttachments) + { + CHECK(CidStore.ContainsChunk(Attachment.second.DecodeRawHash())); + } + + { + Ref<ProjectStore::Project> Project1 = ProjectStore.OpenProject("proj1"sv); + ProjectStore::Oplog* Oplog = Project1->OpenOplog("oplog1"sv, true, true); + Oplog->AppendNewOplogEntry(CreateOplogPackage(Oid::NewOid(), OpAttachments)); + Oplog->RemovePendingChunkReferences(OpChunkHashes); + CHECK(Oplog->GetPendingChunkReferencesLocked().size() == 0); + } + for (auto Attachment : OpAttachments) + { + CHECK(CidStore.ContainsChunk(Attachment.second.DecodeRawHash())); + } + { + Ref<ProjectStore::Project> Project1 = ProjectStore.OpenProject("proj1"sv); + Project1->DeleteOplog("oplog1"sv); + } + + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now(), + .ProjectStoreExpireTime = GcClock::Now(), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + } + + for (auto Attachment : OpAttachments) + { + CHECK(!CidStore.ContainsChunk(Attachment.second.DecodeRawHash())); + } + + { + Ref<ProjectStore::Project> Project1 = ProjectStore.OpenProject("proj1"sv); + Project1->DeleteOplog("oplog1"sv); + } + { + // Make sure the chunks are stored but not the referencing op + Ref<ProjectStore::Project> Project1 = ProjectStore.OpenProject("proj1"sv); + ProjectStore::Oplog* Oplog = Project1->NewOplog("oplog1"sv, Project1OplogPath); + Oplog->AppendNewOplogEntry(CreateOplogPackage(Oid::NewOid(), OpAttachments)); + Project1->DeleteOplog("oplog1"sv); + } + + // Caution - putting breakpoints and stepping through this part of the test likely makes it fails due to expiry time of pending chunks + { + Ref<ProjectStore::Project> Project1 = ProjectStore.OpenProject("proj1"sv); + ProjectStore::Oplog* Oplog = Project1->NewOplog("oplog1"sv, Project1OplogPath); + + CHECK(Oplog->CheckPendingChunkReferences(OpChunkHashes, std::chrono::milliseconds(100)).empty()); + } + + // This pass they should be retained and while the ops are picked up in GC we are blocked from adding our op + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now(), + .ProjectStoreExpireTime = GcClock::Now(), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + } + for (auto Attachment : OpAttachments) + { + CHECK(CidStore.ContainsChunk(Attachment.second.DecodeRawHash())); + } + + Sleep(200); + // This pass they should also be retained since our age retention has kept them alive and they will now be picked up and the retention + // cleared + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now(), + .ProjectStoreExpireTime = GcClock::Now(), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + } + for (auto Attachment : OpAttachments) + { + CHECK(CidStore.ContainsChunk(Attachment.second.DecodeRawHash())); + } + + // This pass the retention time has expired and the last GC pass cleared the entries + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now(), + .ProjectStoreExpireTime = GcClock::Now(), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + } + + for (auto Attachment : OpAttachments) + { + CHECK(!CidStore.ContainsChunk(Attachment.second.DecodeRawHash())); } } diff --git a/src/zenserver/projectstore/projectstore.h b/src/zenserver/projectstore/projectstore.h index 49970b677..1619151dd 100644 --- a/src/zenserver/projectstore/projectstore.h +++ b/src/zenserver/projectstore/projectstore.h @@ -161,13 +161,14 @@ public: void AddChunkMappings(const std::unordered_map<Oid, IoHash, Oid::Hasher>& ChunkMappings); - void CaptureUpdatedLSNs(std::span<const uint32_t> LSNs); - void CaptureAddedAttachments(std::span<const IoHash> AttachmentHashes); + void EnableUpdateCapture(); + void DisableUpdateCapture(); + void CaptureAddedAttachments(std::span<const IoHash> AttachmentHashes); + std::vector<IoHash> GetCapturedAttachmentsLocked(); + std::vector<IoHash> CheckPendingChunkReferences(std::span<const IoHash> ChunkHashes, const GcClock::Duration& RetainTime); + void RemovePendingChunkReferences(std::span<const IoHash> ChunkHashes); + std::vector<IoHash> GetPendingChunkReferencesLocked(); - void EnableUpdateCapture(); - void DisableUpdateCapture(); - void IterateCapturedLSNs(std::function<bool(const CbObjectView& UpdateOp)>&& Callback); - std::vector<IoHash> GetCapturedAttachments(); RwLock::SharedLockScope GetGcReferencerLock() { return RwLock::SharedLockScope(m_OplogLock); } uint32_t GetUnusedSpacePercent() const; @@ -180,6 +181,39 @@ public: static std::optional<CbObject> ReadStateFile(const std::filesystem::path& BasePath, std::function<LoggerRef()>&& Log); + struct ChunkMapping + { + Oid Id; + IoHash Hash; + }; + + struct FileMapping + { + Oid Id; + IoHash Hash; // This is either zero or a cid + std::string ServerPath; // If Hash is valid then this should be empty + std::string ClientPath; + }; + + struct ValidationResult + { + uint32_t OpCount = 0; + uint32_t LSNLow = 0; + uint32_t LSNHigh = 0; + std::vector<std::pair<Oid, FileMapping>> MissingFiles; + std::vector<std::pair<Oid, ChunkMapping>> MissingChunks; + std::vector<std::pair<Oid, ChunkMapping>> MissingMetas; + std::vector<std::pair<Oid, IoHash>> MissingAttachments; + std::vector<std::pair<Oid, std::string>> OpKeys; + + bool IsEmpty() const + { + return MissingFiles.empty() && MissingChunks.empty() && MissingMetas.empty() && MissingAttachments.empty(); + } + }; + + ValidationResult Validate(std::atomic_bool& IsCancelledFlag); + private: struct FileMapEntry { @@ -207,10 +241,11 @@ public: OidMap<uint32_t> m_LatestOpMap; // op key -> latest op LSN for key std::atomic<bool> m_MetaValid = false; - mutable RwLock m_UpdateCaptureLock; - uint32_t m_UpdateCaptureRefCounter = 0; - std::unique_ptr<std::vector<uint32_t>> m_CapturedLSNs; - std::unique_ptr<std::vector<IoHash>> m_CapturedAttachments; + uint32_t m_UpdateCaptureRefCounter = 0; + std::unique_ptr<std::vector<uint32_t>> m_CapturedLSNs; + std::unique_ptr<std::vector<IoHash>> m_CapturedAttachments; + std::unordered_set<IoHash, IoHash::Hasher> m_PendingPrepOpAttachments; + GcClock::TimePoint m_PendingPrepOpAttachmentsRetainEnd; RefPtr<OplogStorage> m_Storage; uint64_t m_LogFlushPosition = 0; @@ -225,23 +260,9 @@ public: struct OplogEntryMapping { - struct Mapping - { - Oid Id; - IoHash Hash; - }; - - struct FileMapping - { - Oid Id; - IoHash Hash; // This is either zero or a cid - std::string ServerPath; // If Hash is valid then this should be empty - std::string ClientPath; - }; - - std::vector<Mapping> Chunks; - std::vector<Mapping> Meta; - std::vector<FileMapping> Files; + std::vector<ChunkMapping> Chunks; + std::vector<ChunkMapping> Meta; + std::vector<FileMapping> Files; }; OplogEntryMapping GetMapping(CbObjectView Core); @@ -262,9 +283,11 @@ public: void AddChunkMapping(const RwLock::ExclusiveLockScope& OplogLock, const Oid& ChunkId, const IoHash& Hash); void AddMetaMapping(const RwLock::ExclusiveLockScope& OplogLock, const Oid& ChunkId, const IoHash& Hash); void Compact(RwLock::ExclusiveLockScope& Lock, bool DryRun, bool RetainLSNs, std::string_view LogPrefix); + void IterateCapturedLSNsLocked(std::function<bool(const CbObjectView& UpdateOp)>&& Callback); friend class ProjectStoreOplogReferenceChecker; friend class ProjectStoreReferenceChecker; + friend class ProjectStoreOplogReferenceValidator; }; struct Project : public RefCounted @@ -305,7 +328,7 @@ public: void EnableUpdateCapture(); void DisableUpdateCapture(); - std::vector<std::string> GetCapturedOplogs(); + std::vector<std::string> GetCapturedOplogsLocked(); std::vector<RwLock::SharedLockScope> GetGcReferencerLocks(); @@ -333,7 +356,6 @@ public: std::filesystem::path m_OplogStoragePath; mutable RwLock m_LastAccessTimesLock; mutable tsl::robin_map<std::string, GcClock::Tick> m_LastAccessTimes; - mutable RwLock m_UpdateCaptureLock; uint32_t m_UpdateCaptureRefCounter = 0; std::unique_ptr<std::vector<std::string>> m_CapturedOplogs; @@ -347,11 +369,10 @@ public: friend class ProjectStoreOplogReferenceChecker; friend class ProjectStoreReferenceChecker; + friend class ProjectStoreOplogReferenceValidator; friend class ProjectStoreGcStoreCompactor; }; - // Oplog* OpenProjectOplog(std::string_view ProjectId, std::string_view OplogId); - Ref<Project> OpenProject(std::string_view ProjectId); Ref<Project> NewProject(const std::filesystem::path& BasePath, std::string_view ProjectId, @@ -377,9 +398,10 @@ public: virtual void ScrubStorage(ScrubContext& Ctx) override; virtual GcStorageSize StorageSize() const override; - virtual std::string GetGcName(GcCtx& Ctx) override; - virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override; - virtual std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) override; + virtual std::string GetGcName(GcCtx& Ctx) override; + virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override; + virtual std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) override; + virtual std::vector<GcReferenceValidator*> CreateReferenceValidators(GcCtx& Ctx) override; virtual std::vector<RwLock::SharedLockScope> LockState(GcCtx& Ctx) override; @@ -454,7 +476,7 @@ public: void EnableUpdateCapture(); void DisableUpdateCapture(); - std::vector<std::string> GetCapturedProjects(); + std::vector<std::string> GetCapturedProjectsLocked(); private: LoggerRef m_Log; @@ -465,8 +487,7 @@ private: const Configuration m_Config; mutable RwLock m_ProjectsLock; std::map<std::string, Ref<Project>> m_Projects; - const DiskWriteBlocker* m_DiskWriteBlocker = nullptr; - mutable RwLock m_UpdateCaptureLock; + const DiskWriteBlocker* m_DiskWriteBlocker = nullptr; uint32_t m_UpdateCaptureRefCounter = 0; std::unique_ptr<std::vector<std::string>> m_CapturedProjects; diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp index 4aafb9828..93b639a51 100644 --- a/src/zenstore/cache/cachedisklayer.cpp +++ b/src/zenstore/cache/cachedisklayer.cpp @@ -3454,6 +3454,12 @@ ZenCacheDiskLayer::CacheBucket::CreateReferenceCheckers(GcCtx& Ctx) return {new DiskBucketReferenceChecker(*this)}; } +std::vector<GcReferenceValidator*> +ZenCacheDiskLayer::CacheBucket::CreateReferenceValidators(GcCtx& /*Ctx*/) +{ + return {}; +} + void ZenCacheDiskLayer::CacheBucket::CompactState(RwLock::ExclusiveLockScope&, std::vector<BucketPayload>& Payloads, @@ -3594,12 +3600,10 @@ ZenCacheDiskLayer::GetOrCreateBucket(std::string_view InBucket) CacheBucket* Result = Bucket.get(); m_Buckets.emplace(BucketName, std::move(Bucket)); - m_UpdateCaptureLock.WithExclusiveLock([&]() { - if (m_CapturedBuckets) - { - m_CapturedBuckets->push_back(std::string(BucketName)); - } - }); + if (m_CapturedBuckets) + { + m_CapturedBuckets->push_back(std::string(BucketName)); + } return Result; } @@ -4176,7 +4180,7 @@ ZenCacheDiskLayer::GetGcReferencerLocks() void ZenCacheDiskLayer::EnableUpdateCapture() { - m_UpdateCaptureLock.WithExclusiveLock([&]() { + m_Lock.WithExclusiveLock([&]() { if (m_UpdateCaptureRefCounter == 0) { ZEN_ASSERT(!m_CapturedBuckets); @@ -4193,7 +4197,7 @@ ZenCacheDiskLayer::EnableUpdateCapture() void ZenCacheDiskLayer::DisableUpdateCapture() { - m_UpdateCaptureLock.WithExclusiveLock([&]() { + m_Lock.WithExclusiveLock([&]() { ZEN_ASSERT(m_CapturedBuckets); ZEN_ASSERT(m_UpdateCaptureRefCounter > 0); m_UpdateCaptureRefCounter--; @@ -4205,9 +4209,8 @@ ZenCacheDiskLayer::DisableUpdateCapture() } std::vector<std::string> -ZenCacheDiskLayer::GetCapturedBuckets() +ZenCacheDiskLayer::GetCapturedBucketsLocked() { - RwLock::SharedLockScope _(m_UpdateCaptureLock); if (m_CapturedBuckets) { return *m_CapturedBuckets; diff --git a/src/zenstore/cache/structuredcachestore.cpp b/src/zenstore/cache/structuredcachestore.cpp index 578929198..512f1d7f2 100644 --- a/src/zenstore/cache/structuredcachestore.cpp +++ b/src/zenstore/cache/structuredcachestore.cpp @@ -364,6 +364,7 @@ ZenCacheNamespace::EnableUpdateCapture() { m_DiskLayer.EnableUpdateCapture(); } + void ZenCacheNamespace::DisableUpdateCapture() { @@ -850,12 +851,10 @@ ZenCacheStore::GetNamespace(std::string_view Namespace) m_BasePath / fmt::format("{}{}", NamespaceDiskPrefix, Namespace), m_Configuration.NamespaceConfig)); - m_UpdateCaptureLock.WithExclusiveLock([&]() { - if (m_CapturedNamespaces) - { - m_CapturedNamespaces->push_back(std::string(Namespace)); - } - }); + if (m_CapturedNamespaces) + { + m_CapturedNamespaces->push_back(std::string(Namespace)); + } return NewNamespace.first->second.get(); } @@ -1039,7 +1038,8 @@ ZenCacheStore::LockState(GcCtx& Ctx) void ZenCacheStore::EnableUpdateCapture() { - m_UpdateCaptureLock.WithExclusiveLock([&]() { + std::vector<ZenCacheNamespace*> Namespaces; + m_NamespacesLock.WithExclusiveLock([&]() { if (m_UpdateCaptureRefCounter == 0) { ZEN_ASSERT(!m_CapturedNamespaces); @@ -1050,21 +1050,24 @@ ZenCacheStore::EnableUpdateCapture() ZEN_ASSERT(m_CapturedNamespaces); } m_UpdateCaptureRefCounter++; + Namespaces.reserve(m_Namespaces.size()); + for (auto& NamespaceIt : m_Namespaces) + { + Namespaces.push_back(NamespaceIt.second.get()); + } }); - for (auto& NamespaceIt : m_Namespaces) + + for (ZenCacheNamespace* Namespace : Namespaces) { - NamespaceIt.second->EnableUpdateCapture(); + Namespace->EnableUpdateCapture(); } } void ZenCacheStore::DisableUpdateCapture() { - for (auto& NamespaceIt : m_Namespaces) - { - NamespaceIt.second->DisableUpdateCapture(); - } - m_UpdateCaptureLock.WithExclusiveLock([&]() { + std::vector<ZenCacheNamespace*> Namespaces; + m_NamespacesLock.WithExclusiveLock([&]() { ZEN_ASSERT(m_CapturedNamespaces); ZEN_ASSERT(m_UpdateCaptureRefCounter > 0); m_UpdateCaptureRefCounter--; @@ -1072,13 +1075,21 @@ ZenCacheStore::DisableUpdateCapture() { m_CapturedNamespaces.reset(); } + Namespaces.reserve(m_Namespaces.size()); + for (auto& NamespaceIt : m_Namespaces) + { + Namespaces.push_back(NamespaceIt.second.get()); + } }); + for (ZenCacheNamespace* Namespace : Namespaces) + { + Namespace->DisableUpdateCapture(); + } } std::vector<std::string> -ZenCacheStore::GetCapturedNamespaces() +ZenCacheStore::GetCapturedNamespacesLocked() { - RwLock::SharedLockScope _(m_UpdateCaptureLock); if (m_CapturedNamespaces) { return *m_CapturedNamespaces; @@ -1149,7 +1160,7 @@ public: AddedBuckets.size()); }); - std::vector<std::string> AddedNamespaces = m_CacheStore.GetCapturedNamespaces(); + std::vector<std::string> AddedNamespaces = m_CacheStore.GetCapturedNamespacesLocked(); for (const std::string& AddedNamespace : AddedNamespaces) { @@ -1165,7 +1176,7 @@ public: for (auto& NamepaceKV : m_CacheStore.m_Namespaces) { ZenCacheNamespace& Namespace = *NamepaceKV.second; - std::vector<std::string> NamespaceAddedBuckets = Namespace.m_DiskLayer.GetCapturedBuckets(); + std::vector<std::string> NamespaceAddedBuckets = Namespace.m_DiskLayer.GetCapturedBucketsLocked(); for (const std::string& AddedBucketName : NamespaceAddedBuckets) { if (auto It = Namespace.m_DiskLayer.m_Buckets.find(AddedBucketName); It != Namespace.m_DiskLayer.m_Buckets.end()) @@ -1244,6 +1255,12 @@ ZenCacheStore::CreateReferenceCheckers(GcCtx& Ctx) return Checkers; } +std::vector<GcReferenceValidator*> +ZenCacheStore::CreateReferenceValidators(GcCtx& /*Ctx*/) +{ + return {}; +} + ////////////////////////////////////////////////////////////////////////// #if ZEN_WITH_TESTS diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp index be8fc0148..b2b574799 100644 --- a/src/zenstore/gc.cpp +++ b/src/zenstore/gc.cpp @@ -240,15 +240,15 @@ WriteCompactStoreStats(CbObjectWriter& Writer, const GcCompactStoreStats& Stats, void WriteReferencerStats(CbObjectWriter& Writer, const GcReferencerStats& Stats, bool HumanReadable) { - if (Stats.RemoveExpiredDataStats.CheckedCount == 0) + Writer << "CreateReferenceValidators" << ToTimeSpan(Stats.CreateReferenceValidatorsMS); + if (Stats.RemoveExpiredDataStats.CheckedCount != 0) { - return; - } - Writer.BeginObject("RemoveExpired"); - { - WriteGcStats(Writer, Stats.RemoveExpiredDataStats, HumanReadable); + Writer.BeginObject("RemoveExpired"); + { + WriteGcStats(Writer, Stats.RemoveExpiredDataStats, HumanReadable); + } + Writer.EndObject(); } - Writer.EndObject(); Writer.BeginObject("Compact"); { @@ -265,15 +265,14 @@ WriteReferencerStats(CbObjectWriter& Writer, const GcReferencerStats& Stats, boo void WriteReferenceStoreStats(CbObjectWriter& Writer, const GcReferenceStoreStats& Stats, bool HumanReadable) { - if (Stats.RemoveUnreferencedDataStats.CheckedCount == 0) - { - return; - } - Writer.BeginObject("RemoveUnreferenced"); + if (Stats.RemoveUnreferencedDataStats.CheckedCount != 0) { - WriteGcStats(Writer, Stats.RemoveUnreferencedDataStats, HumanReadable); + Writer.BeginObject("RemoveUnreferenced"); + { + WriteGcStats(Writer, Stats.RemoveUnreferencedDataStats, HumanReadable); + } + Writer.EndObject(); } - Writer.EndObject(); Writer.BeginObject("Compact"); { @@ -286,6 +285,21 @@ WriteReferenceStoreStats(CbObjectWriter& Writer, const GcReferenceStoreStats& St }; void +WriteReferenceValidatorStats(CbObjectWriter& Writer, const GcReferenceValidatorStats& Stats, bool /*HumanReadable*/) +{ + Writer << "Checked" << Stats.CheckedCount; + Writer << "Missing" << Stats.MissingCount; + if (Stats.MissingCount > 0) + { + Writer << "MissingChunks" << Stats.MissingChunks; + Writer << "MissingFiles" << Stats.MissingFiles; + Writer << "MissingMetas" << Stats.MissingMetas; + Writer << "MissingAttachments" << Stats.MissingAttachments; + } + Writer << "Elapsed" << ToTimeSpan(Stats.ElapsedMS); +}; + +void WriteGCResult(CbObjectWriter& Writer, const GcResult& Result, bool HumanReadable, bool IncludeDetails) { if (!IncludeDetails) @@ -324,8 +338,15 @@ WriteGCResult(CbObjectWriter& Writer, const GcResult& Result, bool HumanReadable } Writer.EndObject(); + Writer.BeginObject("ReferenceValidator"); + { + WriteReferenceValidatorStats(Writer, Result.ReferenceValidatorStatSum, HumanReadable); + } + Writer.EndObject(); + Writer << "RemoveExpiredData" << ToTimeSpan(Result.RemoveExpiredDataMS); Writer << "CreateReferenceCheckers" << ToTimeSpan(Result.CreateReferenceCheckersMS); + Writer << "CreateReferenceValidators" << ToTimeSpan(Result.CreateReferenceValidatorsMS); Writer << "PreCacheState" << ToTimeSpan(Result.PreCacheStateMS); Writer << "LockState" << ToTimeSpan(Result.LockStateMS); Writer << "UpdateLockedState" << ToTimeSpan(Result.UpdateLockedStateMS); @@ -333,6 +354,7 @@ WriteGCResult(CbObjectWriter& Writer, const GcResult& Result, bool HumanReadable Writer << "CreateReferencePruners" << ToTimeSpan(Result.CreateReferencePrunersMS); Writer << "RemoveUnreferencedData" << ToTimeSpan(Result.RemoveUnreferencedDataMS); Writer << "CompactStores" << ToTimeSpan(Result.CompactStoresMS); + Writer << "Validate" << ToTimeSpan(Result.ValidateReferencersMS); Writer << "WriteBlock" << ToTimeSpan(Result.WriteBlockMS); Writer << "Elapsed" << ToTimeSpan(Result.ElapsedMS); @@ -362,6 +384,18 @@ WriteGCResult(CbObjectWriter& Writer, const GcResult& Result, bool HumanReadable } Writer.EndArray(); } + if (!Result.ReferenceValidatorStats.empty()) + { + Writer.BeginArray("ReferenceValidators"); + for (const std::pair<std::string, GcReferenceValidatorStats>& It : Result.ReferenceValidatorStats) + { + Writer.BeginObject(); + Writer << "Name" << It.first; + WriteReferenceValidatorStats(Writer, It.second, HumanReadable); + Writer.EndObject(); + } + Writer.EndArray(); + } }; void @@ -387,7 +421,7 @@ void Sum(GcReferencerStats& Stat) { Stat.ElapsedMS = Stat.RemoveExpiredDataStats.ElapsedMS + Stat.CompactStoreStats.ElapsedMS + Stat.CreateReferenceCheckersMS + - Stat.PreCacheStateMS + Stat.UpdateLockedStateMS; + Stat.CreateReferenceValidatorsMS + Stat.PreCacheStateMS + Stat.UpdateLockedStateMS; } void @@ -397,6 +431,7 @@ Add(GcReferencerStats& Sum, const GcReferencerStats& Sub) Add(Sum.CompactStoreStats, Sub.CompactStoreStats); Sum.CreateReferenceCheckersMS += Sub.CreateReferenceCheckersMS; + Sum.CreateReferenceValidatorsMS += Sub.CreateReferenceValidatorsMS; Sum.PreCacheStateMS += Sub.PreCacheStateMS; Sum.UpdateLockedStateMS += Sub.UpdateLockedStateMS; @@ -420,6 +455,23 @@ Add(GcReferenceStoreStats& Sum, const GcReferenceStoreStats& Sub) Sum.ElapsedMS += Sub.ElapsedMS; } +void +Add(GcReferenceValidatorStats& Sum, const GcReferenceValidatorStats& Sub) +{ + Sum.CheckedCount += Sub.CheckedCount; + Sum.MissingChunks += Sub.MissingChunks; + Sum.MissingFiles += Sub.MissingFiles; + Sum.MissingMetas += Sub.MissingMetas; + Sum.MissingAttachments += Sub.MissingAttachments; + Sum.ElapsedMS += Sub.ElapsedMS; +} + +void +Sum(GcReferenceValidatorStats& Stat) +{ + Stat.MissingCount = Stat.MissingChunks + Stat.MissingFiles + Stat.MissingMetas + Stat.MissingAttachments; +} + GcResult& Sum(GcResult& Stat, bool Cancelled = false) { @@ -435,9 +487,16 @@ Sum(GcResult& Stat, bool Cancelled = false) Sum(SubStat); Add(Stat.ReferenceStoreStatSum, SubStat); } + for (std::pair<std::string, GcReferenceValidatorStats>& ReferenceValidator : Stat.ReferenceValidatorStats) + { + GcReferenceValidatorStats& SubStat = ReferenceValidator.second; + Sum(SubStat); + Add(Stat.ReferenceValidatorStatSum, SubStat); + } Sum(Stat.ReferencerStatSum); Sum(Stat.ReferenceStoreStatSum); + Sum(Stat.ReferenceValidatorStatSum); Add(Stat.CompactStoresStatSum, Stat.ReferencerStatSum.CompactStoreStats); Add(Stat.CompactStoresStatSum, Stat.ReferenceStoreStatSum.CompactStoreStats); @@ -630,18 +689,21 @@ GcManager::CollectGarbage(const GcSettings& Settings) Result.ReferencerStats.resize(m_GcReferencers.size()); + std::unordered_map<std::unique_ptr<GcReferenceChecker>, size_t> ReferenceCheckers; std::unordered_map<std::unique_ptr<GcStoreCompactor>, GcCompactStoreStats*> StoreCompactors; RwLock StoreCompactorsLock; + std::unordered_map<std::unique_ptr<GcReferenceValidator>, size_t> ReferenceValidators; + RwLock ReferenceValidatorsLock; WorkerThreadPool& PreCachePhaseThreadPool = Settings.SingleThread ? GetSyncWorkerPool() : GetSmallWorkerPool(EWorkloadType::Background); - ZEN_INFO("GCV2: Removing expired data from {} referencers", m_GcReferencers.size()); if (!m_GcReferencers.empty()) { if (CheckGCCancel()) { return Sum(Result, true); } + ZEN_INFO("GCV2: Removing expired data from {} referencers", m_GcReferencers.size()); ZEN_TRACE_CPU("GcV2::RemoveExpiredData"); Latch WorkLeft(1); @@ -697,9 +759,11 @@ GcManager::CollectGarbage(const GcSettings& Settings) return Sum(Result, true); } + ZEN_INFO("GCV2: Creating reference pruners from {} reference stores", m_GcReferenceStores.size()); + ZEN_TRACE_CPU("GcV2::CreateReferencePruners"); + Result.ReferenceStoreStats.resize(m_GcReferenceStores.size()); - ZEN_INFO("GCV2: Creating reference pruners from {} reference stores", m_GcReferenceStores.size()); std::unordered_map<size_t, std::unique_ptr<GcReferencePruner>> ReferencePruners; if (!m_GcReferenceStores.empty()) { @@ -771,23 +835,100 @@ GcManager::CollectGarbage(const GcSettings& Settings) } ZEN_INFO("GCV2: Creating reference checkers from {} referencers", m_GcReferencers.size()); - std::unordered_map<std::unique_ptr<GcReferenceChecker>, size_t> ReferenceCheckers; - if (!m_GcReferencers.empty()) { ZEN_TRACE_CPU("GcV2::CreateReferenceCheckers"); - ReferenceCheckers.reserve(m_GcReferencers.size()); - Latch WorkLeft(1); - RwLock ReferenceCheckersLock; + if (!m_GcReferencers.empty()) { - SCOPED_TIMER(Result.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()); + ZEN_TRACE_CPU("GcV2::CreateReferenceCheckers"); + + ReferenceCheckers.reserve(m_GcReferencers.size()); + Latch WorkLeft(1); + RwLock ReferenceCheckersLock; + { + SCOPED_TIMER(Result.CreateReferenceCheckersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()); + if (Ctx.Settings.Verbose) { + ZEN_INFO("GCV2: Created {} reference checkers using {} referencers in {}", + ReferenceCheckers.size(), + m_GcReferencers.size(), + NiceTimeSpanMs(Result.CreateReferenceCheckersMS.count())); + }); + // Lock all reference owners from changing the reference data and get access to check for referenced data + for (size_t Index = 0; Index < m_GcReferencers.size(); Index++) + { + if (CheckGCCancel()) + { + WorkLeft.CountDown(); + WorkLeft.Wait(); + return Sum(Result, true); + } + + GcReferencer* Referencer = m_GcReferencers[Index]; + std::pair<std::string, GcReferencerStats>* Stats = &Result.ReferencerStats[Index]; + WorkLeft.AddCount(1); + PreCachePhaseThreadPool.ScheduleWork( + [this, &Ctx, &WorkLeft, Referencer, Index, Stats, &ReferenceCheckersLock, &ReferenceCheckers]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + // The Referencer will create a reference checker that guarantees that the references do not change + // as long as it lives + std::vector<GcReferenceChecker*> Checkers; + try + { + { + SCOPED_TIMER(Stats->second.CreateReferenceCheckersMS = + std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + Checkers = Referencer->CreateReferenceCheckers(Ctx); + } + if (!Checkers.empty()) + { + RwLock::ExclusiveLockScope __(ReferenceCheckersLock); + for (auto& Checker : Checkers) + { + ReferenceCheckers.insert_or_assign(std::unique_ptr<GcReferenceChecker>(Checker), Index); + Checker = nullptr; + } + } + } + catch (const std::exception& Ex) + { + ZEN_ERROR("GCV2: Failed creating reference checkers for {}. Reason: '{}'", + Referencer->GetGcName(Ctx), + Ex.what()); + SetCancelGC(true); + while (!Checkers.empty()) + { + delete Checkers.back(); + Checkers.pop_back(); + } + } + }); + } + WorkLeft.CountDown(); + WorkLeft.Wait(); + } + } + } + + if (CheckGCCancel()) + { + return Sum(Result, true); + } + + if (!m_GcReferencers.empty() && Settings.EnableValidation) + { + ZEN_INFO("GCV2: Creating reference validators from {} referencers", m_GcReferencers.size()); + ZEN_TRACE_CPU("GcV2::CreateReferenceValidators"); + + ReferenceValidators.reserve(m_GcReferencers.size()); + Latch WorkLeft(1); + { + SCOPED_TIMER(Result.CreateReferenceValidatorsMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()); if (Ctx.Settings.Verbose) { - ZEN_INFO("GCV2: Created {} reference checkers using {} referencers in {}", - ReferenceCheckers.size(), + ZEN_INFO("GCV2: Created {} reference validators using {} referencers in {}", + ReferenceValidators.size(), m_GcReferencers.size(), - NiceTimeSpanMs(Result.CreateReferenceCheckersMS.count())); + NiceTimeSpanMs(Result.CreateReferenceValidatorsMS.count())); }); - // Lock all reference owners from changing the reference data and get access to check for referenced data for (size_t Index = 0; Index < m_GcReferencers.size(); Index++) { if (CheckGCCancel()) @@ -797,100 +938,106 @@ GcManager::CollectGarbage(const GcSettings& Settings) return Sum(Result, true); } - GcReferencer* Referencer = m_GcReferencers[Index]; - std::pair<std::string, GcReferencerStats>* Stats = &Result.ReferencerStats[Index]; + GcReferencer* Referencer = m_GcReferencers[Index]; + std::pair<std::string, GcReferencerStats>* ReferemcerStats = &Result.ReferencerStats[Index]; WorkLeft.AddCount(1); - PreCachePhaseThreadPool.ScheduleWork( - [this, &Ctx, &WorkLeft, Referencer, Index, Stats, &ReferenceCheckersLock, &ReferenceCheckers]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - // The Referencer will create a reference checker that guarantees that the references do not change as - // long as it lives - std::vector<GcReferenceChecker*> Checkers; - try + PreCachePhaseThreadPool.ScheduleWork([this, + &Ctx, + &WorkLeft, + Referencer, + Index, + Result = &Result, + ReferemcerStats, + &ReferenceValidatorsLock, + &ReferenceValidators]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + std::vector<GcReferenceValidator*> Validators; + try + { { - { - SCOPED_TIMER(Stats->second.CreateReferenceCheckersMS = - std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - Checkers = Referencer->CreateReferenceCheckers(Ctx); - } - if (!Checkers.empty()) - { - RwLock::ExclusiveLockScope __(ReferenceCheckersLock); - for (auto& Checker : Checkers) - { - ReferenceCheckers.insert_or_assign(std::unique_ptr<GcReferenceChecker>(Checker), Index); - Checker = nullptr; - } - } + SCOPED_TIMER(ReferemcerStats->second.CreateReferenceValidatorsMS = + std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + Validators = Referencer->CreateReferenceValidators(Ctx); } - catch (const std::exception& Ex) + if (!Validators.empty()) { - ZEN_ERROR("GCV2: Failed creating reference checkers for {}. Reason: '{}'", - Referencer->GetGcName(Ctx), - Ex.what()); - SetCancelGC(true); - while (!Checkers.empty()) + RwLock::ExclusiveLockScope __(ReferenceValidatorsLock); + for (auto& ReferenceValidator : Validators) { - delete Checkers.back(); - Checkers.pop_back(); + size_t ReferencesStatsIndex = Result->ReferenceValidatorStats.size(); + Result->ReferenceValidatorStats.push_back({ReferenceValidator->GetGcName(Ctx), {}}); + ReferenceValidators.insert_or_assign(std::unique_ptr<GcReferenceValidator>(ReferenceValidator), + ReferencesStatsIndex); + ReferenceValidator = nullptr; } } - }); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("GCV2: Failed creating reference validators for {}. Reason: '{}'", + Referencer->GetGcName(Ctx), + Ex.what()); + SetCancelGC(true); + while (!Validators.empty()) + { + delete Validators.back(); + Validators.pop_back(); + } + } + }); } WorkLeft.CountDown(); WorkLeft.Wait(); } } + if (!ReferenceCheckers.empty()) { - ZEN_INFO("GCV2: Precaching state for {} reference checkers", ReferenceCheckers.size()); - if (!ReferenceCheckers.empty()) + if (CheckGCCancel()) { - if (CheckGCCancel()) - { - return Sum(Result, true); - } - ZEN_TRACE_CPU("GcV2::PreCache"); + return Sum(Result, true); + } + ZEN_INFO("GCV2: Precaching state for {} reference checkers", ReferenceCheckers.size()); + ZEN_TRACE_CPU("GcV2::PreCache"); - Latch WorkLeft(1); + Latch WorkLeft(1); + { + SCOPED_TIMER(Result.PreCacheStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()); + if (Ctx.Settings.Verbose) { + ZEN_INFO("GCV2: Precached state using {} reference checkers in {}", + ReferenceCheckers.size(), + NiceTimeSpanMs(Result.PreCacheStateMS.count())); + }); + for (auto& It : ReferenceCheckers) { - SCOPED_TIMER(Result.PreCacheStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()); - if (Ctx.Settings.Verbose) { - ZEN_INFO("GCV2: Precached state using {} reference checkers in {}", - ReferenceCheckers.size(), - NiceTimeSpanMs(Result.PreCacheStateMS.count())); - }); - for (auto& It : ReferenceCheckers) + if (CheckGCCancel()) { - if (CheckGCCancel()) + WorkLeft.CountDown(); + WorkLeft.Wait(); + return Sum(Result, true); + } + + GcReferenceChecker* Checker = It.first.get(); + size_t Index = It.second; + std::pair<std::string, GcReferencerStats>* Stats = &Result.ReferencerStats[Index]; + WorkLeft.AddCount(1); + PreCachePhaseThreadPool.ScheduleWork([this, &Ctx, Checker, Index, Stats, &WorkLeft]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + try { - WorkLeft.CountDown(); - WorkLeft.Wait(); - return Sum(Result, true); + SCOPED_TIMER(Stats->second.PreCacheStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + Checker->PreCache(Ctx); } - - GcReferenceChecker* Checker = It.first.get(); - size_t Index = It.second; - std::pair<std::string, GcReferencerStats>* Stats = &Result.ReferencerStats[Index]; - WorkLeft.AddCount(1); - PreCachePhaseThreadPool.ScheduleWork([this, &Ctx, Checker, Index, Stats, &WorkLeft]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - try - { - SCOPED_TIMER(Stats->second.PreCacheStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - Checker->PreCache(Ctx); - } - catch (const std::exception& Ex) - { - ZEN_ERROR("GCV2: Failed precaching for {}. Reason: '{}'", Checker->GetGcName(Ctx), Ex.what()); - SetCancelGC(true); - } - }); - } - WorkLeft.CountDown(); - WorkLeft.Wait(); + catch (const std::exception& Ex) + { + ZEN_ERROR("GCV2: Failed precaching for {}. Reason: '{}'", Checker->GetGcName(Ctx), Ex.what()); + SetCancelGC(true); + } + }); } + WorkLeft.CountDown(); + WorkLeft.Wait(); } } @@ -898,8 +1045,7 @@ GcManager::CollectGarbage(const GcSettings& Settings) Settings.SingleThread ? GetSyncWorkerPool() : GetMediumWorkerPool(EWorkloadType::Background); std::vector<RwLock::SharedLockScope> LockerScopes; - SCOPED_TIMER(uint64_t ElapsedMS = Timer.GetElapsedTimeMs(); Result.WriteBlockMS = std::chrono::milliseconds(ElapsedMS); - ZEN_INFO("GCV2: Writes blocked for {}", NiceTimeSpanMs(ElapsedMS))); + SCOPED_TIMER(uint64_t ElapsedMS = Timer.GetElapsedTimeMs(); Result.WriteBlockMS = std::chrono::milliseconds(ElapsedMS);); { if (!ReferenceCheckers.empty()) { @@ -984,100 +1130,98 @@ GcManager::CollectGarbage(const GcSettings& Settings) } } } + + if (CheckGCCancel()) { - ZEN_INFO("GCV2: Removing unreferenced data for {} reference pruners", ReferencePruners.size()); - if (CheckGCCancel()) - { - return Sum(Result, true); - } - { - const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::span<IoHash> { - std::span<IoHash> UnusedCids(References); - ZEN_ASSERT(UnusedCids.empty() || UnusedCids[0] != IoHash::Zero); - for (const auto& It : ReferenceCheckers) + return Sum(Result, true); + } + ZEN_INFO("GCV2: Removing unreferenced data for {} reference pruners", ReferencePruners.size()); + { + ZEN_TRACE_CPU("GcV2::RemoveUnreferencedData"); + + const auto GetUnusedReferences = [&ReferenceCheckers, &Ctx](std::span<IoHash> References) -> std::span<IoHash> { + std::span<IoHash> UnusedCids(References); + ZEN_ASSERT(UnusedCids.empty() || UnusedCids[0] != IoHash::Zero); + for (const auto& It : ReferenceCheckers) + { + GcReferenceChecker* ReferenceChecker = It.first.get(); + UnusedCids = ReferenceChecker->GetUnusedReferences(Ctx, UnusedCids); + if (UnusedCids.empty()) { - GcReferenceChecker* ReferenceChecker = It.first.get(); - UnusedCids = ReferenceChecker->GetUnusedReferences(Ctx, UnusedCids); - if (UnusedCids.empty()) - { - return {}; - } + return {}; } - return UnusedCids; - }; - - // checking all Cids agains references in cache - // Ask stores to remove data that the ReferenceCheckers says are not referenced - this should be a lightweight - // operation that only updates in-memory index, actual disk changes should be done by the ReferenceStoreCompactors + } + return UnusedCids; + }; - ZEN_TRACE_CPU("GcV2::RemoveUnreferencedData"); + // checking all Cids agains references in cache + // Ask stores to remove data that the ReferenceCheckers says are not referenced - this should be a lightweight + // operation that only updates in-memory index, actual disk changes should be done by the ReferenceStoreCompactors - Latch WorkLeft(1); + Latch WorkLeft(1); + { + SCOPED_TIMER(Result.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()); + if (Ctx.Settings.Verbose) { + ZEN_INFO("GCV2: Removed unused data using {} pruners in {}", + ReferencePruners.size(), + NiceTimeSpanMs(Result.RemoveUnreferencedDataMS.count())); + }); + for (auto& It : ReferencePruners) { - SCOPED_TIMER(Result.RemoveUnreferencedDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()); - if (Ctx.Settings.Verbose) { - ZEN_INFO("GCV2: Removed unused data using {} pruners in {}", - ReferencePruners.size(), - NiceTimeSpanMs(Result.RemoveUnreferencedDataMS.count())); - }); - for (auto& It : ReferencePruners) + if (CheckGCCancel()) { - if (CheckGCCancel()) - { - WorkLeft.CountDown(); - WorkLeft.Wait(); - return Sum(Result, true); - } + WorkLeft.CountDown(); + WorkLeft.Wait(); + return Sum(Result, true); + } - GcReferencePruner* Pruner = It.second.get(); - size_t Index = It.first; - GcReferenceStoreStats* Stats = &Result.ReferenceStoreStats[Index].second; - WorkLeft.AddCount(1); - LockedPhaseThreadPool.ScheduleWork( - [this, &Ctx, Pruner, Stats, &WorkLeft, &GetUnusedReferences, &StoreCompactorsLock, &StoreCompactors]() { - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are - // referenced or not. - try + GcReferencePruner* Pruner = It.second.get(); + size_t Index = It.first; + GcReferenceStoreStats* Stats = &Result.ReferenceStoreStats[Index].second; + WorkLeft.AddCount(1); + LockedPhaseThreadPool.ScheduleWork( + [this, &Ctx, Pruner, Stats, &WorkLeft, &GetUnusedReferences, &StoreCompactorsLock, &StoreCompactors]() { + auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); + // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are + // referenced or not. + try + { + std::unique_ptr<GcStoreCompactor> StoreCompactor; { - std::unique_ptr<GcStoreCompactor> StoreCompactor; - { - SCOPED_TIMER(Stats->RemoveUnreferencedDataStats.ElapsedMS = - std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - StoreCompactor = std::unique_ptr<GcStoreCompactor>( - Pruner->RemoveUnreferencedData(Ctx, - Stats->RemoveUnreferencedDataStats, - GetUnusedReferences)); - } - if (StoreCompactor) - { - RwLock::ExclusiveLockScope __(StoreCompactorsLock); - StoreCompactors.insert_or_assign(std::move(StoreCompactor), &Stats->CompactStoreStats); - } + SCOPED_TIMER(Stats->RemoveUnreferencedDataStats.ElapsedMS = + std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + StoreCompactor = std::unique_ptr<GcStoreCompactor>( + Pruner->RemoveUnreferencedData(Ctx, + Stats->RemoveUnreferencedDataStats, + GetUnusedReferences)); } - catch (const std::exception& Ex) + if (StoreCompactor) { - ZEN_ERROR("GCV2: Failed removing unused data for {}. Reason: '{}'", - Pruner->GetGcName(Ctx), - Ex.what()); - SetCancelGC(true); + RwLock::ExclusiveLockScope __(StoreCompactorsLock); + StoreCompactors.insert_or_assign(std::move(StoreCompactor), &Stats->CompactStoreStats); } - }); - } - WorkLeft.CountDown(); - WorkLeft.Wait(); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("GCV2: Failed removing unused data for {}. Reason: '{}'", + Pruner->GetGcName(Ctx), + Ex.what()); + SetCancelGC(true); + } + }); } + WorkLeft.CountDown(); + WorkLeft.Wait(); } - // Let the GcReferencers add new data, we will only change on-disk data at this point, adding new data is allowed - LockerScopes.clear(); - ReferenceCheckers.clear(); - ReferencePruners.clear(); } + // Let the GcReferencers add new data, we will only change on-disk data at this point, adding new data is allowed + LockerScopes.clear(); + ReferenceCheckers.clear(); + ReferencePruners.clear(); } } - ZEN_INFO("GCV2: Compacting using {} store compactors", StoreCompactors.size()); if (!StoreCompactors.empty()) { if (CheckGCCancel()) @@ -1085,6 +1229,7 @@ GcManager::CollectGarbage(const GcSettings& Settings) return Sum(Result, true); } + ZEN_INFO("GCV2: Compacting using {} store compactors", StoreCompactors.size()); ZEN_TRACE_CPU("GcV2::CompactStores"); auto ClaimDiskReserve = [&]() -> uint64_t { @@ -1129,6 +1274,47 @@ GcManager::CollectGarbage(const GcSettings& Settings) } StoreCompactors.clear(); } + + if (!ReferenceValidators.empty()) + { + if (CheckGCCancel()) + { + return Sum(Result, true); + } + + ZEN_INFO("GCV2: Validating using {} reference validators", ReferenceValidators.size()); + ZEN_TRACE_CPU("GcV2::ValidateReferences"); + + // Remove the stuff we deemed unreferenced from disk - may be heavy operation + // Don't do in parallel, we don't want to steal CPU/Disk from regular operation + { + SCOPED_TIMER(Result.ValidateReferencersMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()); if (Ctx.Settings.Verbose) { + ZEN_INFO("GCV2: Compacted {} stores in {}", StoreCompactors.size(), NiceTimeSpanMs(Result.CompactStoresMS.count())); + }); + for (auto& It : ReferenceValidators) + { + if (CheckGCCancel()) + { + return Sum(Result, true); + } + + GcReferenceValidator* ReferenceValidator = It.first.get(); + GcReferenceValidatorStats& Stats = Result.ReferenceValidatorStats[It.second].second; + try + { + // Go through all the ReferenceCheckers to see if the list of Cids the collector selected are referenced or + SCOPED_TIMER(Stats.ElapsedMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + ReferenceValidator->Validate(Ctx, Stats); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("GCV2: Failed validating referencer {}. Reason: '{}'", ReferenceValidator->GetGcName(Ctx), Ex.what()); + SetCancelGC(true); + } + } + } + ReferenceValidators.clear(); + } } return Sum(Result); @@ -1521,6 +1707,7 @@ GcScheduler::AppendGCLog(std::string_view Id, GcClock::TimePoint StartTime, cons Writer << "AttachmentRangeMax"sv << Settings.AttachmentRangeMin; Writer << "ForceStoreCacheAttachmentMetaData"sv << Settings.StoreCacheAttachmentMetaData; Writer << "ForceStoreProjectAttachmentMetaData"sv << Settings.StoreProjectAttachmentMetaData; + Writer << "EnableValidation"sv << Settings.EnableValidation; } Writer.EndObject(); @@ -1719,6 +1906,7 @@ GcScheduler::SchedulerThread() IoHash AttachmentRangeMax = IoHash::Max; bool StoreCacheAttachmentMetaData = m_Config.StoreCacheAttachmentMetaData; bool StoreProjectAttachmentMetaData = m_Config.StoreProjectAttachmentMetaData; + bool EnableValidation = m_Config.EnableValidation; uint8_t NextAttachmentPassIndex = ComputeAttachmentRange(m_AttachmentPassIndex, m_Config.AttachmentPassCount, AttachmentRangeMin, AttachmentRangeMax); @@ -1774,6 +1962,10 @@ GcScheduler::SchedulerThread() { StoreProjectAttachmentMetaData = TriggerParams.StoreProjectAttachmentMetaData.value(); } + if (TriggerParams.EnableValidation.has_value()) + { + EnableValidation = TriggerParams.EnableValidation.value(); + } DoGc = true; } @@ -2002,6 +2194,7 @@ GcScheduler::SchedulerThread() AttachmentRangeMax, StoreCacheAttachmentMetaData, StoreProjectAttachmentMetaData, + EnableValidation, SilenceErrors); if (!GcSuccess) { @@ -2105,6 +2298,7 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, const IoHash& AttachmentRangeMax, bool StoreCacheAttachmentMetaData, bool StoreProjectAttachmentMetaData, + bool EnableValidation, bool SilenceErrors) { ZEN_TRACE_CPU("GcScheduler::CollectGarbage"); @@ -2184,25 +2378,26 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, .AttachmentRangeMin = AttachmentRangeMin, .AttachmentRangeMax = AttachmentRangeMax, .StoreCacheAttachmentMetaData = StoreCacheAttachmentMetaData, - .StoreProjectAttachmentMetaData = StoreProjectAttachmentMetaData}; + .StoreProjectAttachmentMetaData = StoreProjectAttachmentMetaData, + .EnableValidation = EnableValidation}; auto AppendSettings = [](StringBuilderBase& SB, const GcSettings& Settings) { SB.Append( - fmt::format(" GC small objects: {}\n", Settings.CollectSmallObjects ? "yes"sv : "no"sv)); - SB.Append(fmt::format(" GC Cid store: {}\n", Settings.SkipCidDelete ? "no"sv : "yes"sv)); + fmt::format(" GC small objects: {}\n", Settings.CollectSmallObjects ? "yes"sv : "no"sv)); + SB.Append(fmt::format(" GC Cid store: {}\n", Settings.SkipCidDelete ? "no"sv : "yes"sv)); if (!Settings.SkipCidDelete) { if (Settings.AttachmentRangeMin != IoHash::Zero || Settings.AttachmentRangeMax != IoHash::Max) { - SB.Append(fmt::format(" Attachment range: {}-{}\n", + SB.Append(fmt::format(" Attachment range: {}-{}\n", Settings.AttachmentRangeMin, Settings.AttachmentRangeMax)); } - SB.Append(fmt::format(" Cache attachment meta: {}\n", Settings.StoreCacheAttachmentMetaData)); - SB.Append(fmt::format(" Project attachment meta: {}\n", Settings.StoreProjectAttachmentMetaData)); + SB.Append(fmt::format(" Cache attachment meta: {}\n", Settings.StoreCacheAttachmentMetaData)); + SB.Append(fmt::format(" Project attachment meta: {}\n", Settings.StoreProjectAttachmentMetaData)); + SB.Append(fmt::format(" Enable validation: {}\n", Settings.EnableValidation)); } - SB.Append(fmt::format(" Cache cutoff time: {}\n", Settings.CacheExpireTime)); - SB.Append(fmt::format(" Project store cutoff time: {}", Settings.ProjectStoreExpireTime)); + SB.Append(fmt::format(" Cache cutoff time: {}\n", Settings.CacheExpireTime)); }; { @@ -2224,7 +2419,36 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, { SB.Append(fmt::format("COMPLETED '{}' in {}\n", GcId, NiceTimeSpanMs(Result.ElapsedMS.count()))); AppendSettings(SB, Settings); - SB.Append("\n\n"); + SB.Append("\n"); + SB.Append( + fmt::format(" Remove Expired Data: {}\n", NiceTimeSpanMs(Result.RemoveExpiredDataMS.count()))); + if (!Settings.SkipCidDelete) + { + SB.Append(fmt::format(" Create Reference Pruners: {}\n", + NiceTimeSpanMs(Result.CreateReferencePrunersMS.count()))); + SB.Append(fmt::format(" Create Reference Checkers: {}\n", + NiceTimeSpanMs(Result.CreateReferenceCheckersMS.count()))); + if (EnableValidation) + { + SB.Append(fmt::format(" Create Reference Validators: {}\n", + NiceTimeSpanMs(Result.CreateReferenceValidatorsMS.count()))); + } + SB.Append( + fmt::format(" Precache State: {}\n", NiceTimeSpanMs(Result.PreCacheStateMS.count()))); + SB.Append( + fmt::format(" Writes blocked: {}\n", NiceTimeSpanMs(Result.WriteBlockMS.count()))); + SB.Append( + fmt::format(" Lock State: {}\n", NiceTimeSpanMs(Result.LockStateMS.count()))); + SB.Append(fmt::format(" Update Lock State: {}\n", + NiceTimeSpanMs(Result.UpdateLockedStateMS.count()))); + SB.Append(fmt::format(" Remove Unreferenced: {}\n", + NiceTimeSpanMs(Result.RemoveUnreferencedDataMS.count()))); + SB.Append( + fmt::format(" Compacting Stores: {}\n", NiceTimeSpanMs(Result.CompactStoresMS.count()))); + SB.Append(fmt::format(" Reference Validation: {}\n", + NiceTimeSpanMs(Result.ValidateReferencersMS.count()))); + SB.Append("\n"); + } SB.Append(fmt::format(" Found {} expired items out of {}, deleted {}\n", Result.ReferencerStatSum.RemoveExpiredDataStats.FoundCount, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount, @@ -2235,6 +2459,12 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.FoundCount, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount)); + if (EnableValidation) + { + SB.Append(fmt::format(" Validated {} items and found {} missing references\n", + Result.ReferenceValidatorStatSum.CheckedCount, + Result.ReferenceValidatorStatSum.MissingCount)); + } } SB.Append(fmt::format(" Freed {} on disk and {} of memory\n", NiceBytes(Result.CompactStoresStatSum.RemovedDisk), diff --git a/src/zenstore/include/zenstore/cache/cachedisklayer.h b/src/zenstore/include/zenstore/cache/cachedisklayer.h index 4b7cf6101..711b96c8f 100644 --- a/src/zenstore/include/zenstore/cache/cachedisklayer.h +++ b/src/zenstore/include/zenstore/cache/cachedisklayer.h @@ -199,7 +199,7 @@ public: void EnableUpdateCapture(); void DisableUpdateCapture(); - std::vector<std::string> GetCapturedBuckets(); + std::vector<std::string> GetCapturedBucketsLocked(); #if ZEN_WITH_TESTS void SetAccessTime(std::string_view Bucket, const IoHash& HashKey, GcClock::TimePoint Time); @@ -367,9 +367,10 @@ public: std::atomic_uint64_t m_StandaloneSize{}; std::atomic_uint64_t m_MemCachedSize{}; - virtual std::string GetGcName(GcCtx& Ctx) override; - virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override; - virtual std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) override; + virtual std::string GetGcName(GcCtx& Ctx) override; + virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override; + virtual std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) override; + virtual std::vector<GcReferenceValidator*> CreateReferenceValidators(GcCtx& Ctx) override; void BuildPath(PathBuilderBase& Path, const IoHash& HashKey) const; void PutStandaloneCacheValue(const IoHash& HashKey, const ZenCacheValue& Value, std::span<IoHash> References); @@ -491,7 +492,6 @@ private: mutable RwLock m_Lock; std::unordered_map<std::string, std::unique_ptr<CacheBucket>> m_Buckets; std::vector<std::unique_ptr<CacheBucket>> m_DroppedBuckets; - mutable RwLock m_UpdateCaptureLock; uint32_t m_UpdateCaptureRefCounter = 0; std::unique_ptr<std::vector<std::string>> m_CapturedBuckets; diff --git a/src/zenstore/include/zenstore/cache/structuredcachestore.h b/src/zenstore/include/zenstore/cache/structuredcachestore.h index dcdca71c6..82fec9b0e 100644 --- a/src/zenstore/include/zenstore/cache/structuredcachestore.h +++ b/src/zenstore/include/zenstore/cache/structuredcachestore.h @@ -279,13 +279,14 @@ public: virtual std::vector<RwLock::SharedLockScope> LockState(GcCtx& Ctx) override; - virtual std::string GetGcName(GcCtx& Ctx) override; - virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override; - virtual std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) override; + virtual std::string GetGcName(GcCtx& Ctx) override; + virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override; + virtual std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) override; + virtual std::vector<GcReferenceValidator*> CreateReferenceValidators(GcCtx& Ctx) override; void EnableUpdateCapture(); void DisableUpdateCapture(); - std::vector<std::string> GetCapturedNamespaces(); + std::vector<std::string> GetCapturedNamespacesLocked(); bool GetContentStats(std::string_view Namespace, std::string_view BucketName, CacheContentStats& OutContentStats) const; @@ -301,7 +302,6 @@ private: mutable RwLock m_NamespacesLock; NamespaceMap m_Namespaces; std::vector<std::unique_ptr<ZenCacheNamespace>> m_DroppedNamespaces; - mutable RwLock m_UpdateCaptureLock; uint32_t m_UpdateCaptureRefCounter = 0; std::unique_ptr<std::vector<std::string>> m_CapturedNamespaces; diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h index e191a0930..3daae0a93 100644 --- a/src/zenstore/include/zenstore/gc.h +++ b/src/zenstore/include/zenstore/gc.h @@ -68,6 +68,7 @@ struct GcSettings IoHash AttachmentRangeMax = IoHash::Max; bool StoreCacheAttachmentMetaData = false; bool StoreProjectAttachmentMetaData = false; + bool EnableValidation = true; }; struct GcCompactStoreStats @@ -76,6 +77,18 @@ struct GcCompactStoreStats std::chrono::milliseconds ElapsedMS = {}; }; +struct GcReferenceValidatorStats +{ + std::uint64_t CheckedCount = 0; + std::uint64_t MissingChunks = 0; + std::uint64_t MissingFiles = 0; + std::uint64_t MissingMetas = 0; + std::uint64_t MissingAttachments = 0; + + std::uint64_t MissingCount = 0; + std::chrono::milliseconds ElapsedMS = {}; +}; + struct GcStats { std::uint64_t CheckedCount = 0; @@ -90,10 +103,11 @@ struct GcReferencerStats GcStats RemoveExpiredDataStats; GcCompactStoreStats CompactStoreStats; - std::chrono::milliseconds CreateReferenceCheckersMS = {}; - std::chrono::milliseconds PreCacheStateMS = {}; - std::chrono::milliseconds UpdateLockedStateMS = {}; - std::chrono::milliseconds ElapsedMS = {}; + std::chrono::milliseconds CreateReferenceCheckersMS = {}; + std::chrono::milliseconds CreateReferenceValidatorsMS = {}; + std::chrono::milliseconds PreCacheStateMS = {}; + std::chrono::milliseconds UpdateLockedStateMS = {}; + std::chrono::milliseconds ElapsedMS = {}; }; struct GcReferenceStoreStats @@ -107,23 +121,27 @@ struct GcReferenceStoreStats struct GcResult { - std::vector<std::pair<std::string, GcReferencerStats>> ReferencerStats; - std::vector<std::pair<std::string, GcReferenceStoreStats>> ReferenceStoreStats; + std::vector<std::pair<std::string, GcReferencerStats>> ReferencerStats; + std::vector<std::pair<std::string, GcReferenceStoreStats>> ReferenceStoreStats; + std::vector<std::pair<std::string, GcReferenceValidatorStats>> ReferenceValidatorStats; - GcReferencerStats ReferencerStatSum; - GcReferenceStoreStats ReferenceStoreStatSum; - GcCompactStoreStats CompactStoresStatSum; + GcReferencerStats ReferencerStatSum; + GcReferenceStoreStats ReferenceStoreStatSum; + GcCompactStoreStats CompactStoresStatSum; + GcReferenceValidatorStats ReferenceValidatorStatSum; // Wall times, not sum of each - std::chrono::milliseconds RemoveExpiredDataMS = {}; - std::chrono::milliseconds CreateReferenceCheckersMS = {}; - std::chrono::milliseconds PreCacheStateMS = {}; - std::chrono::milliseconds LockStateMS = {}; - std::chrono::milliseconds UpdateLockedStateMS = {}; + std::chrono::milliseconds RemoveExpiredDataMS = {}; + std::chrono::milliseconds CreateReferenceCheckersMS = {}; + std::chrono::milliseconds CreateReferenceValidatorsMS = {}; + std::chrono::milliseconds PreCacheStateMS = {}; + std::chrono::milliseconds LockStateMS = {}; + std::chrono::milliseconds UpdateLockedStateMS = {}; std::chrono::milliseconds CreateReferencePrunersMS = {}; std::chrono::milliseconds RemoveUnreferencedDataMS = {}; std::chrono::milliseconds CompactStoresMS = {}; + std::chrono::milliseconds ValidateReferencersMS = {}; std::chrono::milliseconds WriteBlockMS = {}; @@ -167,6 +185,17 @@ public: virtual std::string GetGcName(GcCtx& Ctx) = 0; }; +class GcReferenceValidator +{ +public: + virtual ~GcReferenceValidator() = default; + + virtual std::string GetGcName(GcCtx&) = 0; + + // Validate that that GC did not remove anything needed by this reference checker + virtual void Validate(GcCtx& Ctx, GcReferenceValidatorStats& Stats) = 0; +}; + /** * @brief An interface to check if a set of Cids are referenced * @@ -240,6 +269,10 @@ public: // Create 0-n GcReferenceChecker for this GcReferencer. Caller will manage lifetime of // returned instances virtual std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) = 0; + + // Create 0-n GcReferenceValidator for this GcReferencer. Caller will manage lifetime of + // returned instances + virtual std::vector<GcReferenceValidator*> CreateReferenceValidators(GcCtx& Ctx) = 0; }; /** @@ -392,6 +425,7 @@ struct GcSchedulerConfig uint16_t AttachmentPassCount = 1; bool StoreCacheAttachmentMetaData = false; bool StoreProjectAttachmentMetaData = false; + bool EnableValidation = true; }; struct GcSchedulerState @@ -471,8 +505,9 @@ public: std::optional<bool> SingleThreaded; std::optional<IoHash> AttachmentRangeMin; std::optional<IoHash> AttachmentRangeMax; - std::optional<bool> StoreCacheAttachmentMetaData = false; - std::optional<bool> StoreProjectAttachmentMetaData = false; + std::optional<bool> StoreCacheAttachmentMetaData; + std::optional<bool> StoreProjectAttachmentMetaData; + std::optional<bool> EnableValidation; }; bool TriggerGc(const TriggerGcParams& Params); @@ -504,6 +539,7 @@ private: const IoHash& AttachmentRangeMax, bool StoreCacheAttachmentMetaData, bool StoreProjectAttachmentMetaData, + bool EnableValidation, bool SilenceErrors); void ScrubStorage(bool DoDelete, bool SkipCid, std::chrono::seconds TimeSlice); LoggerRef Log() { return m_Log; } |