diff options
Diffstat (limited to 'src/zenserver')
| -rw-r--r-- | src/zenserver/admin/admin.cpp | 5 | ||||
| -rw-r--r-- | src/zenserver/config.cpp | 8 | ||||
| -rw-r--r-- | src/zenserver/config.h | 1 | ||||
| -rw-r--r-- | src/zenserver/projectstore/httpprojectstore.cpp | 157 | ||||
| -rw-r--r-- | src/zenserver/projectstore/httpprojectstore.h | 1 | ||||
| -rw-r--r-- | src/zenserver/projectstore/projectstore.cpp | 824 | ||||
| -rw-r--r-- | src/zenserver/projectstore/projectstore.h | 95 |
7 files changed, 878 insertions, 213 deletions
diff --git a/src/zenserver/admin/admin.cpp b/src/zenserver/admin/admin.cpp index ea830923f..847ed5a50 100644 --- a/src/zenserver/admin/admin.cpp +++ b/src/zenserver/admin/admin.cpp @@ -454,6 +454,11 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler, GcParams.StoreProjectAttachmentMetaData = Param == "true"sv; } + if (auto Param = Params.GetValue("enablevalidation"); Param.empty() == false) + { + GcParams.EnableValidation = Param == "true"sv; + } + const bool Started = m_GcScheduler.TriggerGc(GcParams); CbObjectWriter Response; diff --git a/src/zenserver/config.cpp b/src/zenserver/config.cpp index 2fd9bbaf3..bedab7049 100644 --- a/src/zenserver/config.cpp +++ b/src/zenserver/config.cpp @@ -485,6 +485,7 @@ ParseConfigFile(const std::filesystem::path& Path, ServerOptions.GcConfig.StoreProjectAttachmentMetaData, "gc-projectstore-attachment-store"); LuaOptions.AddOption("gc.attachment.passes"sv, ServerOptions.GcConfig.AttachmentPassCount, "gc-attachment-passes"sv); + LuaOptions.AddOption("gc.validation"sv, ServerOptions.GcConfig.EnableValidation, "gc-validation"); ////// gc LuaOptions.AddOption("gc.cache.maxdurationseconds"sv, ServerOptions.GcConfig.Cache.MaxDurationSeconds, "gc-cache-duration-seconds"sv); @@ -890,6 +891,13 @@ ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions) options.add_option("gc", "", + "gc-validation", + "Enable validation of references after full GC.", + cxxopts::value<bool>(ServerOptions.GcConfig.EnableValidation)->default_value("true"), + ""); + + options.add_option("gc", + "", "gc-enabled", "Whether garbage collection is enabled or not.", cxxopts::value<bool>(ServerOptions.GcConfig.Enabled)->default_value("true"), diff --git a/src/zenserver/config.h b/src/zenserver/config.h index 3e01cac99..5c56695f3 100644 --- a/src/zenserver/config.h +++ b/src/zenserver/config.h @@ -80,6 +80,7 @@ struct ZenGcConfig uint16_t AttachmentPassCount = 1; bool StoreCacheAttachmentMetaData = false; bool StoreProjectAttachmentMetaData = false; + bool EnableValidation = true; }; struct ZenOpenIdProviderConfig diff --git a/src/zenserver/projectstore/httpprojectstore.cpp b/src/zenserver/projectstore/httpprojectstore.cpp index 1b45e66f3..710cbe5a7 100644 --- a/src/zenserver/projectstore/httpprojectstore.cpp +++ b/src/zenserver/projectstore/httpprojectstore.cpp @@ -292,6 +292,11 @@ HttpProjectService::HttpProjectService(CidStore& Store, ProjectStore* Projects, HttpVerb::kPost); m_Router.RegisterRoute( + "{project}/oplog/{log}/validate", + [this](HttpRouterRequest& Req) { HandleOplogValidateRequest(Req); }, + HttpVerb::kPost); + + m_Router.RegisterRoute( "{project}/oplog/{log}/{op}", [this](HttpRouterRequest& Req) { HandleOpLogOpRequest(Req); }, HttpVerb::kGet); @@ -962,28 +967,25 @@ HttpProjectService::HandleOplogOpPrepRequest(HttpRouterRequest& Req) IoBuffer Payload = HttpReq.ReadPayload(); CbObject RequestObject = LoadCompactBinaryObject(Payload); - std::vector<IoHash> NeedList; - - for (auto Entry : RequestObject["have"sv]) + std::vector<IoHash> ChunkList; + CbArrayView HaveList = RequestObject["have"sv].AsArrayView(); + ChunkList.reserve(HaveList.Num()); + for (auto& Entry : HaveList) { - const IoHash FileHash = Entry.AsHash(); - - if (!m_CidStore.ContainsChunk(FileHash)) - { - ZEN_DEBUG("prep - NEED: {}", FileHash); - - NeedList.push_back(FileHash); - } + ChunkList.push_back(Entry.AsHash()); } + std::vector<IoHash> NeedList = FoundLog->CheckPendingChunkReferences(ChunkList, std::chrono::minutes(2)); + CbObjectWriter Cbo; Cbo.BeginArray("need"); - - for (const IoHash& Hash : NeedList) { - Cbo << Hash; + for (const IoHash& Hash : NeedList) + { + ZEN_DEBUG("prep - NEED: {}", Hash); + Cbo << Hash; + } } - Cbo.EndArray(); CbObject Response = Cbo.Save(); @@ -1043,9 +1045,12 @@ HttpProjectService::HandleOplogOpNewRequest(HttpRouterRequest& Req) bool IsValid = true; std::vector<IoHash> MissingChunks; + std::vector<IoHash> ReferencedChunks; CbPackage::AttachmentResolver Resolver = [&](const IoHash& Hash) -> SharedBuffer { - Oplog.CaptureAddedAttachments(std::vector<IoHash>{Hash}); + // We want to add all chunks here so we can properly clear them from the 'prep' call where we retained them earlier + ReferencedChunks.push_back(Hash); + if (m_CidStore.ContainsChunk(Hash)) { // Return null attachment as we already have it, no point in reading it and storing it again @@ -1146,12 +1151,132 @@ HttpProjectService::HandleOplogOpNewRequest(HttpRouterRequest& Req) } m_ProjectStats.ChunkWriteCount += AttachmentCount; + // Once we stored the op, we no longer need to retain any chunks this op references + FoundLog->RemovePendingChunkReferences(ReferencedChunks); + m_ProjectStats.OpWriteCount++; ZEN_DEBUG("'{}/{}' op #{} ({}) - '{}'", ProjectId, OplogId, OpLsn, NiceBytes(Payload.Size()), Core["key"sv].AsString()); HttpReq.WriteResponse(HttpResponseCode::Created); } void +HttpProjectService::HandleOplogValidateRequest(HttpRouterRequest& Req) +{ + ZEN_TRACE_CPU("ProjectService::OplogOpNew"); + + using namespace std::literals; + + HttpServerRequest& HttpReq = Req.ServerRequest(); + + if (!m_ProjectStore->AreDiskWritesAllowed()) + { + return HttpReq.WriteResponse(HttpResponseCode::InsufficientStorage); + } + + const auto& ProjectId = Req.GetCapture(1); + const auto& OplogId = Req.GetCapture(2); + + Ref<ProjectStore::Project> Project = m_ProjectStore->OpenProject(ProjectId); + if (!Project) + { + return HttpReq.WriteResponse(HttpResponseCode::NotFound, ZenContentType::kText, fmt::format("Project '{}' not found", ProjectId)); + } + Project->TouchProject(); + + ProjectStore::Oplog* FoundLog = Project->OpenOplog(OplogId, /*AllowCompact*/ true, /*VerifyPathOnDisk*/ false); + if (!FoundLog) + { + return HttpReq.WriteResponse(HttpResponseCode::NotFound, + ZenContentType::kText, + fmt::format("Oplog '{}' not found in project '{}'", OplogId, ProjectId)); + } + Project->TouchOplog(OplogId); + + ProjectStore::Oplog& Oplog = *FoundLog; + + std::atomic_bool CancelFlag = false; + ProjectStore::Oplog::ValidationResult Result = Oplog.Validate(CancelFlag); + tsl::robin_map<Oid, std::string, Oid::Hasher> KeyNameLookup; + KeyNameLookup.reserve(Result.OpKeys.size()); + for (const auto& It : Result.OpKeys) + { + KeyNameLookup.insert_or_assign(It.first, It.second); + } + CbObjectWriter Writer; + Writer << "HasMissingData" << !Result.IsEmpty(); + Writer << "OpCount" << Result.OpCount; + Writer << "LSNLow" << Result.LSNLow; + Writer << "LSNHigh" << Result.LSNHigh; + if (!Result.MissingFiles.empty()) + { + Writer.BeginArray("MissingFiles"); + for (const auto& MissingFile : Result.MissingFiles) + { + Writer.BeginObject(); + { + Writer << "Key" << MissingFile.first; + Writer << "KeyName" << KeyNameLookup[MissingFile.first]; + Writer << "Id" << MissingFile.second.Id; + Writer << "Hash" << MissingFile.second.Hash; + Writer << "ServerPath" << MissingFile.second.ServerPath; + Writer << "ClientPath" << MissingFile.second.ClientPath; + } + Writer.EndObject(); + } + Writer.EndArray(); + } + if (!Result.MissingChunks.empty()) + { + Writer.BeginArray("MissingChunks"); + for (const auto& MissingChunk : Result.MissingChunks) + { + Writer.BeginObject(); + { + Writer << "Key" << MissingChunk.first; + Writer << "KeyName" << KeyNameLookup[MissingChunk.first]; + Writer << "Id" << MissingChunk.second.Id; + Writer << "Hash" << MissingChunk.second.Hash; + } + Writer.EndObject(); + } + Writer.EndArray(); + } + if (!Result.MissingMetas.empty()) + { + Writer.BeginArray("MissingMetas"); + for (const auto& MissingMeta : Result.MissingMetas) + { + Writer.BeginObject(); + { + Writer << "Key" << MissingMeta.first; + Writer << "KeyName" << KeyNameLookup[MissingMeta.first]; + Writer << "Id" << MissingMeta.second.Id; + Writer << "Hash" << MissingMeta.second.Hash; + } + Writer.EndObject(); + } + Writer.EndArray(); + } + if (!Result.MissingAttachments.empty()) + { + Writer.BeginArray("MissingAttachments"); + for (const auto& MissingMeta : Result.MissingAttachments) + { + Writer.BeginObject(); + { + Writer << "Key" << MissingMeta.first; + Writer << "KeyName" << KeyNameLookup[MissingMeta.first]; + Writer << "Hash" << MissingMeta.second; + } + Writer.EndObject(); + } + Writer.EndArray(); + } + CbObject Response = Writer.Save(); + HttpReq.WriteResponse(HttpResponseCode::OK, Response); +} + +void HttpProjectService::HandleOpLogOpRequest(HttpRouterRequest& Req) { ZEN_TRACE_CPU("ProjectService::OplogOp"); diff --git a/src/zenserver/projectstore/httpprojectstore.h b/src/zenserver/projectstore/httpprojectstore.h index 9990ee264..13810bd66 100644 --- a/src/zenserver/projectstore/httpprojectstore.h +++ b/src/zenserver/projectstore/httpprojectstore.h @@ -70,6 +70,7 @@ private: void HandleChunkByCidRequest(HttpRouterRequest& Req); void HandleOplogOpPrepRequest(HttpRouterRequest& Req); void HandleOplogOpNewRequest(HttpRouterRequest& Req); + void HandleOplogValidateRequest(HttpRouterRequest& Req); void HandleOpLogOpRequest(HttpRouterRequest& Req); void HandleOpLogRequest(HttpRouterRequest& Req); void HandleOpLogEntriesRequest(HttpRouterRequest& Req); diff --git a/src/zenserver/projectstore/projectstore.cpp b/src/zenserver/projectstore/projectstore.cpp index 7e03432d6..1b48a542c 100644 --- a/src/zenserver/projectstore/projectstore.cpp +++ b/src/zenserver/projectstore/projectstore.cpp @@ -949,6 +949,7 @@ ProjectStore::Oplog::Oplog(std::string_view Id, , m_BasePath(BasePath) , m_MarkerPath(MarkerPath) , m_MetaValid(false) +, m_PendingPrepOpAttachmentsRetainEnd(GcClock::Now()) { using namespace std::literals; @@ -1129,6 +1130,10 @@ bool ProjectStore::Oplog::PrepareForDelete(std::filesystem::path& OutRemoveDirectory) { RwLock::ExclusiveLockScope _(m_OplogLock); + m_UpdateCaptureRefCounter = 0; + m_CapturedLSNs.reset(); + m_CapturedAttachments.reset(); + m_PendingPrepOpAttachments.clear(); m_ChunkMap.clear(); m_MetaMap.clear(); m_FileMap.clear(); @@ -1191,12 +1196,12 @@ ProjectStore::Oplog::Read() const OplogEntryMapping OpMapping = GetMapping(Op); // Update chunk id maps - for (const OplogEntryMapping::Mapping& Chunk : OpMapping.Chunks) + for (const ChunkMapping& Chunk : OpMapping.Chunks) { m_ChunkMap.insert_or_assign(Chunk.Id, Chunk.Hash); } - for (const OplogEntryMapping::FileMapping& File : OpMapping.Files) + for (const FileMapping& File : OpMapping.Files) { if (File.Hash != IoHash::Zero) { @@ -1207,7 +1212,7 @@ ProjectStore::Oplog::Read() FileMapEntry{.ServerPath = File.Hash == IoHash::Zero ? File.ServerPath : std::string(), .ClientPath = File.ClientPath}); } - for (const OplogEntryMapping::Mapping& Meta : OpMapping.Meta) + for (const ChunkMapping& Meta : OpMapping.Meta) { m_MetaMap.insert_or_assign(Meta.Id, Meta.Hash); } @@ -1317,6 +1322,101 @@ ProjectStore::Oplog::ReadStateFile(const std::filesystem::path& BasePath, std::f return {}; } +ProjectStore::Oplog::ValidationResult +ProjectStore::Oplog::Validate(std::atomic_bool& IsCancelledFlag) +{ + using namespace std::literals; + + ValidationResult Result; + + std::vector<Oid> KeyHashes; + std::vector<std::string> Keys; + std::vector<std::vector<IoHash>> Attachments; + std::vector<OplogEntryMapping> Mappings; + + IterateOplogWithKey([&](uint32_t LSN, const Oid& Key, CbObjectView OpView) { + Result.LSNLow = Min(Result.LSNLow, LSN); + Result.LSNHigh = Max(Result.LSNHigh, LSN); + KeyHashes.push_back(Key); + Keys.emplace_back(std::string(OpView["key"sv].AsString())); + + std::vector<IoHash> OpAttachments; + OpView.IterateAttachments([&OpAttachments](CbFieldView Attachment) { OpAttachments.push_back(Attachment.AsAttachment()); }); + Attachments.emplace_back(std::move(OpAttachments)); + + Mappings.push_back(GetMapping(OpView)); + }); + + Result.OpCount = gsl::narrow<uint32_t>(Keys.size()); + for (uint32_t OpIndex = 0; !IsCancelledFlag && OpIndex < Result.OpCount; OpIndex++) + { + const Oid& KeyHash = KeyHashes[OpIndex]; + const std::string& Key = Keys[OpIndex]; + const OplogEntryMapping& Mapping(Mappings[OpIndex]); + bool HasMissingEntries = false; + for (const ChunkMapping& Chunk : Mapping.Chunks) + { + if (!m_CidStore.ContainsChunk(Chunk.Hash)) + { + Result.MissingChunks.push_back({KeyHash, Chunk}); + HasMissingEntries = true; + } + } + for (const ChunkMapping& Meta : Mapping.Meta) + { + if (!m_CidStore.ContainsChunk(Meta.Hash)) + { + Result.MissingMetas.push_back({KeyHash, Meta}); + HasMissingEntries = true; + } + } + for (const FileMapping& File : Mapping.Files) + { + if (File.Hash == IoHash::Zero) + { + std::filesystem::path FilePath = m_OuterProject->RootDir / File.ServerPath; + if (!std::filesystem::is_regular_file(FilePath)) + { + Result.MissingFiles.push_back({KeyHash, File}); + HasMissingEntries = true; + } + } + else + { + if (!m_CidStore.ContainsChunk(File.Hash)) + { + Result.MissingFiles.push_back({KeyHash, File}); + HasMissingEntries = true; + } + } + } + const std::vector<IoHash>& OpAttachments = Attachments[OpIndex]; + for (const IoHash& Attachment : OpAttachments) + { + if (!m_CidStore.ContainsChunk(Attachment)) + { + Result.MissingAttachments.push_back({KeyHash, Attachment}); + HasMissingEntries = true; + } + } + if (HasMissingEntries) + { + Result.OpKeys.push_back({KeyHash, Key}); + } + } + + { + // Check if we were deleted while we were checking the references without a lock... + RwLock::SharedLockScope _(m_OplogLock); + if (!m_Storage) + { + Result = {}; + } + } + + return Result; +} + void ProjectStore::Oplog::WriteIndexSnapshot() { @@ -2286,21 +2386,9 @@ ProjectStore::Oplog::AddChunkMappings(const std::unordered_map<Oid, IoHash, Oid: } void -ProjectStore::Oplog::CaptureUpdatedLSNs(std::span<const uint32_t> LSNs) -{ - m_UpdateCaptureLock.WithExclusiveLock([&]() { - if (m_CapturedLSNs) - { - m_CapturedLSNs->reserve(m_CapturedLSNs->size() + LSNs.size()); - m_CapturedLSNs->insert(m_CapturedLSNs->end(), LSNs.begin(), LSNs.end()); - } - }); -} - -void ProjectStore::Oplog::CaptureAddedAttachments(std::span<const IoHash> AttachmentHashes) { - m_UpdateCaptureLock.WithExclusiveLock([this, AttachmentHashes]() { + m_OplogLock.WithExclusiveLock([this, AttachmentHashes]() { if (m_CapturedAttachments) { m_CapturedAttachments->reserve(m_CapturedAttachments->size() + AttachmentHashes.size()); @@ -2312,7 +2400,7 @@ ProjectStore::Oplog::CaptureAddedAttachments(std::span<const IoHash> AttachmentH void ProjectStore::Oplog::EnableUpdateCapture() { - m_UpdateCaptureLock.WithExclusiveLock([&]() { + m_OplogLock.WithExclusiveLock([&]() { if (m_UpdateCaptureRefCounter == 0) { ZEN_ASSERT(!m_CapturedLSNs); @@ -2332,7 +2420,7 @@ ProjectStore::Oplog::EnableUpdateCapture() void ProjectStore::Oplog::DisableUpdateCapture() { - m_UpdateCaptureLock.WithExclusiveLock([&]() { + m_OplogLock.WithExclusiveLock([&]() { ZEN_ASSERT(m_CapturedLSNs); ZEN_ASSERT(m_CapturedAttachments); ZEN_ASSERT(m_UpdateCaptureRefCounter > 0); @@ -2346,30 +2434,27 @@ ProjectStore::Oplog::DisableUpdateCapture() } void -ProjectStore::Oplog::IterateCapturedLSNs(std::function<bool(const CbObjectView& UpdateOp)>&& Callback) +ProjectStore::Oplog::IterateCapturedLSNsLocked(std::function<bool(const CbObjectView& UpdateOp)>&& Callback) { - m_UpdateCaptureLock.WithExclusiveLock([&]() { - if (m_CapturedLSNs) + if (m_CapturedLSNs) + { + if (!m_Storage) { - if (!m_Storage) - { - return; - } - for (uint32_t UpdatedLSN : *m_CapturedLSNs) + return; + } + for (uint32_t UpdatedLSN : *m_CapturedLSNs) + { + if (const auto AddressEntryIt = m_OpAddressMap.find(UpdatedLSN); AddressEntryIt != m_OpAddressMap.end()) { - if (const auto AddressEntryIt = m_OpAddressMap.find(UpdatedLSN); AddressEntryIt != m_OpAddressMap.end()) - { - Callback(m_Storage->GetOp(AddressEntryIt->second)); - } + Callback(m_Storage->GetOp(AddressEntryIt->second)); } } - }); + } } std::vector<IoHash> -ProjectStore::Oplog::GetCapturedAttachments() +ProjectStore::Oplog::GetCapturedAttachmentsLocked() { - RwLock::SharedLockScope _(m_UpdateCaptureLock); if (m_CapturedAttachments) { return *m_CapturedAttachments; @@ -2377,6 +2462,69 @@ ProjectStore::Oplog::GetCapturedAttachments() return {}; } +std::vector<IoHash> +ProjectStore::Oplog::CheckPendingChunkReferences(std::span<const IoHash> ChunkHashes, const GcClock::Duration& RetainTime) +{ + m_OplogLock.WithExclusiveLock([&]() { + GcClock::TimePoint Now = GcClock::Now(); + if (m_PendingPrepOpAttachmentsRetainEnd < Now) + { + m_PendingPrepOpAttachments.clear(); + } + m_PendingPrepOpAttachments.insert(ChunkHashes.begin(), ChunkHashes.end()); + GcClock::TimePoint NewEndPoint = Now + RetainTime; + if (m_PendingPrepOpAttachmentsRetainEnd < NewEndPoint) + { + m_PendingPrepOpAttachmentsRetainEnd = NewEndPoint; + } + }); + + std::vector<IoHash> MissingChunks; + MissingChunks.reserve(ChunkHashes.size()); + for (const IoHash& FileHash : ChunkHashes) + { + if (!m_CidStore.ContainsChunk(FileHash)) + { + MissingChunks.push_back(FileHash); + } + } + + return MissingChunks; +} + +void +ProjectStore::Oplog::RemovePendingChunkReferences(std::span<const IoHash> ChunkHashes) +{ + m_OplogLock.WithExclusiveLock([&]() { + GcClock::TimePoint Now = GcClock::Now(); + if (m_PendingPrepOpAttachmentsRetainEnd < Now) + { + m_PendingPrepOpAttachments.clear(); + } + else + { + for (const IoHash& Chunk : ChunkHashes) + { + m_PendingPrepOpAttachments.erase(Chunk); + } + } + }); +} + +std::vector<IoHash> +ProjectStore::Oplog::GetPendingChunkReferencesLocked() +{ + std::vector<IoHash> Result; + Result.reserve(m_PendingPrepOpAttachments.size()); + Result.insert(Result.end(), m_PendingPrepOpAttachments.begin(), m_PendingPrepOpAttachments.end()); + GcClock::TimePoint Now = GcClock::Now(); + if (m_PendingPrepOpAttachmentsRetainEnd < Now) + { + m_PendingPrepOpAttachments.clear(); + } + return Result; +} + void ProjectStore::Oplog::AddFileMapping(const RwLock::ExclusiveLockScope&, const Oid& FileId, @@ -2428,7 +2576,7 @@ ProjectStore::Oplog::GetMapping(CbObjectView Core) CbObjectView PackageObj = Field.AsObjectView(); Oid Id = PackageObj["id"sv].AsObjectId(); IoHash Hash = PackageObj["data"sv].AsBinaryAttachment(); - Result.Chunks.emplace_back(OplogEntryMapping::Mapping{Id, Hash}); + Result.Chunks.emplace_back(ChunkMapping{Id, Hash}); ZEN_DEBUG("oplog {}/{}: package data {} -> {}", m_OuterProject->Identifier, m_OplogId, Id, Hash); continue; } @@ -2440,7 +2588,7 @@ ProjectStore::Oplog::GetMapping(CbObjectView Core) CbObjectView BulkObj = Entry.AsObjectView(); Oid Id = BulkObj["id"sv].AsObjectId(); IoHash Hash = BulkObj["data"sv].AsBinaryAttachment(); - Result.Chunks.emplace_back(OplogEntryMapping::Mapping{Id, Hash}); + Result.Chunks.emplace_back(ChunkMapping{Id, Hash}); ZEN_DEBUG("oplog {}/{}: bulkdata {} -> {}", m_OuterProject->Identifier, m_OplogId, Id, Hash); } continue; @@ -2453,7 +2601,7 @@ ProjectStore::Oplog::GetMapping(CbObjectView Core) CbObjectView PackageDataObj = Entry.AsObjectView(); Oid Id = PackageDataObj["id"sv].AsObjectId(); IoHash Hash = PackageDataObj["data"sv].AsBinaryAttachment(); - Result.Chunks.emplace_back(OplogEntryMapping::Mapping{Id, Hash}); + Result.Chunks.emplace_back(ChunkMapping{Id, Hash}); ZEN_DEBUG("oplog {}/{}: package {} -> {}", m_OuterProject->Identifier, m_OplogId, Id, Hash); } continue; @@ -2487,7 +2635,7 @@ ProjectStore::Oplog::GetMapping(CbObjectView Core) continue; } - Result.Files.emplace_back(OplogEntryMapping::FileMapping{Id, Hash, std::string(ServerPath), std::string(ClientPath)}); + Result.Files.emplace_back(FileMapping{Id, Hash, std::string(ServerPath), std::string(ClientPath)}); ZEN_DEBUG("oplog {}/{}: file {} -> {}, ServerPath: {}, ClientPath: {}", m_OuterProject->Identifier, m_OplogId, @@ -2507,7 +2655,7 @@ ProjectStore::Oplog::GetMapping(CbObjectView Core) CbObjectView MetaObj = Entry.AsObjectView(); Oid Id = MetaObj["id"sv].AsObjectId(); IoHash Hash = MetaObj["data"sv].AsBinaryAttachment(); - Result.Meta.emplace_back(OplogEntryMapping::Mapping{Id, Hash}); + Result.Meta.emplace_back(ChunkMapping{Id, Hash}); auto NameString = MetaObj["name"sv].AsString(); ZEN_DEBUG("oplog {}/{}: meta data ({}) {} -> {}", m_OuterProject->Identifier, m_OplogId, NameString, Id, Hash); } @@ -2529,17 +2677,17 @@ ProjectStore::Oplog::RegisterOplogEntry(RwLock::ExclusiveLockScope& OplogLock, using namespace std::literals; // Update chunk id maps - for (const OplogEntryMapping::Mapping& Chunk : OpMapping.Chunks) + for (const ChunkMapping& Chunk : OpMapping.Chunks) { AddChunkMapping(OplogLock, Chunk.Id, Chunk.Hash); } - for (const OplogEntryMapping::FileMapping& File : OpMapping.Files) + for (const FileMapping& File : OpMapping.Files) { AddFileMapping(OplogLock, File.Id, File.Hash, File.ServerPath, File.ClientPath); } - for (const OplogEntryMapping::Mapping& Meta : OpMapping.Meta) + for (const ChunkMapping& Meta : OpMapping.Meta) { AddMetaMapping(OplogLock, Meta.Id, Meta.Hash); } @@ -2637,7 +2785,10 @@ ProjectStore::Oplog::AppendNewOplogEntry(CbObjectView Core) RwLock::ExclusiveLockScope OplogLock(m_OplogLock); const uint32_t EntryId = RegisterOplogEntry(OplogLock, Mapping, OpEntry); - CaptureUpdatedLSNs(std::array<uint32_t, 1u>({EntryId})); + if (m_CapturedLSNs) + { + m_CapturedLSNs->push_back(EntryId); + } m_MetaValid = false; return EntryId; @@ -2676,12 +2827,19 @@ ProjectStore::Oplog::AppendNewOplogEntries(std::span<CbObjectView> Cores) { { RwLock::ExclusiveLockScope OplogLock(m_OplogLock); + if (m_CapturedLSNs) + { + m_CapturedLSNs->reserve(m_CapturedLSNs->size() + OpCount); + } for (size_t OpIndex = 0; OpIndex < OpCount; OpIndex++) { EntryIds[OpIndex] = RegisterOplogEntry(OplogLock, Mappings[OpIndex], OpEntries[OpIndex]); + if (m_CapturedLSNs) + { + m_CapturedLSNs->push_back(EntryIds[OpIndex]); + } } } - CaptureUpdatedLSNs(EntryIds); m_MetaValid = false; } return EntryIds; @@ -2909,12 +3067,10 @@ ProjectStore::Project::NewOplog(std::string_view OplogId, const std::filesystem: Log->Write(); - m_UpdateCaptureLock.WithExclusiveLock([&]() { - if (m_CapturedOplogs) - { - m_CapturedOplogs->push_back(std::string(OplogId)); - } - }); + if (m_CapturedOplogs) + { + m_CapturedOplogs->push_back(std::string(OplogId)); + } return Log; } @@ -3201,7 +3357,7 @@ ProjectStore::Project::PrepareForDelete(std::filesystem::path& OutDeletePath) void ProjectStore::Project::EnableUpdateCapture() { - m_UpdateCaptureLock.WithExclusiveLock([&]() { + m_ProjectLock.WithExclusiveLock([&]() { if (m_UpdateCaptureRefCounter == 0) { ZEN_ASSERT(!m_CapturedOplogs); @@ -3218,7 +3374,7 @@ ProjectStore::Project::EnableUpdateCapture() void ProjectStore::Project::DisableUpdateCapture() { - m_UpdateCaptureLock.WithExclusiveLock([&]() { + m_ProjectLock.WithExclusiveLock([&]() { ZEN_ASSERT(m_CapturedOplogs); ZEN_ASSERT(m_UpdateCaptureRefCounter > 0); m_UpdateCaptureRefCounter--; @@ -3230,9 +3386,8 @@ ProjectStore::Project::DisableUpdateCapture() } std::vector<std::string> -ProjectStore::Project::GetCapturedOplogs() +ProjectStore::Project::GetCapturedOplogsLocked() { - RwLock::SharedLockScope _(m_UpdateCaptureLock); if (m_CapturedOplogs) { return *m_CapturedOplogs; @@ -3593,12 +3748,10 @@ ProjectStore::NewProject(const std::filesystem::path& BasePath, Prj->ProjectFilePath = ProjectFilePath; Prj->Write(); - m_UpdateCaptureLock.WithExclusiveLock([&]() { - if (m_CapturedProjects) - { - m_CapturedProjects->push_back(std::string(ProjectId)); - } - }); + if (m_CapturedProjects) + { + m_CapturedProjects->push_back(std::string(ProjectId)); + } return Prj; } @@ -4850,7 +5003,7 @@ ProjectStore::AreDiskWritesAllowed() const void ProjectStore::EnableUpdateCapture() { - m_UpdateCaptureLock.WithExclusiveLock([&]() { + m_ProjectsLock.WithExclusiveLock([&]() { if (m_UpdateCaptureRefCounter == 0) { ZEN_ASSERT(!m_CapturedProjects); @@ -4867,7 +5020,7 @@ ProjectStore::EnableUpdateCapture() void ProjectStore::DisableUpdateCapture() { - m_UpdateCaptureLock.WithExclusiveLock([&]() { + m_ProjectsLock.WithExclusiveLock([&]() { ZEN_ASSERT(m_CapturedProjects); ZEN_ASSERT(m_UpdateCaptureRefCounter > 0); m_UpdateCaptureRefCounter--; @@ -4879,9 +5032,8 @@ ProjectStore::DisableUpdateCapture() } std::vector<std::string> -ProjectStore::GetCapturedProjects() +ProjectStore::GetCapturedProjectsLocked() { - RwLock::SharedLockScope _(m_UpdateCaptureLock); if (m_CapturedProjects) { return *m_CapturedProjects; @@ -5198,7 +5350,7 @@ public: AddedOplogs.size()); }); - std::vector<std::string> AddedProjects = m_ProjectStore.GetCapturedProjects(); + std::vector<std::string> AddedProjects = m_ProjectStore.GetCapturedProjectsLocked(); for (const std::string& AddedProject : AddedProjects) { if (auto It = m_ProjectStore.m_Projects.find(AddedProject); It != m_ProjectStore.m_Projects.end()) @@ -5213,8 +5365,9 @@ public: } for (auto& ProjectPair : m_ProjectStore.m_Projects) { - ProjectStore::Project& Project = *ProjectPair.second; - std::vector<std::string> AddedOplogNames(Project.GetCapturedOplogs()); + ProjectStore::Project& Project = *ProjectPair.second; + + std::vector<std::string> AddedOplogNames(Project.GetCapturedOplogsLocked()); for (const std::string& OplogName : AddedOplogNames) { if (auto It = Project.m_Oplogs.find(OplogName); It != Project.m_Oplogs.end()) @@ -5243,7 +5396,12 @@ public: }); Oplog->GetAttachmentsLocked(m_References, Ctx.Settings.StoreProjectAttachmentMetaData); + if (std::vector<IoHash> PendingChunkReferences = Oplog->GetPendingChunkReferencesLocked(); !PendingChunkReferences.empty()) + { + m_References.insert(m_References.end(), PendingChunkReferences.begin(), PendingChunkReferences.end()); + } } + FilterReferences(Ctx, fmt::format("projectstore [LOCKSTATE] '{}'", "projectstore"), m_References); } @@ -5404,12 +5562,16 @@ public: if (auto It = m_Project->m_Oplogs.find(m_OplogId); It != m_Project->m_Oplogs.end()) { ProjectStore::Oplog* Oplog = It->second.get(); - Oplog->IterateCapturedLSNs([&](const CbObjectView& UpdateOp) -> bool { + Oplog->IterateCapturedLSNsLocked([&](const CbObjectView& UpdateOp) -> bool { UpdateOp.IterateAttachments([&](CbFieldView Visitor) { m_AddedReferences.emplace_back(Visitor.AsAttachment()); }); return true; }); - std::vector<IoHash> AddedAttachments = Oplog->GetCapturedAttachments(); + std::vector<IoHash> AddedAttachments = Oplog->GetCapturedAttachmentsLocked(); m_AddedReferences.insert(m_AddedReferences.end(), AddedAttachments.begin(), AddedAttachments.end()); + if (std::vector<IoHash> PendingChunkReferences = Oplog->GetPendingChunkReferencesLocked(); !PendingChunkReferences.empty()) + { + m_AddedReferences.insert(m_AddedReferences.end(), PendingChunkReferences.begin(), PendingChunkReferences.end()); + } } else if (m_Project->LastOplogAccessTime(m_OplogId) > m_OplogAccessTime && ProjectStore::Oplog::ExistsAt(m_OplogBasePath)) { @@ -5551,6 +5713,137 @@ ProjectStore::LockState(GcCtx& Ctx) return Locks; } +class ProjectStoreOplogReferenceValidator : public GcReferenceValidator +{ +public: + ProjectStoreOplogReferenceValidator(ProjectStore& InProjectStore, std::string_view InProject, std::string_view InOplog) + : m_ProjectStore(InProjectStore) + , m_ProjectId(InProject) + , m_OplogId(InOplog) + { + } + + virtual ~ProjectStoreOplogReferenceValidator() {} + + virtual std::string GetGcName(GcCtx&) override { return fmt::format("oplog: '{}/{}'", m_ProjectId, m_OplogId); } + + virtual void Validate(GcCtx& Ctx, GcReferenceValidatorStats& Stats) override + { + ZEN_TRACE_CPU("Store::Validate"); + + auto Log = [&Ctx]() { return Ctx.Logger; }; + + ProjectStore::Oplog::ValidationResult Result; + + Stopwatch Timer; + const auto _ = MakeGuard([&] { + if (!Ctx.Settings.Verbose) + { + return; + } + std::string Status = Result.IsEmpty() ? "OK" : "Missing data"; + ZEN_INFO("GCV2: projectstore [VALIDATE] '{}/{}': Validated in {}. OpCount: {}, MinLSN: {}, MaxLSN: {}, Status: {}", + m_ProjectId, + m_OplogId, + NiceTimeSpanMs(Timer.GetElapsedTimeMs()), + Result.OpCount, + Result.LSNLow, + Result.LSNHigh, + Status); + }); + ProjectStore::Oplog* TempOplog = nullptr; + auto __ = MakeGuard([this, &TempOplog]() { + if (TempOplog != nullptr) + { + delete TempOplog; + } + }); + ProjectStore::Oplog* Oplog = nullptr; + Ref<ProjectStore::Project> Project = m_ProjectStore.OpenProject(m_ProjectId); + if (Project) + { + RwLock::SharedLockScope ___(Project->m_ProjectLock); + if (auto It = Project->m_Oplogs.find(m_OplogId); It != Project->m_Oplogs.end()) + { + Oplog = It->second.get(); + } + else + { + std::filesystem::path OplogBasePath = Project->BasePathForOplog(m_OplogId); + TempOplog = new ProjectStore::Oplog(m_OplogId, Project.Get(), Project->m_CidStore, OplogBasePath, std::filesystem::path{}); + Oplog = TempOplog; + Oplog->Read(); + + if (Ctx.IsCancelledFlag) + { + return; + } + } + + if (Oplog != nullptr) + { + Result = Oplog->Validate(Ctx.IsCancelledFlag); + if (Ctx.IsCancelledFlag) + { + return; + } + Stats.CheckedCount = Result.OpCount; + Stats.MissingChunks = Result.MissingChunks.size(); + Stats.MissingFiles = Result.MissingFiles.size(); + Stats.MissingMetas = Result.MissingMetas.size(); + Stats.MissingAttachments = Result.MissingAttachments.size(); + } + + if (!Result.IsEmpty()) + { + ZEN_WARN("GCV2: projectstore [VALIDATE] '{}/{}': Missing data: Files: {}, Chunks: {}, Metas: {}, Attachments: {}", + m_ProjectId, + m_OplogId, + Result.MissingFiles.size(), + Result.MissingChunks.size(), + Result.MissingMetas.size(), + Result.MissingAttachments.size()); + } + } + } + + ProjectStore& m_ProjectStore; + std::string m_ProjectId; + std::string m_OplogId; +}; + +std::vector<GcReferenceValidator*> +ProjectStore::CreateReferenceValidators(GcCtx& Ctx) +{ + if (Ctx.Settings.SkipCidDelete) + { + return {}; + } + DiscoverProjects(); + + std::vector<std::pair<std::string, std::string>> Oplogs; + { + RwLock::SharedLockScope _(m_ProjectsLock); + for (auto& ProjectPair : m_Projects) + { + ProjectStore::Project& Project = *ProjectPair.second; + std::vector<std::string> OpLogs = Project.ScanForOplogs(); + for (const std::string& OplogName : OpLogs) + { + Oplogs.push_back({Project.Identifier, OplogName}); + } + } + } + std::vector<GcReferenceValidator*> Validators; + Validators.reserve(Oplogs.size()); + for (const std::pair<std::string, std::string>& Oplog : Oplogs) + { + Validators.push_back(new ProjectStoreOplogReferenceValidator(*this, Oplog.first, Oplog.second)); + } + + return Validators; +} + ////////////////////////////////////////////////////////////////////////// Oid @@ -5933,106 +6226,317 @@ TEST_CASE("project.store.gc") } } - SUBCASE("v2") { - { - GcSettings Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24), - .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24), - .IsDeleteMode = true}; - GcResult Result = Gc.CollectGarbage(Settings); - CHECK_EQ(5u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); - CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); - CHECK_EQ(21u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); - CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); - CHECK(ProjectStore.OpenProject("proj1"sv)); - CHECK(ProjectStore.OpenProject("proj2"sv)); - } + GcSettings Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24), + .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24), + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + CHECK_EQ(5u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(21u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK(ProjectStore.OpenProject("proj1"sv)); + CHECK(ProjectStore.OpenProject("proj2"sv)); + } - { - GcSettings Settings = {.CacheExpireTime = GcClock::Now() + std::chrono::hours(24), - .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(24), - .IsDeleteMode = true}; - GcResult Result = Gc.CollectGarbage(Settings); - CHECK_EQ(5u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); - CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); - CHECK_EQ(21u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); - CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); - CHECK(ProjectStore.OpenProject("proj1"sv)); - CHECK(ProjectStore.OpenProject("proj2"sv)); - } + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now() + std::chrono::hours(24), + .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(24), + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + CHECK_EQ(5u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(21u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK(ProjectStore.OpenProject("proj1"sv)); + CHECK(ProjectStore.OpenProject("proj2"sv)); + } - std::filesystem::remove(Project1FilePath); + std::filesystem::remove(Project1FilePath); - { - GcSettings Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24), - .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24), - .IsDeleteMode = true}; - GcResult Result = Gc.CollectGarbage(Settings); - CHECK_EQ(5u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); - CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); - CHECK_EQ(21u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); - CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); - CHECK(ProjectStore.OpenProject("proj1"sv)); - CHECK(ProjectStore.OpenProject("proj2"sv)); - } + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24), + .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24), + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + CHECK_EQ(5u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(21u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK(ProjectStore.OpenProject("proj1"sv)); + CHECK(ProjectStore.OpenProject("proj2"sv)); + } - { - GcSettings Settings = {.CacheExpireTime = GcClock::Now() + std::chrono::hours(24), - .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(24), - .CollectSmallObjects = true, - .IsDeleteMode = true}; - GcResult Result = Gc.CollectGarbage(Settings); - CHECK_EQ(4u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); - CHECK_EQ(1u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); - CHECK_EQ(21u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); - CHECK_EQ(7u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); - CHECK(!ProjectStore.OpenProject("proj1"sv)); - CHECK(ProjectStore.OpenProject("proj2"sv)); - } + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now() + std::chrono::hours(24), + .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(24), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + CHECK_EQ(4u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(1u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(21u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(7u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK(!ProjectStore.OpenProject("proj1"sv)); + CHECK(ProjectStore.OpenProject("proj2"sv)); + } - std::filesystem::remove(Project2Oplog1Path); - { - GcSettings Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24), - .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24), - .CollectSmallObjects = true, - .IsDeleteMode = true}; - GcResult Result = Gc.CollectGarbage(Settings); - CHECK_EQ(3u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); - CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); - CHECK_EQ(14u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); - CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); - CHECK(!ProjectStore.OpenProject("proj1"sv)); - CHECK(ProjectStore.OpenProject("proj2"sv)); - } + std::filesystem::remove(Project2Oplog1Path); + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24), + .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + CHECK_EQ(3u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(14u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK(!ProjectStore.OpenProject("proj1"sv)); + CHECK(ProjectStore.OpenProject("proj2"sv)); + } - { - GcSettings Settings = {.CacheExpireTime = GcClock::Now() + std::chrono::hours(24), - .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(24), - .CollectSmallObjects = true, - .IsDeleteMode = true}; - GcResult Result = Gc.CollectGarbage(Settings); - CHECK_EQ(3u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); - CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); - CHECK_EQ(14u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); - CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); - CHECK(!ProjectStore.OpenProject("proj1"sv)); - CHECK(ProjectStore.OpenProject("proj2"sv)); - } + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now() + std::chrono::hours(24), + .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(24), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + CHECK_EQ(3u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(14u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(0u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK(!ProjectStore.OpenProject("proj1"sv)); + CHECK(ProjectStore.OpenProject("proj2"sv)); + } - std::filesystem::remove(Project2FilePath); - { - GcSettings Settings = {.CacheExpireTime = GcClock::Now() + std::chrono::hours(24), - .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(24), - .CollectSmallObjects = true, - .IsDeleteMode = true}; - GcResult Result = Gc.CollectGarbage(Settings); - CHECK_EQ(1u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); - CHECK_EQ(1u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); - CHECK_EQ(14u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); - CHECK_EQ(14u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); - CHECK(!ProjectStore.OpenProject("proj1"sv)); - CHECK(!ProjectStore.OpenProject("proj2"sv)); - } + std::filesystem::remove(Project2FilePath); + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now() + std::chrono::hours(24), + .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(24), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + CHECK_EQ(1u, Result.ReferencerStatSum.RemoveExpiredDataStats.CheckedCount); + CHECK_EQ(1u, Result.ReferencerStatSum.RemoveExpiredDataStats.DeletedCount); + CHECK_EQ(14u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.CheckedCount); + CHECK_EQ(14u, Result.ReferenceStoreStatSum.RemoveUnreferencedDataStats.DeletedCount); + CHECK(!ProjectStore.OpenProject("proj1"sv)); + CHECK(!ProjectStore.OpenProject("proj2"sv)); + } +} + +TEST_CASE("project.store.gc.prep") +{ + using namespace std::literals; + using namespace testutils; + + ScopedTemporaryDirectory TempDir; + + auto JobQueue = MakeJobQueue(1, ""sv); + GcManager Gc; + CidStore CidStore(Gc); + CidStoreConfiguration CidConfig = {.RootDirectory = TempDir.Path() / "cas", .TinyValueThreshold = 1024, .HugeValueThreshold = 4096}; + CidStore.Initialize(CidConfig); + + std::filesystem::path BasePath = TempDir.Path() / "projectstore"; + ProjectStore ProjectStore(CidStore, BasePath, Gc, *JobQueue, ProjectStore::Configuration{}); + std::filesystem::path RootDir = TempDir.Path() / "root"; + std::filesystem::path EngineRootDir = TempDir.Path() / "engine"; + + std::filesystem::path Project1RootDir = TempDir.Path() / "game1"; + std::filesystem::path Project1FilePath = TempDir.Path() / "game1" / "game.uproject"; + { + CreateDirectories(Project1FilePath.parent_path()); + BasicFile ProjectFile; + ProjectFile.Open(Project1FilePath, BasicFile::Mode::kTruncate); + } + std::filesystem::path Project1OplogPath = TempDir.Path() / "game1" / "saves" / "cooked" / ".projectstore"; + { + CreateDirectories(Project1OplogPath.parent_path()); + BasicFile OplogFile; + OplogFile.Open(Project1OplogPath, BasicFile::Mode::kTruncate); + } + + std::vector<std::pair<Oid, CompressedBuffer>> OpAttachments = CreateAttachments(std::initializer_list<size_t>{7123, 583, 690, 99}); + std::vector<IoHash> OpChunkHashes; + for (const auto& Chunk : OpAttachments) + { + OpChunkHashes.push_back(Chunk.second.DecodeRawHash()); + } + + { + Ref<ProjectStore::Project> Project1(ProjectStore.NewProject(BasePath / "proj1"sv, + "proj1"sv, + RootDir.string(), + EngineRootDir.string(), + Project1RootDir.string(), + Project1FilePath.string())); + ProjectStore::Oplog* Oplog = Project1->NewOplog("oplog1"sv, Project1OplogPath); + Oplog->AppendNewOplogEntry(CreateOplogPackage(Oid::NewOid(), OpAttachments)); + } + { + Ref<ProjectStore::Project> Project1 = ProjectStore.OpenProject("proj1"sv); + Project1->DeleteOplog("oplog1"sv); + } + + // Equivalent of a `prep` existance check call + for (auto Attachment : OpAttachments) + { + CHECK(CidStore.ContainsChunk(Attachment.second.DecodeRawHash())); + } + + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now(), + .ProjectStoreExpireTime = GcClock::Now(), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + } + + // If a gc comes in between our prep and op write the chunks will be removed + for (auto Attachment : OpAttachments) + { + CHECK(!CidStore.ContainsChunk(Attachment.second.DecodeRawHash())); + } + + { + // Make sure the chunks are stored but not the referencing op + Ref<ProjectStore::Project> Project1 = ProjectStore.OpenProject("proj1"sv); + ProjectStore::Oplog* Oplog = Project1->NewOplog("oplog1"sv, Project1OplogPath); + Oplog->AppendNewOplogEntry(CreateOplogPackage(Oid::NewOid(), OpAttachments)); + Project1->DeleteOplog("oplog1"sv); + } + { + Ref<ProjectStore::Project> Project1 = ProjectStore.OpenProject("proj1"sv); + ProjectStore::Oplog* Oplog = Project1->NewOplog("oplog1"sv, Project1OplogPath); + + // Equivalent of a `prep` call with tracking of ops + CHECK(Oplog->CheckPendingChunkReferences(OpChunkHashes, std::chrono::hours(1)).empty()); + } + + for (auto Attachment : OpAttachments) + { + CHECK(CidStore.ContainsChunk(Attachment.second.DecodeRawHash())); + } + + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now(), + .ProjectStoreExpireTime = GcClock::Now(), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + } + + // Attachments should now be retained + for (auto Attachment : OpAttachments) + { + CHECK(CidStore.ContainsChunk(Attachment.second.DecodeRawHash())); + } + + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now(), + .ProjectStoreExpireTime = GcClock::Now(), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + } + + // Attachments should now be retained across multiple GCs if retain time is still valud + for (auto Attachment : OpAttachments) + { + CHECK(CidStore.ContainsChunk(Attachment.second.DecodeRawHash())); + } + + { + Ref<ProjectStore::Project> Project1 = ProjectStore.OpenProject("proj1"sv); + ProjectStore::Oplog* Oplog = Project1->OpenOplog("oplog1"sv, true, true); + Oplog->AppendNewOplogEntry(CreateOplogPackage(Oid::NewOid(), OpAttachments)); + Oplog->RemovePendingChunkReferences(OpChunkHashes); + CHECK(Oplog->GetPendingChunkReferencesLocked().size() == 0); + } + for (auto Attachment : OpAttachments) + { + CHECK(CidStore.ContainsChunk(Attachment.second.DecodeRawHash())); + } + { + Ref<ProjectStore::Project> Project1 = ProjectStore.OpenProject("proj1"sv); + Project1->DeleteOplog("oplog1"sv); + } + + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now(), + .ProjectStoreExpireTime = GcClock::Now(), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + } + + for (auto Attachment : OpAttachments) + { + CHECK(!CidStore.ContainsChunk(Attachment.second.DecodeRawHash())); + } + + { + Ref<ProjectStore::Project> Project1 = ProjectStore.OpenProject("proj1"sv); + Project1->DeleteOplog("oplog1"sv); + } + { + // Make sure the chunks are stored but not the referencing op + Ref<ProjectStore::Project> Project1 = ProjectStore.OpenProject("proj1"sv); + ProjectStore::Oplog* Oplog = Project1->NewOplog("oplog1"sv, Project1OplogPath); + Oplog->AppendNewOplogEntry(CreateOplogPackage(Oid::NewOid(), OpAttachments)); + Project1->DeleteOplog("oplog1"sv); + } + + // Caution - putting breakpoints and stepping through this part of the test likely makes it fails due to expiry time of pending chunks + { + Ref<ProjectStore::Project> Project1 = ProjectStore.OpenProject("proj1"sv); + ProjectStore::Oplog* Oplog = Project1->NewOplog("oplog1"sv, Project1OplogPath); + + CHECK(Oplog->CheckPendingChunkReferences(OpChunkHashes, std::chrono::milliseconds(100)).empty()); + } + + // This pass they should be retained and while the ops are picked up in GC we are blocked from adding our op + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now(), + .ProjectStoreExpireTime = GcClock::Now(), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + } + for (auto Attachment : OpAttachments) + { + CHECK(CidStore.ContainsChunk(Attachment.second.DecodeRawHash())); + } + + Sleep(200); + // This pass they should also be retained since our age retention has kept them alive and they will now be picked up and the retention + // cleared + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now(), + .ProjectStoreExpireTime = GcClock::Now(), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + } + for (auto Attachment : OpAttachments) + { + CHECK(CidStore.ContainsChunk(Attachment.second.DecodeRawHash())); + } + + // This pass the retention time has expired and the last GC pass cleared the entries + { + GcSettings Settings = {.CacheExpireTime = GcClock::Now(), + .ProjectStoreExpireTime = GcClock::Now(), + .CollectSmallObjects = true, + .IsDeleteMode = true}; + GcResult Result = Gc.CollectGarbage(Settings); + } + + for (auto Attachment : OpAttachments) + { + CHECK(!CidStore.ContainsChunk(Attachment.second.DecodeRawHash())); } } diff --git a/src/zenserver/projectstore/projectstore.h b/src/zenserver/projectstore/projectstore.h index 49970b677..1619151dd 100644 --- a/src/zenserver/projectstore/projectstore.h +++ b/src/zenserver/projectstore/projectstore.h @@ -161,13 +161,14 @@ public: void AddChunkMappings(const std::unordered_map<Oid, IoHash, Oid::Hasher>& ChunkMappings); - void CaptureUpdatedLSNs(std::span<const uint32_t> LSNs); - void CaptureAddedAttachments(std::span<const IoHash> AttachmentHashes); + void EnableUpdateCapture(); + void DisableUpdateCapture(); + void CaptureAddedAttachments(std::span<const IoHash> AttachmentHashes); + std::vector<IoHash> GetCapturedAttachmentsLocked(); + std::vector<IoHash> CheckPendingChunkReferences(std::span<const IoHash> ChunkHashes, const GcClock::Duration& RetainTime); + void RemovePendingChunkReferences(std::span<const IoHash> ChunkHashes); + std::vector<IoHash> GetPendingChunkReferencesLocked(); - void EnableUpdateCapture(); - void DisableUpdateCapture(); - void IterateCapturedLSNs(std::function<bool(const CbObjectView& UpdateOp)>&& Callback); - std::vector<IoHash> GetCapturedAttachments(); RwLock::SharedLockScope GetGcReferencerLock() { return RwLock::SharedLockScope(m_OplogLock); } uint32_t GetUnusedSpacePercent() const; @@ -180,6 +181,39 @@ public: static std::optional<CbObject> ReadStateFile(const std::filesystem::path& BasePath, std::function<LoggerRef()>&& Log); + struct ChunkMapping + { + Oid Id; + IoHash Hash; + }; + + struct FileMapping + { + Oid Id; + IoHash Hash; // This is either zero or a cid + std::string ServerPath; // If Hash is valid then this should be empty + std::string ClientPath; + }; + + struct ValidationResult + { + uint32_t OpCount = 0; + uint32_t LSNLow = 0; + uint32_t LSNHigh = 0; + std::vector<std::pair<Oid, FileMapping>> MissingFiles; + std::vector<std::pair<Oid, ChunkMapping>> MissingChunks; + std::vector<std::pair<Oid, ChunkMapping>> MissingMetas; + std::vector<std::pair<Oid, IoHash>> MissingAttachments; + std::vector<std::pair<Oid, std::string>> OpKeys; + + bool IsEmpty() const + { + return MissingFiles.empty() && MissingChunks.empty() && MissingMetas.empty() && MissingAttachments.empty(); + } + }; + + ValidationResult Validate(std::atomic_bool& IsCancelledFlag); + private: struct FileMapEntry { @@ -207,10 +241,11 @@ public: OidMap<uint32_t> m_LatestOpMap; // op key -> latest op LSN for key std::atomic<bool> m_MetaValid = false; - mutable RwLock m_UpdateCaptureLock; - uint32_t m_UpdateCaptureRefCounter = 0; - std::unique_ptr<std::vector<uint32_t>> m_CapturedLSNs; - std::unique_ptr<std::vector<IoHash>> m_CapturedAttachments; + uint32_t m_UpdateCaptureRefCounter = 0; + std::unique_ptr<std::vector<uint32_t>> m_CapturedLSNs; + std::unique_ptr<std::vector<IoHash>> m_CapturedAttachments; + std::unordered_set<IoHash, IoHash::Hasher> m_PendingPrepOpAttachments; + GcClock::TimePoint m_PendingPrepOpAttachmentsRetainEnd; RefPtr<OplogStorage> m_Storage; uint64_t m_LogFlushPosition = 0; @@ -225,23 +260,9 @@ public: struct OplogEntryMapping { - struct Mapping - { - Oid Id; - IoHash Hash; - }; - - struct FileMapping - { - Oid Id; - IoHash Hash; // This is either zero or a cid - std::string ServerPath; // If Hash is valid then this should be empty - std::string ClientPath; - }; - - std::vector<Mapping> Chunks; - std::vector<Mapping> Meta; - std::vector<FileMapping> Files; + std::vector<ChunkMapping> Chunks; + std::vector<ChunkMapping> Meta; + std::vector<FileMapping> Files; }; OplogEntryMapping GetMapping(CbObjectView Core); @@ -262,9 +283,11 @@ public: void AddChunkMapping(const RwLock::ExclusiveLockScope& OplogLock, const Oid& ChunkId, const IoHash& Hash); void AddMetaMapping(const RwLock::ExclusiveLockScope& OplogLock, const Oid& ChunkId, const IoHash& Hash); void Compact(RwLock::ExclusiveLockScope& Lock, bool DryRun, bool RetainLSNs, std::string_view LogPrefix); + void IterateCapturedLSNsLocked(std::function<bool(const CbObjectView& UpdateOp)>&& Callback); friend class ProjectStoreOplogReferenceChecker; friend class ProjectStoreReferenceChecker; + friend class ProjectStoreOplogReferenceValidator; }; struct Project : public RefCounted @@ -305,7 +328,7 @@ public: void EnableUpdateCapture(); void DisableUpdateCapture(); - std::vector<std::string> GetCapturedOplogs(); + std::vector<std::string> GetCapturedOplogsLocked(); std::vector<RwLock::SharedLockScope> GetGcReferencerLocks(); @@ -333,7 +356,6 @@ public: std::filesystem::path m_OplogStoragePath; mutable RwLock m_LastAccessTimesLock; mutable tsl::robin_map<std::string, GcClock::Tick> m_LastAccessTimes; - mutable RwLock m_UpdateCaptureLock; uint32_t m_UpdateCaptureRefCounter = 0; std::unique_ptr<std::vector<std::string>> m_CapturedOplogs; @@ -347,11 +369,10 @@ public: friend class ProjectStoreOplogReferenceChecker; friend class ProjectStoreReferenceChecker; + friend class ProjectStoreOplogReferenceValidator; friend class ProjectStoreGcStoreCompactor; }; - // Oplog* OpenProjectOplog(std::string_view ProjectId, std::string_view OplogId); - Ref<Project> OpenProject(std::string_view ProjectId); Ref<Project> NewProject(const std::filesystem::path& BasePath, std::string_view ProjectId, @@ -377,9 +398,10 @@ public: virtual void ScrubStorage(ScrubContext& Ctx) override; virtual GcStorageSize StorageSize() const override; - virtual std::string GetGcName(GcCtx& Ctx) override; - virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override; - virtual std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) override; + virtual std::string GetGcName(GcCtx& Ctx) override; + virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override; + virtual std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) override; + virtual std::vector<GcReferenceValidator*> CreateReferenceValidators(GcCtx& Ctx) override; virtual std::vector<RwLock::SharedLockScope> LockState(GcCtx& Ctx) override; @@ -454,7 +476,7 @@ public: void EnableUpdateCapture(); void DisableUpdateCapture(); - std::vector<std::string> GetCapturedProjects(); + std::vector<std::string> GetCapturedProjectsLocked(); private: LoggerRef m_Log; @@ -465,8 +487,7 @@ private: const Configuration m_Config; mutable RwLock m_ProjectsLock; std::map<std::string, Ref<Project>> m_Projects; - const DiskWriteBlocker* m_DiskWriteBlocker = nullptr; - mutable RwLock m_UpdateCaptureLock; + const DiskWriteBlocker* m_DiskWriteBlocker = nullptr; uint32_t m_UpdateCaptureRefCounter = 0; std::unique_ptr<std::vector<std::string>> m_CapturedProjects; |