diff options
Diffstat (limited to 'src')
54 files changed, 5757 insertions, 1831 deletions
diff --git a/src/zen/cmds/admin_cmd.cpp b/src/zen/cmds/admin_cmd.cpp index 502d1e799..15e854796 100644 --- a/src/zen/cmds/admin_cmd.cpp +++ b/src/zen/cmds/admin_cmd.cpp @@ -21,6 +21,12 @@ ScrubCommand::ScrubCommand() m_Options.add_option("", "n", "dry", "Dry run (do not delete any data)", cxxopts::value(m_DryRun), "<bool>"); m_Options.add_option("", "", "no-gc", "Do not perform GC after scrub pass", cxxopts::value(m_NoGc), "<bool>"); m_Options.add_option("", "", "no-cas", "Do not scrub CAS stores", cxxopts::value(m_NoCas), "<bool>"); + m_Options.add_option("", + "", + "maxtimeslice", + "Number of second Scrub is allowed to run before stopping in seconds (default 300s)", + cxxopts::value(m_MaxTimeSliceSeconds), + "<maxtimeslice>"); } ScrubCommand::~ScrubCommand() = default; @@ -44,7 +50,10 @@ ScrubCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) HttpClient Http(m_HostName); - HttpClient::KeyValueMap Params{{"skipdelete", ToString(m_DryRun)}, {"skipgc", ToString(m_NoGc)}, {"skipcid", ToString(m_NoCas)}}; + HttpClient::KeyValueMap Params{{"skipdelete", ToString(m_DryRun)}, + {"skipgc", ToString(m_NoGc)}, + {"skipcid", ToString(m_NoCas)}, + {"maxtimeslice", fmt::format("{}", m_MaxTimeSliceSeconds)}}; if (HttpClient::Response Response = Http.Post("/admin/scrub"sv, /* headers */ HttpClient::KeyValueMap{}, Params)) { diff --git a/src/zen/cmds/admin_cmd.h b/src/zen/cmds/admin_cmd.h index 4f97b7ad4..87ef8091b 100644 --- a/src/zen/cmds/admin_cmd.h +++ b/src/zen/cmds/admin_cmd.h @@ -22,9 +22,10 @@ public: private: cxxopts::Options m_Options{"scrub", "Scrub zen storage"}; std::string m_HostName; - bool m_DryRun = false; - bool m_NoGc = false; - bool m_NoCas = false; + bool m_DryRun = false; + bool m_NoGc = false; + bool m_NoCas = false; + uint64_t m_MaxTimeSliceSeconds = 300; }; /** Garbage collect storage diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index d7980cc24..f4edb65ab 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -24,6 +24,7 @@ #include <zenhttp/httpclientauth.h> #include <zenhttp/httpcommon.h> #include <zenremotestore/builds/buildcontent.h> +#include <zenremotestore/builds/buildmanifest.h> #include <zenremotestore/builds/buildsavedstate.h> #include <zenremotestore/builds/buildstoragecache.h> #include <zenremotestore/builds/buildstorageoperations.h> @@ -33,6 +34,7 @@ #include <zenremotestore/chunking/chunkblock.h> #include <zenremotestore/chunking/chunkedcontent.h> #include <zenremotestore/chunking/chunkedfile.h> +#include <zenremotestore/chunking/chunkingcache.h> #include <zenremotestore/chunking/chunkingcontroller.h> #include <zenremotestore/filesystemutils.h> #include <zenremotestore/jupiter/jupiterhost.h> @@ -289,13 +291,6 @@ namespace { return BoostWorkerMemory ? (MaxBlockSize + 16u * 1024u) : 1024u * 1024u; } - bool IncludePath(std::span<const std::string> IncludeWildcards, - std::span<const std::string> ExcludeWildcards, - const std::filesystem::path& Path) - { - return zen::IncludePath(IncludeWildcards, ExcludeWildcards, ToLower(Path.generic_string()), /*CaseSensitive*/ true); - } - class FilteredRate { public: @@ -427,253 +422,269 @@ namespace { NiceTimeSpanMs(ValidateOp.m_ValidateStats.ElapsedWallTimeUS / 1000)); } - void UploadFolder(OperationLogOutput& Output, - TransferThreadWorkers& Workers, - StorageInstance& Storage, - const Oid& BuildId, - const Oid& BuildPartId, - const std::string_view BuildPartName, - const std::filesystem::path& Path, - const std::filesystem::path& TempDir, - const std::filesystem::path& ManifestPath, - const uint64_t FindBlockMaxCount, - const uint8_t BlockReuseMinPercentLimit, - bool AllowMultiparts, - const CbObject& MetaData, - bool CreateBuild, - bool IgnoreExistingBlocks, - bool UploadToZenCache, - const std::vector<std::string>& ExcludeFolders, - const std::vector<std::string>& ExcludeExtensions) + struct UploadFolderOptions + { + std::filesystem::path TempDir; + uint64_t FindBlockMaxCount; + uint8_t BlockReuseMinPercentLimit; + bool AllowMultiparts; + bool CreateBuild; + bool IgnoreExistingBlocks; + bool UploadToZenCache; + const std::vector<std::string>& ExcludeFolders = DefaultExcludeFolders; + const std::vector<std::string>& ExcludeExtensions = DefaultExcludeExtensions; + }; + + std::vector<std::pair<Oid, std::string>> UploadFolder(OperationLogOutput& Output, + TransferThreadWorkers& Workers, + StorageInstance& Storage, + const Oid& BuildId, + const Oid& BuildPartId, + const std::string_view BuildPartName, + const std::filesystem::path& Path, + const std::filesystem::path& ManifestPath, + const CbObject& MetaData, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache, + const UploadFolderOptions& Options) { ProgressBar::SetLogOperationName(ProgressMode, "Upload Folder"); + + Stopwatch UploadTimer; + + BuildsOperationUploadFolder UploadOp( + Output, + Storage, + AbortFlag, + PauseFlag, + Workers.GetIOWorkerPool(), + Workers.GetNetworkPool(), + BuildId, + Path, + Options.CreateBuild, + std::move(MetaData), + BuildsOperationUploadFolder::Options{.IsQuiet = IsQuiet, + .IsVerbose = IsVerbose, + .DoExtraContentValidation = DoExtraContentVerify, + .FindBlockMaxCount = Options.FindBlockMaxCount, + .BlockReuseMinPercentLimit = Options.BlockReuseMinPercentLimit, + .AllowMultiparts = Options.AllowMultiparts, + .IgnoreExistingBlocks = Options.IgnoreExistingBlocks, + .TempDir = Options.TempDir, + .ExcludeFolders = Options.ExcludeFolders, + .ExcludeExtensions = Options.ExcludeExtensions, + .ZenExcludeManifestName = ZenExcludeManifestName, + .NonCompressableExtensions = DefaultSplitOnlyExtensions, + .PopulateCache = Options.UploadToZenCache}); + + std::vector<std::pair<Oid, std::string>> UploadedParts = + UploadOp.Execute(BuildPartId, BuildPartName, ManifestPath, ChunkController, ChunkCache); + if (AbortFlag) { - Stopwatch UploadTimer; + return {}; + } - BuildsOperationUploadFolder UploadOp( - Output, - Storage, - AbortFlag, - PauseFlag, - Workers.GetIOWorkerPool(), - Workers.GetNetworkPool(), - BuildId, + ZEN_CONSOLE_VERBOSE( + "Folder scanning stats:" + "\n FoundFileCount: {}" + "\n FoundFileByteCount: {}" + "\n AcceptedFileCount: {}" + "\n AcceptedFileByteCount: {}" + "\n ElapsedWallTimeUS: {}", + UploadOp.m_LocalFolderScanStats.FoundFileCount.load(), + NiceBytes(UploadOp.m_LocalFolderScanStats.FoundFileByteCount.load()), + UploadOp.m_LocalFolderScanStats.AcceptedFileCount.load(), + NiceBytes(UploadOp.m_LocalFolderScanStats.AcceptedFileByteCount.load()), + NiceLatencyNs(UploadOp.m_LocalFolderScanStats.ElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE_VERBOSE( + "Chunking stats:" + "\n FilesProcessed: {}" + "\n FilesChunked: {}" + "\n BytesHashed: {}" + "\n UniqueChunksFound: {}" + "\n UniqueSequencesFound: {}" + "\n UniqueBytesFound: {}" + "\n FilesFoundInCache: {}" + "\n ChunksFoundInCache: {}" + "\n FilesStoredInCache: {}" + "\n ChunksStoredInCache: {}" + "\n ElapsedWallTimeUS: {}", + UploadOp.m_ChunkingStats.FilesProcessed.load(), + UploadOp.m_ChunkingStats.FilesChunked.load(), + NiceBytes(UploadOp.m_ChunkingStats.BytesHashed.load()), + UploadOp.m_ChunkingStats.UniqueChunksFound.load(), + UploadOp.m_ChunkingStats.UniqueSequencesFound.load(), + NiceBytes(UploadOp.m_ChunkingStats.UniqueBytesFound.load()), + UploadOp.m_ChunkingStats.FilesFoundInCache.load(), + UploadOp.m_ChunkingStats.ChunksFoundInCache.load(), + NiceBytes(UploadOp.m_ChunkingStats.BytesFoundInCache.load()), + UploadOp.m_ChunkingStats.FilesStoredInCache.load(), + UploadOp.m_ChunkingStats.ChunksStoredInCache.load(), + NiceBytes(UploadOp.m_ChunkingStats.BytesStoredInCache.load()), + NiceLatencyNs(UploadOp.m_ChunkingStats.ElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE_VERBOSE( + "Find block stats:" + "\n FindBlockTimeMS: {}" + "\n PotentialChunkCount: {}" + "\n PotentialChunkByteCount: {}" + "\n FoundBlockCount: {}" + "\n FoundBlockChunkCount: {}" + "\n FoundBlockByteCount: {}" + "\n AcceptedBlockCount: {}" + "\n NewBlocksCount: {}" + "\n NewBlocksChunkCount: {}" + "\n NewBlocksChunkByteCount: {}", + NiceTimeSpanMs(UploadOp.m_FindBlocksStats.FindBlockTimeMS), + UploadOp.m_FindBlocksStats.PotentialChunkCount, + NiceBytes(UploadOp.m_FindBlocksStats.PotentialChunkByteCount), + UploadOp.m_FindBlocksStats.FoundBlockCount, + UploadOp.m_FindBlocksStats.FoundBlockChunkCount, + NiceBytes(UploadOp.m_FindBlocksStats.FoundBlockByteCount), + UploadOp.m_FindBlocksStats.AcceptedBlockCount, + UploadOp.m_FindBlocksStats.NewBlocksCount, + UploadOp.m_FindBlocksStats.NewBlocksChunkCount, + NiceBytes(UploadOp.m_FindBlocksStats.NewBlocksChunkByteCount)); + + ZEN_CONSOLE_VERBOSE( + "Reuse block stats:" + "\n AcceptedChunkCount: {}" + "\n AcceptedByteCount: {}" + "\n AcceptedRawByteCount: {}" + "\n RejectedBlockCount: {}" + "\n RejectedChunkCount: {}" + "\n RejectedByteCount: {}" + "\n AcceptedReduntantChunkCount: {}" + "\n AcceptedReduntantByteCount: {}", + UploadOp.m_ReuseBlocksStats.AcceptedChunkCount, + NiceBytes(UploadOp.m_ReuseBlocksStats.AcceptedByteCount), + NiceBytes(UploadOp.m_ReuseBlocksStats.AcceptedRawByteCount), + UploadOp.m_ReuseBlocksStats.RejectedBlockCount, + UploadOp.m_ReuseBlocksStats.RejectedChunkCount, + NiceBytes(UploadOp.m_ReuseBlocksStats.RejectedByteCount), + UploadOp.m_ReuseBlocksStats.AcceptedReduntantChunkCount, + NiceBytes(UploadOp.m_ReuseBlocksStats.AcceptedReduntantByteCount)); + + ZEN_CONSOLE_VERBOSE( + "Generate blocks stats:" + "\n GeneratedBlockByteCount: {}" + "\n GeneratedBlockCount: {}" + "\n GenerateBlocksElapsedWallTimeUS: {}", + NiceBytes(UploadOp.m_GenerateBlocksStats.GeneratedBlockByteCount.load()), + UploadOp.m_GenerateBlocksStats.GeneratedBlockCount.load(), + NiceLatencyNs(UploadOp.m_GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE_VERBOSE( + "Generate blocks stats:" + "\n ChunkCount: {}" + "\n ChunkByteCount: {}" + "\n CompressedChunkCount: {}" + "\n CompressChunksElapsedWallTimeUS: {}", + UploadOp.m_LooseChunksStats.ChunkCount, + NiceBytes(UploadOp.m_LooseChunksStats.ChunkByteCount), + UploadOp.m_LooseChunksStats.CompressedChunkCount.load(), + NiceBytes(UploadOp.m_LooseChunksStats.CompressedChunkBytes.load()), + NiceLatencyNs(UploadOp.m_LooseChunksStats.CompressChunksElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE_VERBOSE( + "Disk stats:" + "\n OpenReadCount: {}" + "\n OpenWriteCount: {}" + "\n ReadCount: {}" + "\n ReadByteCount: {}" + "\n WriteCount: {} ({} cloned)" + "\n WriteByteCount: {} ({} cloned)" + "\n CurrentOpenFileCount: {}", + UploadOp.m_DiskStats.OpenReadCount.load(), + UploadOp.m_DiskStats.OpenWriteCount.load(), + UploadOp.m_DiskStats.ReadCount.load(), + NiceBytes(UploadOp.m_DiskStats.ReadByteCount.load()), + UploadOp.m_DiskStats.WriteCount.load(), + UploadOp.m_DiskStats.CloneCount.load(), + NiceBytes(UploadOp.m_DiskStats.WriteByteCount.load()), + NiceBytes(UploadOp.m_DiskStats.CloneByteCount.load()), + UploadOp.m_DiskStats.CurrentOpenFileCount.load()); + + ZEN_CONSOLE_VERBOSE( + "Upload stats:" + "\n BlockCount: {}" + "\n BlocksBytes: {}" + "\n ChunkCount: {}" + "\n ChunksBytes: {}" + "\n ReadFromDiskBytes: {}" + "\n MultipartAttachmentCount: {}" + "\n ElapsedWallTimeUS: {}", + UploadOp.m_UploadStats.BlockCount.load(), + NiceBytes(UploadOp.m_UploadStats.BlocksBytes.load()), + UploadOp.m_UploadStats.ChunkCount.load(), + NiceBytes(UploadOp.m_UploadStats.ChunksBytes.load()), + NiceBytes(UploadOp.m_UploadStats.ReadFromDiskBytes.load()), + UploadOp.m_UploadStats.MultipartAttachmentCount.load(), + NiceLatencyNs(UploadOp.m_UploadStats.ElapsedWallTimeUS * 1000)); + + const double DeltaByteCountPercent = + UploadOp.m_ChunkingStats.BytesHashed > 0 + ? (100.0 * (UploadOp.m_FindBlocksStats.NewBlocksChunkByteCount + UploadOp.m_LooseChunksStats.CompressedChunkBytes)) / + (UploadOp.m_ChunkingStats.BytesHashed) + : 0.0; + + const std::string MultipartAttachmentStats = + Options.AllowMultiparts ? fmt::format(" ({} as multipart)", UploadOp.m_UploadStats.MultipartAttachmentCount.load()) : ""; + + if (!IsQuiet) + { + ZEN_CONSOLE( + "Uploaded part {} ('{}') to build {}, {}\n" + " Scanned files: {:>8} ({}), {}B/sec, {}\n" + " New data: {:>8} ({}) {:.1f}%\n" + " New blocks: {:>8} ({} -> {}), {}B/sec, {}\n" + " New chunks: {:>8} ({} -> {}), {}B/sec, {}\n" + " Uploaded: {:>8} ({}), {}bits/sec, {}\n" + " Blocks: {:>8} ({})\n" + " Chunks: {:>8} ({}){}", BuildPartId, BuildPartName, - Path, - ManifestPath, - CreateBuild, - std::move(MetaData), - BuildsOperationUploadFolder::Options{.IsQuiet = IsQuiet, - .IsVerbose = IsVerbose, - .DoExtraContentValidation = DoExtraContentVerify, - .FindBlockMaxCount = FindBlockMaxCount, - .BlockReuseMinPercentLimit = BlockReuseMinPercentLimit, - .AllowMultiparts = AllowMultiparts, - .IgnoreExistingBlocks = IgnoreExistingBlocks, - .TempDir = TempDir, - .ExcludeFolders = ExcludeFolders, - .ExcludeExtensions = ExcludeExtensions, - .ZenExcludeManifestName = ZenExcludeManifestName, - .NonCompressableExtensions = DefaultSplitOnlyExtensions, - .PopulateCache = UploadToZenCache}); - UploadOp.Execute(); - if (AbortFlag) - { - return; - } + BuildId, + NiceTimeSpanMs(UploadTimer.GetElapsedTimeMs()), - ZEN_CONSOLE_VERBOSE( - "Folder scanning stats:" - "\n FoundFileCount: {}" - "\n FoundFileByteCount: {}" - "\n AcceptedFileCount: {}" - "\n AcceptedFileByteCount: {}" - "\n ElapsedWallTimeUS: {}", UploadOp.m_LocalFolderScanStats.FoundFileCount.load(), NiceBytes(UploadOp.m_LocalFolderScanStats.FoundFileByteCount.load()), - UploadOp.m_LocalFolderScanStats.AcceptedFileCount.load(), - NiceBytes(UploadOp.m_LocalFolderScanStats.AcceptedFileByteCount.load()), - NiceLatencyNs(UploadOp.m_LocalFolderScanStats.ElapsedWallTimeUS * 1000)); - - ZEN_CONSOLE_VERBOSE( - "Chunking stats:" - "\n FilesProcessed: {}" - "\n FilesChunked: {}" - "\n BytesHashed: {}" - "\n UniqueChunksFound: {}" - "\n UniqueSequencesFound: {}" - "\n UniqueBytesFound: {}" - "\n ElapsedWallTimeUS: {}", - UploadOp.m_ChunkingStats.FilesProcessed.load(), - UploadOp.m_ChunkingStats.FilesChunked.load(), - NiceBytes(UploadOp.m_ChunkingStats.BytesHashed.load()), - UploadOp.m_ChunkingStats.UniqueChunksFound.load(), - UploadOp.m_ChunkingStats.UniqueSequencesFound.load(), - NiceBytes(UploadOp.m_ChunkingStats.UniqueBytesFound.load()), - NiceLatencyNs(UploadOp.m_ChunkingStats.ElapsedWallTimeUS * 1000)); - - ZEN_CONSOLE_VERBOSE( - "Find block stats:" - "\n FindBlockTimeMS: {}" - "\n PotentialChunkCount: {}" - "\n PotentialChunkByteCount: {}" - "\n FoundBlockCount: {}" - "\n FoundBlockChunkCount: {}" - "\n FoundBlockByteCount: {}" - "\n AcceptedBlockCount: {}" - "\n NewBlocksCount: {}" - "\n NewBlocksChunkCount: {}" - "\n NewBlocksChunkByteCount: {}", - NiceTimeSpanMs(UploadOp.m_FindBlocksStats.FindBlockTimeMS), - UploadOp.m_FindBlocksStats.PotentialChunkCount, - NiceBytes(UploadOp.m_FindBlocksStats.PotentialChunkByteCount), - UploadOp.m_FindBlocksStats.FoundBlockCount, - UploadOp.m_FindBlocksStats.FoundBlockChunkCount, - NiceBytes(UploadOp.m_FindBlocksStats.FoundBlockByteCount), - UploadOp.m_FindBlocksStats.AcceptedBlockCount, - UploadOp.m_FindBlocksStats.NewBlocksCount, - UploadOp.m_FindBlocksStats.NewBlocksChunkCount, - NiceBytes(UploadOp.m_FindBlocksStats.NewBlocksChunkByteCount)); - - ZEN_CONSOLE_VERBOSE( - "Reuse block stats:" - "\n AcceptedChunkCount: {}" - "\n AcceptedByteCount: {}" - "\n AcceptedRawByteCount: {}" - "\n RejectedBlockCount: {}" - "\n RejectedChunkCount: {}" - "\n RejectedByteCount: {}" - "\n AcceptedReduntantChunkCount: {}" - "\n AcceptedReduntantByteCount: {}", - UploadOp.m_ReuseBlocksStats.AcceptedChunkCount, - NiceBytes(UploadOp.m_ReuseBlocksStats.AcceptedByteCount), - NiceBytes(UploadOp.m_ReuseBlocksStats.AcceptedRawByteCount), - UploadOp.m_ReuseBlocksStats.RejectedBlockCount, - UploadOp.m_ReuseBlocksStats.RejectedChunkCount, - NiceBytes(UploadOp.m_ReuseBlocksStats.RejectedByteCount), - UploadOp.m_ReuseBlocksStats.AcceptedReduntantChunkCount, - NiceBytes(UploadOp.m_ReuseBlocksStats.AcceptedReduntantByteCount)); - - ZEN_CONSOLE_VERBOSE( - "Generate blocks stats:" - "\n GeneratedBlockByteCount: {}" - "\n GeneratedBlockCount: {}" - "\n GenerateBlocksElapsedWallTimeUS: {}", - NiceBytes(UploadOp.m_GenerateBlocksStats.GeneratedBlockByteCount.load()), + NiceNum(GetBytesPerSecond(UploadOp.m_ChunkingStats.ElapsedWallTimeUS, UploadOp.m_ChunkingStats.BytesHashed)), + NiceTimeSpanMs(UploadOp.m_ChunkingStats.ElapsedWallTimeUS / 1000), + + UploadOp.m_FindBlocksStats.NewBlocksChunkCount + UploadOp.m_LooseChunksStats.CompressedChunkCount, + NiceBytes(UploadOp.m_FindBlocksStats.NewBlocksChunkByteCount + UploadOp.m_LooseChunksStats.CompressedChunkBytes), + DeltaByteCountPercent, + UploadOp.m_GenerateBlocksStats.GeneratedBlockCount.load(), - NiceLatencyNs(UploadOp.m_GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS * 1000)); - - ZEN_CONSOLE_VERBOSE( - "Generate blocks stats:" - "\n ChunkCount: {}" - "\n ChunkByteCount: {}" - "\n CompressedChunkCount: {}" - "\n CompressChunksElapsedWallTimeUS: {}", - UploadOp.m_LooseChunksStats.ChunkCount, - NiceBytes(UploadOp.m_LooseChunksStats.ChunkByteCount), + NiceBytes(UploadOp.m_FindBlocksStats.NewBlocksChunkByteCount), + NiceBytes(UploadOp.m_GenerateBlocksStats.GeneratedBlockByteCount.load()), + NiceNum(GetBytesPerSecond(UploadOp.m_GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS, + UploadOp.m_GenerateBlocksStats.GeneratedBlockByteCount)), + NiceTimeSpanMs(UploadOp.m_GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS / 1000), + UploadOp.m_LooseChunksStats.CompressedChunkCount.load(), + NiceBytes(UploadOp.m_LooseChunksStats.CompressedChunkRawBytes), NiceBytes(UploadOp.m_LooseChunksStats.CompressedChunkBytes.load()), - NiceLatencyNs(UploadOp.m_LooseChunksStats.CompressChunksElapsedWallTimeUS * 1000)); - - ZEN_CONSOLE_VERBOSE( - "Disk stats:" - "\n OpenReadCount: {}" - "\n OpenWriteCount: {}" - "\n ReadCount: {}" - "\n ReadByteCount: {}" - "\n WriteCount: {} ({} cloned)" - "\n WriteByteCount: {} ({} cloned)" - "\n CurrentOpenFileCount: {}", - UploadOp.m_DiskStats.OpenReadCount.load(), - UploadOp.m_DiskStats.OpenWriteCount.load(), - UploadOp.m_DiskStats.ReadCount.load(), - NiceBytes(UploadOp.m_DiskStats.ReadByteCount.load()), - UploadOp.m_DiskStats.WriteCount.load(), - UploadOp.m_DiskStats.CloneCount.load(), - NiceBytes(UploadOp.m_DiskStats.WriteByteCount.load()), - NiceBytes(UploadOp.m_DiskStats.CloneByteCount.load()), - UploadOp.m_DiskStats.CurrentOpenFileCount.load()); - - ZEN_CONSOLE_VERBOSE( - "Upload stats:" - "\n BlockCount: {}" - "\n BlocksBytes: {}" - "\n ChunkCount: {}" - "\n ChunksBytes: {}" - "\n ReadFromDiskBytes: {}" - "\n MultipartAttachmentCount: {}" - "\n ElapsedWallTimeUS: {}", + NiceNum(GetBytesPerSecond(UploadOp.m_LooseChunksStats.CompressChunksElapsedWallTimeUS, + UploadOp.m_LooseChunksStats.CompressedChunkRawBytes)), + NiceTimeSpanMs(UploadOp.m_LooseChunksStats.CompressChunksElapsedWallTimeUS / 1000), + + UploadOp.m_UploadStats.BlockCount.load() + UploadOp.m_UploadStats.ChunkCount.load(), + NiceBytes(UploadOp.m_UploadStats.BlocksBytes + UploadOp.m_UploadStats.ChunksBytes), + NiceNum(GetBytesPerSecond(UploadOp.m_UploadStats.ElapsedWallTimeUS, + (UploadOp.m_UploadStats.ChunksBytes + UploadOp.m_UploadStats.BlocksBytes) * 8)), + NiceTimeSpanMs(UploadOp.m_UploadStats.ElapsedWallTimeUS / 1000), + UploadOp.m_UploadStats.BlockCount.load(), NiceBytes(UploadOp.m_UploadStats.BlocksBytes.load()), + UploadOp.m_UploadStats.ChunkCount.load(), NiceBytes(UploadOp.m_UploadStats.ChunksBytes.load()), - NiceBytes(UploadOp.m_UploadStats.ReadFromDiskBytes.load()), - UploadOp.m_UploadStats.MultipartAttachmentCount.load(), - NiceLatencyNs(UploadOp.m_UploadStats.ElapsedWallTimeUS * 1000)); - - const double DeltaByteCountPercent = - UploadOp.m_ChunkingStats.BytesHashed > 0 - ? (100.0 * (UploadOp.m_FindBlocksStats.NewBlocksChunkByteCount + UploadOp.m_LooseChunksStats.CompressedChunkBytes)) / - (UploadOp.m_ChunkingStats.BytesHashed) - : 0.0; - - const std::string MultipartAttachmentStats = - AllowMultiparts ? fmt::format(" ({} as multipart)", UploadOp.m_UploadStats.MultipartAttachmentCount.load()) : ""; - - if (!IsQuiet) - { - ZEN_CONSOLE( - "Uploaded part {} ('{}') to build {}, {}\n" - " Scanned files: {:>8} ({}), {}B/sec, {}\n" - " New data: {:>8} ({}) {:.1f}%\n" - " New blocks: {:>8} ({} -> {}), {}B/sec, {}\n" - " New chunks: {:>8} ({} -> {}), {}B/sec, {}\n" - " Uploaded: {:>8} ({}), {}bits/sec, {}\n" - " Blocks: {:>8} ({})\n" - " Chunks: {:>8} ({}){}", - BuildPartId, - BuildPartName, - BuildId, - NiceTimeSpanMs(UploadTimer.GetElapsedTimeMs()), - - UploadOp.m_LocalFolderScanStats.FoundFileCount.load(), - NiceBytes(UploadOp.m_LocalFolderScanStats.FoundFileByteCount.load()), - NiceNum(GetBytesPerSecond(UploadOp.m_ChunkingStats.ElapsedWallTimeUS, UploadOp.m_ChunkingStats.BytesHashed)), - NiceTimeSpanMs(UploadOp.m_ChunkingStats.ElapsedWallTimeUS / 1000), - - UploadOp.m_FindBlocksStats.NewBlocksChunkCount + UploadOp.m_LooseChunksStats.CompressedChunkCount, - NiceBytes(UploadOp.m_FindBlocksStats.NewBlocksChunkByteCount + UploadOp.m_LooseChunksStats.CompressedChunkBytes), - DeltaByteCountPercent, - - UploadOp.m_GenerateBlocksStats.GeneratedBlockCount.load(), - NiceBytes(UploadOp.m_FindBlocksStats.NewBlocksChunkByteCount), - NiceBytes(UploadOp.m_GenerateBlocksStats.GeneratedBlockByteCount.load()), - NiceNum(GetBytesPerSecond(UploadOp.m_GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS, - UploadOp.m_GenerateBlocksStats.GeneratedBlockByteCount)), - NiceTimeSpanMs(UploadOp.m_GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS / 1000), - - UploadOp.m_LooseChunksStats.CompressedChunkCount.load(), - NiceBytes(UploadOp.m_LooseChunksStats.ChunkByteCount), - NiceBytes(UploadOp.m_LooseChunksStats.CompressedChunkBytes.load()), - NiceNum(GetBytesPerSecond(UploadOp.m_LooseChunksStats.CompressChunksElapsedWallTimeUS, - UploadOp.m_LooseChunksStats.ChunkByteCount)), - NiceTimeSpanMs(UploadOp.m_LooseChunksStats.CompressChunksElapsedWallTimeUS / 1000), - - UploadOp.m_UploadStats.BlockCount.load() + UploadOp.m_UploadStats.ChunkCount.load(), - NiceBytes(UploadOp.m_UploadStats.BlocksBytes + UploadOp.m_UploadStats.ChunksBytes), - NiceNum(GetBytesPerSecond(UploadOp.m_UploadStats.ElapsedWallTimeUS, - (UploadOp.m_UploadStats.ChunksBytes + UploadOp.m_UploadStats.BlocksBytes) * 8)), - NiceTimeSpanMs(UploadOp.m_UploadStats.ElapsedWallTimeUS / 1000), - - UploadOp.m_UploadStats.BlockCount.load(), - NiceBytes(UploadOp.m_UploadStats.BlocksBytes.load()), - - UploadOp.m_UploadStats.ChunkCount.load(), - NiceBytes(UploadOp.m_UploadStats.ChunksBytes.load()), - MultipartAttachmentStats); - } + MultipartAttachmentStats); } + return UploadedParts; } struct VerifyFolderStatistics @@ -878,107 +889,6 @@ namespace { } } - std::string GetCbObjectAsNiceString(CbObjectView Object, std::string_view Prefix, std::string_view Suffix) - { - ExtendableStringBuilder<512> SB; - std::vector<std::pair<std::string, std::string>> NameStringValuePairs; - for (CbFieldView Field : Object) - { - std::string_view Name = Field.GetName(); - switch (CbValue Accessor = Field.GetValue(); Accessor.GetType()) - { - case CbFieldType::String: - NameStringValuePairs.push_back({std::string(Name), std::string(Accessor.AsString())}); - break; - case CbFieldType::IntegerPositive: - NameStringValuePairs.push_back({std::string(Name), fmt::format("{}", Accessor.AsIntegerPositive())}); - break; - case CbFieldType::IntegerNegative: - NameStringValuePairs.push_back({std::string(Name), fmt::format("{}", Accessor.AsIntegerNegative())}); - break; - case CbFieldType::Float32: - { - const float Value = Accessor.AsFloat32(); - if (std::isfinite(Value)) - { - NameStringValuePairs.push_back({std::string(Name), fmt::format("{:.9g}", Value)}); - } - else - { - NameStringValuePairs.push_back({std::string(Name), "null"}); - } - } - break; - case CbFieldType::Float64: - { - const double Value = Accessor.AsFloat64(); - if (std::isfinite(Value)) - { - NameStringValuePairs.push_back({std::string(Name), fmt::format("{:.17g}", Value)}); - } - else - { - NameStringValuePairs.push_back({std::string(Name), "null"}); - } - } - break; - case CbFieldType::BoolFalse: - NameStringValuePairs.push_back({std::string(Name), "false"}); - break; - case CbFieldType::BoolTrue: - NameStringValuePairs.push_back({std::string(Name), "true"}); - break; - case CbFieldType::Hash: - { - NameStringValuePairs.push_back({std::string(Name), Accessor.AsHash().ToHexString()}); - } - break; - case CbFieldType::Uuid: - { - StringBuilder<Oid::StringLength + 1> Builder; - Accessor.AsUuid().ToString(Builder); - NameStringValuePairs.push_back({std::string(Name), Builder.ToString()}); - } - break; - case CbFieldType::DateTime: - { - ExtendableStringBuilder<64> Builder; - Builder << DateTime(Accessor.AsDateTimeTicks()).ToIso8601(); - NameStringValuePairs.push_back({std::string(Name), Builder.ToString()}); - } - break; - case CbFieldType::TimeSpan: - { - ExtendableStringBuilder<64> Builder; - const TimeSpan Span(Accessor.AsTimeSpanTicks()); - if (Span.GetDays() == 0) - { - Builder << Span.ToString("%h:%m:%s.%n"); - } - else - { - Builder << Span.ToString("%d.%h:%m:%s.%n"); - } - NameStringValuePairs.push_back({std::string(Name), Builder.ToString()}); - break; - } - case CbFieldType::ObjectId: - NameStringValuePairs.push_back({std::string(Name), Accessor.AsObjectId().ToString()}); - break; - } - } - std::string::size_type LongestKey = 0; - for (const std::pair<std::string, std::string>& KeyValue : NameStringValuePairs) - { - LongestKey = Max(KeyValue.first.length(), LongestKey); - } - for (const std::pair<std::string, std::string>& KeyValue : NameStringValuePairs) - { - SB.Append(fmt::format("{}{:<{}}: {}{}", Prefix, KeyValue.first, LongestKey, KeyValue.second, Suffix)); - } - return SB.ToString(); - } - CbObject GetBuild(BuildStorageBase& Storage, const Oid& BuildId) { Stopwatch GetBuildTimer; @@ -995,280 +905,6 @@ namespace { return BuildObject; } - std::vector<std::pair<Oid, std::string>> ResolveBuildPartNames(CbObjectView BuildObject, - const Oid& BuildId, - const std::vector<Oid>& BuildPartIds, - std::span<const std::string> BuildPartNames, - std::uint64_t& OutPreferredMultipartChunkSize) - { - std::vector<std::pair<Oid, std::string>> Result; - { - CbObjectView PartsObject = BuildObject["parts"sv].AsObjectView(); - if (!PartsObject) - { - throw std::runtime_error("Build object does not have a 'parts' object"); - } - - OutPreferredMultipartChunkSize = BuildObject["chunkSize"sv].AsUInt64(OutPreferredMultipartChunkSize); - - std::vector<std::pair<Oid, std::string>> AvailableParts; - - for (CbFieldView PartView : PartsObject) - { - const std::string BuildPartName = std::string(PartView.GetName()); - const Oid BuildPartId = PartView.AsObjectId(); - if (BuildPartId == Oid::Zero) - { - ExtendableStringBuilder<128> SB; - for (CbFieldView ScanPartView : PartsObject) - { - SB.Append(fmt::format("\n {}: {}", ScanPartView.GetName(), ScanPartView.AsObjectId())); - } - throw std::runtime_error( - fmt::format("Build object parts does not have a '{}' object id{}", BuildPartName, SB.ToView())); - } - AvailableParts.push_back({BuildPartId, BuildPartName}); - } - - if (BuildPartIds.empty() && BuildPartNames.empty()) - { - Result = AvailableParts; - } - else - { - for (const std::string& BuildPartName : BuildPartNames) - { - if (auto It = std::find_if(AvailableParts.begin(), - AvailableParts.end(), - [&BuildPartName](const auto& Part) { return Part.second == BuildPartName; }); - It != AvailableParts.end()) - { - Result.push_back(*It); - } - else - { - throw std::runtime_error(fmt::format("Build {} object does not have a part named '{}'", BuildId, BuildPartName)); - } - } - for (const Oid& BuildPartId : BuildPartIds) - { - if (auto It = std::find_if(AvailableParts.begin(), - AvailableParts.end(), - [&BuildPartId](const auto& Part) { return Part.first == BuildPartId; }); - It != AvailableParts.end()) - { - Result.push_back(*It); - } - else - { - throw std::runtime_error(fmt::format("Build {} object does not have a part with id '{}'", BuildId, BuildPartId)); - } - } - } - - if (Result.empty()) - { - throw std::runtime_error(fmt::format("Build object does not have any parts", BuildId)); - } - } - return Result; - } - - ChunkedFolderContent GetRemoteContent(OperationLogOutput& Output, - StorageInstance& Storage, - const Oid& BuildId, - const std::vector<std::pair<Oid, std::string>>& BuildParts, - std::span<const std::string> IncludeWildcards, - std::span<const std::string> ExcludeWildcards, - std::unique_ptr<ChunkingController>& OutChunkController, - std::vector<ChunkedFolderContent>& OutPartContents, - std::vector<ChunkBlockDescription>& OutBlockDescriptions, - std::vector<IoHash>& OutLooseChunkHashes) - { - ZEN_TRACE_CPU("GetRemoteContent"); - - Stopwatch GetBuildPartTimer; - const Oid BuildPartId = BuildParts[0].first; - const std::string_view BuildPartName = BuildParts[0].second; - CbObject BuildPartManifest = Storage.BuildStorage->GetBuildPart(BuildId, BuildPartId); - if (!IsQuiet) - { - ZEN_CONSOLE("GetBuildPart {} ('{}') took {}. Payload size: {}", - BuildPartId, - BuildPartName, - NiceTimeSpanMs(GetBuildPartTimer.GetElapsedTimeMs()), - NiceBytes(BuildPartManifest.GetSize())); - ZEN_CONSOLE("{}", GetCbObjectAsNiceString(BuildPartManifest, " "sv, "\n"sv)); - } - - { - CbObjectView Chunker = BuildPartManifest["chunker"sv].AsObjectView(); - std::string_view ChunkerName = Chunker["name"sv].AsString(); - CbObjectView Parameters = Chunker["parameters"sv].AsObjectView(); - OutChunkController = CreateChunkingController(ChunkerName, Parameters); - } - - auto ParseBuildPartManifest = [&Output](StorageInstance& Storage, - const Oid& BuildId, - const Oid& BuildPartId, - CbObject BuildPartManifest, - std::span<const std::string> IncludeWildcards, - std::span<const std::string> ExcludeWildcards, - ChunkedFolderContent& OutRemoteContent, - std::vector<ChunkBlockDescription>& OutBlockDescriptions, - std::vector<IoHash>& OutLooseChunkHashes) { - std::vector<uint32_t> AbsoluteChunkOrders; - std::vector<uint64_t> LooseChunkRawSizes; - std::vector<IoHash> BlockRawHashes; - - ReadBuildContentFromCompactBinary(BuildPartManifest, - OutRemoteContent.Platform, - OutRemoteContent.Paths, - OutRemoteContent.RawHashes, - OutRemoteContent.RawSizes, - OutRemoteContent.Attributes, - OutRemoteContent.ChunkedContent.SequenceRawHashes, - OutRemoteContent.ChunkedContent.ChunkCounts, - AbsoluteChunkOrders, - OutLooseChunkHashes, - LooseChunkRawSizes, - BlockRawHashes); - - // TODO: GetBlockDescriptions for all BlockRawHashes in one go - check for local block descriptions when we cache them - - { - bool AttemptFallback = false; - OutBlockDescriptions = GetBlockDescriptions(Output, - *Storage.BuildStorage, - Storage.BuildCacheStorage.get(), - BuildId, - BuildPartId, - BlockRawHashes, - AttemptFallback, - IsQuiet, - IsVerbose); - } - - CalculateLocalChunkOrders(AbsoluteChunkOrders, - OutLooseChunkHashes, - LooseChunkRawSizes, - OutBlockDescriptions, - OutRemoteContent.ChunkedContent.ChunkHashes, - OutRemoteContent.ChunkedContent.ChunkRawSizes, - OutRemoteContent.ChunkedContent.ChunkOrders, - DoExtraContentVerify); - - if (!IncludeWildcards.empty() || !ExcludeWildcards.empty()) - { - std::vector<std::filesystem::path> DeletedPaths; - for (const std::filesystem::path& RemotePath : OutRemoteContent.Paths) - { - if (!IncludePath(IncludeWildcards, ExcludeWildcards, RemotePath)) - { - DeletedPaths.push_back(RemotePath); - } - } - - if (!DeletedPaths.empty()) - { - OutRemoteContent = DeletePathsFromChunkedContent(OutRemoteContent, DeletedPaths); - InlineRemoveUnusedHashes(OutLooseChunkHashes, OutRemoteContent.ChunkedContent.ChunkHashes); - } - } - -#if ZEN_BUILD_DEBUG - ValidateChunkedFolderContent(OutRemoteContent, OutBlockDescriptions, OutLooseChunkHashes, IncludeWildcards, ExcludeWildcards); -#endif // ZEN_BUILD_DEBUG - }; - - OutPartContents.resize(1); - ParseBuildPartManifest(Storage, - BuildId, - BuildPartId, - BuildPartManifest, - IncludeWildcards, - ExcludeWildcards, - OutPartContents[0], - OutBlockDescriptions, - OutLooseChunkHashes); - ChunkedFolderContent RemoteContent; - if (BuildParts.size() > 1) - { - std::vector<ChunkBlockDescription> OverlayBlockDescriptions; - std::vector<IoHash> OverlayLooseChunkHashes; - for (size_t PartIndex = 1; PartIndex < BuildParts.size(); PartIndex++) - { - const Oid& OverlayBuildPartId = BuildParts[PartIndex].first; - const std::string& OverlayBuildPartName = BuildParts[PartIndex].second; - Stopwatch GetOverlayBuildPartTimer; - CbObject OverlayBuildPartManifest = Storage.BuildStorage->GetBuildPart(BuildId, OverlayBuildPartId); - if (!IsQuiet) - { - ZEN_CONSOLE("GetBuildPart {} ('{}') took {}. Payload size: {}", - OverlayBuildPartId, - OverlayBuildPartName, - NiceTimeSpanMs(GetOverlayBuildPartTimer.GetElapsedTimeMs()), - NiceBytes(OverlayBuildPartManifest.GetSize())); - } - - ChunkedFolderContent OverlayPartContent; - std::vector<ChunkBlockDescription> OverlayPartBlockDescriptions; - std::vector<IoHash> OverlayPartLooseChunkHashes; - - ParseBuildPartManifest(Storage, - BuildId, - OverlayBuildPartId, - OverlayBuildPartManifest, - IncludeWildcards, - ExcludeWildcards, - OverlayPartContent, - OverlayPartBlockDescriptions, - OverlayPartLooseChunkHashes); - OutPartContents.push_back(OverlayPartContent); - OverlayBlockDescriptions.insert(OverlayBlockDescriptions.end(), - OverlayPartBlockDescriptions.begin(), - OverlayPartBlockDescriptions.end()); - OverlayLooseChunkHashes.insert(OverlayLooseChunkHashes.end(), - OverlayPartLooseChunkHashes.begin(), - OverlayPartLooseChunkHashes.end()); - } - - RemoteContent = - MergeChunkedFolderContents(OutPartContents[0], std::span<const ChunkedFolderContent>(OutPartContents).subspan(1)); - { - tsl::robin_set<IoHash> AllBlockHashes; - for (const ChunkBlockDescription& Description : OutBlockDescriptions) - { - AllBlockHashes.insert(Description.BlockHash); - } - for (const ChunkBlockDescription& Description : OverlayBlockDescriptions) - { - if (!AllBlockHashes.contains(Description.BlockHash)) - { - AllBlockHashes.insert(Description.BlockHash); - OutBlockDescriptions.push_back(Description); - } - } - } - { - tsl::robin_set<IoHash> AllLooseChunkHashes(OutLooseChunkHashes.begin(), OutLooseChunkHashes.end()); - for (const IoHash& OverlayLooseChunkHash : OverlayLooseChunkHashes) - { - if (!AllLooseChunkHashes.contains(OverlayLooseChunkHash)) - { - AllLooseChunkHashes.insert(OverlayLooseChunkHash); - OutLooseChunkHashes.push_back(OverlayLooseChunkHash); - } - } - } - } - else - { - RemoteContent = OutPartContents[0]; - } - return RemoteContent; - } - std::vector<std::filesystem::path> GetNewPaths(const std::span<const std::filesystem::path> KnownPaths, const std::span<const std::filesystem::path> Paths) { @@ -1295,6 +931,7 @@ namespace { ChunkingStatistics& ChunkingStats, const std::filesystem::path& Path, ChunkingController& ChunkController, + ChunkingCache& ChunkCache, std::span<const std::filesystem::path> PathsToCheck) { FolderContent FolderState; @@ -1339,6 +976,7 @@ namespace { Path, FolderState, ChunkController, + ChunkCache, GetUpdateDelayMS(ProgressMode), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { FilteredBytesHashed.Update(LocalChunkingStats.BytesHashed.load()); @@ -1374,7 +1012,8 @@ namespace { ChunkingStatistics& ChunkingStats, const std::filesystem::path& Path, const std::filesystem::path& StateFilePath, - ChunkingController& ChunkController) + ChunkingController& ChunkController, + ChunkingCache& ChunkCache) { Stopwatch ReadStateTimer; bool FileExists = IsFile(StateFilePath); @@ -1461,6 +1100,7 @@ namespace { Path, UpdatedContent, ChunkController, + ChunkCache, GetUpdateDelayMS(ProgressMode), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { FilteredBytesHashed.Update(LocalChunkingStats.BytesHashed.load()); @@ -1529,7 +1169,8 @@ namespace { const std::filesystem::path& Path, std::function<bool(const std::string_view& RelativePath)>&& IsAcceptedFolder, std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& IsAcceptedFile, - ChunkingController& ChunkController) + ChunkingController& ChunkController, + ChunkingCache& ChunkCache) { Stopwatch Timer; @@ -1549,14 +1190,19 @@ namespace { return {}; } - BuildState LocalContent = - GetLocalContent(Workers, GetFolderContentStats, ChunkingStats, Path, ZenStateFilePath(Path / ZenFolderName), ChunkController) - .State; + BuildState LocalContent = GetLocalContent(Workers, + GetFolderContentStats, + ChunkingStats, + Path, + ZenStateFilePath(Path / ZenFolderName), + ChunkController, + ChunkCache) + .State; std::vector<std::filesystem::path> UntrackedPaths = GetNewPaths(LocalContent.ChunkedContent.Paths, Content.Paths); BuildState UntrackedLocalContent = - GetLocalStateFromPaths(Workers, GetFolderContentStats, ChunkingStats, Path, ChunkController, UntrackedPaths).State; + GetLocalStateFromPaths(Workers, GetFolderContentStats, ChunkingStats, Path, ChunkController, ChunkCache, UntrackedPaths).State; ChunkedFolderContent Result = MergeChunkedFolderContents(LocalContent.ChunkedContent, std::vector<ChunkedFolderContent>{UntrackedLocalContent.ChunkedContent}); @@ -1596,7 +1242,7 @@ namespace { uint64_t MaximumInMemoryPayloadSize = 512u * 1024u; bool PopulateCache = true; bool AppendNewContent = false; - std::vector<std::string> ExcludeFolders; + std::vector<std::string> ExcludeFolders = DefaultExcludeFolders; }; void DownloadFolder(OperationLogOutput& Output, @@ -1606,6 +1252,7 @@ namespace { const Oid& BuildId, const std::vector<Oid>& BuildPartIds, std::span<const std::string> BuildPartNames, + const std::filesystem::path& DownloadSpecPath, const std::filesystem::path& Path, const DownloadOptions& Options) { @@ -1643,6 +1290,14 @@ namespace { std::vector<std::pair<Oid, std::string>> AllBuildParts = ResolveBuildPartNames(BuildObject, BuildId, BuildPartIds, BuildPartNames, PreferredMultipartChunkSize); + BuildManifest Manifest; + if (!DownloadSpecPath.empty()) + { + const std::filesystem::path AbsoluteDownloadSpecPath = + DownloadSpecPath.is_relative() ? MakeSafeAbsolutePath(Path / DownloadSpecPath) : MakeSafeAbsolutePath(DownloadSpecPath); + Manifest = ParseBuildManifest(DownloadSpecPath); + } + std::vector<ChunkedFolderContent> PartContents; std::unique_ptr<ChunkingController> ChunkController; @@ -1656,15 +1311,17 @@ namespace { Storage, BuildId, AllBuildParts, + Manifest, Options.IncludeWildcards, Options.ExcludeWildcards, ChunkController, PartContents, BlockDescriptions, - LooseChunkHashes); -#if ZEN_BUILD_DEBUG - ValidateChunkedFolderContent(RemoteContent, BlockDescriptions, LooseChunkHashes, {}, {}); -#endif // ZEN_BUILD_DEBUG + LooseChunkHashes, + IsQuiet, + IsVerbose, + DoExtraContentVerify); + const std::uint64_t LargeAttachmentSize = Options.AllowMultiparts ? PreferredMultipartChunkSize * 4u : (std::uint64_t)-1; GetFolderContentStatistics LocalFolderScanStats; ChunkingStatistics ChunkingStats; @@ -1680,18 +1337,25 @@ namespace { ZEN_CONSOLE_INFO("Unspecified chunking algorithm, using default"); ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{}); } + std::unique_ptr<ChunkingCache> ChunkCache(CreateNullChunkingCache()); LocalState = GetLocalContent(Workers, LocalFolderScanStats, ChunkingStats, Path, ZenStateFilePath(Path / ZenFolderName), - *ChunkController); + *ChunkController, + *ChunkCache); std::vector<std::filesystem::path> UntrackedPaths = GetNewPaths(LocalState.State.ChunkedContent.Paths, RemoteContent.Paths); - BuildSaveState UntrackedLocalContent = - GetLocalStateFromPaths(Workers, LocalFolderScanStats, ChunkingStats, Path, *ChunkController, UntrackedPaths); + BuildSaveState UntrackedLocalContent = GetLocalStateFromPaths(Workers, + LocalFolderScanStats, + ChunkingStats, + Path, + *ChunkController, + *ChunkCache, + UntrackedPaths); if (!UntrackedLocalContent.State.ChunkedContent.Paths.empty()) { @@ -2102,7 +1766,7 @@ namespace { for (size_t Index : Order) { const std::filesystem::path& Path = Paths[Index]; - if (IncludePath(IncludeWildcards, ExcludeWildcards, Path)) + if (IncludePath(IncludeWildcards, ExcludeWildcards, ToLower(Path.generic_string()), /*CaseSensitive*/ true)) { const IoHash& RawHash = RawHashes[Index]; const uint64_t RawSize = RawSizes[Index]; @@ -2156,9 +1820,10 @@ namespace { void DiffFolders(TransferThreadWorkers& Workers, const std::filesystem::path& BasePath, const std::filesystem::path& ComparePath, - bool OnlyChunked, - const std::vector<std::string>& InExcludeFolders, - const std::vector<std::string>& InExcludeExtensions) + ChunkingController& ChunkController, + ChunkingCache& ChunkCache, + const std::vector<std::string>& ExcludeFolders, + const std::vector<std::string>& ExcludeExtensions) { ZEN_TRACE_CPU("DiffFolders"); @@ -2180,20 +1845,7 @@ namespace { ChunkedFolderContent CompareFolderContent; { - StandardChunkingControllerSettings ChunkingSettings; - std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(ChunkingSettings); - std::vector<std::string> ExcludeExtensions = InExcludeExtensions; - if (OnlyChunked) - { - ExcludeExtensions.insert(ExcludeExtensions.end(), - ChunkingSettings.SplitOnlyExtensions.begin(), - ChunkingSettings.SplitOnlyExtensions.end()); - ExcludeExtensions.insert(ExcludeExtensions.end(), - ChunkingSettings.SplitAndCompressExtensions.begin(), - ChunkingSettings.SplitAndCompressExtensions.end()); - } - - auto IsAcceptedFolder = [ExcludeFolders = InExcludeFolders](const std::string_view& RelativePath) -> bool { + auto IsAcceptedFolder = [ExcludeFolders](const std::string_view& RelativePath) -> bool { for (const std::string& ExcludeFolder : ExcludeFolders) { if (RelativePath.starts_with(ExcludeFolder)) @@ -2232,7 +1884,8 @@ namespace { BasePath, IsAcceptedFolder, IsAcceptedFile, - *ChunkController); + ChunkController, + ChunkCache); if (AbortFlag) { return; @@ -2248,7 +1901,8 @@ namespace { ComparePath, IsAcceptedFolder, IsAcceptedFile, - *ChunkController); + ChunkController, + ChunkCache); if (AbortFlag) { @@ -2465,6 +2119,15 @@ BuildsCommand::BuildsCommand() "<boostworkers>"); }; + auto AddChunkingCacheOptions = [this](cxxopts::Options& Ops) { + Ops.add_option("", + "", + "chunking-cache-path", + "Path to cache for chunking information of scanned files. Default is empty resulting in no caching", + cxxopts::value(m_ChunkingCachePath), + "<chunkingcachepath>"); + }; + auto AddWildcardOptions = [this](cxxopts::Options& Ops) { Ops.add_option("", "", @@ -2621,6 +2284,7 @@ BuildsCommand::BuildsCommand() AddZenFolderOptions(m_UploadOptions); AddExcludeFolderOption(m_UploadOptions); AddExcludeExtensionsOption(m_UploadOptions); + AddChunkingCacheOptions(m_UploadOptions); m_UploadOptions.add_options()("h,help", "Print help"); m_UploadOptions.add_option("", "l", "local-path", "Root file system folder for build", cxxopts::value(m_Path), "<local-path>"); m_UploadOptions.add_option("", @@ -2674,9 +2338,11 @@ BuildsCommand::BuildsCommand() m_UploadOptions.add_option("", "", "manifest-path", - "Path to a text file with one line of <local path>[TAB]<modification date> per file to include.", + "Path to a text file with one line of <local path>[TAB]<modification date> per file to include or a " + "structured .json file describing the parts", cxxopts::value(m_ManifestPath), "<manifestpath>"); + m_UploadOptions .add_option("", "", "verify", "Enable post upload verify of all uploaded data", cxxopts::value(m_PostUploadVerify), "<verify>"); m_UploadOptions.add_option("", @@ -2747,6 +2413,14 @@ BuildsCommand::BuildsCommand() AddPartialBlockRequestOptions(m_DownloadOptions); + m_DownloadOptions.add_option( + "", + "", + "download-spec-path", + "Path to a text file with one line of <local path> per file to include or a structured .json file describing what to download.", + cxxopts::value(m_DownloadSpecPath), + "<downloadspecpath>"); + m_DownloadOptions .add_option("", "", "verify", "Enable post download verify of all tracked files", cxxopts::value(m_PostDownloadVerify), "<verify>"); m_DownloadOptions.add_option("", @@ -2814,6 +2488,7 @@ BuildsCommand::BuildsCommand() AddWorkerOptions(m_DiffOptions); AddExcludeFolderOption(m_DiffOptions); AddExcludeExtensionsOption(m_DiffOptions); + AddChunkingCacheOptions(m_DiffOptions); m_DiffOptions.add_options()("h,help", "Print help"); m_DiffOptions.add_option("", "l", "local-path", "Root file system folder used as base", cxxopts::value(m_Path), "<local-path>"); m_DiffOptions.add_option("", "c", "compare-path", "Root file system folder used as diff", cxxopts::value(m_DiffPath), "<diff-path>"); @@ -2839,6 +2514,7 @@ BuildsCommand::BuildsCommand() AddPartialBlockRequestOptions(m_TestOptions); AddWildcardOptions(m_TestOptions); AddAppendNewContentOptions(m_TestOptions); + AddChunkingCacheOptions(m_TestOptions); m_TestOptions.add_option("", "", @@ -3275,9 +2951,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) }; auto ParseFileFilters = [&](std::vector<std::string>& OutIncludeWildcards, std::vector<std::string>& OutExcludeWildcards) { - auto SplitWildcard = [](const std::string_view Wildcard) -> std::vector<std::string> { - std::vector<std::string> Wildcards; - ForEachStrTok(Wildcard, ';', [&Wildcards](std::string_view Wildcard) { + auto SplitAndAppendWildcard = [](const std::string_view Wildcard, std::vector<std::string>& Output) { + ForEachStrTok(Wildcard, ';', [&Output](std::string_view Wildcard) { if (!Wildcard.empty()) { std::string CleanWildcard(ToLower(Wildcard)); @@ -3294,21 +2969,19 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) CleanWildcard = CleanWildcard.substr(2); } - Wildcards.emplace_back(std::move(CleanWildcard)); + Output.emplace_back(std::move(CleanWildcard)); } return true; }); - return Wildcards; }; - OutIncludeWildcards = SplitWildcard(m_IncludeWildcard); - OutExcludeWildcards = SplitWildcard(m_ExcludeWildcard); + SplitAndAppendWildcard(m_IncludeWildcard, OutIncludeWildcards); + SplitAndAppendWildcard(m_ExcludeWildcard, OutExcludeWildcards); }; auto ParseExcludeFolderAndExtension = [&](std::vector<std::string>& OutExcludeFolders, std::vector<std::string>& OutExcludeExtensions) { - auto SplitExclusion = [](const std::string_view Input) -> std::vector<std::string> { - std::vector<std::string> Exclusions; - ForEachStrTok(Input, ";,", [&Exclusions](std::string_view Exclusion) { + auto SplitAndAppendExclusion = [](const std::string_view Input, std::vector<std::string>& Output) { + ForEachStrTok(Input, ";,", [&Output](std::string_view Exclusion) { if (!Exclusion.empty()) { std::string CleanExclusion(ToLower(Exclusion)); @@ -3316,15 +2989,14 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) { CleanExclusion = CleanExclusion.substr(1, CleanExclusion.length() - 2); } - Exclusions.emplace_back(std::move(CleanExclusion)); + Output.emplace_back(std::move(CleanExclusion)); } return true; }); - return Exclusions; }; - OutExcludeFolders = SplitExclusion(m_ExcludeFolders); - OutExcludeExtensions = SplitExclusion(m_ExcludeExtensions); + SplitAndAppendExclusion(m_ExcludeFolders, OutExcludeFolders); + SplitAndAppendExclusion(m_ExcludeExtensions, OutExcludeExtensions); }; auto ParseDiffPath = [&]() { @@ -3540,31 +3212,39 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) LogExecutableVersionAndPid(); } } - CbObject QueryObject; + std::string JsonQuery; if (m_ListQueryPath.empty()) { CbObjectWriter QueryWriter; QueryWriter.BeginObject("query"); QueryWriter.EndObject(); // query - QueryObject = QueryWriter.Save(); + CbObject QueryObject = QueryWriter.Save(); + ExtendableStringBuilder<64> SB; + CompactBinaryToJson(QueryObject, SB); + JsonQuery = SB.ToString(); } else { if (ToLower(m_ListQueryPath.extension().string()) == ".cbo") { - QueryObject = LoadCompactBinaryObject(IoBufferBuilder::MakeFromFile(m_ListQueryPath)); + CbObject QueryObject = LoadCompactBinaryObject(IoBufferBuilder::MakeFromFile(m_ListQueryPath)); + ExtendableStringBuilder<64> SB; + CompactBinaryToJson(QueryObject, SB); + JsonQuery = SB.ToString(); } else { IoBuffer MetaDataJson = ReadFile(m_ListQueryPath).Flatten(); std::string_view Json(reinterpret_cast<const char*>(MetaDataJson.GetData()), MetaDataJson.GetSize()); std::string JsonError; - QueryObject = LoadCompactBinaryFromJson(Json, JsonError).AsObject(); + CbObject QueryObject = LoadCompactBinaryFromJson(Json, JsonError) + .AsObject(); // We try to convert it so it is at least reaonably verified in format if (!JsonError.empty()) { throw std::runtime_error( fmt::format("build metadata file '{}' is malformed. Reason: '{}'", m_ListQueryPath, JsonError)); } + JsonQuery = std::string(Json); } } @@ -3587,7 +3267,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) /*RequireBucket*/ false, /*BoostCacheBackgroundWorkerPool */ false); - CbObject Response = Storage.BuildStorage->ListBuilds(QueryObject); + CbObject Response = Storage.BuildStorage->ListBuilds(JsonQuery); ZEN_ASSERT(ValidateCompactBinary(Response.GetView(), CbValidateMode::Default) == CbValidateError::None); if (m_ListResultPath.empty()) { @@ -3708,6 +3388,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_ZenFolderPath = std::filesystem::current_path() / ZenFolderName; } MakeSafeAbsolutePathÍnPlace(m_ZenFolderPath); + MakeSafeAbsolutePathÍnPlace(m_ChunkingCachePath); CreateDirectories(m_ZenFolderPath); auto _ = MakeGuard([this, &Workers]() { CleanAndRemoveDirectory(Workers.GetIOWorkerPool(), m_ZenFolderPath); }); @@ -3719,7 +3400,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) /*RequireBucket*/ true, /*BoostCacheBackgroundWorkerPool */ false); - if (m_BuildPartName.empty()) + if (m_BuildPartName.empty() && m_ManifestPath.empty()) { m_BuildPartName = m_Path.filename().string(); } @@ -3729,10 +3410,11 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) { m_BuildId = BuildId.ToString(); } - const Oid BuildPartId = m_BuildPartId.empty() ? Oid::NewOid() : ParseBuildPartId(); - if (m_BuildPartId.empty()) + + Oid BuildPartId; + if (!m_BuildPartId.empty()) { - m_BuildPartId = BuildPartId.ToString(); + BuildPartId = ParseBuildPartId(); } CbObject MetaData = ParseBuildMetadata(); @@ -3743,30 +3425,42 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) std::vector<std::string> ExcludeExtensions = DefaultExcludeExtensions; ParseExcludeFolderAndExtension(ExcludeFolders, ExcludeExtensions); - UploadFolder(*Output, - Workers, - Storage, - BuildId, - BuildPartId, - m_BuildPartName, - m_Path, - TempDir, - m_ManifestPath, - m_FindBlockMaxCount, - m_BlockReuseMinPercentLimit, - m_AllowMultiparts, - MetaData, - m_CreateBuild, - m_Clean, - m_UploadToZenCache, - ExcludeFolders, - ExcludeExtensions); + std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{}); + std::unique_ptr<ChunkingCache> ChunkCache = m_ChunkingCachePath.empty() + ? CreateNullChunkingCache() + : CreateDiskChunkingCache(m_ChunkingCachePath, *ChunkController, 256u * 1024u); + + std::vector<std::pair<Oid, std::string>> UploadedParts = + UploadFolder(*Output, + Workers, + Storage, + BuildId, + BuildPartId, + m_BuildPartName, + m_Path, + m_ManifestPath, + MetaData, + *ChunkController, + *ChunkCache, + UploadFolderOptions{.TempDir = TempDir, + .FindBlockMaxCount = m_FindBlockMaxCount, + .BlockReuseMinPercentLimit = m_BlockReuseMinPercentLimit, + .AllowMultiparts = m_AllowMultiparts, + .CreateBuild = m_CreateBuild, + .IgnoreExistingBlocks = m_Clean, + .UploadToZenCache = m_UploadToZenCache, + .ExcludeFolders = ExcludeFolders, + .ExcludeExtensions = ExcludeExtensions}); if (!AbortFlag) { if (m_PostUploadVerify) { - ValidateBuildPart(*Output, Workers, *Storage.BuildStorage, BuildId, BuildPartId, m_BuildPartName); + // TODO: Validate all parts + for (const auto& Part : UploadedParts) + { + ValidateBuildPart(*Output, Workers, *Storage.BuildStorage, BuildId, Part.first, Part.second); + } } } @@ -3893,6 +3587,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) BuildId, BuildPartIds, BuildPartNames, + m_DownloadSpecPath, m_Path, DownloadOptions{.SystemRootDir = m_SystemRootDir, .ZenFolderPath = m_ZenFolderPath, @@ -4001,11 +3696,29 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) ParsePath(); ParseDiffPath(); + MakeSafeAbsolutePathÍnPlace(m_ChunkingCachePath); + std::vector<std::string> ExcludeFolders = DefaultExcludeFolders; std::vector<std::string> ExcludeExtensions = DefaultExcludeExtensions; ParseExcludeFolderAndExtension(ExcludeFolders, ExcludeExtensions); - DiffFolders(Workers, m_Path, m_DiffPath, m_OnlyChunked, ExcludeFolders, ExcludeExtensions); + StandardChunkingControllerSettings ChunkingSettings; + std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(ChunkingSettings); + std::unique_ptr<ChunkingCache> ChunkCache = m_ChunkingCachePath.empty() + ? CreateNullChunkingCache() + : CreateDiskChunkingCache(m_ChunkingCachePath, *ChunkController, 256u * 1024u); + + if (m_OnlyChunked) + { + ExcludeExtensions.insert(ExcludeExtensions.end(), + ChunkingSettings.SplitOnlyExtensions.begin(), + ChunkingSettings.SplitOnlyExtensions.end()); + ExcludeExtensions.insert(ExcludeExtensions.end(), + ChunkingSettings.SplitAndCompressExtensions.begin(), + ChunkingSettings.SplitAndCompressExtensions.end()); + } + + DiffFolders(Workers, m_Path, m_DiffPath, *ChunkController, *ChunkCache, ExcludeFolders, ExcludeExtensions); if (AbortFlag) { throw std::runtime_error("Diff folders aborted"); @@ -4247,8 +3960,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) Storage, StorageCacheStats, BuildId, - {}, - {}, + /*BuildPartIds,*/ {}, + /*BuildPartNames*/ {}, + /*ManifestPath*/ {}, m_Path, DownloadOptions{.SystemRootDir = m_SystemRootDir, .ZenFolderPath = m_ZenFolderPath, @@ -4370,6 +4084,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_ZenFolderPath = m_Path / ZenFolderName; } MakeSafeAbsolutePathÍnPlace(m_ZenFolderPath); + MakeSafeAbsolutePathÍnPlace(m_ChunkingCachePath); StorageInstance Storage = CreateBuildStorage(StorageStats, StorageCacheStats, @@ -4408,7 +4123,11 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } const std::filesystem::path UploadTempDir = UploadTempDirectory(m_Path); - // std::filesystem::path UploadTempDir = m_ZenFolderPath / "upload_tmp"; + + std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{}); + std::unique_ptr<ChunkingCache> ChunkCache = m_ChunkingCachePath.empty() + ? CreateNullChunkingCache() + : CreateDiskChunkingCache(m_ChunkingCachePath, *ChunkController, 256u * 1024u); UploadFolder(*Output, Workers, @@ -4417,22 +4136,51 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) BuildPartId, m_BuildPartName, m_Path, - UploadTempDir, {}, - m_FindBlockMaxCount, - m_BlockReuseMinPercentLimit, - m_AllowMultiparts, MetaData, - true, - false, - m_UploadToZenCache, - DefaultExcludeFolders, - DefaultExcludeExtensions); + *ChunkController, + *ChunkCache, + UploadFolderOptions{.TempDir = UploadTempDir, + .FindBlockMaxCount = m_FindBlockMaxCount, + .BlockReuseMinPercentLimit = m_BlockReuseMinPercentLimit, + .AllowMultiparts = m_AllowMultiparts, + .CreateBuild = true, + .IgnoreExistingBlocks = false, + .UploadToZenCache = m_UploadToZenCache}); + if (AbortFlag) { throw std::runtime_error("Test aborted. (Upload build)"); } + { + ZEN_CONSOLE("Upload Build {}, Part {} ({}) from '{}' with chunking cache", m_BuildId, BuildPartId, m_BuildPartName, m_Path); + + UploadFolder(*Output, + Workers, + Storage, + Oid::NewOid(), + Oid::NewOid(), + m_BuildPartName, + m_Path, + {}, + MetaData, + *ChunkController, + *ChunkCache, + UploadFolderOptions{.TempDir = UploadTempDir, + .FindBlockMaxCount = m_FindBlockMaxCount, + .BlockReuseMinPercentLimit = m_BlockReuseMinPercentLimit, + .AllowMultiparts = m_AllowMultiparts, + .CreateBuild = true, + .IgnoreExistingBlocks = false, + .UploadToZenCache = m_UploadToZenCache}); + + if (AbortFlag) + { + throw std::runtime_error("Test aborted. (Upload again, chunking is cached)"); + } + } + ValidateBuildPart(*Output, Workers, *Storage.BuildStorage, BuildId, BuildPartId, m_BuildPartName); if (!m_IncludeWildcard.empty() || !m_ExcludeWildcard.empty()) @@ -4451,7 +4199,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) StorageCacheStats, BuildId, {BuildPartId}, - {}, + /*BuildPartNames*/ {}, + /*ManifestPath*/ {}, DownloadPath, DownloadOptions{.SystemRootDir = m_SystemRootDir, .ZenFolderPath = DownloadPath / ZenFolderName, @@ -4482,7 +4231,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) StorageCacheStats, BuildId, {BuildPartId}, - {}, + /*BuildPartNames*/ {}, + /*ManifestPath*/ {}, DownloadPath, DownloadOptions{.SystemRootDir = m_SystemRootDir, .ZenFolderPath = DownloadPath / ZenFolderName, @@ -4509,7 +4259,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) StorageCacheStats, BuildId, {BuildPartId}, - {}, + /*BuildPartNames*/ {}, + /*ManifestPath*/ {}, DownloadPath, DownloadOptions{.SystemRootDir = m_SystemRootDir, .ZenFolderPath = DownloadPath / ZenFolderName, @@ -4537,7 +4288,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) StorageCacheStats, BuildId, {BuildPartId}, - {}, + /*BuildPartNames*/ {}, + /*ManifestPath*/ {}, DownloadPath, DownloadOptions{.SystemRootDir = m_SystemRootDir, @@ -4562,7 +4314,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) StorageCacheStats, BuildId, {BuildPartId}, - {}, + /*BuildPartNames*/ {}, + /*ManifestPath*/ {}, DownloadPath, DownloadOptions{.SystemRootDir = m_SystemRootDir, @@ -4679,7 +4432,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) StorageCacheStats, BuildId, {BuildPartId}, - {}, + /*BuildPartNames*/ {}, + /*ManifestPath*/ {}, DownloadPath, DownloadOptions{.SystemRootDir = m_SystemRootDir, @@ -4716,17 +4470,18 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) BuildPartId2, m_BuildPartName, DownloadPath, - UploadTempDir, {}, - m_FindBlockMaxCount, - m_BlockReuseMinPercentLimit, - m_AllowMultiparts, MetaData2, - true, - false, - m_UploadToZenCache, - DefaultExcludeFolders, - DefaultExcludeExtensions); + *ChunkController, + *ChunkCache, + UploadFolderOptions{.TempDir = UploadTempDir, + .FindBlockMaxCount = m_FindBlockMaxCount, + .BlockReuseMinPercentLimit = m_BlockReuseMinPercentLimit, + .AllowMultiparts = m_AllowMultiparts, + .CreateBuild = true, + .IgnoreExistingBlocks = false, + .UploadToZenCache = m_UploadToZenCache}); + if (AbortFlag) { throw std::runtime_error("Test aborted. (Upload scrambled)"); @@ -4741,7 +4496,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) StorageCacheStats, BuildId, {BuildPartId}, - {}, + /*BuildPartNames*/ {}, + /*ManifestPath*/ {}, DownloadPath, DownloadOptions{.SystemRootDir = m_SystemRootDir, @@ -4766,7 +4522,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) StorageCacheStats, BuildId2, {BuildPartId2}, - {}, + /*BuildPartNames*/ {}, + /*ManifestPath*/ {}, DownloadPath, DownloadOptions{.SystemRootDir = m_SystemRootDir, .ZenFolderPath = DownloadPath / ZenFolderName, @@ -4790,7 +4547,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) StorageCacheStats, BuildId2, {BuildPartId2}, - {}, + /*BuildPartNames*/ {}, + /*ManifestPath*/ {}, DownloadPath, DownloadOptions{.SystemRootDir = m_SystemRootDir, .ZenFolderPath = DownloadPath / ZenFolderName, @@ -4814,7 +4572,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) StorageCacheStats, BuildId, {BuildPartId}, - {}, + /*BuildPartNames*/ {}, + /*ManifestPath*/ {}, DownloadPath2, DownloadOptions{.SystemRootDir = m_SystemRootDir, .ZenFolderPath = DownloadPath2 / ZenFolderName, @@ -4838,7 +4597,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) StorageCacheStats, BuildId, {BuildPartId}, - {}, + /*BuildPartNames*/ {}, + /*ManifestPath*/ {}, DownloadPath3, DownloadOptions{.SystemRootDir = m_SystemRootDir, .ZenFolderPath = DownloadPath3 / ZenFolderName, diff --git a/src/zen/cmds/builds_cmd.h b/src/zen/cmds/builds_cmd.h index 80c64c48d..f5c44ab55 100644 --- a/src/zen/cmds/builds_cmd.h +++ b/src/zen/cmds/builds_cmd.h @@ -72,7 +72,6 @@ private: uint8_t m_BlockReuseMinPercentLimit = 85; bool m_AllowMultiparts = true; std::string m_AllowPartialBlockRequests = "mixed"; - std::string m_ManifestPath; // Not a std::filesystem::path since it can be relative to m_Path AuthCommandLineOptions m_AuthOptions; @@ -96,15 +95,18 @@ private: std::string m_ExcludeFolders; std::string m_ExcludeExtensions; - cxxopts::Options m_UploadOptions{"upload", "Upload a folder"}; - uint64_t m_FindBlockMaxCount = 10000; - bool m_PostUploadVerify = false; + cxxopts::Options m_UploadOptions{"upload", "Upload a folder"}; + uint64_t m_FindBlockMaxCount = 10000; + bool m_PostUploadVerify = false; + std::filesystem::path m_ChunkingCachePath; + std::filesystem::path m_ManifestPath; cxxopts::Options m_DownloadOptions{"download", "Download a folder"}; std::vector<std::string> m_BuildPartNames; std::vector<std::string> m_BuildPartIds; bool m_PostDownloadVerify = false; bool m_EnableScavenging = true; + std::filesystem::path m_DownloadSpecPath; cxxopts::Options m_LsOptions{"ls", "List the content of uploaded build"}; std::filesystem::path m_LsResultPath; diff --git a/src/zen/zen.cpp b/src/zen/zen.cpp index c03ae476f..09a2e4f91 100644 --- a/src/zen/zen.cpp +++ b/src/zen/zen.cpp @@ -302,26 +302,7 @@ main(int argc, char** argv) zen::SetCurrentThreadName("main"); - std::vector<std::string> Args; -#if ZEN_PLATFORM_WINDOWS - LPWSTR RawCommandLine = GetCommandLineW(); - std::string CommandLine = zen::WideToUtf8(RawCommandLine); - Args = zen::ParseCommandLine(CommandLine); -#else - Args.reserve(argc); - for (int I = 0; I < argc; I++) - { - std::string Arg(argv[I]); - if ((!Arg.empty()) && (Arg != " ")) - { - Args.emplace_back(std::move(Arg)); - } - } -#endif - std::vector<char*> RawArgs = zen::StripCommandlineQuotes(Args); - - argc = gsl::narrow<int>(RawArgs.size()); - argv = RawArgs.data(); + zen::CommandLineConverter ArgConverter(argc, argv); using namespace zen; using namespace std::literals; diff --git a/src/zencore/basicfile.cpp b/src/zencore/basicfile.cpp index 2fa02937d..bd4d119fb 100644 --- a/src/zencore/basicfile.cpp +++ b/src/zencore/basicfile.cpp @@ -181,7 +181,7 @@ BasicFile::ReadRange(uint64_t FileOffset, uint64_t ByteCount) void BasicFile::Read(void* Data, uint64_t BytesToRead, uint64_t FileOffset) { - const uint64_t MaxChunkSize = 2u * 1024 * 1024 * 1024; + const uint64_t MaxChunkSize = 512u * 1024u; std::error_code Ec; ReadFile(m_FileHandle, Data, BytesToRead, FileOffset, MaxChunkSize, Ec); if (Ec) diff --git a/src/zencore/include/zencore/parallelwork.h b/src/zencore/include/zencore/parallelwork.h index 138d0bc7c..536b0a056 100644 --- a/src/zencore/include/zencore/parallelwork.h +++ b/src/zencore/include/zencore/parallelwork.h @@ -21,7 +21,19 @@ public: typedef std::function<void(std::exception_ptr Ex, std::atomic<bool>& AbortFlag)> ExceptionCallback; typedef std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)> UpdateCallback; - void ScheduleWork(WorkerThreadPool& WorkerPool, WorkCallback&& Work, ExceptionCallback&& OnError = {}) + inline void ScheduleWork(WorkerThreadPool& WorkerPool, WorkCallback&& Work) { ScheduleWork(WorkerPool, std::move(Work), {}, m_Mode); } + + inline void ScheduleWork(WorkerThreadPool& WorkerPool, WorkCallback&& Work, ExceptionCallback&& OnError) + { + ScheduleWork(WorkerPool, std::move(Work), std::move(OnError), m_Mode); + } + + inline void ScheduleWork(WorkerThreadPool& WorkerPool, WorkCallback&& Work, WorkerThreadPool::EMode Mode) + { + ScheduleWork(WorkerPool, std::move(Work), {}, Mode); + } + + void ScheduleWork(WorkerThreadPool& WorkerPool, WorkCallback&& Work, ExceptionCallback&& OnError, WorkerThreadPool::EMode Mode) { m_PendingWork.AddCount(1); try @@ -42,7 +54,7 @@ public: OnError(std::current_exception(), m_AbortFlag); } }, - m_Mode); + Mode); } catch (const std::exception& Ex) { diff --git a/src/zenhttp/clients/httpclientcommon.cpp b/src/zenhttp/clients/httpclientcommon.cpp index 8e5136dff..47425e014 100644 --- a/src/zenhttp/clients/httpclientcommon.cpp +++ b/src/zenhttp/clients/httpclientcommon.cpp @@ -309,7 +309,7 @@ namespace detail { void BufferedReadFileStream::Read(void* Data, uint64_t BytesToRead, uint64_t FileOffset) { - const uint64_t MaxChunkSize = 1u * 1024 * 1024; + const uint64_t MaxChunkSize = 512u * 1024u; std::error_code Ec; ReadFile(m_FileHandle, Data, BytesToRead, FileOffset, MaxChunkSize, Ec); diff --git a/src/zenhttp/servers/httpasio.cpp b/src/zenhttp/servers/httpasio.cpp index 6c25636a2..734c452ae 100644 --- a/src/zenhttp/servers/httpasio.cpp +++ b/src/zenhttp/servers/httpasio.cpp @@ -164,7 +164,7 @@ Log() #if defined(ASIO_HAS_WINDOWS_OVERLAPPED_PTR) # define ZEN_USE_TRANSMITFILE 1 -# define ZEN_USE_ASYNC_SENDFILE ASIO_HAS_FILE +# define ZEN_USE_ASYNC_SENDFILE 0 #else # define ZEN_USE_TRANSMITFILE 0 # define ZEN_USE_ASYNC_SENDFILE 0 @@ -558,6 +558,14 @@ public: std::unique_ptr<HttpResponse> m_Response; }; +/** + * HTTP Response representation used internally by the ASIO server + * + * This is used to build up the response headers and payload prior to sending + * it over the network. It's also responsible for managing the send operation itself, + * including ownership of the source buffers until the operation completes. + * + */ struct HttpResponse { public: @@ -568,8 +576,18 @@ public: ~HttpResponse() = default; + /** + * Initialize the response for sending a payload made up of multiple blobs + * + * This builds the necessary headers and IO vectors for sending the response + * and also makes sure all buffers are owned for the duration of the + * operation. + * + */ void InitializeForPayload(uint16_t ResponseCode, std::span<IoBuffer> BlobList) { + ZEN_ASSERT(m_State == State::kUninitialized); + ZEN_MEMSCOPE(GetHttpasioTag()); ZEN_TRACE_CPU("asio::InitializeForPayload"); @@ -578,13 +596,13 @@ public: const uint32_t ChunkCount = gsl::narrow<uint32_t>(BlobList.size()); m_DataBuffers.reserve(ChunkCount); - m_IoVecCount = ChunkCount + 1 /* one extra buffer for headers */; - m_IoVecs.resize(m_IoVecCount); + m_IoVecs.reserve(ChunkCount + 1); + + m_IoVecs.emplace_back(); // header IoVec m_IoVecCursor = 0; uint64_t LocalDataSize = 0; - int Index = 1; for (IoBuffer& Buffer : BlobList) { @@ -597,37 +615,71 @@ public: IoBuffer OwnedBuffer = std::move(Buffer); OwnedBuffer.MakeOwned(); - IoVec& Io = m_IoVecs[Index++]; - bool ChunkHandled = false; #if ZEN_USE_TRANSMITFILE || ZEN_USE_ASYNC_SENDFILE if (IoBufferFileReference FileRef; OwnedBuffer.GetFileReference(/* out */ FileRef)) { - Io.IsFileRef = true; - Io.Ref.FileRef = FileRef; - ChunkHandled = true; + // Since there's a limit to how much data TransmitFile can send in one go, + // we may need to split this into multiple IoVec entries. In this case we'll + // end up reallocating the IoVec array, but this should be rare. + + uint64_t RemainingChunkBytes = FileRef.FileChunkSize; + uint64_t ChunkOffset = FileRef.FileChunkOffset; + + const uint32_t MaxTransmitSize = 1 * 1024 * 1024 * 1024; // 1 GB + + while (RemainingChunkBytes) + { + IoVec Io{.IsFileRef = true}; + + Io.Ref.FileRef.FileHandle = FileRef.FileHandle; + Io.Ref.FileRef.FileChunkOffset = ChunkOffset; + + if (RemainingChunkBytes > MaxTransmitSize) + { + Io.Ref.FileRef.FileChunkSize = MaxTransmitSize; + RemainingChunkBytes -= MaxTransmitSize; + } + else + { + Io.Ref.FileRef.FileChunkSize = gsl::narrow<uint32_t>(RemainingChunkBytes); + RemainingChunkBytes = 0; + } + + ChunkOffset += Io.Ref.FileRef.FileChunkSize; + + m_IoVecs.push_back(Io); + } + + ChunkHandled = true; } #endif if (!ChunkHandled) { - Io.IsFileRef = false; - uint32_t Size = gsl::narrow<uint32_t>(OwnedBuffer.Size()); - Io.Ref.MemoryRef = {OwnedBuffer.Data(), Size}; + IoVec Io{.IsFileRef = false}; + + Io.Ref.MemoryRef = {.Data = OwnedBuffer.Data(), .Size = OwnedBuffer.Size()}; + + m_IoVecs.push_back(Io); } - m_DataBuffers.emplace_back(OwnedBuffer); + m_DataBuffers.push_back(std::move(OwnedBuffer)); } + // Now that we know the full data size, we can build the headers + m_ContentLength = LocalDataSize; std::string_view Headers = GetHeaders(); - IoVec& Io = m_IoVecs[0]; + IoVec& HeaderIo = m_IoVecs[0]; - Io.IsFileRef = false; - Io.Ref.MemoryRef = {.Data = Headers.data(), .Size = gsl::narrow_cast<uint32_t>(Headers.size())}; + HeaderIo.IsFileRef = false; + HeaderIo.Ref.MemoryRef = {.Data = Headers.data(), .Size = Headers.size()}; + + m_State = State::kInitialized; } uint16_t ResponseCode() const { return m_ResponseCode; } @@ -653,17 +705,32 @@ public: void SendResponse(asio::ip::tcp::socket& TcpSocket, std::function<void(const asio::error_code& Ec, std::size_t ByteCount)>&& Token) { + ZEN_ASSERT(m_State == State::kInitialized); + + ZEN_MEMSCOPE(GetHttpasioTag()); + ZEN_TRACE_CPU("asio::SendResponse"); + m_SendCb = std::move(Token); + m_State = State::kSending; SendNextChunk(TcpSocket); } void SendNextChunk(asio::ip::tcp::socket& TcpSocket) { - if (m_IoVecCursor == m_IoVecCount) + ZEN_ASSERT(m_State == State::kSending); + + ZEN_MEMSCOPE(GetHttpasioTag()); + ZEN_TRACE_CPU("asio::SendNextChunk"); + + if (m_IoVecCursor == m_IoVecs.size()) { + // All data sent, complete the operation + ZEN_ASSERT(m_SendCb); + m_State = State::kSent; + auto CompletionToken = [Self = this, Token = std::move(m_SendCb), TotalBytes = m_TotalBytesSent] { Token({}, TotalBytes); }; asio::defer(TcpSocket.get_executor(), std::move(CompletionToken)); @@ -671,49 +738,47 @@ public: return; } + auto OnCompletion = [this, &TcpSocket](const asio::error_code& Ec, std::size_t ByteCount) { + ZEN_ASSERT(m_State == State::kSending); + + m_TotalBytesSent += ByteCount; + if (Ec) + { + m_State = State::kFailed; + m_SendCb(Ec, m_TotalBytesSent); + } + else + { + SendNextChunk(TcpSocket); + } + }; + const IoVec& Io = m_IoVecs[m_IoVecCursor++]; if (Io.IsFileRef) { - ZEN_TRACE_VERBOSE("SendNextChunk from FILE, thread: {}, bytes: {}", zen::GetCurrentThreadId(), Io.Ref.FileRef.FileChunkSize); + ZEN_TRACE_VERBOSE("SendNextChunk from FILE, thread: {}, offset: {}, bytes: {}", + zen::GetCurrentThreadId(), + Io.Ref.FileRef.FileChunkOffset, + Io.Ref.FileRef.FileChunkSize); #if ZEN_USE_TRANSMITFILE TransmitFileAsync(TcpSocket, Io.Ref.FileRef.FileHandle, Io.Ref.FileRef.FileChunkOffset, gsl::narrow_cast<uint32_t>(Io.Ref.FileRef.FileChunkSize), - [this, &TcpSocket](const asio::error_code& Ec, std::size_t ByteCount) { - m_TotalBytesSent += ByteCount; - if (Ec) - { - m_SendCb(Ec, m_TotalBytesSent); - } - else - { - SendNextChunk(TcpSocket); - } - }); + OnCompletion); #elif ZEN_USE_ASYNC_SENDFILE SendFileAsync(TcpSocket, Io.Ref.FileRef.FileHandle, Io.Ref.FileRef.FileChunkOffset, Io.Ref.FileRef.FileChunkSize, 64 * 1024, - [this, &TcpSocket](const asio::error_code& Ec, std::size_t ByteCount) { - m_TotalBytesSent += ByteCount; - if (Ec) - { - m_SendCb(Ec, m_TotalBytesSent); - } - else - { - SendNextChunk(TcpSocket); - } - }); + OnCompletion); #else // This should never occur unless we compile with one // of the options above - ZEN_ASSERT("invalid file reference in response"); + ZEN_WARN("invalid file reference in response"); #endif return; @@ -724,7 +789,7 @@ public: std::vector<asio::const_buffer> AsioBuffers; AsioBuffers.push_back(asio::const_buffer{Io.Ref.MemoryRef.Data, Io.Ref.MemoryRef.Size}); - while (m_IoVecCursor != m_IoVecCount) + while (m_IoVecCursor != m_IoVecs.size()) { const IoVec& Io2 = m_IoVecs[m_IoVecCursor]; @@ -737,26 +802,23 @@ public: ++m_IoVecCursor; } - asio::async_write(TcpSocket, - std::move(AsioBuffers), - asio::transfer_all(), - [this, &TcpSocket](const asio::error_code& Ec, std::size_t ByteCount) { - m_TotalBytesSent += ByteCount; - if (Ec) - { - m_SendCb(Ec, m_TotalBytesSent); - } - else - { - SendNextChunk(TcpSocket); - } - }); + asio::async_write(TcpSocket, std::move(AsioBuffers), asio::transfer_all(), OnCompletion); } private: + enum class State : uint8_t + { + kUninitialized, + kInitialized, + kSending, + kSent, + kFailed + }; + uint32_t m_RequestNumber = 0; uint16_t m_ResponseCode = 0; bool m_IsKeepAlive = true; + State m_State = State::kUninitialized; HttpContentType m_ContentType = HttpContentType::kBinary; uint64_t m_ContentLength = 0; eastl::fixed_vector<IoBuffer, 8> m_DataBuffers; // This is here to keep the IoBuffer buffers/handles alive @@ -770,15 +832,14 @@ private: struct MemoryBuffer { const void* Data; - uint32_t Size; + uint64_t Size; } MemoryRef; IoBufferFileReference FileRef; } Ref; }; eastl::fixed_vector<IoVec, 8> m_IoVecs; - int m_IoVecCursor = 0; - int m_IoVecCount = 0; + unsigned int m_IoVecCursor = 0; std::function<void(const asio::error_code& Ec, std::size_t ByteCount)> m_SendCb; uint64_t m_TotalBytesSent = 0; diff --git a/src/zenremotestore/builds/buildmanifest.cpp b/src/zenremotestore/builds/buildmanifest.cpp new file mode 100644 index 000000000..051436e96 --- /dev/null +++ b/src/zenremotestore/builds/buildmanifest.cpp @@ -0,0 +1,173 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenremotestore/builds/buildmanifest.h> + +#include <zencore/compactbinary.h> +#include <zencore/fmtutils.h> + +#if ZEN_WITH_TESTS +# include <zencore/basicfile.h> +# include <zencore/testing.h> +# include <zencore/testutils.h> +#endif // ZEN_WITH_TESTS + +namespace zen { + +using namespace std::literals; + +BuildManifest +ParseBuildManifest(const std::filesystem::path& ManifestPath) +{ + BuildManifest Result; + { + IoBuffer ManifestContent = ReadFile(ManifestPath).Flatten(); + + if (ToLower(ManifestPath.extension().string()) == ".json") + { + IoBuffer MetaDataJson = ReadFile(ManifestPath).Flatten(); + std::string_view Json(reinterpret_cast<const char*>(MetaDataJson.GetData()), MetaDataJson.GetSize()); + std::string JsonError; + CbObject Manifest = LoadCompactBinaryFromJson(Json, JsonError).AsObject(); + if (!JsonError.empty()) + { + throw std::runtime_error(fmt::format("Invalid manifest file at {}. '{}'", ManifestPath, JsonError)); + } + CbObjectView PartsObject = Manifest["parts"sv].AsObjectView(); + for (CbFieldView PartsField : PartsObject) + { + std::string_view PartName = PartsField.GetName(); + if (PartName.empty()) + { + throw std::runtime_error(fmt::format("Part {} in manifest file at {} does not have a name. '{}'", + Result.Parts.size() + 1, + ManifestPath, + JsonError)); + } + CbObjectView Part = PartsField.AsObjectView(); + Oid PartId = Part["partId"sv].AsObjectId(); + CbArrayView FilesArray = Part["files"sv].AsArrayView(); + std::vector<std::filesystem::path> Files; + Files.reserve(FilesArray.Num()); + for (CbFieldView FileField : FilesArray) + { + std::filesystem::path File(FileField.AsU8String()); + Files.push_back(File); + } + + Result.Parts.push_back(BuildManifest::Part{.PartId = PartId, .PartName = std::string(PartName), .Files = std::move(Files)}); + } + return Result; + } + else + { + Result.Parts.resize(1); + BuildManifest::Part& SinglePart = Result.Parts.front(); + + std::string_view ManifestString((const char*)ManifestContent.GetView().GetData(), ManifestContent.GetSize()); + std::string_view::size_type Offset = 0; + while (Offset < ManifestContent.GetSize()) + { + size_t PathBreakOffset = ManifestString.find_first_of("\t\r\n", Offset); + if (PathBreakOffset == std::string_view::npos) + { + PathBreakOffset = ManifestContent.GetSize(); + } + std::string_view AssetPath = ManifestString.substr(Offset, PathBreakOffset - Offset); + if (!AssetPath.empty()) + { + SinglePart.Files.push_back(std::filesystem::path(AssetPath)); + } + Offset = PathBreakOffset; + size_t EolOffset = ManifestString.find_first_of("\r\n", Offset); + if (EolOffset == std::string_view::npos) + { + break; + } + Offset = EolOffset; + size_t LineBreakOffset = ManifestString.find_first_not_of("\t\r\n", Offset); + if (LineBreakOffset == std::string_view::npos) + { + break; + } + Offset = LineBreakOffset; + } + } + } + return Result; +} +#if ZEN_WITH_TESTS + +TEST_CASE("buildmanifest.unstructured") +{ + ScopedTemporaryDirectory Root; + std::vector<std::filesystem::path> Files = {"fileA", "dirA/FileB", "dirB/FileC", "dirB/FileD"}; + + { + ExtendableStringBuilder<512> SB; + for (const std::filesystem::path& File : Files) + { + SB << File.generic_string() << "\n"; + } + WriteFile(Root.Path() / "manifest.txt", IoBuffer(IoBuffer::Wrap, SB.ToView().data(), SB.ToView().length())); + } + + BuildManifest Manifest = ParseBuildManifest(Root.Path() / "manifest.txt"); + CHECK_EQ(Manifest.Parts.size(), 1u); + CHECK_EQ(Manifest.Parts[0].PartId, Oid::Zero); + CHECK_EQ(Manifest.Parts[0].PartName, ""); + CHECK_EQ(Manifest.Parts[0].Files, Files); +} + +TEST_CASE("buildmanifest.structured") +{ + ScopedTemporaryDirectory Root; + + std::string Id = Oid::NewOid().ToString(); + + std::string ManifestString = + "{\n" + " \"parts\": {\n" + " \"default\": {\n" + " \"partId\": \"098a2742d46c22a67ab57457\",\n" + " \"files\": [\n" + " \"foo/bar\",\n" + " \"baz.exe\"\n" + " ]\n" + " },\n" + " \"symbols\": {\n" + " \"files\": [\n" + " \"baz.pdb\"\n" + " ]\n" + " }\n" + " }\n" + "}\n"; + + WriteFile(Root.Path() / "manifest.json", IoBuffer(IoBuffer::Wrap, ManifestString.data(), ManifestString.length())); + + const Oid DefaultPartExpectedId = Oid::FromHexString("098a2742d46c22a67ab57457"); + const std::string DefaultPartExpectedName = "default"; + const Oid SymbolPartExpectedId = Oid::Zero; + const std::string SymbolsPartExpectedName = "symbols"; + + BuildManifest Manifest = ParseBuildManifest(Root.Path() / "manifest.json"); + CHECK_EQ(Manifest.Parts.size(), 2u); + CHECK_EQ(Manifest.Parts[0].PartId, DefaultPartExpectedId); + CHECK_EQ(Manifest.Parts[0].PartName, DefaultPartExpectedName); + CHECK_EQ(Manifest.Parts[0].Files.size(), 2u); + CHECK_EQ(Manifest.Parts[0].Files[0].generic_string(), "foo/bar"); + CHECK_EQ(Manifest.Parts[0].Files[1].generic_string(), "baz.exe"); + + CHECK_EQ(Manifest.Parts[1].PartId, SymbolPartExpectedId); + CHECK_EQ(Manifest.Parts[1].PartName, SymbolsPartExpectedName); + CHECK_EQ(Manifest.Parts[1].Files.size(), 1u); + CHECK_EQ(Manifest.Parts[1].Files[0].generic_string(), "baz.pdb"); +} + +void +buildmanifest_forcelink() +{ +} + +#endif // ZEN_WITH_TESTS + +} // namespace zen diff --git a/src/zenremotestore/builds/buildstorageoperations.cpp b/src/zenremotestore/builds/buildstorageoperations.cpp index 3ca2f72c1..26968efc1 100644 --- a/src/zenremotestore/builds/buildstorageoperations.cpp +++ b/src/zenremotestore/builds/buildstorageoperations.cpp @@ -3,11 +3,13 @@ #include <zenremotestore/builds/buildstorageoperations.h> #include <zenremotestore/builds/buildcontent.h> +#include <zenremotestore/builds/buildmanifest.h> #include <zenremotestore/builds/buildsavedstate.h> #include <zenremotestore/builds/buildstorage.h> #include <zenremotestore/builds/buildstoragecache.h> #include <zenremotestore/builds/buildstorageutil.h> #include <zenremotestore/chunking/chunkblock.h> +#include <zenremotestore/chunking/chunkingcache.h> #include <zenremotestore/chunking/chunkingcontroller.h> #include <zenremotestore/filesystemutils.h> #include <zenremotestore/operationlogoutput.h> @@ -16,6 +18,7 @@ #include <zencore/compactbinary.h> #include <zencore/compactbinaryfile.h> #include <zencore/compactbinaryutil.h> +#include <zencore/compactbinaryvalue.h> #include <zencore/filesystem.h> #include <zencore/fmtutils.h> #include <zencore/parallelwork.h> @@ -23,6 +26,7 @@ #include <zencore/string.h> #include <zencore/timer.h> #include <zencore/trace.h> +#include <zenutil/wildcard.h> #include <numeric> @@ -31,6 +35,12 @@ ZEN_THIRD_PARTY_INCLUDES_START #include <tsl/robin_set.h> ZEN_THIRD_PARTY_INCLUDES_END +#if ZEN_WITH_TESTS +# include <zencore/testing.h> +# include <zencore/testutils.h> +# include <zenremotestore/builds/filebuildstorage.h> +#endif // ZEN_WITH_TESTS + namespace zen { using namespace std::literals; @@ -1293,37 +1303,39 @@ BuildsOperationUpdateFolder::Execute(FolderContent& OutLocalFolderState) } } - Work.ScheduleWork(m_IOWorkerPool, - [this, - &SequenceIndexChunksLeftToWriteCounters, - &Work, - &ExistsResult, - &WritePartsComplete, - &LooseChunkHashWorks, - LooseChunkHashWorkIndex, - TotalRequestCount, - TotalPartWriteCount, - &WriteCache, - &FilteredDownloadedBytesPerSecond, - &FilteredWrittenBytesPerSecond](std::atomic<bool>&) mutable { - ZEN_TRACE_CPU("Async_ReadPreDownloadedChunk"); - if (!m_AbortFlag) - { - LooseChunkHashWorkData& LooseChunkHashWork = LooseChunkHashWorks[LooseChunkHashWorkIndex]; - const uint32_t RemoteChunkIndex = LooseChunkHashWorks[LooseChunkHashWorkIndex].RemoteChunkIndex; - WriteLooseChunk(RemoteChunkIndex, - ExistsResult, - SequenceIndexChunksLeftToWriteCounters, - WritePartsComplete, - std::move(LooseChunkHashWork.ChunkTargetPtrs), - WriteCache, - Work, - TotalRequestCount, - TotalPartWriteCount, - FilteredDownloadedBytesPerSecond, - FilteredWrittenBytesPerSecond); - } - }); + Work.ScheduleWork( + m_IOWorkerPool, + [this, + &SequenceIndexChunksLeftToWriteCounters, + &Work, + &ExistsResult, + &WritePartsComplete, + &LooseChunkHashWorks, + LooseChunkHashWorkIndex, + TotalRequestCount, + TotalPartWriteCount, + &WriteCache, + &FilteredDownloadedBytesPerSecond, + &FilteredWrittenBytesPerSecond](std::atomic<bool>&) mutable { + ZEN_TRACE_CPU("Async_ReadPreDownloadedChunk"); + if (!m_AbortFlag) + { + LooseChunkHashWorkData& LooseChunkHashWork = LooseChunkHashWorks[LooseChunkHashWorkIndex]; + const uint32_t RemoteChunkIndex = LooseChunkHashWorks[LooseChunkHashWorkIndex].RemoteChunkIndex; + WriteLooseChunk(RemoteChunkIndex, + ExistsResult, + SequenceIndexChunksLeftToWriteCounters, + WritePartsComplete, + std::move(LooseChunkHashWork.ChunkTargetPtrs), + WriteCache, + Work, + TotalRequestCount, + TotalPartWriteCount, + FilteredDownloadedBytesPerSecond, + FilteredWrittenBytesPerSecond); + } + }, + WorkerThreadPool::EMode::EnableBacklog); } std::unique_ptr<CloneQueryInterface> CloneQuery; @@ -1583,7 +1595,9 @@ BuildsOperationUpdateFolder::Execute(FolderContent& OutLocalFolderState) FilteredWrittenBytesPerSecond.Stop(); } } - }); + }, + OnDiskPath.empty() ? WorkerThreadPool::EMode::DisableBacklog + : WorkerThreadPool::EMode::EnableBacklog); } }); } @@ -1771,7 +1785,9 @@ BuildsOperationUpdateFolder::Execute(FolderContent& OutLocalFolderState) FilteredWrittenBytesPerSecond.Stop(); } } - }); + }, + BlockChunkPath.empty() ? WorkerThreadPool::EMode::DisableBacklog + : WorkerThreadPool::EMode::EnableBacklog); } } } @@ -3175,6 +3191,8 @@ BuildsOperationUpdateFolder::WriteLooseChunk(const uint32_t RemoteChunkInd { FilteredDownloadedBytesPerSecond.Stop(); } + IoBufferFileReference FileRef; + bool EnableBacklog = Payload.GetFileReference(FileRef); AsyncWriteDownloadedChunk(m_Options.ZenFolderPath, RemoteChunkIndex, std::move(ChunkTargetPtrs), @@ -3184,7 +3202,8 @@ BuildsOperationUpdateFolder::WriteLooseChunk(const uint32_t RemoteChunkInd SequenceIndexChunksLeftToWriteCounters, WritePartsComplete, TotalPartWriteCount, - FilteredWrittenBytesPerSecond); + FilteredWrittenBytesPerSecond, + EnableBacklog); }); } }); @@ -3741,7 +3760,7 @@ BuildsOperationUpdateFolder::WriteLocalChunkToCache(CloneQueryInterface* C break; } const uint64_t NextChunkLength = m_RemoteContent.ChunkedContent.ChunkRawSizes[NextOp.ChunkIndex]; - if (ReadLength + NextChunkLength > m_Options.MaximumInMemoryPayloadSize) + if (ReadLength + NextChunkLength > BufferedOpenFile::BlockSize) { break; } @@ -4295,7 +4314,8 @@ BuildsOperationUpdateFolder::AsyncWriteDownloadedChunk(const std::filesystem::pa std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, std::atomic<uint64_t>& WritePartsComplete, const uint64_t TotalPartWriteCount, - FilteredRate& FilteredWrittenBytesPerSecond) + FilteredRate& FilteredWrittenBytesPerSecond, + bool EnableBacklog) { ZEN_TRACE_CPU("AsyncWriteDownloadedChunk"); @@ -4412,7 +4432,8 @@ BuildsOperationUpdateFolder::AsyncWriteDownloadedChunk(const std::filesystem::pa } } } - }); + }, + EnableBacklog ? WorkerThreadPool::EMode::EnableBacklog : WorkerThreadPool::EMode::DisableBacklog); } void @@ -4554,10 +4575,7 @@ BuildsOperationUploadFolder::BuildsOperationUploadFolder(OperationLogOutput& WorkerThreadPool& IOWorkerPool, WorkerThreadPool& NetworkPool, const Oid& BuildId, - const Oid& BuildPartId, - const std::string_view BuildPartName, const std::filesystem::path& Path, - const std::filesystem::path& ManifestPath, bool CreateBuild, const CbObject& MetaData, const Options& Options) @@ -4568,10 +4586,7 @@ BuildsOperationUploadFolder::BuildsOperationUploadFolder(OperationLogOutput& , m_IOWorkerPool(IOWorkerPool) , m_NetworkPool(NetworkPool) , m_BuildId(BuildId) -, m_BuildPartId(BuildPartId) -, m_BuildPartName(BuildPartName) , m_Path(Path) -, m_ManifestPath(ManifestPath) , m_CreateBuild(CreateBuild) , m_MetaData(MetaData) , m_Options(Options) @@ -4583,739 +4598,259 @@ BuildsOperationUploadFolder::BuildsOperationUploadFolder(OperationLogOutput& } } -void -BuildsOperationUploadFolder::Execute() +BuildsOperationUploadFolder::PrepareBuildResult +BuildsOperationUploadFolder::PrepareBuild() { - ZEN_TRACE_CPU("BuildsOperationUploadFolder::Execute"); - try - { - enum class TaskSteps : uint32_t - { - PrepareBuild, - CalculateDelta, - GenerateBlocks, - BuildPartManifest, - UploadBuildPart, - UploadAttachments, - FinalizeBuild, - PutBuildPartStats, - Cleanup, - StepCount - }; - - auto EndProgress = - MakeGuard([&]() { m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::StepCount, (uint32_t)TaskSteps::StepCount); }); + ZEN_TRACE_CPU("PrepareBuild"); - Stopwatch ProcessTimer; - - CleanAndRemoveDirectory(m_IOWorkerPool, m_AbortFlag, m_PauseFlag, m_Options.TempDir); - CreateDirectories(m_Options.TempDir); - auto _ = MakeGuard([&]() { CleanAndRemoveDirectory(m_IOWorkerPool, m_AbortFlag, m_PauseFlag, m_Options.TempDir); }); - - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::PrepareBuild, (uint32_t)TaskSteps::StepCount); - - std::uint64_t TotalRawSize = 0; - - CbObject ChunkerParameters; + PrepareBuildResult Result; + Result.PreferredMultipartChunkSize = m_Options.PreferredMultipartChunkSize; + Stopwatch Timer; + if (m_CreateBuild) + { + ZEN_TRACE_CPU("CreateBuild"); - struct PrepareBuildResult + Stopwatch PutBuildTimer; + CbObject PutBuildResult = m_Storage.BuildStorage->PutBuild(m_BuildId, m_MetaData); + Result.PrepareBuildTimeMs = PutBuildTimer.GetElapsedTimeMs(); + if (auto ChunkSize = PutBuildResult["chunkSize"sv].AsUInt64(); ChunkSize != 0) { - std::vector<ChunkBlockDescription> KnownBlocks; - uint64_t PreferredMultipartChunkSize = 0; - uint64_t PayloadSize = 0; - uint64_t PrepareBuildTimeMs = 0; - uint64_t FindBlocksTimeMs = 0; - uint64_t ElapsedTimeMs = 0; - }; - - std::future<PrepareBuildResult> PrepBuildResultFuture = m_NetworkPool.EnqueueTask( - std::packaged_task<PrepareBuildResult()>{[this] { - ZEN_TRACE_CPU("PrepareBuild"); - - PrepareBuildResult Result; - Result.PreferredMultipartChunkSize = m_Options.PreferredMultipartChunkSize; - Stopwatch Timer; - if (m_CreateBuild) - { - ZEN_TRACE_CPU("CreateBuild"); - - Stopwatch PutBuildTimer; - CbObject PutBuildResult = m_Storage.BuildStorage->PutBuild(m_BuildId, m_MetaData); - Result.PrepareBuildTimeMs = PutBuildTimer.GetElapsedTimeMs(); - Result.PreferredMultipartChunkSize = PutBuildResult["chunkSize"sv].AsUInt64(Result.PreferredMultipartChunkSize); - Result.PayloadSize = m_MetaData.GetSize(); - } - else - { - ZEN_TRACE_CPU("PutBuild"); - Stopwatch GetBuildTimer; - CbObject Build = m_Storage.BuildStorage->GetBuild(m_BuildId); - Result.PrepareBuildTimeMs = GetBuildTimer.GetElapsedTimeMs(); - Result.PayloadSize = Build.GetSize(); - if (auto ChunkSize = Build["chunkSize"sv].AsUInt64(); ChunkSize != 0) - { - Result.PreferredMultipartChunkSize = ChunkSize; - } - else if (m_Options.AllowMultiparts) - { - ZEN_OPERATION_LOG_WARN(m_LogOutput, - "PreferredMultipartChunkSize is unknown. Defaulting to '{}'", - NiceBytes(Result.PreferredMultipartChunkSize)); - } - } - - if (!m_Options.IgnoreExistingBlocks) - { - ZEN_TRACE_CPU("FindBlocks"); - Stopwatch KnownBlocksTimer; - CbObject BlockDescriptionList = m_Storage.BuildStorage->FindBlocks(m_BuildId, m_Options.FindBlockMaxCount); - if (BlockDescriptionList) - { - Result.KnownBlocks = ParseChunkBlockDescriptionList(BlockDescriptionList); - } - m_FindBlocksStats.FindBlockTimeMS = KnownBlocksTimer.GetElapsedTimeMs(); - m_FindBlocksStats.FoundBlockCount = Result.KnownBlocks.size(); - Result.FindBlocksTimeMs = KnownBlocksTimer.GetElapsedTimeMs(); - } - Result.ElapsedTimeMs = Timer.GetElapsedTimeMs(); - return Result; - }}, - WorkerThreadPool::EMode::EnableBacklog); + Result.PreferredMultipartChunkSize = ChunkSize; + } + Result.PayloadSize = m_MetaData.GetSize(); + } + else + { + ZEN_TRACE_CPU("PutBuild"); + Stopwatch GetBuildTimer; + CbObject Build = m_Storage.BuildStorage->GetBuild(m_BuildId); + Result.PrepareBuildTimeMs = GetBuildTimer.GetElapsedTimeMs(); + Result.PayloadSize = Build.GetSize(); + if (auto ChunkSize = Build["chunkSize"sv].AsUInt64(); ChunkSize != 0) + { + Result.PreferredMultipartChunkSize = ChunkSize; + } + else if (m_Options.AllowMultiparts) + { + ZEN_OPERATION_LOG_WARN(m_LogOutput, + "PreferredMultipartChunkSize is unknown. Defaulting to '{}'", + NiceBytes(Result.PreferredMultipartChunkSize)); + } + } - ChunkedFolderContent LocalContent; + if (!m_Options.IgnoreExistingBlocks) + { + ZEN_TRACE_CPU("FindBlocks"); + Stopwatch KnownBlocksTimer; + CbObject BlockDescriptionList = m_Storage.BuildStorage->FindBlocks(m_BuildId, m_Options.FindBlockMaxCount); + if (BlockDescriptionList) + { + Result.KnownBlocks = ParseChunkBlockDescriptionList(BlockDescriptionList); + } + Result.FindBlocksTimeMs = KnownBlocksTimer.GetElapsedTimeMs(); + } + Result.ElapsedTimeMs = Timer.GetElapsedTimeMs(); + return Result; +} +std::vector<BuildsOperationUploadFolder::UploadPart> +BuildsOperationUploadFolder::ReadFolder() +{ + std::vector<UploadPart> UploadParts; + std::filesystem::path ExcludeManifestPath = m_Path / m_Options.ZenExcludeManifestName; + tsl::robin_set<std::string> ExcludeAssetPaths; + if (IsFile(ExcludeManifestPath)) + { + std::filesystem::path AbsoluteExcludeManifestPath = + MakeSafeAbsolutePath(ExcludeManifestPath.is_absolute() ? ExcludeManifestPath : m_Path / ExcludeManifestPath); + BuildManifest Manifest = ParseBuildManifest(AbsoluteExcludeManifestPath); + const std::vector<std::filesystem::path>& AssetPaths = Manifest.Parts.front().Files; + ExcludeAssetPaths.reserve(AssetPaths.size()); + for (const std::filesystem::path& AssetPath : AssetPaths) { - Stopwatch ScanTimer; - FolderContent Content; - if (m_ManifestPath.empty()) - { - std::filesystem::path ExcludeManifestPath = m_Path / m_Options.ZenExcludeManifestName; - tsl::robin_set<std::string> ExcludeAssetPaths; - if (IsFile(ExcludeManifestPath)) - { - std::vector<std::filesystem::path> AssetPaths = ParseManifest(m_Path, ExcludeManifestPath); - ExcludeAssetPaths.reserve(AssetPaths.size()); - for (const std::filesystem::path& AssetPath : AssetPaths) - { - ExcludeAssetPaths.insert(AssetPath.generic_string()); - } - } - Content = GetFolderContent( - m_LocalFolderScanStats, - m_Path, - [this](const std::string_view& RelativePath) { return IsAcceptedFolder(RelativePath); }, - [this, &ExcludeAssetPaths](const std::string_view& RelativePath, uint64_t Size, uint32_t Attributes) -> bool { - ZEN_UNUSED(Size, Attributes); - if (!IsAcceptedFile(RelativePath)) - { - return false; - } - if (ExcludeAssetPaths.contains(std::filesystem::path(RelativePath).generic_string())) - { - return false; - } - return true; - }, - m_IOWorkerPool, - m_LogOutput.GetProgressUpdateDelayMS(), - [&](bool, std::ptrdiff_t) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Found {} files in '{}'...", - m_LocalFolderScanStats.AcceptedFileCount.load(), - m_Path); - }, - m_AbortFlag); - } - else - { - Stopwatch ManifestParseTimer; - std::vector<std::filesystem::path> AssetPaths = ParseManifest(m_Path, m_ManifestPath); - for (const std::filesystem::path& AssetPath : AssetPaths) - { - Content.Paths.push_back(AssetPath); - const std::filesystem::path AssetFilePath = (m_Path / AssetPath).make_preferred(); - Content.RawSizes.push_back(FileSizeFromPath(AssetFilePath)); -#if ZEN_PLATFORM_WINDOWS - Content.Attributes.push_back(GetFileAttributesFromPath(AssetFilePath)); -#endif // ZEN_PLATFORM_WINDOWS -#if ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX - Content.Attributes.push_back(GetFileMode(AssetFilePath)); -#endif // ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX - m_LocalFolderScanStats.AcceptedFileByteCount += Content.RawSizes.back(); - m_LocalFolderScanStats.AcceptedFileCount++; - } - if (m_ManifestPath.is_relative()) - { - Content.Paths.push_back(m_ManifestPath); - const std::filesystem::path ManifestFilePath = (m_Path / m_ManifestPath).make_preferred(); - Content.RawSizes.push_back(FileSizeFromPath(ManifestFilePath)); -#if ZEN_PLATFORM_WINDOWS - Content.Attributes.push_back(GetFileAttributesFromPath(ManifestFilePath)); -#endif // ZEN_PLATFORM_WINDOWS -#if ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX - Content.Attributes.push_back(GetFileMode(ManifestFilePath)); -#endif // ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX - - m_LocalFolderScanStats.AcceptedFileByteCount += Content.RawSizes.back(); - m_LocalFolderScanStats.AcceptedFileCount++; - } - m_LocalFolderScanStats.FoundFileByteCount.store(m_LocalFolderScanStats.AcceptedFileByteCount); - m_LocalFolderScanStats.FoundFileCount.store(m_LocalFolderScanStats.AcceptedFileCount); - m_LocalFolderScanStats.ElapsedWallTimeUS = ManifestParseTimer.GetElapsedTimeUs(); - } + ExcludeAssetPaths.insert(AssetPath.generic_string()); + } + } - std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{}); - { - CbObjectWriter ChunkParametersWriter; - ChunkParametersWriter.AddString("name"sv, ChunkController->GetName()); - ChunkParametersWriter.AddObject("parameters"sv, ChunkController->GetParameters()); - ChunkerParameters = ChunkParametersWriter.Save(); - } + UploadParts.resize(1); - TotalRawSize = std::accumulate(Content.RawSizes.begin(), Content.RawSizes.end(), std::uint64_t(0)); + UploadPart& Part = UploadParts.front(); + GetFolderContentStatistics& LocalFolderScanStats = Part.LocalFolderScanStats; + Part.Content = GetFolderContent( + Part.LocalFolderScanStats, + m_Path, + [this](const std::string_view& RelativePath) { return IsAcceptedFolder(RelativePath); }, + [this, &ExcludeAssetPaths](const std::string_view& RelativePath, uint64_t Size, uint32_t Attributes) -> bool { + ZEN_UNUSED(Size, Attributes); + if (!IsAcceptedFile(RelativePath)) { - std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(m_LogOutput.CreateProgressBar("Scan Folder")); - OperationLogOutput::ProgressBar& Progress(*ProgressBarPtr); - - FilteredRate FilteredBytesHashed; - FilteredBytesHashed.Start(); - LocalContent = ChunkFolderContent( - m_ChunkingStats, - m_IOWorkerPool, - m_Path, - Content, - *ChunkController, - m_LogOutput.GetProgressUpdateDelayMS(), - [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { - FilteredBytesHashed.Update(m_ChunkingStats.BytesHashed.load()); - std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found", - m_ChunkingStats.FilesProcessed.load(), - Content.Paths.size(), - NiceBytes(m_ChunkingStats.BytesHashed.load()), - NiceBytes(TotalRawSize), - NiceNum(FilteredBytesHashed.GetCurrent()), - m_ChunkingStats.UniqueChunksFound.load(), - NiceBytes(m_ChunkingStats.UniqueBytesFound.load())); - Progress.UpdateState({.Task = "Scanning files ", - .Details = Details, - .TotalCount = TotalRawSize, - .RemainingCount = TotalRawSize - m_ChunkingStats.BytesHashed.load(), - .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, - false); - }, - m_AbortFlag, - m_PauseFlag); - FilteredBytesHashed.Stop(); - Progress.Finish(); - if (m_AbortFlag) - { - return; - } + return false; } - - if (!m_Options.IsQuiet) + if (ExcludeAssetPaths.contains(std::filesystem::path(RelativePath).generic_string())) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Found {} ({}) files divided into {} ({}) unique chunks in '{}' in {}. Average hash rate {}B/sec", - LocalContent.Paths.size(), - NiceBytes(TotalRawSize), - m_ChunkingStats.UniqueChunksFound.load(), - NiceBytes(m_ChunkingStats.UniqueBytesFound.load()), - m_Path, - NiceTimeSpanMs(ScanTimer.GetElapsedTimeMs()), - NiceNum(GetBytesPerSecond(m_ChunkingStats.ElapsedWallTimeUS, m_ChunkingStats.BytesHashed))); + return false; } - } - - const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalContent); - - std::vector<size_t> ReuseBlockIndexes; - std::vector<uint32_t> NewBlockChunkIndexes; + return true; + }, + m_IOWorkerPool, + m_LogOutput.GetProgressUpdateDelayMS(), + [&](bool, std::ptrdiff_t) { + ZEN_OPERATION_LOG_INFO(m_LogOutput, "Found {} files in '{}'...", LocalFolderScanStats.AcceptedFileCount.load(), m_Path); + }, + m_AbortFlag); + Part.TotalRawSize = std::accumulate(Part.Content.RawSizes.begin(), Part.Content.RawSizes.end(), std::uint64_t(0)); + + return UploadParts; +} - PrepareBuildResult PrepBuildResult = PrepBuildResultFuture.get(); +std::vector<BuildsOperationUploadFolder::UploadPart> +BuildsOperationUploadFolder::ReadManifestParts(const std::filesystem::path& ManifestPath) +{ + std::vector<UploadPart> UploadParts; + Stopwatch ManifestParseTimer; + std::filesystem::path AbsoluteManifestPath = MakeSafeAbsolutePath(ManifestPath.is_absolute() ? ManifestPath : m_Path / ManifestPath); + BuildManifest Manifest = ParseBuildManifest(AbsoluteManifestPath); + if (Manifest.Parts.empty()) + { + throw std::runtime_error(fmt::format("Manifest file at '{}' is invalid", ManifestPath)); + } - if (!m_Options.IsQuiet) + UploadParts.resize(Manifest.Parts.size()); + for (size_t PartIndex = 0; PartIndex < Manifest.Parts.size(); PartIndex++) + { + BuildManifest::Part& PartManifest = Manifest.Parts[PartIndex]; + if (ManifestPath.is_relative()) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Build prepare took {}. {} took {}, payload size {}{}", - NiceTimeSpanMs(PrepBuildResult.ElapsedTimeMs), - m_CreateBuild ? "PutBuild" : "GetBuild", - NiceTimeSpanMs(PrepBuildResult.PrepareBuildTimeMs), - NiceBytes(PrepBuildResult.PayloadSize), - m_Options.IgnoreExistingBlocks ? "" - : fmt::format(". Found {} blocks in {}", - PrepBuildResult.KnownBlocks.size(), - NiceTimeSpanMs(PrepBuildResult.FindBlocksTimeMs))); + PartManifest.Files.push_back(ManifestPath); } - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::CalculateDelta, (uint32_t)TaskSteps::StepCount); + UploadPart& Part = UploadParts[PartIndex]; + FolderContent& Content = Part.Content; - const std::uint64_t LargeAttachmentSize = - m_Options.AllowMultiparts ? PrepBuildResult.PreferredMultipartChunkSize * 4u : (std::uint64_t)-1; + GetFolderContentStatistics& LocalFolderScanStats = Part.LocalFolderScanStats; - Stopwatch BlockArrangeTimer; + const std::vector<std::filesystem::path>& AssetPaths = PartManifest.Files; + Content = GetValidFolderContent( + m_IOWorkerPool, + LocalFolderScanStats, + m_Path, + AssetPaths, + [](uint64_t PathCount, uint64_t CompletedPathCount) { ZEN_UNUSED(PathCount, CompletedPathCount); }, + 1000, + m_AbortFlag, + m_PauseFlag); - std::vector<std::uint32_t> LooseChunkIndexes; + if (Content.Paths.size() != AssetPaths.size()) { - bool EnableBlocks = true; - std::vector<std::uint32_t> BlockChunkIndexes; - for (uint32_t ChunkIndex = 0; ChunkIndex < LocalContent.ChunkedContent.ChunkHashes.size(); ChunkIndex++) - { - const uint64_t ChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; - if (!EnableBlocks || ChunkRawSize == 0 || ChunkRawSize > m_Options.BlockParameters.MaxChunkEmbedSize) - { - LooseChunkIndexes.push_back(ChunkIndex); - m_LooseChunksStats.ChunkByteCount += ChunkRawSize; - } - else - { - BlockChunkIndexes.push_back(ChunkIndex); - m_FindBlocksStats.PotentialChunkByteCount += ChunkRawSize; - } - } - m_FindBlocksStats.PotentialChunkCount = BlockChunkIndexes.size(); - m_LooseChunksStats.ChunkCount = LooseChunkIndexes.size(); - - if (m_Options.IgnoreExistingBlocks) + const tsl::robin_set<std::filesystem::path> FoundPaths(Content.Paths.begin(), Content.Paths.end()); + ExtendableStringBuilder<1024> SB; + for (const std::filesystem::path& AssetPath : AssetPaths) { - if (!m_Options.IsQuiet) + if (!FoundPaths.contains(AssetPath)) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, "Ignoring any existing blocks in store"); - } - NewBlockChunkIndexes = std::move(BlockChunkIndexes); - } - else - { - ReuseBlockIndexes = FindReuseBlocks(m_LogOutput, - m_Options.BlockReuseMinPercentLimit, - m_Options.IsVerbose, - m_ReuseBlocksStats, - PrepBuildResult.KnownBlocks, - LocalContent.ChunkedContent.ChunkHashes, - BlockChunkIndexes, - NewBlockChunkIndexes); - m_FindBlocksStats.AcceptedBlockCount = ReuseBlockIndexes.size(); - - for (const ChunkBlockDescription& Description : PrepBuildResult.KnownBlocks) - { - for (uint32_t ChunkRawLength : Description.ChunkRawLengths) - { - m_FindBlocksStats.FoundBlockByteCount += ChunkRawLength; - } - m_FindBlocksStats.FoundBlockChunkCount += Description.ChunkRawHashes.size(); + SB << "\n " << AssetPath.generic_string(); } } + throw std::runtime_error( + fmt::format("Manifest file at '{}' references files that does not exist{}", ManifestPath, SB.ToView())); } - std::vector<std::vector<uint32_t>> NewBlockChunks; - ArrangeChunksIntoBlocks(LocalContent, LocalLookup, NewBlockChunkIndexes, NewBlockChunks); + Part.PartId = PartManifest.PartId; + Part.PartName = PartManifest.PartName; + Part.TotalRawSize = std::accumulate(Part.Content.RawSizes.begin(), Part.Content.RawSizes.end(), std::uint64_t(0)); + } - m_FindBlocksStats.NewBlocksCount = NewBlockChunks.size(); - for (uint32_t ChunkIndex : NewBlockChunkIndexes) - { - m_FindBlocksStats.NewBlocksChunkByteCount += LocalContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; - } - m_FindBlocksStats.NewBlocksChunkCount = NewBlockChunkIndexes.size(); + return UploadParts; +} - const double AcceptedByteCountPercent = - m_FindBlocksStats.PotentialChunkByteCount > 0 - ? (100.0 * m_ReuseBlocksStats.AcceptedRawByteCount / m_FindBlocksStats.PotentialChunkByteCount) - : 0.0; +std::vector<std::pair<Oid, std::string>> +BuildsOperationUploadFolder::Execute(const Oid& BuildPartId, + const std::string_view BuildPartName, + const std::filesystem::path& ManifestPath, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache) +{ + ZEN_TRACE_CPU("BuildsOperationUploadFolder::Execute"); + try + { + Stopwatch ReadPartsTimer; + std::vector<UploadPart> UploadParts = ManifestPath.empty() ? ReadFolder() : ReadManifestParts(ManifestPath); - const double AcceptedReduntantByteCountPercent = - m_ReuseBlocksStats.AcceptedByteCount > 0 - ? (100.0 * m_ReuseBlocksStats.AcceptedReduntantByteCount) / - (m_ReuseBlocksStats.AcceptedByteCount + m_ReuseBlocksStats.AcceptedReduntantByteCount) - : 0.0; - if (!m_Options.IsQuiet) + for (UploadPart& Part : UploadParts) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Found {} chunks in {} ({}) blocks eligible for reuse in {}\n" - " Reusing {} ({}) matching chunks in {} blocks ({:.1f}%)\n" - " Accepting {} ({}) redundant chunks ({:.1f}%)\n" - " Rejected {} ({}) chunks in {} blocks\n" - " Arranged {} ({}) chunks in {} new blocks\n" - " Keeping {} ({}) chunks as loose chunks\n" - " Discovery completed in {}", - m_FindBlocksStats.FoundBlockChunkCount, - m_FindBlocksStats.FoundBlockCount, - NiceBytes(m_FindBlocksStats.FoundBlockByteCount), - NiceTimeSpanMs(m_FindBlocksStats.FindBlockTimeMS), - - m_ReuseBlocksStats.AcceptedChunkCount, - NiceBytes(m_ReuseBlocksStats.AcceptedRawByteCount), - m_FindBlocksStats.AcceptedBlockCount, - AcceptedByteCountPercent, - - m_ReuseBlocksStats.AcceptedReduntantChunkCount, - NiceBytes(m_ReuseBlocksStats.AcceptedReduntantByteCount), - AcceptedReduntantByteCountPercent, - - m_ReuseBlocksStats.RejectedChunkCount, - NiceBytes(m_ReuseBlocksStats.RejectedByteCount), - m_ReuseBlocksStats.RejectedBlockCount, - - m_FindBlocksStats.NewBlocksChunkCount, - NiceBytes(m_FindBlocksStats.NewBlocksChunkByteCount), - m_FindBlocksStats.NewBlocksCount, - - m_LooseChunksStats.ChunkCount, - NiceBytes(m_LooseChunksStats.ChunkByteCount), - - NiceTimeSpanMs(BlockArrangeTimer.GetElapsedTimeMs())); - } - - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::GenerateBlocks, (uint32_t)TaskSteps::StepCount); - GeneratedBlocks NewBlocks; - - if (!NewBlockChunks.empty()) - { - Stopwatch GenerateBuildBlocksTimer; - auto __ = MakeGuard([&]() { - uint64_t BlockGenerateTimeUs = GenerateBuildBlocksTimer.GetElapsedTimeUs(); - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO( - m_LogOutput, - "Generated {} ({}) and uploaded {} ({}) blocks in {}. Generate speed: {}B/sec. Transfer speed {}bits/sec.", - m_GenerateBlocksStats.GeneratedBlockCount.load(), - NiceBytes(m_GenerateBlocksStats.GeneratedBlockByteCount), - m_UploadStats.BlockCount.load(), - NiceBytes(m_UploadStats.BlocksBytes.load()), - NiceTimeSpanMs(BlockGenerateTimeUs / 1000), - NiceNum(GetBytesPerSecond(m_GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS, - m_GenerateBlocksStats.GeneratedBlockByteCount)), - NiceNum(GetBytesPerSecond(m_UploadStats.ElapsedWallTimeUS, m_UploadStats.BlocksBytes * 8))); - } - }); - GenerateBuildBlocks(LocalContent, LocalLookup, NewBlockChunks, NewBlocks); - } - - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::BuildPartManifest, (uint32_t)TaskSteps::StepCount); - - CbObject PartManifest; - { - CbObjectWriter PartManifestWriter; - Stopwatch ManifestGenerationTimer; - auto __ = MakeGuard([&]() { - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Generated build part manifest in {} ({})", - NiceTimeSpanMs(ManifestGenerationTimer.GetElapsedTimeMs()), - NiceBytes(PartManifestWriter.GetSaveSize())); - } - }); - PartManifestWriter.AddObject("chunker"sv, ChunkerParameters); - - std::vector<IoHash> AllChunkBlockHashes; - std::vector<ChunkBlockDescription> AllChunkBlockDescriptions; - AllChunkBlockHashes.reserve(ReuseBlockIndexes.size() + NewBlocks.BlockDescriptions.size()); - AllChunkBlockDescriptions.reserve(ReuseBlockIndexes.size() + NewBlocks.BlockDescriptions.size()); - for (size_t ReuseBlockIndex : ReuseBlockIndexes) - { - AllChunkBlockDescriptions.push_back(PrepBuildResult.KnownBlocks[ReuseBlockIndex]); - AllChunkBlockHashes.push_back(PrepBuildResult.KnownBlocks[ReuseBlockIndex].BlockHash); - } - AllChunkBlockDescriptions.insert(AllChunkBlockDescriptions.end(), - NewBlocks.BlockDescriptions.begin(), - NewBlocks.BlockDescriptions.end()); - for (const ChunkBlockDescription& BlockDescription : NewBlocks.BlockDescriptions) - { - AllChunkBlockHashes.push_back(BlockDescription.BlockHash); - } - std::vector<IoHash> AbsoluteChunkHashes; - if (m_Options.DoExtraContentValidation) + if (Part.PartId == Oid::Zero) { - tsl::robin_map<IoHash, size_t, IoHash::Hasher> ChunkHashToAbsoluteChunkIndex; - AbsoluteChunkHashes.reserve(LocalContent.ChunkedContent.ChunkHashes.size()); - for (uint32_t ChunkIndex : LooseChunkIndexes) - { - ChunkHashToAbsoluteChunkIndex.insert({LocalContent.ChunkedContent.ChunkHashes[ChunkIndex], AbsoluteChunkHashes.size()}); - AbsoluteChunkHashes.push_back(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); - } - for (const ChunkBlockDescription& Block : AllChunkBlockDescriptions) + if (UploadParts.size() != 1) { - for (const IoHash& ChunkHash : Block.ChunkRawHashes) - { - ChunkHashToAbsoluteChunkIndex.insert({ChunkHash, AbsoluteChunkHashes.size()}); - AbsoluteChunkHashes.push_back(ChunkHash); - } + throw std::runtime_error(fmt::format("Multi part upload manifest '{}' must contains build part id", ManifestPath)); } - for (const IoHash& ChunkHash : LocalContent.ChunkedContent.ChunkHashes) - { - ZEN_ASSERT(AbsoluteChunkHashes[ChunkHashToAbsoluteChunkIndex.at(ChunkHash)] == ChunkHash); - ZEN_ASSERT(LocalContent.ChunkedContent.ChunkHashes[LocalLookup.ChunkHashToChunkIndex.at(ChunkHash)] == ChunkHash); - } - for (const uint32_t ChunkIndex : LocalContent.ChunkedContent.ChunkOrders) + + if (BuildPartId == Oid::Zero) { - ZEN_ASSERT(AbsoluteChunkHashes[ChunkHashToAbsoluteChunkIndex.at(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex])] == - LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); - ZEN_ASSERT(LocalLookup.ChunkHashToChunkIndex.at(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]) == ChunkIndex); + Part.PartId = Oid::NewOid(); } - } - std::vector<uint32_t> AbsoluteChunkOrders = CalculateAbsoluteChunkOrders(LocalContent.ChunkedContent.ChunkHashes, - LocalContent.ChunkedContent.ChunkOrders, - LocalLookup.ChunkHashToChunkIndex, - LooseChunkIndexes, - AllChunkBlockDescriptions); - - if (m_Options.DoExtraContentValidation) - { - for (uint32_t ChunkOrderIndex = 0; ChunkOrderIndex < LocalContent.ChunkedContent.ChunkOrders.size(); ChunkOrderIndex++) + else { - uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[ChunkOrderIndex]; - uint32_t AbsoluteChunkIndex = AbsoluteChunkOrders[ChunkOrderIndex]; - const IoHash& LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; - const IoHash& AbsoluteChunkHash = AbsoluteChunkHashes[AbsoluteChunkIndex]; - ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash); + Part.PartId = BuildPartId; } } - - WriteBuildContentToCompactBinary(PartManifestWriter, - LocalContent.Platform, - LocalContent.Paths, - LocalContent.RawHashes, - LocalContent.RawSizes, - LocalContent.Attributes, - LocalContent.ChunkedContent.SequenceRawHashes, - LocalContent.ChunkedContent.ChunkCounts, - LocalContent.ChunkedContent.ChunkHashes, - LocalContent.ChunkedContent.ChunkRawSizes, - AbsoluteChunkOrders, - LooseChunkIndexes, - AllChunkBlockHashes); - - if (m_Options.DoExtraContentValidation) + if (Part.PartName.empty()) { - ChunkedFolderContent VerifyFolderContent; - - std::vector<uint32_t> OutAbsoluteChunkOrders; - std::vector<IoHash> OutLooseChunkHashes; - std::vector<uint64_t> OutLooseChunkRawSizes; - std::vector<IoHash> OutBlockRawHashes; - ReadBuildContentFromCompactBinary(PartManifestWriter.Save(), - VerifyFolderContent.Platform, - VerifyFolderContent.Paths, - VerifyFolderContent.RawHashes, - VerifyFolderContent.RawSizes, - VerifyFolderContent.Attributes, - VerifyFolderContent.ChunkedContent.SequenceRawHashes, - VerifyFolderContent.ChunkedContent.ChunkCounts, - OutAbsoluteChunkOrders, - OutLooseChunkHashes, - OutLooseChunkRawSizes, - OutBlockRawHashes); - ZEN_ASSERT(OutBlockRawHashes == AllChunkBlockHashes); - - for (uint32_t OrderIndex = 0; OrderIndex < OutAbsoluteChunkOrders.size(); OrderIndex++) + if (UploadParts.size() != 1) { - uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[OrderIndex]; - const IoHash LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; - - uint32_t VerifyChunkIndex = OutAbsoluteChunkOrders[OrderIndex]; - const IoHash VerifyChunkHash = AbsoluteChunkHashes[VerifyChunkIndex]; - - ZEN_ASSERT(LocalChunkHash == VerifyChunkHash); + throw std::runtime_error(fmt::format("Multi part upload manifest '{}' must contains build part name", ManifestPath)); } - - CalculateLocalChunkOrders(OutAbsoluteChunkOrders, - OutLooseChunkHashes, - OutLooseChunkRawSizes, - AllChunkBlockDescriptions, - VerifyFolderContent.ChunkedContent.ChunkHashes, - VerifyFolderContent.ChunkedContent.ChunkRawSizes, - VerifyFolderContent.ChunkedContent.ChunkOrders, - m_Options.DoExtraContentValidation); - - ZEN_ASSERT(LocalContent.Paths == VerifyFolderContent.Paths); - ZEN_ASSERT(LocalContent.RawHashes == VerifyFolderContent.RawHashes); - ZEN_ASSERT(LocalContent.RawSizes == VerifyFolderContent.RawSizes); - ZEN_ASSERT(LocalContent.Attributes == VerifyFolderContent.Attributes); - ZEN_ASSERT(LocalContent.ChunkedContent.SequenceRawHashes == VerifyFolderContent.ChunkedContent.SequenceRawHashes); - ZEN_ASSERT(LocalContent.ChunkedContent.ChunkCounts == VerifyFolderContent.ChunkedContent.ChunkCounts); - - for (uint32_t OrderIndex = 0; OrderIndex < LocalContent.ChunkedContent.ChunkOrders.size(); OrderIndex++) + if (BuildPartName.empty()) { - uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[OrderIndex]; - const IoHash LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; - uint64_t LocalChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[LocalChunkIndex]; - - uint32_t VerifyChunkIndex = VerifyFolderContent.ChunkedContent.ChunkOrders[OrderIndex]; - const IoHash VerifyChunkHash = VerifyFolderContent.ChunkedContent.ChunkHashes[VerifyChunkIndex]; - uint64_t VerifyChunkRawSize = VerifyFolderContent.ChunkedContent.ChunkRawSizes[VerifyChunkIndex]; - - ZEN_ASSERT(LocalChunkHash == VerifyChunkHash); - ZEN_ASSERT(LocalChunkRawSize == VerifyChunkRawSize); + throw std::runtime_error("Build part name must be set"); } + Part.PartName = std::string(BuildPartName); } - PartManifest = PartManifestWriter.Save(); } - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::UploadBuildPart, (uint32_t)TaskSteps::StepCount); - - Stopwatch PutBuildPartResultTimer; - std::pair<IoHash, std::vector<IoHash>> PutBuildPartResult = - m_Storage.BuildStorage->PutBuildPart(m_BuildId, m_BuildPartId, m_BuildPartName, PartManifest); if (!m_Options.IsQuiet) { ZEN_OPERATION_LOG_INFO(m_LogOutput, - "PutBuildPart took {}, payload size {}. {} attachments are needed.", - NiceTimeSpanMs(PutBuildPartResultTimer.GetElapsedTimeMs()), - NiceBytes(PartManifest.GetSize()), - PutBuildPartResult.second.size()); + "Reading {} parts took {}", + UploadParts.size(), + NiceTimeSpanMs(ReadPartsTimer.GetElapsedTimeMs())); } - IoHash PartHash = PutBuildPartResult.first; - - auto UploadAttachments = [this, &LocalContent, &LocalLookup, &NewBlockChunks, &NewBlocks, &LooseChunkIndexes, &LargeAttachmentSize]( - std::span<IoHash> RawHashes, - std::vector<IoHash>& OutUnknownChunks) { - if (!m_AbortFlag) - { - UploadStatistics TempUploadStats; - LooseChunksStatistics TempLooseChunksStats; - - Stopwatch TempUploadTimer; - auto __ = MakeGuard([&]() { - if (!m_Options.IsQuiet) - { - uint64_t TempChunkUploadTimeUs = TempUploadTimer.GetElapsedTimeUs(); - ZEN_OPERATION_LOG_INFO( - m_LogOutput, - "Uploaded {} ({}) blocks. " - "Compressed {} ({} {}B/s) and uploaded {} ({}) chunks. " - "Transferred {} ({}bits/s) in {}", - TempUploadStats.BlockCount.load(), - NiceBytes(TempUploadStats.BlocksBytes), - - TempLooseChunksStats.CompressedChunkCount.load(), - NiceBytes(TempLooseChunksStats.CompressedChunkBytes.load()), - NiceNum(GetBytesPerSecond(TempLooseChunksStats.CompressChunksElapsedWallTimeUS, - TempLooseChunksStats.ChunkByteCount)), - TempUploadStats.ChunkCount.load(), - NiceBytes(TempUploadStats.ChunksBytes), - - NiceBytes(TempUploadStats.BlocksBytes + TempUploadStats.ChunksBytes), - NiceNum(GetBytesPerSecond(TempUploadStats.ElapsedWallTimeUS, TempUploadStats.ChunksBytes * 8)), - NiceTimeSpanMs(TempChunkUploadTimeUs / 1000)); - } - }); - UploadPartBlobs(LocalContent, - LocalLookup, - RawHashes, - NewBlockChunks, - NewBlocks, - LooseChunkIndexes, - LargeAttachmentSize, - TempUploadStats, - TempLooseChunksStats, - OutUnknownChunks); - m_UploadStats += TempUploadStats; - m_LooseChunksStats += TempLooseChunksStats; - } - }; - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::UploadAttachments, (uint32_t)TaskSteps::StepCount); + const uint32_t PartsUploadStepCount = gsl::narrow<uint32_t>(uint32_t(PartTaskSteps::StepCount) * UploadParts.size()); - std::vector<IoHash> UnknownChunks; - if (m_Options.IgnoreExistingBlocks) - { - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "PutBuildPart uploading all attachments, needs are: {}", - FormatArray<IoHash>(PutBuildPartResult.second, "\n "sv)); - } + const uint32_t PrepareBuildStep = 0; + const uint32_t UploadPartsStep = 1; + const uint32_t FinalizeBuildStep = UploadPartsStep + PartsUploadStepCount; + const uint32_t CleanupStep = FinalizeBuildStep + 1; + const uint32_t StepCount = CleanupStep + 1; - std::vector<IoHash> ForceUploadChunkHashes; - ForceUploadChunkHashes.reserve(LooseChunkIndexes.size()); + auto EndProgress = MakeGuard([&]() { m_LogOutput.SetLogOperationProgress(StepCount, StepCount); }); - for (uint32_t ChunkIndex : LooseChunkIndexes) - { - ForceUploadChunkHashes.push_back(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); - } + Stopwatch ProcessTimer; - for (size_t BlockIndex = 0; BlockIndex < NewBlocks.BlockHeaders.size(); BlockIndex++) - { - if (NewBlocks.BlockHeaders[BlockIndex]) - { - // Block was not uploaded during generation - ForceUploadChunkHashes.push_back(NewBlocks.BlockDescriptions[BlockIndex].BlockHash); - } - } - UploadAttachments(ForceUploadChunkHashes, UnknownChunks); - } - else if (!PutBuildPartResult.second.empty()) - { - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "PutBuildPart needs attachments: {}", - FormatArray<IoHash>(PutBuildPartResult.second, "\n "sv)); - } - UploadAttachments(PutBuildPartResult.second, UnknownChunks); - } + CleanAndRemoveDirectory(m_IOWorkerPool, m_AbortFlag, m_PauseFlag, m_Options.TempDir); + CreateDirectories(m_Options.TempDir); + auto _ = MakeGuard([&]() { CleanAndRemoveDirectory(m_IOWorkerPool, m_AbortFlag, m_PauseFlag, m_Options.TempDir); }); - auto BuildUnkownChunksResponse = [](const std::vector<IoHash>& UnknownChunks, bool WillRetry) { - return fmt::format( - "The following build blobs was reported as needed for upload but was reported as existing at the start of the " - "operation.{}{}", - WillRetry ? " Treating this as a transient inconsistency issue and will attempt to retry finalization."sv : ""sv, - FormatArray<IoHash>(UnknownChunks, "\n "sv)); - }; + m_LogOutput.SetLogOperationProgress(PrepareBuildStep, StepCount); - if (!UnknownChunks.empty()) - { - ZEN_OPERATION_LOG_WARN(m_LogOutput, "{}", BuildUnkownChunksResponse(UnknownChunks, /*WillRetry*/ true)); - } + m_PrepBuildResultFuture = m_NetworkPool.EnqueueTask(std::packaged_task<PrepareBuildResult()>{[this] { return PrepareBuild(); }}, + WorkerThreadPool::EMode::EnableBacklog); - uint32_t FinalizeBuildPartRetryCount = 5; - while (!m_AbortFlag && (FinalizeBuildPartRetryCount--) > 0) + for (uint32_t PartIndex = 0; PartIndex < UploadParts.size(); PartIndex++) { - Stopwatch FinalizeBuildPartTimer; - std::vector<IoHash> Needs = m_Storage.BuildStorage->FinalizeBuildPart(m_BuildId, m_BuildPartId, PartHash); - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "FinalizeBuildPart took {}. {} attachments are missing.", - NiceTimeSpanMs(FinalizeBuildPartTimer.GetElapsedTimeMs()), - Needs.size()); - } - if (Needs.empty()) - { - break; - } - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, "FinalizeBuildPart needs attachments: {}", FormatArray<IoHash>(Needs, "\n "sv)); - } + const uint32_t PartStepOffset = UploadPartsStep + (PartIndex * uint32_t(PartTaskSteps::StepCount)); - std::vector<IoHash> RetryUnknownChunks; - UploadAttachments(Needs, RetryUnknownChunks); - if (RetryUnknownChunks == UnknownChunks) - { - if (FinalizeBuildPartRetryCount > 0) - { - // Back off a bit - Sleep(1000); - } - } - else + const UploadPart& Part = UploadParts[PartIndex]; + UploadBuildPart(ChunkController, ChunkCache, PartIndex, Part, PartStepOffset, StepCount); + if (m_AbortFlag) { - UnknownChunks = RetryUnknownChunks; - ZEN_OPERATION_LOG_WARN(m_LogOutput, - "{}", - BuildUnkownChunksResponse(UnknownChunks, /*WillRetry*/ FinalizeBuildPartRetryCount != 0)); + return {}; } } - if (!UnknownChunks.empty()) - { - throw std::runtime_error(BuildUnkownChunksResponse(UnknownChunks, /*WillRetry*/ false)); - } - - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::FinalizeBuild, (uint32_t)TaskSteps::StepCount); + m_LogOutput.SetLogOperationProgress(FinalizeBuildStep, StepCount); if (m_CreateBuild && !m_AbortFlag) { @@ -5327,79 +4862,15 @@ BuildsOperationUploadFolder::Execute() } } - if (!NewBlocks.BlockDescriptions.empty() && !m_AbortFlag) - { - uint64_t UploadBlockMetadataCount = 0; - Stopwatch UploadBlockMetadataTimer; + m_LogOutput.SetLogOperationProgress(CleanupStep, StepCount); - uint32_t FailedMetadataUploadCount = 1; - int32_t MetadataUploadRetryCount = 3; - while ((MetadataUploadRetryCount-- > 0) && (FailedMetadataUploadCount > 0)) - { - FailedMetadataUploadCount = 0; - for (size_t BlockIndex = 0; BlockIndex < NewBlocks.BlockDescriptions.size(); BlockIndex++) - { - if (m_AbortFlag) - { - break; - } - const IoHash& BlockHash = NewBlocks.BlockDescriptions[BlockIndex].BlockHash; - if (!NewBlocks.MetaDataHasBeenUploaded[BlockIndex]) - { - const CbObject BlockMetaData = - BuildChunkBlockDescription(NewBlocks.BlockDescriptions[BlockIndex], NewBlocks.BlockMetaDatas[BlockIndex]); - if (m_Storage.BuildCacheStorage && m_Options.PopulateCache) - { - m_Storage.BuildCacheStorage->PutBlobMetadatas(m_BuildId, - std::vector<IoHash>({BlockHash}), - std::vector<CbObject>({BlockMetaData})); - } - bool MetadataSucceeded = m_Storage.BuildStorage->PutBlockMetadata(m_BuildId, BlockHash, BlockMetaData); - if (MetadataSucceeded) - { - m_UploadStats.BlocksBytes += BlockMetaData.GetSize(); - NewBlocks.MetaDataHasBeenUploaded[BlockIndex] = true; - UploadBlockMetadataCount++; - } - else - { - FailedMetadataUploadCount++; - } - } - } - } - if (UploadBlockMetadataCount > 0) - { - uint64_t ElapsedUS = UploadBlockMetadataTimer.GetElapsedTimeUs(); - m_UploadStats.ElapsedWallTimeUS += ElapsedUS; - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Uploaded metadata for {} blocks in {}", - UploadBlockMetadataCount, - NiceTimeSpanMs(ElapsedUS / 1000)); - } - } + std::vector<std::pair<Oid, std::string>> Result; + Result.reserve(UploadParts.size()); + for (UploadPart& Part : UploadParts) + { + Result.push_back(std::make_pair(Part.PartId, Part.PartName)); } - - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::PutBuildPartStats, (uint32_t)TaskSteps::StepCount); - - m_Storage.BuildStorage->PutBuildPartStats( - m_BuildId, - m_BuildPartId, - {{"totalSize", double(m_LocalFolderScanStats.FoundFileByteCount.load())}, - {"reusedRatio", AcceptedByteCountPercent / 100.0}, - {"reusedBlockCount", double(m_FindBlocksStats.AcceptedBlockCount)}, - {"reusedBlockByteCount", double(m_ReuseBlocksStats.AcceptedRawByteCount)}, - {"newBlockCount", double(m_FindBlocksStats.NewBlocksCount)}, - {"newBlockByteCount", double(m_FindBlocksStats.NewBlocksChunkByteCount)}, - {"uploadedCount", double(m_UploadStats.BlockCount.load() + m_UploadStats.ChunkCount.load())}, - {"uploadedByteCount", double(m_UploadStats.BlocksBytes.load() + m_UploadStats.ChunksBytes.load())}, - {"uploadedBytesPerSec", - double(GetBytesPerSecond(m_UploadStats.ElapsedWallTimeUS, m_UploadStats.ChunksBytes + m_UploadStats.BlocksBytes))}, - {"elapsedTimeSec", double(ProcessTimer.GetElapsedTimeMs() / 1000.0)}}); - - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::Cleanup, (uint32_t)TaskSteps::StepCount); + return Result; } catch (const std::exception&) { @@ -5408,43 +4879,6 @@ BuildsOperationUploadFolder::Execute() } } -std::vector<std::filesystem::path> -BuildsOperationUploadFolder::ParseManifest(const std::filesystem::path& Path, const std::filesystem::path& ManifestPath) -{ - std::vector<std::filesystem::path> AssetPaths; - std::filesystem::path AbsoluteManifestPath = MakeSafeAbsolutePath(ManifestPath.is_absolute() ? ManifestPath : Path / ManifestPath); - IoBuffer ManifestContent = ReadFile(AbsoluteManifestPath).Flatten(); - std::string_view ManifestString((const char*)ManifestContent.GetView().GetData(), ManifestContent.GetSize()); - std::string_view::size_type Offset = 0; - while (Offset < ManifestContent.GetSize()) - { - size_t PathBreakOffset = ManifestString.find_first_of("\t\r\n", Offset); - if (PathBreakOffset == std::string_view::npos) - { - PathBreakOffset = ManifestContent.GetSize(); - } - std::string_view AssetPath = ManifestString.substr(Offset, PathBreakOffset - Offset); - if (!AssetPath.empty()) - { - AssetPaths.emplace_back(std::filesystem::path(AssetPath)); - } - Offset = PathBreakOffset; - size_t EolOffset = ManifestString.find_first_of("\r\n", Offset); - if (EolOffset == std::string_view::npos) - { - break; - } - Offset = EolOffset; - size_t LineBreakOffset = ManifestString.find_first_not_of("\t\r\n", Offset); - if (LineBreakOffset == std::string_view::npos) - { - break; - } - Offset = LineBreakOffset; - } - return AssetPaths; -} - bool BuildsOperationUploadFolder::IsAcceptedFolder(const std::string_view& RelativePath) const { @@ -5580,7 +5014,9 @@ void BuildsOperationUploadFolder::GenerateBuildBlocks(const ChunkedFolderContent& Content, const ChunkedContentLookup& Lookup, const std::vector<std::vector<uint32_t>>& NewBlockChunks, - GeneratedBlocks& OutBlocks) + GeneratedBlocks& OutBlocks, + GenerateBlocksStatistics& GenerateBlocksStats, + UploadStatistics& UploadStats) { ZEN_TRACE_CPU("GenerateBuildBlocks"); const std::size_t NewBlockCount = NewBlockChunks.size(); @@ -5626,6 +5062,8 @@ BuildsOperationUploadFolder::GenerateBuildBlocks(const ChunkedFolderContent& ChunksInBlock, &Lock, &OutBlocks, + &GenerateBlocksStats, + &UploadStats, &FilteredGeneratedBytesPerSecond, &QueuedPendingBlocksForUpload, &FilteredUploadedBytesPerSecond, @@ -5655,8 +5093,8 @@ BuildsOperationUploadFolder::GenerateBuildBlocks(const ChunkedFolderContent& Writer.AddString("createdBy", "zen"); OutBlocks.BlockMetaDatas[BlockIndex] = Writer.Save(); } - m_GenerateBlocksStats.GeneratedBlockByteCount += OutBlocks.BlockSizes[BlockIndex]; - m_GenerateBlocksStats.GeneratedBlockCount++; + GenerateBlocksStats.GeneratedBlockByteCount += OutBlocks.BlockSizes[BlockIndex]; + GenerateBlocksStats.GeneratedBlockCount++; Lock.WithExclusiveLock([&]() { OutBlocks.BlockHashToBlockIndex.insert_or_assign(OutBlocks.BlockDescriptions[BlockIndex].BlockHash, BlockIndex); @@ -5668,7 +5106,7 @@ BuildsOperationUploadFolder::GenerateBuildBlocks(const ChunkedFolderContent& OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]); } - if (m_GenerateBlocksStats.GeneratedBlockCount == NewBlockCount) + if (GenerateBlocksStats.GeneratedBlockCount == NewBlockCount) { FilteredGeneratedBytesPerSecond.Stop(); } @@ -5689,6 +5127,8 @@ BuildsOperationUploadFolder::GenerateBuildBlocks(const ChunkedFolderContent& UploadBlocksPool, [this, NewBlockCount, + &GenerateBlocksStats, + &UploadStats, &FilteredUploadedBytesPerSecond, &QueuedPendingBlocksForUpload, &OutBlocks, @@ -5697,7 +5137,7 @@ BuildsOperationUploadFolder::GenerateBuildBlocks(const ChunkedFolderContent& auto _ = MakeGuard([&QueuedPendingBlocksForUpload] { QueuedPendingBlocksForUpload--; }); if (!m_AbortFlag) { - if (m_GenerateBlocksStats.GeneratedBlockCount == NewBlockCount) + if (GenerateBlocksStats.GeneratedBlockCount == NewBlockCount) { ZEN_TRACE_CPU("GenerateBuildBlocks_Save"); @@ -5731,7 +5171,7 @@ BuildsOperationUploadFolder::GenerateBuildBlocks(const ChunkedFolderContent& BlockHash, ZenContentType::kCompressedBinary, std::move(Payload).GetCompressed()); - m_UploadStats.BlocksBytes += CompressedBlockSize; + UploadStats.BlocksBytes += CompressedBlockSize; if (m_Options.IsVerbose) { @@ -5762,11 +5202,11 @@ BuildsOperationUploadFolder::GenerateBuildBlocks(const ChunkedFolderContent& } OutBlocks.MetaDataHasBeenUploaded[BlockIndex] = true; - m_UploadStats.BlocksBytes += BlockMetaData.GetSize(); + UploadStats.BlocksBytes += BlockMetaData.GetSize(); } - m_UploadStats.BlockCount++; - if (m_UploadStats.BlockCount == NewBlockCount) + UploadStats.BlockCount++; + if (UploadStats.BlockCount == NewBlockCount) { FilteredUploadedBytesPerSecond.Stop(); } @@ -5782,23 +5222,23 @@ BuildsOperationUploadFolder::GenerateBuildBlocks(const ChunkedFolderContent& Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { ZEN_UNUSED(PendingWork); - FilteredGeneratedBytesPerSecond.Update(m_GenerateBlocksStats.GeneratedBlockByteCount.load()); - FilteredUploadedBytesPerSecond.Update(m_UploadStats.BlocksBytes.load()); + FilteredGeneratedBytesPerSecond.Update(GenerateBlocksStats.GeneratedBlockByteCount.load()); + FilteredUploadedBytesPerSecond.Update(UploadStats.BlocksBytes.load()); std::string Details = fmt::format("Generated {}/{} ({}, {}B/s). Uploaded {}/{} ({}, {}bits/s)", - m_GenerateBlocksStats.GeneratedBlockCount.load(), + GenerateBlocksStats.GeneratedBlockCount.load(), NewBlockCount, - NiceBytes(m_GenerateBlocksStats.GeneratedBlockByteCount.load()), + NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount.load()), NiceNum(FilteredGeneratedBytesPerSecond.GetCurrent()), - m_UploadStats.BlockCount.load(), + UploadStats.BlockCount.load(), NewBlockCount, - NiceBytes(m_UploadStats.BlocksBytes.load()), + NiceBytes(UploadStats.BlocksBytes.load()), NiceNum(FilteredUploadedBytesPerSecond.GetCurrent() * 8)); Progress.UpdateState({.Task = "Generating blocks", .Details = Details, .TotalCount = gsl::narrow<uint64_t>(NewBlockCount), - .RemainingCount = gsl::narrow<uint64_t>(NewBlockCount - m_GenerateBlocksStats.GeneratedBlockCount.load()), + .RemainingCount = gsl::narrow<uint64_t>(NewBlockCount - GenerateBlocksStats.GeneratedBlockCount.load()), .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, false); }); @@ -5807,8 +5247,8 @@ BuildsOperationUploadFolder::GenerateBuildBlocks(const ChunkedFolderContent& Progress.Finish(); - m_GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS = FilteredGeneratedBytesPerSecond.GetElapsedTimeUS(); - m_UploadStats.ElapsedWallTimeUS = FilteredUploadedBytesPerSecond.GetElapsedTimeUS(); + GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS = FilteredGeneratedBytesPerSecond.GetElapsedTimeUS(); + UploadStats.ElapsedWallTimeUS = FilteredUploadedBytesPerSecond.GetElapsedTimeUS(); } } @@ -5985,6 +5425,671 @@ BuildsOperationUploadFolder::RebuildBlock(const ChunkedFolderContent& Content, }; void +BuildsOperationUploadFolder::UploadBuildPart(ChunkingController& ChunkController, + ChunkingCache& ChunkCache, + uint32_t PartIndex, + const UploadPart& Part, + uint32_t PartStepOffset, + uint32_t StepCount) +{ + Stopwatch UploadTimer; + + ChunkingStatistics ChunkingStats; + FindBlocksStatistics FindBlocksStats; + ReuseBlocksStatistics ReuseBlocksStats; + UploadStatistics UploadStats; + GenerateBlocksStatistics GenerateBlocksStats; + + LooseChunksStatistics LooseChunksStats; + ChunkedFolderContent LocalContent; + + m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::ChunkPartContent, StepCount); + + Stopwatch ScanTimer; + { + std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(m_LogOutput.CreateProgressBar("Scan Folder")); + OperationLogOutput::ProgressBar& Progress(*ProgressBarPtr); + + FilteredRate FilteredBytesHashed; + FilteredBytesHashed.Start(); + LocalContent = ChunkFolderContent( + ChunkingStats, + m_IOWorkerPool, + m_Path, + Part.Content, + ChunkController, + ChunkCache, + m_LogOutput.GetProgressUpdateDelayMS(), + [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { + FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); + std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found", + ChunkingStats.FilesProcessed.load(), + Part.Content.Paths.size(), + NiceBytes(ChunkingStats.BytesHashed.load()), + NiceBytes(Part.TotalRawSize), + NiceNum(FilteredBytesHashed.GetCurrent()), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load())); + Progress.UpdateState({.Task = "Scanning files ", + .Details = Details, + .TotalCount = Part.TotalRawSize, + .RemainingCount = Part.TotalRawSize - ChunkingStats.BytesHashed.load(), + .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }, + m_AbortFlag, + m_PauseFlag); + FilteredBytesHashed.Stop(); + Progress.Finish(); + if (m_AbortFlag) + { + return; + } + } + + if (!m_Options.IsQuiet) + { + ZEN_OPERATION_LOG_INFO(m_LogOutput, + "Found {} ({}) files divided into {} ({}) unique chunks in '{}' in {}. Average hash rate {}B/sec", + Part.Content.Paths.size(), + NiceBytes(Part.TotalRawSize), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load()), + m_Path, + NiceTimeSpanMs(ScanTimer.GetElapsedTimeMs()), + NiceNum(GetBytesPerSecond(ChunkingStats.ElapsedWallTimeUS, ChunkingStats.BytesHashed))); + } + + const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalContent); + + std::vector<size_t> ReuseBlockIndexes; + std::vector<uint32_t> NewBlockChunkIndexes; + + if (PartIndex == 0) + { + const PrepareBuildResult PrepBuildResult = m_PrepBuildResultFuture.get(); + + m_FindBlocksStats.FindBlockTimeMS = PrepBuildResult.ElapsedTimeMs; + m_FindBlocksStats.FoundBlockCount = PrepBuildResult.KnownBlocks.size(); + + if (!m_Options.IsQuiet) + { + ZEN_OPERATION_LOG_INFO(m_LogOutput, + "Build prepare took {}. {} took {}, payload size {}{}", + NiceTimeSpanMs(PrepBuildResult.ElapsedTimeMs), + m_CreateBuild ? "PutBuild" : "GetBuild", + NiceTimeSpanMs(PrepBuildResult.PrepareBuildTimeMs), + NiceBytes(PrepBuildResult.PayloadSize), + m_Options.IgnoreExistingBlocks ? "" + : fmt::format(". Found {} blocks in {}", + PrepBuildResult.KnownBlocks.size(), + NiceTimeSpanMs(PrepBuildResult.FindBlocksTimeMs))); + } + + m_PreferredMultipartChunkSize = PrepBuildResult.PreferredMultipartChunkSize; + + m_LargeAttachmentSize = m_Options.AllowMultiparts ? m_PreferredMultipartChunkSize * 4u : (std::uint64_t)-1; + + m_KnownBlocks = std::move(PrepBuildResult.KnownBlocks); + } + + ZEN_ASSERT(m_PreferredMultipartChunkSize != 0); + ZEN_ASSERT(m_LargeAttachmentSize != 0); + + m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::CalculateDelta, StepCount); + + Stopwatch BlockArrangeTimer; + + std::vector<std::uint32_t> LooseChunkIndexes; + { + bool EnableBlocks = true; + std::vector<std::uint32_t> BlockChunkIndexes; + for (uint32_t ChunkIndex = 0; ChunkIndex < LocalContent.ChunkedContent.ChunkHashes.size(); ChunkIndex++) + { + const uint64_t ChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + if (!EnableBlocks || ChunkRawSize == 0 || ChunkRawSize > m_Options.BlockParameters.MaxChunkEmbedSize) + { + LooseChunkIndexes.push_back(ChunkIndex); + LooseChunksStats.ChunkByteCount += ChunkRawSize; + } + else + { + BlockChunkIndexes.push_back(ChunkIndex); + FindBlocksStats.PotentialChunkByteCount += ChunkRawSize; + } + } + FindBlocksStats.PotentialChunkCount += BlockChunkIndexes.size(); + LooseChunksStats.ChunkCount = LooseChunkIndexes.size(); + + if (m_Options.IgnoreExistingBlocks) + { + if (!m_Options.IsQuiet) + { + ZEN_OPERATION_LOG_INFO(m_LogOutput, "Ignoring any existing blocks in store"); + } + NewBlockChunkIndexes = std::move(BlockChunkIndexes); + } + else + { + ReuseBlockIndexes = FindReuseBlocks(m_LogOutput, + m_Options.BlockReuseMinPercentLimit, + m_Options.IsVerbose, + ReuseBlocksStats, + m_KnownBlocks, + LocalContent.ChunkedContent.ChunkHashes, + BlockChunkIndexes, + NewBlockChunkIndexes); + FindBlocksStats.AcceptedBlockCount += ReuseBlockIndexes.size(); + + for (const ChunkBlockDescription& Description : m_KnownBlocks) + { + for (uint32_t ChunkRawLength : Description.ChunkRawLengths) + { + FindBlocksStats.FoundBlockByteCount += ChunkRawLength; + } + FindBlocksStats.FoundBlockChunkCount += Description.ChunkRawHashes.size(); + } + } + } + + std::vector<std::vector<uint32_t>> NewBlockChunks; + ArrangeChunksIntoBlocks(LocalContent, LocalLookup, NewBlockChunkIndexes, NewBlockChunks); + + FindBlocksStats.NewBlocksCount += NewBlockChunks.size(); + for (uint32_t ChunkIndex : NewBlockChunkIndexes) + { + FindBlocksStats.NewBlocksChunkByteCount += LocalContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + FindBlocksStats.NewBlocksChunkCount += NewBlockChunkIndexes.size(); + + const double AcceptedByteCountPercent = FindBlocksStats.PotentialChunkByteCount > 0 + ? (100.0 * ReuseBlocksStats.AcceptedRawByteCount / FindBlocksStats.PotentialChunkByteCount) + : 0.0; + + const double AcceptedReduntantByteCountPercent = + ReuseBlocksStats.AcceptedByteCount > 0 ? (100.0 * ReuseBlocksStats.AcceptedReduntantByteCount) / + (ReuseBlocksStats.AcceptedByteCount + ReuseBlocksStats.AcceptedReduntantByteCount) + : 0.0; + if (!m_Options.IsQuiet) + { + ZEN_OPERATION_LOG_INFO(m_LogOutput, + "Found {} chunks in {} ({}) blocks eligible for reuse in {}\n" + " Reusing {} ({}) matching chunks in {} blocks ({:.1f}%)\n" + " Accepting {} ({}) redundant chunks ({:.1f}%)\n" + " Rejected {} ({}) chunks in {} blocks\n" + " Arranged {} ({}) chunks in {} new blocks\n" + " Keeping {} ({}) chunks as loose chunks\n" + " Discovery completed in {}", + FindBlocksStats.FoundBlockChunkCount, + FindBlocksStats.FoundBlockCount, + NiceBytes(FindBlocksStats.FoundBlockByteCount), + NiceTimeSpanMs(FindBlocksStats.FindBlockTimeMS), + + ReuseBlocksStats.AcceptedChunkCount, + NiceBytes(ReuseBlocksStats.AcceptedRawByteCount), + FindBlocksStats.AcceptedBlockCount, + AcceptedByteCountPercent, + + ReuseBlocksStats.AcceptedReduntantChunkCount, + NiceBytes(ReuseBlocksStats.AcceptedReduntantByteCount), + AcceptedReduntantByteCountPercent, + + ReuseBlocksStats.RejectedChunkCount, + NiceBytes(ReuseBlocksStats.RejectedByteCount), + ReuseBlocksStats.RejectedBlockCount, + + FindBlocksStats.NewBlocksChunkCount, + NiceBytes(FindBlocksStats.NewBlocksChunkByteCount), + FindBlocksStats.NewBlocksCount, + + LooseChunksStats.ChunkCount, + NiceBytes(LooseChunksStats.ChunkByteCount), + + NiceTimeSpanMs(BlockArrangeTimer.GetElapsedTimeMs())); + } + + m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::GenerateBlocks, StepCount); + GeneratedBlocks NewBlocks; + + if (!NewBlockChunks.empty()) + { + Stopwatch GenerateBuildBlocksTimer; + auto __ = MakeGuard([&]() { + uint64_t BlockGenerateTimeUs = GenerateBuildBlocksTimer.GetElapsedTimeUs(); + if (!m_Options.IsQuiet) + { + ZEN_OPERATION_LOG_INFO( + m_LogOutput, + "Generated {} ({}) and uploaded {} ({}) blocks in {}. Generate speed: {}B/sec. Transfer speed {}bits/sec.", + GenerateBlocksStats.GeneratedBlockCount.load(), + NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount), + UploadStats.BlockCount.load(), + NiceBytes(UploadStats.BlocksBytes.load()), + NiceTimeSpanMs(BlockGenerateTimeUs / 1000), + NiceNum(GetBytesPerSecond(GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS, + GenerateBlocksStats.GeneratedBlockByteCount)), + NiceNum(GetBytesPerSecond(UploadStats.ElapsedWallTimeUS, UploadStats.BlocksBytes * 8))); + } + }); + GenerateBuildBlocks(LocalContent, LocalLookup, NewBlockChunks, NewBlocks, GenerateBlocksStats, UploadStats); + } + + m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::BuildPartManifest, StepCount); + + CbObject PartManifest; + { + CbObjectWriter PartManifestWriter; + Stopwatch ManifestGenerationTimer; + auto __ = MakeGuard([&]() { + if (!m_Options.IsQuiet) + { + ZEN_OPERATION_LOG_INFO(m_LogOutput, + "Generated build part manifest in {} ({})", + NiceTimeSpanMs(ManifestGenerationTimer.GetElapsedTimeMs()), + NiceBytes(PartManifestWriter.GetSaveSize())); + } + }); + + PartManifestWriter.BeginObject("chunker"sv); + { + PartManifestWriter.AddString("name"sv, ChunkController.GetName()); + PartManifestWriter.AddObject("parameters"sv, ChunkController.GetParameters()); + } + PartManifestWriter.EndObject(); // chunker + + std::vector<IoHash> AllChunkBlockHashes; + std::vector<ChunkBlockDescription> AllChunkBlockDescriptions; + AllChunkBlockHashes.reserve(ReuseBlockIndexes.size() + NewBlocks.BlockDescriptions.size()); + AllChunkBlockDescriptions.reserve(ReuseBlockIndexes.size() + NewBlocks.BlockDescriptions.size()); + for (size_t ReuseBlockIndex : ReuseBlockIndexes) + { + AllChunkBlockDescriptions.push_back(m_KnownBlocks[ReuseBlockIndex]); + AllChunkBlockHashes.push_back(m_KnownBlocks[ReuseBlockIndex].BlockHash); + } + AllChunkBlockDescriptions.insert(AllChunkBlockDescriptions.end(), + NewBlocks.BlockDescriptions.begin(), + NewBlocks.BlockDescriptions.end()); + for (const ChunkBlockDescription& BlockDescription : NewBlocks.BlockDescriptions) + { + AllChunkBlockHashes.push_back(BlockDescription.BlockHash); + } + + std::vector<IoHash> AbsoluteChunkHashes; + if (m_Options.DoExtraContentValidation) + { + tsl::robin_map<IoHash, size_t, IoHash::Hasher> ChunkHashToAbsoluteChunkIndex; + AbsoluteChunkHashes.reserve(LocalContent.ChunkedContent.ChunkHashes.size()); + for (uint32_t ChunkIndex : LooseChunkIndexes) + { + ChunkHashToAbsoluteChunkIndex.insert({LocalContent.ChunkedContent.ChunkHashes[ChunkIndex], AbsoluteChunkHashes.size()}); + AbsoluteChunkHashes.push_back(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); + } + for (const ChunkBlockDescription& Block : AllChunkBlockDescriptions) + { + for (const IoHash& ChunkHash : Block.ChunkRawHashes) + { + ChunkHashToAbsoluteChunkIndex.insert({ChunkHash, AbsoluteChunkHashes.size()}); + AbsoluteChunkHashes.push_back(ChunkHash); + } + } + for (const IoHash& ChunkHash : LocalContent.ChunkedContent.ChunkHashes) + { + ZEN_ASSERT(AbsoluteChunkHashes[ChunkHashToAbsoluteChunkIndex.at(ChunkHash)] == ChunkHash); + ZEN_ASSERT(LocalContent.ChunkedContent.ChunkHashes[LocalLookup.ChunkHashToChunkIndex.at(ChunkHash)] == ChunkHash); + } + for (const uint32_t ChunkIndex : LocalContent.ChunkedContent.ChunkOrders) + { + ZEN_ASSERT(AbsoluteChunkHashes[ChunkHashToAbsoluteChunkIndex.at(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex])] == + LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); + ZEN_ASSERT(LocalLookup.ChunkHashToChunkIndex.at(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]) == ChunkIndex); + } + } + std::vector<uint32_t> AbsoluteChunkOrders = CalculateAbsoluteChunkOrders(LocalContent.ChunkedContent.ChunkHashes, + LocalContent.ChunkedContent.ChunkOrders, + LocalLookup.ChunkHashToChunkIndex, + LooseChunkIndexes, + AllChunkBlockDescriptions); + + if (m_Options.DoExtraContentValidation) + { + for (uint32_t ChunkOrderIndex = 0; ChunkOrderIndex < LocalContent.ChunkedContent.ChunkOrders.size(); ChunkOrderIndex++) + { + uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[ChunkOrderIndex]; + uint32_t AbsoluteChunkIndex = AbsoluteChunkOrders[ChunkOrderIndex]; + const IoHash& LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; + const IoHash& AbsoluteChunkHash = AbsoluteChunkHashes[AbsoluteChunkIndex]; + ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash); + } + } + + WriteBuildContentToCompactBinary(PartManifestWriter, + LocalContent.Platform, + LocalContent.Paths, + LocalContent.RawHashes, + LocalContent.RawSizes, + LocalContent.Attributes, + LocalContent.ChunkedContent.SequenceRawHashes, + LocalContent.ChunkedContent.ChunkCounts, + LocalContent.ChunkedContent.ChunkHashes, + LocalContent.ChunkedContent.ChunkRawSizes, + AbsoluteChunkOrders, + LooseChunkIndexes, + AllChunkBlockHashes); + + if (m_Options.DoExtraContentValidation) + { + ChunkedFolderContent VerifyFolderContent; + + std::vector<uint32_t> OutAbsoluteChunkOrders; + std::vector<IoHash> OutLooseChunkHashes; + std::vector<uint64_t> OutLooseChunkRawSizes; + std::vector<IoHash> OutBlockRawHashes; + ReadBuildContentFromCompactBinary(PartManifestWriter.Save(), + VerifyFolderContent.Platform, + VerifyFolderContent.Paths, + VerifyFolderContent.RawHashes, + VerifyFolderContent.RawSizes, + VerifyFolderContent.Attributes, + VerifyFolderContent.ChunkedContent.SequenceRawHashes, + VerifyFolderContent.ChunkedContent.ChunkCounts, + OutAbsoluteChunkOrders, + OutLooseChunkHashes, + OutLooseChunkRawSizes, + OutBlockRawHashes); + ZEN_ASSERT(OutBlockRawHashes == AllChunkBlockHashes); + + for (uint32_t OrderIndex = 0; OrderIndex < OutAbsoluteChunkOrders.size(); OrderIndex++) + { + uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[OrderIndex]; + const IoHash LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; + + uint32_t VerifyChunkIndex = OutAbsoluteChunkOrders[OrderIndex]; + const IoHash VerifyChunkHash = AbsoluteChunkHashes[VerifyChunkIndex]; + + ZEN_ASSERT(LocalChunkHash == VerifyChunkHash); + } + + CalculateLocalChunkOrders(OutAbsoluteChunkOrders, + OutLooseChunkHashes, + OutLooseChunkRawSizes, + AllChunkBlockDescriptions, + VerifyFolderContent.ChunkedContent.ChunkHashes, + VerifyFolderContent.ChunkedContent.ChunkRawSizes, + VerifyFolderContent.ChunkedContent.ChunkOrders, + m_Options.DoExtraContentValidation); + + ZEN_ASSERT(LocalContent.Paths == VerifyFolderContent.Paths); + ZEN_ASSERT(LocalContent.RawHashes == VerifyFolderContent.RawHashes); + ZEN_ASSERT(LocalContent.RawSizes == VerifyFolderContent.RawSizes); + ZEN_ASSERT(LocalContent.Attributes == VerifyFolderContent.Attributes); + ZEN_ASSERT(LocalContent.ChunkedContent.SequenceRawHashes == VerifyFolderContent.ChunkedContent.SequenceRawHashes); + ZEN_ASSERT(LocalContent.ChunkedContent.ChunkCounts == VerifyFolderContent.ChunkedContent.ChunkCounts); + + for (uint32_t OrderIndex = 0; OrderIndex < LocalContent.ChunkedContent.ChunkOrders.size(); OrderIndex++) + { + uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[OrderIndex]; + const IoHash LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; + uint64_t LocalChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[LocalChunkIndex]; + + uint32_t VerifyChunkIndex = VerifyFolderContent.ChunkedContent.ChunkOrders[OrderIndex]; + const IoHash VerifyChunkHash = VerifyFolderContent.ChunkedContent.ChunkHashes[VerifyChunkIndex]; + uint64_t VerifyChunkRawSize = VerifyFolderContent.ChunkedContent.ChunkRawSizes[VerifyChunkIndex]; + + ZEN_ASSERT(LocalChunkHash == VerifyChunkHash); + ZEN_ASSERT(LocalChunkRawSize == VerifyChunkRawSize); + } + } + PartManifest = PartManifestWriter.Save(); + } + + m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::UploadBuildPart, StepCount); + + Stopwatch PutBuildPartResultTimer; + std::pair<IoHash, std::vector<IoHash>> PutBuildPartResult = + m_Storage.BuildStorage->PutBuildPart(m_BuildId, Part.PartId, Part.PartName, PartManifest); + if (!m_Options.IsQuiet) + { + ZEN_OPERATION_LOG_INFO(m_LogOutput, + "PutBuildPart took {}, payload size {}. {} attachments are needed.", + NiceTimeSpanMs(PutBuildPartResultTimer.GetElapsedTimeMs()), + NiceBytes(PartManifest.GetSize()), + PutBuildPartResult.second.size()); + } + IoHash PartHash = PutBuildPartResult.first; + + auto UploadAttachments = + [this, &LooseChunksStats, &UploadStats, &LocalContent, &LocalLookup, &NewBlockChunks, &NewBlocks, &LooseChunkIndexes]( + std::span<IoHash> RawHashes, + std::vector<IoHash>& OutUnknownChunks) { + if (!m_AbortFlag) + { + UploadStatistics TempUploadStats; + LooseChunksStatistics TempLooseChunksStats; + + Stopwatch TempUploadTimer; + auto __ = MakeGuard([&]() { + if (!m_Options.IsQuiet) + { + uint64_t TempChunkUploadTimeUs = TempUploadTimer.GetElapsedTimeUs(); + ZEN_OPERATION_LOG_INFO( + m_LogOutput, + "Uploaded {} ({}) blocks. " + "Compressed {} ({} {}B/s) and uploaded {} ({}) chunks. " + "Transferred {} ({}bits/s) in {}", + TempUploadStats.BlockCount.load(), + NiceBytes(TempUploadStats.BlocksBytes), + + TempLooseChunksStats.CompressedChunkCount.load(), + NiceBytes(TempLooseChunksStats.CompressedChunkBytes.load()), + NiceNum(GetBytesPerSecond(TempLooseChunksStats.CompressChunksElapsedWallTimeUS, + TempLooseChunksStats.ChunkByteCount)), + TempUploadStats.ChunkCount.load(), + NiceBytes(TempUploadStats.ChunksBytes), + + NiceBytes(TempUploadStats.BlocksBytes + TempUploadStats.ChunksBytes), + NiceNum(GetBytesPerSecond(TempUploadStats.ElapsedWallTimeUS, TempUploadStats.ChunksBytes * 8)), + NiceTimeSpanMs(TempChunkUploadTimeUs / 1000)); + } + }); + UploadPartBlobs(LocalContent, + LocalLookup, + RawHashes, + NewBlockChunks, + NewBlocks, + LooseChunkIndexes, + m_LargeAttachmentSize, + TempUploadStats, + TempLooseChunksStats, + OutUnknownChunks); + UploadStats += TempUploadStats; + LooseChunksStats += TempLooseChunksStats; + } + }; + + m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::UploadAttachments, StepCount); + + std::vector<IoHash> UnknownChunks; + if (m_Options.IgnoreExistingBlocks) + { + if (m_Options.IsVerbose) + { + ZEN_OPERATION_LOG_INFO(m_LogOutput, + "PutBuildPart uploading all attachments, needs are: {}", + FormatArray<IoHash>(PutBuildPartResult.second, "\n "sv)); + } + + std::vector<IoHash> ForceUploadChunkHashes; + ForceUploadChunkHashes.reserve(LooseChunkIndexes.size()); + + for (uint32_t ChunkIndex : LooseChunkIndexes) + { + ForceUploadChunkHashes.push_back(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); + } + + for (size_t BlockIndex = 0; BlockIndex < NewBlocks.BlockHeaders.size(); BlockIndex++) + { + if (NewBlocks.BlockHeaders[BlockIndex]) + { + // Block was not uploaded during generation + ForceUploadChunkHashes.push_back(NewBlocks.BlockDescriptions[BlockIndex].BlockHash); + } + } + UploadAttachments(ForceUploadChunkHashes, UnknownChunks); + } + else if (!PutBuildPartResult.second.empty()) + { + if (m_Options.IsVerbose) + { + ZEN_OPERATION_LOG_INFO(m_LogOutput, + "PutBuildPart needs attachments: {}", + FormatArray<IoHash>(PutBuildPartResult.second, "\n "sv)); + } + UploadAttachments(PutBuildPartResult.second, UnknownChunks); + } + + auto BuildUnkownChunksResponse = [](const std::vector<IoHash>& UnknownChunks, bool WillRetry) { + return fmt::format( + "The following build blobs was reported as needed for upload but was reported as existing at the start of the " + "operation.{}{}", + WillRetry ? " Treating this as a transient inconsistency issue and will attempt to retry finalization."sv : ""sv, + FormatArray<IoHash>(UnknownChunks, "\n "sv)); + }; + + if (!UnknownChunks.empty()) + { + ZEN_OPERATION_LOG_WARN(m_LogOutput, "{}", BuildUnkownChunksResponse(UnknownChunks, /*WillRetry*/ true)); + } + + uint32_t FinalizeBuildPartRetryCount = 5; + while (!m_AbortFlag && (FinalizeBuildPartRetryCount--) > 0) + { + Stopwatch FinalizeBuildPartTimer; + std::vector<IoHash> Needs = m_Storage.BuildStorage->FinalizeBuildPart(m_BuildId, Part.PartId, PartHash); + if (!m_Options.IsQuiet) + { + ZEN_OPERATION_LOG_INFO(m_LogOutput, + "FinalizeBuildPart took {}. {} attachments are missing.", + NiceTimeSpanMs(FinalizeBuildPartTimer.GetElapsedTimeMs()), + Needs.size()); + } + if (Needs.empty()) + { + break; + } + if (m_Options.IsVerbose) + { + ZEN_OPERATION_LOG_INFO(m_LogOutput, "FinalizeBuildPart needs attachments: {}", FormatArray<IoHash>(Needs, "\n "sv)); + } + + std::vector<IoHash> RetryUnknownChunks; + UploadAttachments(Needs, RetryUnknownChunks); + if (RetryUnknownChunks == UnknownChunks) + { + if (FinalizeBuildPartRetryCount > 0) + { + // Back off a bit + Sleep(1000); + } + } + else + { + UnknownChunks = RetryUnknownChunks; + ZEN_OPERATION_LOG_WARN(m_LogOutput, + "{}", + BuildUnkownChunksResponse(UnknownChunks, /*WillRetry*/ FinalizeBuildPartRetryCount != 0)); + } + } + + if (!UnknownChunks.empty()) + { + throw std::runtime_error(BuildUnkownChunksResponse(UnknownChunks, /*WillRetry*/ false)); + } + + if (!NewBlocks.BlockDescriptions.empty() && !m_AbortFlag) + { + uint64_t UploadBlockMetadataCount = 0; + Stopwatch UploadBlockMetadataTimer; + + uint32_t FailedMetadataUploadCount = 1; + int32_t MetadataUploadRetryCount = 3; + while ((MetadataUploadRetryCount-- > 0) && (FailedMetadataUploadCount > 0)) + { + FailedMetadataUploadCount = 0; + for (size_t BlockIndex = 0; BlockIndex < NewBlocks.BlockDescriptions.size(); BlockIndex++) + { + if (m_AbortFlag) + { + break; + } + const IoHash& BlockHash = NewBlocks.BlockDescriptions[BlockIndex].BlockHash; + if (!NewBlocks.MetaDataHasBeenUploaded[BlockIndex]) + { + const CbObject BlockMetaData = + BuildChunkBlockDescription(NewBlocks.BlockDescriptions[BlockIndex], NewBlocks.BlockMetaDatas[BlockIndex]); + if (m_Storage.BuildCacheStorage && m_Options.PopulateCache) + { + m_Storage.BuildCacheStorage->PutBlobMetadatas(m_BuildId, + std::vector<IoHash>({BlockHash}), + std::vector<CbObject>({BlockMetaData})); + } + bool MetadataSucceeded = m_Storage.BuildStorage->PutBlockMetadata(m_BuildId, BlockHash, BlockMetaData); + if (MetadataSucceeded) + { + UploadStats.BlocksBytes += BlockMetaData.GetSize(); + NewBlocks.MetaDataHasBeenUploaded[BlockIndex] = true; + UploadBlockMetadataCount++; + } + else + { + FailedMetadataUploadCount++; + } + } + } + } + if (UploadBlockMetadataCount > 0) + { + uint64_t ElapsedUS = UploadBlockMetadataTimer.GetElapsedTimeUs(); + UploadStats.ElapsedWallTimeUS += ElapsedUS; + if (!m_Options.IsQuiet) + { + ZEN_OPERATION_LOG_INFO(m_LogOutput, + "Uploaded metadata for {} blocks in {}", + UploadBlockMetadataCount, + NiceTimeSpanMs(ElapsedUS / 1000)); + } + } + + // The newly generated blocks are now known blocks so the next part upload can use those blocks as well + m_KnownBlocks.insert(m_KnownBlocks.end(), NewBlocks.BlockDescriptions.begin(), NewBlocks.BlockDescriptions.end()); + } + + m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::PutBuildPartStats, StepCount); + + m_Storage.BuildStorage->PutBuildPartStats( + m_BuildId, + Part.PartId, + {{"totalSize", double(Part.LocalFolderScanStats.FoundFileByteCount.load())}, + {"reusedRatio", AcceptedByteCountPercent / 100.0}, + {"reusedBlockCount", double(FindBlocksStats.AcceptedBlockCount)}, + {"reusedBlockByteCount", double(ReuseBlocksStats.AcceptedRawByteCount)}, + {"newBlockCount", double(FindBlocksStats.NewBlocksCount)}, + {"newBlockByteCount", double(FindBlocksStats.NewBlocksChunkByteCount)}, + {"uploadedCount", double(UploadStats.BlockCount.load() + UploadStats.ChunkCount.load())}, + {"uploadedByteCount", double(UploadStats.BlocksBytes.load() + UploadStats.ChunksBytes.load())}, + {"uploadedBytesPerSec", + double(GetBytesPerSecond(UploadStats.ElapsedWallTimeUS, UploadStats.ChunksBytes + UploadStats.BlocksBytes))}, + {"elapsedTimeSec", double(UploadTimer.GetElapsedTimeMs() / 1000.0)}}); + + m_LocalFolderScanStats += Part.LocalFolderScanStats; + m_ChunkingStats += ChunkingStats; + m_FindBlocksStats += FindBlocksStats; + m_ReuseBlocksStats += ReuseBlocksStats; + m_UploadStats += UploadStats; + m_GenerateBlocksStats += GenerateBlocksStats; + m_LooseChunksStats += LooseChunksStats; +} + +void BuildsOperationUploadFolder::UploadPartBlobs(const ChunkedFolderContent& Content, const ChunkedContentLookup& Lookup, std::span<IoHash> RawHashes, @@ -7179,4 +7284,1007 @@ BuildsOperationValidateBuildPart::ValidateChunkBlock(IoBuffer&& Payload, return GetChunkBlockDescription(BlockBuffer.Flatten(), BlobHash); } +std::vector<std::pair<Oid, std::string>> +ResolveBuildPartNames(CbObjectView BuildObject, + const Oid& BuildId, + const std::vector<Oid>& BuildPartIds, + std::span<const std::string> BuildPartNames, + std::uint64_t& OutPreferredMultipartChunkSize) +{ + std::vector<std::pair<Oid, std::string>> Result; + { + CbObjectView PartsObject = BuildObject["parts"sv].AsObjectView(); + if (!PartsObject) + { + throw std::runtime_error("Build object does not have a 'parts' object"); + } + + OutPreferredMultipartChunkSize = BuildObject["chunkSize"sv].AsUInt64(OutPreferredMultipartChunkSize); + + std::vector<std::pair<Oid, std::string>> AvailableParts; + + for (CbFieldView PartView : PartsObject) + { + const std::string BuildPartName = std::string(PartView.GetName()); + const Oid BuildPartId = PartView.AsObjectId(); + if (BuildPartId == Oid::Zero) + { + ExtendableStringBuilder<128> SB; + for (CbFieldView ScanPartView : PartsObject) + { + SB.Append(fmt::format("\n {}: {}", ScanPartView.GetName(), ScanPartView.AsObjectId())); + } + throw std::runtime_error(fmt::format("Build object parts does not have a '{}' object id{}", BuildPartName, SB.ToView())); + } + AvailableParts.push_back({BuildPartId, BuildPartName}); + } + + if (BuildPartIds.empty() && BuildPartNames.empty()) + { + Result = AvailableParts; + } + else + { + for (const std::string& BuildPartName : BuildPartNames) + { + if (auto It = std::find_if(AvailableParts.begin(), + AvailableParts.end(), + [&BuildPartName](const auto& Part) { return Part.second == BuildPartName; }); + It != AvailableParts.end()) + { + Result.push_back(*It); + } + else + { + throw std::runtime_error(fmt::format("Build {} object does not have a part named '{}'", BuildId, BuildPartName)); + } + } + for (const Oid& BuildPartId : BuildPartIds) + { + if (auto It = std::find_if(AvailableParts.begin(), + AvailableParts.end(), + [&BuildPartId](const auto& Part) { return Part.first == BuildPartId; }); + It != AvailableParts.end()) + { + Result.push_back(*It); + } + else + { + throw std::runtime_error(fmt::format("Build {} object does not have a part with id '{}'", BuildId, BuildPartId)); + } + } + } + + if (Result.empty()) + { + throw std::runtime_error(fmt::format("Build object does not have any parts", BuildId)); + } + } + return Result; +} + +ChunkedFolderContent +GetRemoteContent(OperationLogOutput& Output, + StorageInstance& Storage, + const Oid& BuildId, + const std::vector<std::pair<Oid, std::string>>& BuildParts, + const BuildManifest& Manifest, + std::span<const std::string> IncludeWildcards, + std::span<const std::string> ExcludeWildcards, + std::unique_ptr<ChunkingController>& OutChunkController, + std::vector<ChunkedFolderContent>& OutPartContents, + std::vector<ChunkBlockDescription>& OutBlockDescriptions, + std::vector<IoHash>& OutLooseChunkHashes, + bool IsQuiet, + bool IsVerbose, + bool DoExtraContentVerify) +{ + ZEN_TRACE_CPU("GetRemoteContent"); + + Stopwatch GetBuildPartTimer; + const Oid BuildPartId = BuildParts[0].first; + const std::string_view BuildPartName = BuildParts[0].second; + CbObject BuildPartManifest = Storage.BuildStorage->GetBuildPart(BuildId, BuildPartId); + if (!IsQuiet) + { + ZEN_OPERATION_LOG_INFO(Output, + "GetBuildPart {} ('{}') took {}. Payload size: {}", + BuildPartId, + BuildPartName, + NiceTimeSpanMs(GetBuildPartTimer.GetElapsedTimeMs()), + NiceBytes(BuildPartManifest.GetSize())); + ZEN_OPERATION_LOG_INFO(Output, "{}", GetCbObjectAsNiceString(BuildPartManifest, " "sv, "\n"sv)); + } + + { + CbObjectView Chunker = BuildPartManifest["chunker"sv].AsObjectView(); + std::string_view ChunkerName = Chunker["name"sv].AsString(); + CbObjectView Parameters = Chunker["parameters"sv].AsObjectView(); + OutChunkController = CreateChunkingController(ChunkerName, Parameters); + } + + auto ParseBuildPartManifest = [&Output, IsQuiet, IsVerbose, DoExtraContentVerify]( + StorageInstance& Storage, + const Oid& BuildId, + const Oid& BuildPartId, + CbObject BuildPartManifest, + std::span<const std::string> IncludeWildcards, + std::span<const std::string> ExcludeWildcards, + const BuildManifest::Part* OptionalManifest, + ChunkedFolderContent& OutRemoteContent, + std::vector<ChunkBlockDescription>& OutBlockDescriptions, + std::vector<IoHash>& OutLooseChunkHashes) { + std::vector<uint32_t> AbsoluteChunkOrders; + std::vector<uint64_t> LooseChunkRawSizes; + std::vector<IoHash> BlockRawHashes; + + ReadBuildContentFromCompactBinary(BuildPartManifest, + OutRemoteContent.Platform, + OutRemoteContent.Paths, + OutRemoteContent.RawHashes, + OutRemoteContent.RawSizes, + OutRemoteContent.Attributes, + OutRemoteContent.ChunkedContent.SequenceRawHashes, + OutRemoteContent.ChunkedContent.ChunkCounts, + AbsoluteChunkOrders, + OutLooseChunkHashes, + LooseChunkRawSizes, + BlockRawHashes); + + // TODO: GetBlockDescriptions for all BlockRawHashes in one go - check for local block descriptions when we cache them + + { + bool AttemptFallback = false; + OutBlockDescriptions = GetBlockDescriptions(Output, + *Storage.BuildStorage, + Storage.BuildCacheStorage.get(), + BuildId, + BuildPartId, + BlockRawHashes, + AttemptFallback, + IsQuiet, + IsVerbose); + } + + CalculateLocalChunkOrders(AbsoluteChunkOrders, + OutLooseChunkHashes, + LooseChunkRawSizes, + OutBlockDescriptions, + OutRemoteContent.ChunkedContent.ChunkHashes, + OutRemoteContent.ChunkedContent.ChunkRawSizes, + OutRemoteContent.ChunkedContent.ChunkOrders, + DoExtraContentVerify); + + std::vector<std::filesystem::path> DeletedPaths; + + if (OptionalManifest) + { + tsl::robin_set<std::string> PathsInManifest; + PathsInManifest.reserve(OptionalManifest->Files.size()); + for (const std::filesystem::path& ManifestPath : OptionalManifest->Files) + { + PathsInManifest.insert(ToLower(ManifestPath.generic_string())); + } + for (const std::filesystem::path& RemotePath : OutRemoteContent.Paths) + { + if (!PathsInManifest.contains(ToLower(RemotePath.generic_string()))) + { + DeletedPaths.push_back(RemotePath); + } + } + } + + if (!IncludeWildcards.empty() || !ExcludeWildcards.empty()) + { + for (const std::filesystem::path& RemotePath : OutRemoteContent.Paths) + { + if (!IncludePath(IncludeWildcards, ExcludeWildcards, ToLower(RemotePath.generic_string()), /*CaseSensitive*/ true)) + { + DeletedPaths.push_back(RemotePath); + } + } + } + + if (!DeletedPaths.empty()) + { + OutRemoteContent = DeletePathsFromChunkedContent(OutRemoteContent, DeletedPaths); + InlineRemoveUnusedHashes(OutLooseChunkHashes, OutRemoteContent.ChunkedContent.ChunkHashes); + } + +#if ZEN_BUILD_DEBUG + ValidateChunkedFolderContent(OutRemoteContent, OutBlockDescriptions, OutLooseChunkHashes, IncludeWildcards, ExcludeWildcards); +#endif // ZEN_BUILD_DEBUG + }; + + auto FindManifest = [&Manifest](const Oid& BuildPartId, std::string_view BuildPartName) -> const BuildManifest::Part* { + if (Manifest.Parts.empty()) + { + return nullptr; + } + if (Manifest.Parts.size() == 1) + { + if (Manifest.Parts[0].PartId == Oid::Zero && Manifest.Parts[0].PartName.empty()) + { + return &Manifest.Parts[0]; + } + } + + auto It = std::find_if(Manifest.Parts.begin(), Manifest.Parts.end(), [BuildPartId, BuildPartName](const BuildManifest::Part& Part) { + if (Part.PartId != Oid::Zero) + { + return Part.PartId == BuildPartId; + } + if (!Part.PartName.empty()) + { + return Part.PartName == BuildPartName; + } + return false; + }); + if (It != Manifest.Parts.end()) + { + return &(*It); + } + return nullptr; + }; + + OutPartContents.resize(1); + ParseBuildPartManifest(Storage, + BuildId, + BuildPartId, + BuildPartManifest, + IncludeWildcards, + ExcludeWildcards, + FindManifest(BuildPartId, BuildPartName), + OutPartContents[0], + OutBlockDescriptions, + OutLooseChunkHashes); + ChunkedFolderContent RemoteContent; + if (BuildParts.size() > 1) + { + std::vector<ChunkBlockDescription> OverlayBlockDescriptions; + std::vector<IoHash> OverlayLooseChunkHashes; + for (size_t PartIndex = 1; PartIndex < BuildParts.size(); PartIndex++) + { + const Oid& OverlayBuildPartId = BuildParts[PartIndex].first; + const std::string& OverlayBuildPartName = BuildParts[PartIndex].second; + Stopwatch GetOverlayBuildPartTimer; + CbObject OverlayBuildPartManifest = Storage.BuildStorage->GetBuildPart(BuildId, OverlayBuildPartId); + if (!IsQuiet) + { + ZEN_OPERATION_LOG_INFO(Output, + "GetBuildPart {} ('{}') took {}. Payload size: {}", + OverlayBuildPartId, + OverlayBuildPartName, + NiceTimeSpanMs(GetOverlayBuildPartTimer.GetElapsedTimeMs()), + NiceBytes(OverlayBuildPartManifest.GetSize())); + } + + ChunkedFolderContent OverlayPartContent; + std::vector<ChunkBlockDescription> OverlayPartBlockDescriptions; + std::vector<IoHash> OverlayPartLooseChunkHashes; + + ParseBuildPartManifest(Storage, + BuildId, + OverlayBuildPartId, + OverlayBuildPartManifest, + IncludeWildcards, + ExcludeWildcards, + FindManifest(OverlayBuildPartId, OverlayBuildPartName), + OverlayPartContent, + OverlayPartBlockDescriptions, + OverlayPartLooseChunkHashes); + OutPartContents.push_back(OverlayPartContent); + OverlayBlockDescriptions.insert(OverlayBlockDescriptions.end(), + OverlayPartBlockDescriptions.begin(), + OverlayPartBlockDescriptions.end()); + OverlayLooseChunkHashes.insert(OverlayLooseChunkHashes.end(), + OverlayPartLooseChunkHashes.begin(), + OverlayPartLooseChunkHashes.end()); + } + + RemoteContent = MergeChunkedFolderContents(OutPartContents[0], std::span<const ChunkedFolderContent>(OutPartContents).subspan(1)); + { + tsl::robin_set<IoHash> AllBlockHashes; + for (const ChunkBlockDescription& Description : OutBlockDescriptions) + { + AllBlockHashes.insert(Description.BlockHash); + } + for (const ChunkBlockDescription& Description : OverlayBlockDescriptions) + { + if (!AllBlockHashes.contains(Description.BlockHash)) + { + AllBlockHashes.insert(Description.BlockHash); + OutBlockDescriptions.push_back(Description); + } + } + } + { + tsl::robin_set<IoHash> AllLooseChunkHashes(OutLooseChunkHashes.begin(), OutLooseChunkHashes.end()); + for (const IoHash& OverlayLooseChunkHash : OverlayLooseChunkHashes) + { + if (!AllLooseChunkHashes.contains(OverlayLooseChunkHash)) + { + AllLooseChunkHashes.insert(OverlayLooseChunkHash); + OutLooseChunkHashes.push_back(OverlayLooseChunkHash); + } + } + } + } + else + { + RemoteContent = OutPartContents[0]; + } + return RemoteContent; +} + +std::string +GetCbObjectAsNiceString(CbObjectView Object, std::string_view Prefix, std::string_view Suffix) +{ + ExtendableStringBuilder<512> SB; + std::vector<std::pair<std::string, std::string>> NameStringValuePairs; + for (CbFieldView Field : Object) + { + std::string_view Name = Field.GetName(); + switch (CbValue Accessor = Field.GetValue(); Accessor.GetType()) + { + case CbFieldType::String: + NameStringValuePairs.push_back({std::string(Name), std::string(Accessor.AsString())}); + break; + case CbFieldType::IntegerPositive: + NameStringValuePairs.push_back({std::string(Name), fmt::format("{}", Accessor.AsIntegerPositive())}); + break; + case CbFieldType::IntegerNegative: + NameStringValuePairs.push_back({std::string(Name), fmt::format("{}", Accessor.AsIntegerNegative())}); + break; + case CbFieldType::Float32: + { + const float Value = Accessor.AsFloat32(); + if (std::isfinite(Value)) + { + NameStringValuePairs.push_back({std::string(Name), fmt::format("{:.9g}", Value)}); + } + else + { + NameStringValuePairs.push_back({std::string(Name), "null"}); + } + } + break; + case CbFieldType::Float64: + { + const double Value = Accessor.AsFloat64(); + if (std::isfinite(Value)) + { + NameStringValuePairs.push_back({std::string(Name), fmt::format("{:.17g}", Value)}); + } + else + { + NameStringValuePairs.push_back({std::string(Name), "null"}); + } + } + break; + case CbFieldType::BoolFalse: + NameStringValuePairs.push_back({std::string(Name), "false"}); + break; + case CbFieldType::BoolTrue: + NameStringValuePairs.push_back({std::string(Name), "true"}); + break; + case CbFieldType::Hash: + { + NameStringValuePairs.push_back({std::string(Name), Accessor.AsHash().ToHexString()}); + } + break; + case CbFieldType::Uuid: + { + StringBuilder<Oid::StringLength + 1> Builder; + Accessor.AsUuid().ToString(Builder); + NameStringValuePairs.push_back({std::string(Name), Builder.ToString()}); + } + break; + case CbFieldType::DateTime: + { + ExtendableStringBuilder<64> Builder; + Builder << DateTime(Accessor.AsDateTimeTicks()).ToIso8601(); + NameStringValuePairs.push_back({std::string(Name), Builder.ToString()}); + } + break; + case CbFieldType::TimeSpan: + { + ExtendableStringBuilder<64> Builder; + const TimeSpan Span(Accessor.AsTimeSpanTicks()); + if (Span.GetDays() == 0) + { + Builder << Span.ToString("%h:%m:%s.%n"); + } + else + { + Builder << Span.ToString("%d.%h:%m:%s.%n"); + } + NameStringValuePairs.push_back({std::string(Name), Builder.ToString()}); + break; + } + case CbFieldType::ObjectId: + NameStringValuePairs.push_back({std::string(Name), Accessor.AsObjectId().ToString()}); + break; + } + } + std::string::size_type LongestKey = 0; + for (const std::pair<std::string, std::string>& KeyValue : NameStringValuePairs) + { + LongestKey = Max(KeyValue.first.length(), LongestKey); + } + for (const std::pair<std::string, std::string>& KeyValue : NameStringValuePairs) + { + SB.Append(fmt::format("{}{:<{}}: {}{}", Prefix, KeyValue.first, LongestKey, KeyValue.second, Suffix)); + } + return SB.ToString(); +} + +#if ZEN_WITH_TESTS + +namespace buildstorageoperations_testutils { + struct TestState + { + TestState(const std::filesystem::path& InRootPath) + : RootPath(InRootPath) + , LogOutput(CreateStandardLogOutput(Log)) + , ChunkController(CreateStandardChunkingController(StandardChunkingControllerSettings{})) + , ChunkCache(CreateMemoryChunkingCache()) + , WorkerPool(2) + , NetworkPool(2) + { + } + + void Initialize() + { + StoragePath = RootPath / "storage"; + TempPath = RootPath / "temp"; + SystemRootDir = RootPath / "sysroot"; + ZenFolderPath = RootPath / ".zen"; + + CreateDirectories(TempPath); + CreateDirectories(StoragePath); + + Storage.BuildStorage = CreateFileBuildStorage(StoragePath, StorageStats, false); + } + + void CreateSourceData(const std::filesystem::path& Source, std::span<const std::string> Paths, std::span<const uint64_t> Sizes) + { + const std::filesystem::path SourcePath = RootPath / Source; + CreateDirectories(SourcePath); + for (size_t FileIndex = 0; FileIndex < Paths.size(); FileIndex++) + { + const std::string& FilePath = Paths[FileIndex]; + const uint64_t FileSize = Sizes[FileIndex]; + IoBuffer FileData = FileSize > 0 ? CreateSemiRandomBlob(FileSize) : IoBuffer{}; + WriteFile(SourcePath / FilePath, FileData); + } + } + + std::vector<std::pair<Oid, std::string>> Upload(const Oid& BuildId, + const Oid& BuildPartId, + const std::string_view BuildPartName, + const std::filesystem::path& Source, + const std::filesystem::path& ManifestPath) + { + const std::filesystem::path SourcePath = RootPath / Source; + CbObject MetaData; + BuildsOperationUploadFolder Upload(*LogOutput, + Storage, + AbortFlag, + PauseFlag, + WorkerPool, + NetworkPool, + BuildId, + SourcePath, + true, + MetaData, + BuildsOperationUploadFolder::Options{.TempDir = TempPath}); + return Upload.Execute(BuildPartId, BuildPartName, ManifestPath, *ChunkController, *ChunkCache); + } + + void ValidateUpload(const Oid& BuildId, const std::vector<std::pair<Oid, std::string>>& Parts) + { + for (auto Part : Parts) + { + BuildsOperationValidateBuildPart Validate(*LogOutput, + *Storage.BuildStorage, + AbortFlag, + PauseFlag, + WorkerPool, + NetworkPool, + BuildId, + Part.first, + Part.second, + BuildsOperationValidateBuildPart::Options{}); + Validate.Execute(); + } + } + + FolderContent Download(const Oid& BuildId, + const Oid& BuildPartId, + const std::string_view BuildPartName, + const std::filesystem::path& Target, + bool Append) + { + const std::filesystem::path TargetPath = RootPath / Target; + + CreateDirectories(TargetPath); + + uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; + CbObject BuildObject = Storage.BuildStorage->GetBuild(BuildId); + std::vector<Oid> PartIds; + if (BuildPartId != Oid::Zero) + { + PartIds.push_back(BuildPartId); + } + std::vector<std::string> PartNames; + if (!BuildPartName.empty()) + { + PartNames.push_back(std::string(BuildPartName)); + } + std::vector<std::pair<Oid, std::string>> AllBuildParts = + ResolveBuildPartNames(BuildObject, BuildId, PartIds, PartNames, PreferredMultipartChunkSize); + + std::vector<ChunkedFolderContent> PartContents; + + std::vector<ChunkBlockDescription> BlockDescriptions; + std::vector<IoHash> LooseChunkHashes; + + ChunkedFolderContent RemoteContent = GetRemoteContent(*LogOutput, + Storage, + BuildId, + AllBuildParts, + {}, + {}, + {}, + ChunkController, + PartContents, + BlockDescriptions, + LooseChunkHashes, + /*IsQuiet*/ false, + /*IsVerbose*/ false, + /*DoExtraContentVerify*/ true); + + GetFolderContentStatistics LocalFolderScanStats; + + struct ContentVisitor : public GetDirectoryContentVisitor + { + virtual void AsyncVisitDirectory(const std::filesystem::path& RelativeRoot, DirectoryContent&& Content) + { + RwLock::ExclusiveLockScope _(ExistingPathsLock); + for (const std::filesystem::path& FileName : Content.FileNames) + { + if (RelativeRoot.empty()) + { + ExistingPaths.push_back(FileName); + } + else + { + ExistingPaths.push_back(RelativeRoot / FileName); + } + } + } + + RwLock ExistingPathsLock; + std::vector<std::filesystem::path> ExistingPaths; + } Visitor; + + Latch PendingWorkCount(1); + + GetDirectoryContent(TargetPath, + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive, + Visitor, + WorkerPool, + PendingWorkCount); + + PendingWorkCount.CountDown(); + PendingWorkCount.Wait(); + + FolderContent CurrentLocalFolderState = GetValidFolderContent( + WorkerPool, + LocalFolderScanStats, + TargetPath, + Visitor.ExistingPaths, + [](uint64_t PathCount, uint64_t CompletedPathCount) { ZEN_UNUSED(PathCount, CompletedPathCount); }, + 1000, + AbortFlag, + PauseFlag); + + ChunkingStatistics LocalChunkingStats; + ChunkedFolderContent LocalContent = ChunkFolderContent( + LocalChunkingStats, + WorkerPool, + TargetPath, + CurrentLocalFolderState, + *ChunkController, + *ChunkCache, + 1000, + [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { ZEN_UNUSED(IsAborted, IsPaused); }, + AbortFlag, + PauseFlag); + + if (Append) + { + RemoteContent = ApplyChunkedContentOverlay(LocalContent, RemoteContent, {}, {}); + } + + const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalContent); + const ChunkedContentLookup RemoteLookup = BuildChunkedContentLookup(RemoteContent); + + BuildsOperationUpdateFolder Download(*LogOutput, + Storage, + AbortFlag, + PauseFlag, + WorkerPool, + NetworkPool, + BuildId, + TargetPath, + LocalContent, + LocalLookup, + RemoteContent, + RemoteLookup, + BlockDescriptions, + LooseChunkHashes, + BuildsOperationUpdateFolder::Options{.SystemRootDir = SystemRootDir, + .ZenFolderPath = ZenFolderPath, + .ValidateCompletedSequences = true}); + FolderContent ResultingState; + Download.Execute(ResultingState); + + return ResultingState; + } + + void ValidateDownload(std::span<const std::string> Paths, + std::span<const uint64_t> Sizes, + const std::filesystem::path& Source, + const std::filesystem::path& Target, + const FolderContent& DownloadContent) + { + const std::filesystem::path SourcePath = RootPath / Source; + const std::filesystem::path TargetPath = RootPath / Target; + + CHECK_EQ(Paths.size(), DownloadContent.Paths.size()); + tsl::robin_map<std::string, uint64_t> ExpectedSizes; + tsl::robin_map<std::string, IoHash> ExpectedHashes; + for (size_t Index = 0; Index < Paths.size(); Index++) + { + const std::string LookupString = std::filesystem::path(Paths[Index]).generic_string(); + ExpectedSizes.insert_or_assign(LookupString, Sizes[Index]); + std::filesystem::path FilePath = SourcePath / Paths[Index]; + const IoHash SourceHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile(FilePath.make_preferred())); + ExpectedHashes.insert_or_assign(LookupString, SourceHash); + } + for (size_t Index = 0; Index < DownloadContent.Paths.size(); Index++) + { + const std::string LookupString = std::filesystem::path(DownloadContent.Paths[Index]).generic_string(); + auto SizeIt = ExpectedSizes.find(LookupString); + CHECK_NE(SizeIt, ExpectedSizes.end()); + CHECK_EQ(SizeIt->second, DownloadContent.RawSizes[Index]); + std::filesystem::path FilePath = TargetPath / DownloadContent.Paths[Index]; + const IoHash DownloadedHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile(FilePath.make_preferred())); + auto HashIt = ExpectedHashes.find(LookupString); + CHECK_NE(HashIt, ExpectedHashes.end()); + CHECK_EQ(HashIt->second, DownloadedHash); + } + } + + const std::filesystem::path RootPath; + std::filesystem::path StoragePath; + std::filesystem::path TempPath; + std::filesystem::path SystemRootDir; + std::filesystem::path ZenFolderPath; + + LoggerRef Log = ConsoleLog(); + std::unique_ptr<OperationLogOutput> LogOutput; + + std::unique_ptr<ChunkingController> ChunkController; + std::unique_ptr<ChunkingCache> ChunkCache; + + StorageInstance Storage; + BuildStorageBase::Statistics StorageStats; + + WorkerThreadPool WorkerPool; + WorkerThreadPool NetworkPool; + + std::atomic<bool> AbortFlag; + std::atomic<bool> PauseFlag; + }; + +} // namespace buildstorageoperations_testutils + +TEST_CASE("buildstorageoperations.upload.folder") +{ + using namespace buildstorageoperations_testutils; + + FastRandom BaseRandom; + + const size_t FileCount = 11; + + const std::string Paths[FileCount] = {{"file_1"}, + {"file_2.exe"}, + {"file_3.txt"}, + {"dir_1/dir1_file_1.exe"}, + {"dir_1/dir1_file_2.pdb"}, + {"dir_1/dir1_file_3.txt"}, + {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"}, + {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"}, + {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"}, + {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"}, + {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}}; + const uint64_t Sizes[FileCount] = + {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u}; + + ScopedTemporaryDirectory SourceFolder; + TestState State(SourceFolder.Path()); + State.Initialize(); + State.CreateSourceData("source", Paths, Sizes); + + const Oid BuildId = Oid::NewOid(); + const Oid BuildPartId = Oid::NewOid(); + const std::string BuildPartName = "default"; + + auto Result = State.Upload(BuildId, BuildPartId, BuildPartName, "source", {}); + + CHECK_EQ(Result.size(), 1u); + CHECK_EQ(Result[0].first, BuildPartId); + CHECK_EQ(Result[0].second, BuildPartName); + State.ValidateUpload(BuildId, Result); + + FolderContent DownloadContent = State.Download(BuildId, Oid::Zero, {}, "download", /* Append */ false); + CHECK_EQ(DownloadContent.Paths.size(), FileCount); + State.ValidateDownload(Paths, Sizes, "source", "download", DownloadContent); +} + +TEST_CASE("buildstorageoperations.upload.manifest") +{ + using namespace buildstorageoperations_testutils; + + FastRandom BaseRandom; + + const size_t FileCount = 11; + + const std::string Paths[FileCount] = {{"file_1"}, + {"file_2.exe"}, + {"file_3.txt"}, + {"dir_1/dir1_file_1.exe"}, + {"dir_1/dir1_file_2.pdb"}, + {"dir_1/dir1_file_3.txt"}, + {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"}, + {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"}, + {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"}, + {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"}, + {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}}; + const uint64_t Sizes[FileCount] = + {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u}; + + ScopedTemporaryDirectory SourceFolder; + TestState State(SourceFolder.Path()); + State.Initialize(); + State.CreateSourceData("source", Paths, Sizes); + + std::span<const std::string> ManifestFiles(Paths); + ManifestFiles = ManifestFiles.subspan(0, FileCount / 2); + + std::span<const uint64_t> ManifestSizes(Sizes); + ManifestSizes = ManifestSizes.subspan(0, FileCount / 2); + + ExtendableStringBuilder<1024> Manifest; + for (const std::string& FilePath : ManifestFiles) + { + Manifest << FilePath << "\n"; + } + + WriteFile(State.RootPath / "manifest.txt", IoBuffer(IoBuffer::Wrap, Manifest.Data(), Manifest.Size())); + + const Oid BuildId = Oid::NewOid(); + const Oid BuildPartId = Oid::NewOid(); + const std::string BuildPartName = "default"; + + auto Result = State.Upload(BuildId, BuildPartId, BuildPartName, "source", State.RootPath / "manifest.txt"); + + CHECK_EQ(Result.size(), 1u); + CHECK_EQ(Result[0].first, BuildPartId); + CHECK_EQ(Result[0].second, BuildPartName); + State.ValidateUpload(BuildId, Result); + + FolderContent DownloadContent = State.Download(BuildId, Oid::Zero, {}, "download", /* Append */ false); + State.ValidateDownload(ManifestFiles, ManifestSizes, "source", "download", DownloadContent); +} + +TEST_CASE("buildstorageoperations.memorychunkingcache") +{ + using namespace buildstorageoperations_testutils; + + FastRandom BaseRandom; + + const size_t FileCount = 11; + + const std::string Paths[FileCount] = {{"file_1"}, + {"file_2.exe"}, + {"file_3.txt"}, + {"dir_1/dir1_file_1.exe"}, + {"dir_1/dir1_file_2.pdb"}, + {"dir_1/dir1_file_3.txt"}, + {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"}, + {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"}, + {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"}, + {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"}, + {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}}; + const uint64_t Sizes[FileCount] = + {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u}; + + ScopedTemporaryDirectory SourceFolder; + TestState State(SourceFolder.Path()); + State.Initialize(); + State.CreateSourceData("source", Paths, Sizes); + + const Oid BuildId = Oid::NewOid(); + const Oid BuildPartId = Oid::NewOid(); + const std::string BuildPartName = "default"; + + { + const std::filesystem::path SourcePath = SourceFolder.Path() / "source"; + CbObject MetaData; + BuildsOperationUploadFolder Upload(*State.LogOutput, + State.Storage, + State.AbortFlag, + State.PauseFlag, + State.WorkerPool, + State.NetworkPool, + BuildId, + SourcePath, + true, + MetaData, + BuildsOperationUploadFolder::Options{.TempDir = State.TempPath}); + auto Result = Upload.Execute(BuildPartId, BuildPartName, {}, *State.ChunkController, *State.ChunkCache); + + CHECK_EQ(Upload.m_ChunkingStats.FilesStoredInCache.load(), FileCount - 1); // Zero size files are not stored in cache + CHECK_EQ(Upload.m_ChunkingStats.BytesStoredInCache.load(), std::accumulate(&Sizes[0], &Sizes[FileCount], uint64_t(0))); + CHECK(Upload.m_ChunkingStats.ChunksStoredInCache.load() >= FileCount - 1); // Zero size files are not stored in cache + + CHECK_EQ(Result.size(), 1u); + CHECK_EQ(Result[0].first, BuildPartId); + CHECK_EQ(Result[0].second, BuildPartName); + } + + auto Result = State.Upload(BuildId, BuildPartId, BuildPartName, "source", {}); + + const Oid BuildId2 = Oid::NewOid(); + const Oid BuildPartId2 = Oid::NewOid(); + + { + const std::filesystem::path SourcePath = SourceFolder.Path() / "source"; + CbObject MetaData; + BuildsOperationUploadFolder Upload(*State.LogOutput, + State.Storage, + State.AbortFlag, + State.PauseFlag, + State.WorkerPool, + State.NetworkPool, + BuildId2, + SourcePath, + true, + MetaData, + BuildsOperationUploadFolder::Options{.TempDir = State.TempPath}); + Upload.Execute(BuildPartId2, BuildPartName, {}, *State.ChunkController, *State.ChunkCache); + + CHECK_EQ(Upload.m_ChunkingStats.FilesFoundInCache.load(), FileCount - 1); // Zero size files are not stored in cache + CHECK_EQ(Upload.m_ChunkingStats.BytesFoundInCache.load(), std::accumulate(&Sizes[0], &Sizes[FileCount], uint64_t(0))); + CHECK(Upload.m_ChunkingStats.ChunksFoundInCache.load() >= FileCount - 1); // Zero size files are not stored in cache + } + + FolderContent DownloadContent = State.Download(BuildId2, BuildPartId2, {}, "download", /* Append */ false); + State.ValidateDownload(Paths, Sizes, "source", "download", DownloadContent); +} + +TEST_CASE("buildstorageoperations.upload.multipart") +{ + using namespace buildstorageoperations_testutils; + + FastRandom BaseRandom; + + const size_t FileCount = 11; + + const std::string Paths[FileCount] = {{"file_1"}, + {"file_2.exe"}, + {"file_3.txt"}, + {"dir_1/dir1_file_1.exe"}, + {"dir_1/dir1_file_2.pdb"}, + {"dir_1/dir1_file_3.txt"}, + {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"}, + {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"}, + {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"}, + {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"}, + {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}}; + const uint64_t Sizes[FileCount] = + {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u}; + + ScopedTemporaryDirectory SourceFolder; + TestState State(SourceFolder.Path()); + State.Initialize(); + State.CreateSourceData("source", Paths, Sizes); + + std::span<const std::string> ManifestFiles1(Paths); + ManifestFiles1 = ManifestFiles1.subspan(0, FileCount / 2); + + std::span<const uint64_t> ManifestSizes1(Sizes); + ManifestSizes1 = ManifestSizes1.subspan(0, FileCount / 2); + + std::span<const std::string> ManifestFiles2(Paths); + ManifestFiles2 = ManifestFiles2.subspan(FileCount / 2 - 1); + + std::span<const uint64_t> ManifestSizes2(Sizes); + ManifestSizes2 = ManifestSizes2.subspan(FileCount / 2 - 1); + + const Oid BuildPart1Id = Oid::NewOid(); + const std::string BuildPart1Name = "part1"; + const Oid BuildPart2Id = Oid::NewOid(); + const std::string BuildPart2Name = "part2"; + { + CbObjectWriter Writer; + Writer.BeginObject("parts"sv); + { + Writer.BeginObject(BuildPart1Name); + { + Writer.AddObjectId("partId"sv, BuildPart1Id); + Writer.BeginArray("files"sv); + for (const std::string& ManifestFile : ManifestFiles1) + { + Writer.AddString(ManifestFile); + } + Writer.EndArray(); // files + } + Writer.EndObject(); // part1 + + Writer.BeginObject(BuildPart2Name); + { + Writer.AddObjectId("partId"sv, BuildPart2Id); + Writer.BeginArray("files"sv); + for (const std::string& ManifestFile : ManifestFiles2) + { + Writer.AddString(ManifestFile); + } + Writer.EndArray(); // files + } + Writer.EndObject(); // part2 + } + Writer.EndObject(); // parts + + ExtendableStringBuilder<1024> Manifest; + CompactBinaryToJson(Writer.Save(), Manifest); + WriteFile(State.RootPath / "manifest.json", IoBuffer(IoBuffer::Wrap, Manifest.Data(), Manifest.Size())); + } + + const Oid BuildId = Oid::NewOid(); + + auto Result = State.Upload(BuildId, {}, {}, "source", State.RootPath / "manifest.json"); + + CHECK_EQ(Result.size(), 2u); + CHECK_EQ(Result[0].first, BuildPart1Id); + CHECK_EQ(Result[0].second, BuildPart1Name); + CHECK_EQ(Result[1].first, BuildPart2Id); + CHECK_EQ(Result[1].second, BuildPart2Name); + State.ValidateUpload(BuildId, Result); + + FolderContent DownloadContent = State.Download(BuildId, Oid::Zero, {}, "download", /* Append */ false); + State.ValidateDownload(Paths, Sizes, "source", "download", DownloadContent); + + FolderContent Part1DownloadContent = State.Download(BuildId, BuildPart1Id, {}, "download_part1", /* Append */ false); + State.ValidateDownload(ManifestFiles1, ManifestSizes1, "source", "download_part1", Part1DownloadContent); + + FolderContent Part2DownloadContent = State.Download(BuildId, Oid::Zero, BuildPart2Name, "download_part2", /* Append */ false); + State.ValidateDownload(ManifestFiles2, ManifestSizes2, "source", "download_part2", Part2DownloadContent); + + (void)State.Download(BuildId, BuildPart1Id, BuildPart1Name, "download_part1+2", /* Append */ false); + FolderContent Part1And2DownloadContent = State.Download(BuildId, BuildPart2Id, {}, "download_part1+2", /* Append */ true); + State.ValidateDownload(Paths, Sizes, "source", "download_part1+2", Part1And2DownloadContent); +} + +void +buildstorageoperations_forcelink() +{ +} + +#endif // ZEN_WITH_TESTS + } // namespace zen diff --git a/src/zenremotestore/builds/filebuildstorage.cpp b/src/zenremotestore/builds/filebuildstorage.cpp index 1474fd819..55e69de61 100644 --- a/src/zenremotestore/builds/filebuildstorage.cpp +++ b/src/zenremotestore/builds/filebuildstorage.cpp @@ -61,13 +61,12 @@ public: return Writer.Save(); } - virtual CbObject ListBuilds(CbObject Query) override + virtual CbObject ListBuilds(std::string_view JsonQuery) override { ZEN_TRACE_CPU("FileBuildStorage::ListBuilds"); - ZEN_UNUSED(Query); uint64_t ReceivedBytes = 0; - uint64_t SentBytes = Query.GetSize(); + uint64_t SentBytes = JsonQuery.size(); SimulateLatency(SentBytes, 0); auto _ = MakeGuard([&]() { SimulateLatency(0, ReceivedBytes); }); diff --git a/src/zenremotestore/builds/jupiterbuildstorage.cpp b/src/zenremotestore/builds/jupiterbuildstorage.cpp index 962ffaaed..23d0ddd4c 100644 --- a/src/zenremotestore/builds/jupiterbuildstorage.cpp +++ b/src/zenremotestore/builds/jupiterbuildstorage.cpp @@ -104,15 +104,13 @@ public: return Response.Save(); } - virtual CbObject ListBuilds(CbObject Query) override + virtual CbObject ListBuilds(std::string_view JsonQuery) override { ZEN_TRACE_CPU("Jupiter::ListBuilds"); - Stopwatch ExecutionTimer; - auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); - IoBuffer Payload = Query.GetBuffer().AsIoBuffer(); - Payload.SetContentType(ZenContentType::kCbObject); - JupiterResult ListResult = m_Session.ListBuilds(m_Namespace, m_Bucket, Payload); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult ListResult = m_Session.ListBuilds(m_Namespace, m_Bucket, JsonQuery); AddStatistic(ListResult); if (!ListResult.Success) { diff --git a/src/zenremotestore/chunking/chunkblock.cpp b/src/zenremotestore/chunking/chunkblock.cpp index a5d0db205..c4d8653f4 100644 --- a/src/zenremotestore/chunking/chunkblock.cpp +++ b/src/zenremotestore/chunking/chunkblock.cpp @@ -297,6 +297,7 @@ FindReuseBlocks(OperationLogOutput& Output, if (ChunkCount > 0) { + size_t AcceptedChunkCount = 0; if (!KnownBlocks.empty()) { Stopwatch ReuseTimer; @@ -420,6 +421,7 @@ FindReuseBlocks(OperationLogOutput& Output, { ChunkFound[ChunkIndex] = true; } + AcceptedChunkCount += FoundChunkIndexes.size(); Stats.AcceptedChunkCount += FoundChunkIndexes.size(); Stats.AcceptedByteCount += AdjustedReuseSize; Stats.AcceptedRawByteCount += AdjustedRawReuseSize; @@ -440,7 +442,8 @@ FindReuseBlocks(OperationLogOutput& Output, } } } - OutUnusedChunkIndexes.reserve(ChunkIndexes.size() - Stats.AcceptedChunkCount); + + OutUnusedChunkIndexes.reserve(ChunkIndexes.size() - AcceptedChunkCount); for (uint32_t ChunkIndex : ChunkIndexes) { if (!ChunkFound[ChunkIndex]) diff --git a/src/zenremotestore/chunking/chunkedcontent.cpp b/src/zenremotestore/chunking/chunkedcontent.cpp index fda01aa56..26d179f14 100644 --- a/src/zenremotestore/chunking/chunkedcontent.cpp +++ b/src/zenremotestore/chunking/chunkedcontent.cpp @@ -13,6 +13,7 @@ #include <zencore/trace.h> #include <zenremotestore/chunking/chunkblock.h> #include <zenremotestore/chunking/chunkedfile.h> +#include <zenremotestore/chunking/chunkingcache.h> #include <zenremotestore/chunking/chunkingcontroller.h> #include <zenutil/wildcard.h> @@ -100,6 +101,8 @@ namespace { IoHash HashOneFile(ChunkingStatistics& Stats, const ChunkingController& InChunkingController, + ChunkingCache& InChunkingCache, + std::span<const uint64_t> ModificationTicks, ChunkedFolderContent& OutChunkedContent, tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex, tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& RawHashToSequenceRawHashIndex, @@ -110,8 +113,9 @@ namespace { { ZEN_TRACE_CPU("HashOneFile"); - const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex]; - const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex]; + const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex]; + const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex]; + const uint64_t ModificationTick = ModificationTicks[PathIndex]; if (RawSize == 0) { @@ -119,16 +123,53 @@ namespace { } else { + std::filesystem::path FullPath = FolderPath / Path; + FullPath.make_preferred(); + ChunkedInfoWithSource Chunked; - const bool DidChunking = - InChunkingController.ProcessFile((FolderPath / Path).make_preferred(), RawSize, Chunked, Stats.BytesHashed, AbortFlag); - if (DidChunking) + + if (!InChunkingCache.GetCachedFile(FullPath, RawSize, ModificationTick, Chunked)) { - Lock.WithExclusiveLock([&]() { - if (!RawHashToSequenceRawHashIndex.contains(Chunked.Info.RawHash)) + const bool DidChunking = InChunkingController.ProcessFile(FullPath, RawSize, Chunked, Stats.BytesHashed, AbortFlag); + if (!DidChunking) + { + ZEN_TRACE_CPU("HashOnly"); + + IoBuffer Buffer = IoBufferBuilder::MakeFromFile(FullPath); + if (Buffer.GetSize() != RawSize) + { + throw std::runtime_error(fmt::format("Failed opening file '{}' for hashing", FolderPath / Path)); + } + + Chunked.Info.RawSize = RawSize; + Chunked.Info.RawHash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed); + } + if (InChunkingCache.PutCachedFile(FullPath, ModificationTick, Chunked)) + { + Stats.FilesStoredInCache++; + Stats.ChunksStoredInCache += Chunked.Info.ChunkSequence.empty() ? 1 : Chunked.Info.ChunkHashes.size(); + Stats.BytesStoredInCache += RawSize; + } + } + else + { + Stats.FilesFoundInCache++; + Stats.ChunksFoundInCache += Chunked.Info.ChunkSequence.empty() ? 1 : Chunked.Info.ChunkHashes.size(); + Stats.BytesFoundInCache += RawSize; + } + Lock.WithExclusiveLock([&]() { + if (!RawHashToSequenceRawHashIndex.contains(Chunked.Info.RawHash)) + { + RawHashToSequenceRawHashIndex.insert( + {Chunked.Info.RawHash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); + + if (Chunked.Info.ChunkSequence.empty()) + { + AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Chunked.Info.RawHash, RawSize); + Stats.UniqueSequencesFound++; + } + else { - RawHashToSequenceRawHashIndex.insert( - {Chunked.Info.RawHash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); std::vector<uint64_t> ChunkSizes; ChunkSizes.reserve(Chunked.ChunkSources.size()); for (const ChunkSource& Source : Chunked.ChunkSources) @@ -144,34 +185,12 @@ namespace { Chunked.Info.ChunkSequence, Chunked.Info.ChunkHashes, ChunkSizes); - Stats.UniqueSequencesFound++; } - }); - Stats.FilesChunked++; - return Chunked.Info.RawHash; - } - else - { - ZEN_TRACE_CPU("HashOnly"); - - IoBuffer Buffer = IoBufferBuilder::MakeFromFile((FolderPath / Path).make_preferred()); - if (Buffer.GetSize() != RawSize) - { - throw std::runtime_error(fmt::format("Failed opening file '{}' for hashing", FolderPath / Path)); + Stats.UniqueSequencesFound++; } - const IoHash Hash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed); - - Lock.WithExclusiveLock([&]() { - if (!RawHashToSequenceRawHashIndex.contains(Hash)) - { - RawHashToSequenceRawHashIndex.insert( - {Hash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); - AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Hash, RawSize); - Stats.UniqueSequencesFound++; - } - }); - return Hash; - } + }); + Stats.FilesChunked++; + return Chunked.Info.RawHash; } } @@ -1113,6 +1132,7 @@ ChunkFolderContent(ChunkingStatistics& Stats, const std::filesystem::path& RootPath, const FolderContent& Content, const ChunkingController& InChunkingController, + ChunkingCache& InChunkingCache, int32_t UpdateIntervalMS, std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)>&& UpdateCallback, std::atomic<bool>& AbortFlag, @@ -1123,6 +1143,10 @@ ChunkFolderContent(ChunkingStatistics& Stats, Stopwatch Timer; auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); + ZEN_ASSERT(Content.ModificationTicks.size() == Content.Paths.size()); + ZEN_ASSERT(Content.RawSizes.size() == Content.Paths.size()); + ZEN_ASSERT(Content.Attributes.size() == Content.Paths.size()); + ChunkedFolderContent Result = {.Platform = Content.Platform, .Paths = Content.Paths, .RawSizes = Content.RawSizes, @@ -1163,12 +1187,15 @@ ChunkFolderContent(ChunkingStatistics& Stats, { break; } + Work.ScheduleWork(WorkerPool, // GetSyncWorkerPool() [&, PathIndex](std::atomic<bool>& AbortFlag) { if (!AbortFlag) { IoHash RawHash = HashOneFile(Stats, InChunkingController, + InChunkingCache, + Content.ModificationTicks, Result, ChunkHashToChunkIndex, RawHashToSequenceRawHashIndex, diff --git a/src/zenremotestore/chunking/chunkingcache.cpp b/src/zenremotestore/chunking/chunkingcache.cpp new file mode 100644 index 000000000..7f0a26330 --- /dev/null +++ b/src/zenremotestore/chunking/chunkingcache.cpp @@ -0,0 +1,627 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenremotestore/chunking/chunkingcache.h> + +#include <zenbase/zenbase.h> +#include <zencore/basicfile.h> +#include <zencore/compactbinarybuilder.h> +#include <zencore/compactbinaryutil.h> +#include <zencore/filesystem.h> +#include <zencore/fmtutils.h> +#include <zencore/logging.h> +#include <zenremotestore/chunking/chunkedfile.h> +#include <zenremotestore/chunking/chunkingcontroller.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +#include <xxhash.h> +#include <gsl/gsl-lite.hpp> +ZEN_THIRD_PARTY_INCLUDES_END + +#if ZEN_WITH_TESTS +# include <zencore/testing.h> +# include <zencore/testutils.h> +# include <algorithm> +#endif // ZEN_WITH_TESTS + +namespace zen { + +class NullChunkingCache : public ChunkingCache +{ +public: + NullChunkingCache() {} + + virtual bool GetCachedFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + uint64_t ModificationTick, + ChunkedInfoWithSource& OutChunked) override + { + ZEN_UNUSED(InputPath, RawSize, OutChunked, ModificationTick); + return false; + } + + virtual bool PutCachedFile(const std::filesystem::path& InputPath, + uint64_t ModificationTick, + const ChunkedInfoWithSource& Chunked) override + { + ZEN_UNUSED(InputPath, Chunked, ModificationTick); + return false; + } +}; + +class MemoryChunkingCache : public ChunkingCache +{ +public: + MemoryChunkingCache() {} + + virtual bool GetCachedFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + uint64_t ModificationTick, + ChunkedInfoWithSource& OutChunked) override + { + const std::u8string PathString = InputPath.generic_u8string(); + const IoHash PathHash = IoHash::HashBuffer(PathString.data(), PathString.length()); + + RwLock::SharedLockScope Lock(m_Lock); + if (auto It = m_PathHashToEntry.find(PathHash); It != m_PathHashToEntry.end()) + { + const CachedEntry& Entry = m_Entries[It->second]; + if (ModificationTick == Entry.ModificationTick && RawSize == Entry.Chunked.Info.RawSize) + { + OutChunked = Entry.Chunked; + return true; + } + else + { + Lock.ReleaseNow(); + RwLock::ExclusiveLockScope EditLock(m_Lock); + if (auto RemoveIt = m_PathHashToEntry.find(PathHash); It != m_PathHashToEntry.end()) + { + CachedEntry& DeleteEntry = m_Entries[It->second]; + DeleteEntry.Chunked = {}; + DeleteEntry.ModificationTick = 0; + m_FreeEntryIndexes.push_back(It->second); + m_PathHashToEntry.erase(It); + } + } + } + return false; + } + + virtual bool PutCachedFile(const std::filesystem::path& InputPath, + uint64_t ModificationTick, + const ChunkedInfoWithSource& Chunked) override + { + const std::u8string PathString = InputPath.generic_u8string(); + const IoHash PathHash = IoHash::HashBuffer(PathString.data(), PathString.length()); + + RwLock::ExclusiveLockScope _(m_Lock); + if (auto It = m_PathHashToEntry.find(PathHash); It != m_PathHashToEntry.end()) + { + CachedEntry& Entry = m_Entries[It->second]; + if (ModificationTick != Entry.ModificationTick || Chunked.Info.RawSize != Entry.Chunked.Info.RawSize) + { + Entry.Chunked = Chunked; + Entry.ModificationTick = ModificationTick; + } + } + else + { + uint32_t EntryIndex = gsl::narrow<uint32_t>(m_Entries.size()); + if (!m_FreeEntryIndexes.empty()) + { + EntryIndex = m_FreeEntryIndexes.back(); + m_FreeEntryIndexes.pop_back(); + m_Entries[EntryIndex] = CachedEntry{.Chunked = Chunked, .ModificationTick = ModificationTick}; + } + else + { + m_Entries.emplace_back(CachedEntry{.Chunked = Chunked, .ModificationTick = ModificationTick}); + } + m_PathHashToEntry.insert_or_assign(PathHash, EntryIndex); + } + return true; + } + + RwLock m_Lock; + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> m_PathHashToEntry; + std::vector<uint32_t> m_FreeEntryIndexes; + + struct CachedEntry + { + ChunkedInfoWithSource Chunked; + uint64_t ModificationTick = 0; + }; + + std::vector<CachedEntry> m_Entries; +}; + +class DiskChunkingCache : public ChunkingCache +{ +public: + DiskChunkingCache(const std::filesystem::path& RootPath, ChunkingController& ChunkController, uint64_t MinimumRawSizeForCaching) + : m_RootPath(RootPath) + , m_ChunkerId(GetChunkerIdentity(ChunkController)) + , m_MinimumRawSizeForCaching(MinimumRawSizeForCaching) + { + } + + virtual bool GetCachedFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + uint64_t ModificationTick, + ChunkedInfoWithSource& OutChunked) override + { + if (RawSize < m_MinimumRawSizeForCaching) + { + return false; + } + + const std::filesystem::path CachePath = GetCachePath(InputPath); + + return ReadChunkedInfo(CachePath, RawSize, ModificationTick, OutChunked); + } + + virtual bool PutCachedFile(const std::filesystem::path& InputPath, + uint64_t ModificationTick, + const ChunkedInfoWithSource& Chunked) override + { + if (Chunked.Info.RawSize < m_MinimumRawSizeForCaching) + { + return false; + } + + const std::filesystem::path CachePath = GetCachePath(InputPath); + + return WriteChunkedInfo(CachePath, ModificationTick, Chunked); + } + +private: + static constexpr uint32_t ImplementationRevision = 1; + +#pragma pack(push) +#pragma pack(1) + struct ChunkedInfoHeader + { + static constexpr uint32_t ExpectedMagic = 0x75636368; // 'ucch'; + static constexpr uint32_t CurrentVersion = 1; + + uint32_t Magic = ExpectedMagic; + uint32_t Version = CurrentVersion; + uint64_t SequenceCount = 0; + uint64_t ChunkCount = 0; + uint64_t RawSize = 0; + IoHash RawHash = IoHash::Zero; + uint64_t ModificationTick = 0; + uint32_t Checksum = 0; + + static uint32_t ComputeChecksum(const ChunkedInfoHeader& Header) + { + return XXH32(&Header.Magic, sizeof(Header) - sizeof(uint32_t), 0xC0C0'BABA); + } + }; +#pragma pack(pop) + static_assert(sizeof(ChunkedInfoHeader) == 64); + static_assert(sizeof(ChunkSource) == 12); + + std::filesystem::path GetCachePath(const std::filesystem::path& InputPath) + { + const std::string IdentityString = fmt::format("{}_{}_{}", ImplementationRevision, m_ChunkerId, InputPath.generic_string()); + const IoHash IdentityHash = IoHash::HashBuffer(IdentityString.data(), IdentityString.length()); + std::filesystem::path CachePath = m_RootPath / fmt::format("{}.chunked_content", IdentityHash); + return CachePath; + } + + bool WriteChunkedInfo(const std::filesystem::path& CachePath, uint64_t ModificationTick, const ChunkedInfoWithSource& Chunked) + { + CreateDirectories(CachePath.parent_path()); + + TemporaryFile OutputFile; + std::error_code Ec; + OutputFile.CreateTemporary(CachePath.parent_path(), Ec); + if (Ec) + { + ZEN_DEBUG("Failed to create temp file for cached chunked data at '{}'", CachePath); + return false; + } + ChunkedInfoHeader Header = {.SequenceCount = Chunked.Info.ChunkSequence.size(), + .ChunkCount = Chunked.Info.ChunkHashes.size(), + .RawSize = Chunked.Info.RawSize, + .RawHash = Chunked.Info.RawHash, + .ModificationTick = ModificationTick}; + + Header.Checksum = ChunkedInfoHeader::ComputeChecksum(Header); + + try + { + uint64_t Offset = 0; + + OutputFile.Write(&Header, sizeof(ChunkedInfoHeader), Offset); + Offset += sizeof(ChunkedInfoHeader); + + if (Header.SequenceCount > 0) + { + OutputFile.Write(Chunked.Info.ChunkSequence.data(), Header.SequenceCount * sizeof(uint32_t), Offset); + Offset += Header.SequenceCount * sizeof(uint32_t); + } + + if (Header.ChunkCount > 0) + { + OutputFile.Write(Chunked.Info.ChunkHashes.data(), Header.ChunkCount * sizeof(IoHash), Offset); + Offset += Header.ChunkCount * sizeof(IoHash); + + OutputFile.Write(Chunked.ChunkSources.data(), Header.ChunkCount * sizeof(ChunkSource), Offset); + Offset += Header.ChunkCount * sizeof(ChunkSource); + } + + OutputFile.Flush(); + } + catch (const std::exception& Ex) + { + ZEN_DEBUG("Failed to write cached file {}. Reason: {}", CachePath, Ex.what()); + return false; + } + OutputFile.MoveTemporaryIntoPlace(CachePath, Ec); + if (Ec) + { + ZEN_DEBUG("Failed to move temporary file {} to {}. Reason: {}", OutputFile.GetPath(), CachePath, Ec.message()); + return false; + } + + return true; + } + + bool ReadChunkedInfo(const std::filesystem::path& CachePath, + uint64_t RawSize, + uint64_t ModificationTick, + ChunkedInfoWithSource& OutChunked) + { + BasicFile InputFile; + std::error_code Ec; + InputFile.Open(CachePath, BasicFile::Mode::kRead, Ec); + if (Ec) + { + return false; + } + try + { + uint64_t Size = InputFile.FileSize(); + if (Size < sizeof(ChunkedInfoHeader)) + { + throw std::runtime_error(fmt::format("Expected size >= {}, file has size {}", sizeof(ChunkedInfoHeader), Size)); + } + + uint64_t Offset = 0; + ChunkedInfoHeader Header; + InputFile.Read(&Header, sizeof(ChunkedInfoHeader), Offset); + Offset += sizeof(Header); + + if (Header.Magic != ChunkedInfoHeader::ExpectedMagic) + { + throw std::runtime_error( + fmt::format("Expected magic 0x{:04x}, file has magic 0x{:04x}", ChunkedInfoHeader::ExpectedMagic, Header.Magic)); + } + if (Header.Version != ChunkedInfoHeader::CurrentVersion) + { + throw std::runtime_error( + fmt::format("Expected version {}, file has version {}", ChunkedInfoHeader::CurrentVersion, Header.Version)); + } + if (Header.Checksum != ChunkedInfoHeader::ComputeChecksum(Header)) + { + throw std::runtime_error(fmt::format("Expected checksum 0x{:04x}, file has checksum 0x{:04x}", + Header.Checksum, + ChunkedInfoHeader::ComputeChecksum(Header))); + } + + uint64_t ExpectedSize = sizeof(ChunkedInfoHeader) + Header.SequenceCount * sizeof(uint32_t) + + Header.ChunkCount * sizeof(IoHash) + Header.ChunkCount * sizeof(ChunkSource); + + if (ExpectedSize != Size) + { + throw std::runtime_error(fmt::format("Expected size {}, file has size {}", ExpectedSize, Size)); + } + + if (Header.RawSize != RawSize) + { + InputFile.Close(); + RemoveFile(CachePath, Ec); + return false; + } + + if (Header.ModificationTick != ModificationTick) + { + InputFile.Close(); + RemoveFile(CachePath, Ec); + return false; + } + + OutChunked.Info.RawSize = Header.RawSize; + OutChunked.Info.RawHash = Header.RawHash; + + if (Header.SequenceCount > 0) + { + OutChunked.Info.ChunkSequence.resize(Header.SequenceCount); + InputFile.Read(OutChunked.Info.ChunkSequence.data(), Header.SequenceCount * sizeof(uint32_t), Offset); + Offset += Header.SequenceCount * sizeof(uint32_t); + } + + if (Header.ChunkCount > 0) + { + OutChunked.Info.ChunkHashes.resize(Header.ChunkCount); + OutChunked.ChunkSources.resize(Header.ChunkCount); + + InputFile.Read(OutChunked.Info.ChunkHashes.data(), Header.ChunkCount * sizeof(IoHash), Offset); + Offset += Header.ChunkCount * sizeof(IoHash); + + InputFile.Read(OutChunked.ChunkSources.data(), Header.ChunkCount * sizeof(ChunkSource), Offset); + Offset += Header.ChunkCount * sizeof(ChunkSource); + } + } + catch (const std::exception& Ex) + { + ZEN_DEBUG("Failed to read cached file {}. Reason: {}", CachePath, Ex.what()); + InputFile.Close(); + RemoveFile(CachePath, Ec); + return false; + } + + return true; + } + + const std::filesystem::path m_RootPath; + const IoHash m_ChunkerId; + const uint64_t m_MinimumRawSizeForCaching; + + static IoHash GetChunkerIdentity(ChunkingController& ChunkController) + { + IoHashStream ChunkerIdStream; + std::string_view ChunkerName = ChunkController.GetName(); + ChunkerIdStream.Append(ChunkerName.data(), ChunkerName.length()); + const CbObject ChunkerParameters = ChunkController.GetParameters(); + ChunkerParameters.GetHash(ChunkerIdStream); + return ChunkerIdStream.GetHash(); + } +}; + +std::unique_ptr<ChunkingCache> +CreateNullChunkingCache() +{ + return std::make_unique<NullChunkingCache>(); +} + +std::unique_ptr<ChunkingCache> +CreateMemoryChunkingCache() +{ + return std::make_unique<MemoryChunkingCache>(); +} + +std::unique_ptr<ChunkingCache> +CreateDiskChunkingCache(const std::filesystem::path& RootPath, ChunkingController& ChunkController, uint64_t MinimumRawSizeForCaching) +{ + return std::make_unique<DiskChunkingCache>(RootPath, ChunkController, MinimumRawSizeForCaching); +} + +#if ZEN_WITH_TESTS + +namespace chunkingcache_testutils { + ChunkedInfoWithSource CreateChunked(const std::string_view Data, uint32_t SplitSize) + { + std::vector<uint32_t> ChunkSequence; + std::vector<IoHash> ChunkHashes; + std::vector<ChunkSource> ChunkSources; + + if (SplitSize > 0) + { + std::string_view::size_type SplitOffset = 0; + while (SplitOffset < Data.length()) + { + std::string_view DataPart(Data.substr(SplitOffset, SplitSize)); + + ChunkSequence.push_back(gsl::narrow<uint32_t>(ChunkSequence.size())); + ChunkHashes.push_back(IoHash::HashBuffer(DataPart.data(), DataPart.length())); + ChunkSources.push_back({.Offset = SplitOffset, .Size = gsl::narrow<uint32_t>(DataPart.length())}); + SplitOffset += DataPart.length(); + } + } + + return ChunkedInfoWithSource{.Info = {.RawSize = Data.length(), + .RawHash = IoHash::HashBuffer(Data.data(), Data.length()), + .ChunkSequence = std::move(ChunkSequence), + .ChunkHashes = std::move(ChunkHashes)}, + .ChunkSources = std::move(ChunkSources)}; + } + + bool Equals(const ChunkedInfoWithSource& Lhs, const ChunkedInfoWithSource& Rhs) + { + if (Lhs.ChunkSources.size() != Rhs.ChunkSources.size()) + { + return false; + } + if (std::mismatch(Lhs.ChunkSources.begin(), + Lhs.ChunkSources.end(), + Rhs.ChunkSources.begin(), + [](const ChunkSource& Lhs, const ChunkSource& Rhs) { return Lhs.Offset == Rhs.Offset && Lhs.Size == Rhs.Size; }) + .first != Lhs.ChunkSources.end()) + { + return false; + } + if (Lhs.Info.RawSize != Rhs.Info.RawSize) + { + return false; + } + if (Lhs.Info.ChunkSequence != Rhs.Info.ChunkSequence) + { + return false; + } + if (Lhs.Info.ChunkHashes != Rhs.Info.ChunkHashes) + { + return false; + } + return true; + } +} // namespace chunkingcache_testutils + +TEST_CASE("chunkingcache.nullchunkingcache") +{ + using namespace chunkingcache_testutils; + + std::unique_ptr<ChunkingCache> Cache = CreateNullChunkingCache(); + ChunkedInfoWithSource Result; + CHECK(!Cache->GetCachedFile("dummy-path", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + ChunkedInfoWithSource Chunked = CreateChunked("my data string", 4); + CHECK(!Cache->PutCachedFile("dummy-path", 91283, Chunked)); + + CHECK(!Cache->GetCachedFile("dummy-path", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); +} + +TEST_CASE("chunkingcache.memorychunkingcache") +{ + using namespace chunkingcache_testutils; + + std::unique_ptr<ChunkingCache> Cache = CreateMemoryChunkingCache(); + ChunkedInfoWithSource Result; + CHECK(!Cache->GetCachedFile("file/A/Path", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + CHECK(!Cache->GetCachedFile("file/B/Path", 395, 671283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + ChunkedInfoWithSource ChunkedAV1 = CreateChunked("File A data string", 4); + ChunkedInfoWithSource ChunkedAV2 = CreateChunked("File A updated data string", 4); + ChunkedInfoWithSource ChunkedBV1 = CreateChunked("File B data string", 4); + + CHECK(Cache->PutCachedFile("file/A/Path", 91283, ChunkedAV1)); + CHECK(Cache->PutCachedFile("file/B/Path", 51283, ChunkedBV1)); + + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result)); + CHECK(Equals(Result, ChunkedAV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result)); + CHECK(Equals(Result, ChunkedBV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(!Cache->GetCachedFile("file/A/Path-wrong", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(!Cache->GetCachedFile("file/A/Path", 493, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + // Asking a path that exists but without a match will remove that path + CHECK(!Cache->GetCachedFile("file/A/Path", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(!Cache->GetCachedFile("file/A/Path", 495, 9283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(Cache->PutCachedFile("file/A/Path", 91283, ChunkedAV1)); + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result)); + CHECK(Equals(Result, ChunkedAV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(Cache->PutCachedFile("file/A/Path", 91483, ChunkedAV2)); + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91483, Result)); + CHECK(Equals(Result, ChunkedAV2)); + Result = ChunkedInfoWithSource{}; + + CHECK(!Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result)); + CHECK(Equals(Result, ChunkedBV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(!Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize + 1, 51283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); +} + +TEST_CASE("chunkingcache.diskchunkingcache") +{ + using namespace chunkingcache_testutils; + + ScopedTemporaryDirectory TmpDir; + + std::unique_ptr<ChunkingController> ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{}); + + ChunkedInfoWithSource ChunkedAV1 = CreateChunked("File A data string", 4); + ChunkedInfoWithSource ChunkedAV2 = CreateChunked("File A updated data string", 4); + ChunkedInfoWithSource ChunkedBV1 = CreateChunked("File B data string", 4); + + { + std::unique_ptr<ChunkingCache> Cache = CreateDiskChunkingCache(TmpDir.Path(), *ChunkController, 0); + ChunkedInfoWithSource Result; + CHECK(!Cache->GetCachedFile("file/A/Path", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + CHECK(!Cache->GetCachedFile("file/B/Path", 395, 671283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(Cache->PutCachedFile("file/A/Path", 91283, ChunkedAV1)); + CHECK(Cache->PutCachedFile("file/B/Path", 51283, ChunkedBV1)); + + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result)); + CHECK(Equals(Result, ChunkedAV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result)); + CHECK(Equals(Result, ChunkedBV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(!Cache->GetCachedFile("file/A/Path-wrong", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(!Cache->GetCachedFile("file/A/Path", 493, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + // Asking a path that exists but without a match will remove that path + CHECK(!Cache->GetCachedFile("file/A/Path", 495, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(!Cache->GetCachedFile("file/A/Path", 495, 9283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(Cache->PutCachedFile("file/A/Path", 91283, ChunkedAV1)); + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV1.Info.RawSize, 91283, Result)); + CHECK(Equals(Result, ChunkedAV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(Cache->PutCachedFile("file/A/Path", 91483, ChunkedAV2)); + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91483, Result)); + CHECK(Equals(Result, ChunkedAV2)); + Result = ChunkedInfoWithSource{}; + } + { + std::unique_ptr<ChunkingCache> Cache = CreateDiskChunkingCache(TmpDir.Path(), *ChunkController, 0); + ChunkedInfoWithSource Result; + + CHECK(Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91483, Result)); + CHECK(Equals(Result, ChunkedAV2)); + Result = ChunkedInfoWithSource{}; + + CHECK(!Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(!Cache->GetCachedFile("file/A/Path", ChunkedAV2.Info.RawSize, 91483, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result)); + CHECK(Equals(Result, ChunkedBV1)); + Result = ChunkedInfoWithSource{}; + + CHECK(!Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize + 1, 51283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + + CHECK(!Cache->GetCachedFile("file/B/Path", ChunkedBV1.Info.RawSize, 51283, Result)); + CHECK(Result.Info.ChunkHashes.empty()); + } +} + +void +chunkingcache_forcelink() +{ +} + +#endif // ZEN_WITH_TESTS + +} // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/builds/buildmanifest.h b/src/zenremotestore/include/zenremotestore/builds/buildmanifest.h new file mode 100644 index 000000000..a0d9a7691 --- /dev/null +++ b/src/zenremotestore/include/zenremotestore/builds/buildmanifest.h @@ -0,0 +1,27 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/filesystem.h> +#include <zencore/uid.h> + +namespace zen { + +struct BuildManifest +{ + struct Part + { + Oid PartId = Oid::Zero; + std::string PartName; + std::vector<std::filesystem::path> Files; + }; + std::vector<Part> Parts; +}; + +BuildManifest ParseBuildManifest(const std::filesystem::path& ManifestPath); + +#if ZEN_WITH_TESTS +void buildmanifest_forcelink(); +#endif // ZEN_WITH_TESTS + +} // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstorage.h b/src/zenremotestore/include/zenremotestore/builds/buildstorage.h index 4b7e54d85..85dabc59f 100644 --- a/src/zenremotestore/include/zenremotestore/builds/buildstorage.h +++ b/src/zenremotestore/include/zenremotestore/builds/buildstorage.h @@ -34,7 +34,7 @@ public: virtual ~BuildStorageBase() {} virtual CbObject ListNamespaces(bool bRecursive = false) = 0; - virtual CbObject ListBuilds(CbObject Query) = 0; + virtual CbObject ListBuilds(std::string_view JsonQuery) = 0; virtual CbObject PutBuild(const Oid& BuildId, const CbObject& MetaData) = 0; virtual CbObject GetBuild(const Oid& BuildId) = 0; virtual void FinalizeBuild(const Oid& BuildId) = 0; diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h b/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h index 32c8bda01..6304159ae 100644 --- a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h +++ b/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h @@ -11,6 +11,7 @@ #include <zenutil/bufferedwritefilecache.h> #include <atomic> +#include <future> #include <memory> ZEN_THIRD_PARTY_INCLUDES_START @@ -377,7 +378,8 @@ private: std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, std::atomic<uint64_t>& WritePartsComplete, const uint64_t TotalPartWriteCount, - FilteredRate& FilteredWrittenBytesPerSecond); + FilteredRate& FilteredWrittenBytesPerSecond, + bool EnableBacklog); void VerifyAndCompleteChunkSequencesAsync(std::span<const uint32_t> RemoteSequenceIndexes, ParallelWork& Work); bool CompleteSequenceChunk(uint32_t RemoteSequenceIndex, std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters); @@ -421,6 +423,21 @@ struct FindBlocksStatistics uint64_t NewBlocksCount = 0; uint64_t NewBlocksChunkCount = 0; uint64_t NewBlocksChunkByteCount = 0; + + FindBlocksStatistics& operator+=(const FindBlocksStatistics& Rhs) + { + FindBlockTimeMS += Rhs.FindBlockTimeMS; + PotentialChunkCount += Rhs.PotentialChunkCount; + PotentialChunkByteCount += Rhs.PotentialChunkByteCount; + FoundBlockCount += Rhs.FoundBlockCount; + FoundBlockChunkCount += Rhs.FoundBlockChunkCount; + FoundBlockByteCount += Rhs.FoundBlockByteCount; + AcceptedBlockCount += Rhs.AcceptedBlockCount; + NewBlocksCount += Rhs.NewBlocksCount; + NewBlocksChunkCount += Rhs.NewBlocksChunkCount; + NewBlocksChunkByteCount += Rhs.NewBlocksChunkByteCount; + return *this; + } }; struct UploadStatistics @@ -528,15 +545,16 @@ public: WorkerThreadPool& IOWorkerPool, WorkerThreadPool& NetworkPool, const Oid& BuildId, - const Oid& BuildPartId, - const std::string_view BuildPartName, const std::filesystem::path& Path, - const std::filesystem::path& ManifestPath, bool CreateBuild, const CbObject& MetaData, const Options& Options); - void Execute(); + std::vector<std::pair<Oid, std::string>> Execute(const Oid& BuildPartId, + const std::string_view BuildPartName, + const std::filesystem::path& ManifestPath, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache); DiskStatistics m_DiskStats; GetFolderContentStatistics m_LocalFolderScanStats; @@ -548,7 +566,29 @@ public: LooseChunksStatistics m_LooseChunksStats; private: - std::vector<std::filesystem::path> ParseManifest(const std::filesystem::path& Path, const std::filesystem::path& ManifestPath); + struct PrepareBuildResult + { + std::vector<ChunkBlockDescription> KnownBlocks; + uint64_t PreferredMultipartChunkSize = 0; + uint64_t PayloadSize = 0; + uint64_t PrepareBuildTimeMs = 0; + uint64_t FindBlocksTimeMs = 0; + uint64_t ElapsedTimeMs = 0; + }; + + PrepareBuildResult PrepareBuild(); + + struct UploadPart + { + Oid PartId = Oid::Zero; + std::string PartName; + FolderContent Content; + uint64_t TotalRawSize = 0; + GetFolderContentStatistics LocalFolderScanStats; + }; + + std::vector<BuildsOperationUploadFolder::UploadPart> ReadFolder(); + std::vector<UploadPart> ReadManifestParts(const std::filesystem::path& ManifestPath); bool IsAcceptedFolder(const std::string_view& RelativePath) const; bool IsAcceptedFile(const std::string_view& RelativePath) const; @@ -571,7 +611,9 @@ private: void GenerateBuildBlocks(const ChunkedFolderContent& Content, const ChunkedContentLookup& Lookup, const std::vector<std::vector<uint32_t>>& NewBlockChunks, - GeneratedBlocks& OutBlocks); + GeneratedBlocks& OutBlocks, + GenerateBlocksStatistics& GenerateBlocksStats, + UploadStatistics& UploadStats); std::vector<uint32_t> CalculateAbsoluteChunkOrders(const std::span<const IoHash> LocalChunkHashes, const std::span<const uint32_t> LocalChunkOrder, @@ -594,6 +636,25 @@ private: CompositeBuffer&& HeaderBuffer, const std::vector<uint32_t>& ChunksInBlock); + enum class PartTaskSteps : uint32_t + { + ChunkPartContent = 0, + CalculateDelta, + GenerateBlocks, + BuildPartManifest, + UploadBuildPart, + UploadAttachments, + PutBuildPartStats, + StepCount + }; + + void UploadBuildPart(ChunkingController& ChunkController, + ChunkingCache& ChunkCache, + uint32_t PartIndex, + const UploadPart& Part, + uint32_t PartStepOffset, + uint32_t StepCount); + void UploadPartBlobs(const ChunkedFolderContent& Content, const ChunkedContentLookup& Lookup, std::span<IoHash> RawHashes, @@ -617,16 +678,18 @@ private: WorkerThreadPool& m_IOWorkerPool; WorkerThreadPool& m_NetworkPool; const Oid m_BuildId; - const Oid m_BuildPartId; - const std::string m_BuildPartName; const std::filesystem::path m_Path; - const std::filesystem::path m_ManifestPath; const bool m_CreateBuild; // ?? Member? const CbObject m_MetaData; // ?? Member const Options m_Options; tsl::robin_set<uint32_t> m_NonCompressableExtensionHashes; + + std::future<PrepareBuildResult> m_PrepBuildResultFuture; + std::vector<ChunkBlockDescription> m_KnownBlocks; + uint64_t m_PreferredMultipartChunkSize = 0; + uint64_t m_LargeAttachmentSize = 0; }; struct ValidateStatistics @@ -730,4 +793,33 @@ CompositeBuffer ValidateBlob(std::atomic<bool>& AbortFlag, uint64_t& OutCompressedSize, uint64_t& OutDecompressedSize); +std::vector<std::pair<Oid, std::string>> ResolveBuildPartNames(CbObjectView BuildObject, + const Oid& BuildId, + const std::vector<Oid>& BuildPartIds, + std::span<const std::string> BuildPartNames, + std::uint64_t& OutPreferredMultipartChunkSize); + +struct BuildManifest; + +ChunkedFolderContent GetRemoteContent(OperationLogOutput& Output, + StorageInstance& Storage, + const Oid& BuildId, + const std::vector<std::pair<Oid, std::string>>& BuildParts, + const BuildManifest& Manifest, + std::span<const std::string> IncludeWildcards, + std::span<const std::string> ExcludeWildcards, + std::unique_ptr<ChunkingController>& OutChunkController, + std::vector<ChunkedFolderContent>& OutPartContents, + std::vector<ChunkBlockDescription>& OutBlockDescriptions, + std::vector<IoHash>& OutLooseChunkHashes, + bool IsQuiet, + bool IsVerbose, + bool DoExtraContentVerify); + +std::string GetCbObjectAsNiceString(CbObjectView Object, std::string_view Prefix, std::string_view Suffix); + +#if ZEN_WITH_TESTS +void buildstorageoperations_forcelink(); +#endif // ZEN_WITH_TESTS + } // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkblock.h b/src/zenremotestore/include/zenremotestore/chunking/chunkblock.h index 295d275d1..d339b0f94 100644 --- a/src/zenremotestore/include/zenremotestore/chunking/chunkblock.h +++ b/src/zenremotestore/include/zenremotestore/chunking/chunkblock.h @@ -47,6 +47,19 @@ struct ReuseBlocksStatistics uint64_t RejectedByteCount = 0; uint64_t AcceptedReduntantChunkCount = 0; uint64_t AcceptedReduntantByteCount = 0; + + ReuseBlocksStatistics& operator+=(const ReuseBlocksStatistics& Rhs) + { + AcceptedChunkCount += Rhs.AcceptedChunkCount; + AcceptedByteCount += Rhs.AcceptedByteCount; + AcceptedRawByteCount += Rhs.AcceptedRawByteCount; + RejectedBlockCount += Rhs.RejectedBlockCount; + RejectedChunkCount += Rhs.RejectedChunkCount; + RejectedByteCount += Rhs.RejectedByteCount; + AcceptedReduntantChunkCount += Rhs.AcceptedReduntantChunkCount; + AcceptedReduntantByteCount += Rhs.AcceptedReduntantByteCount; + return *this; + } }; class OperationLogOutput; diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h index 78f20a727..d402bd3f0 100644 --- a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h +++ b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h @@ -17,6 +17,7 @@ namespace zen { class CbWriter; class ChunkingController; +class ChunkingCache; class WorkerThreadPool; enum class SourcePlatform @@ -55,11 +56,30 @@ FolderContent LoadFolderContentToCompactBinary(CbObjectView Input); struct GetFolderContentStatistics { + GetFolderContentStatistics() {} + GetFolderContentStatistics(GetFolderContentStatistics&& Rhs) + : FoundFileCount(Rhs.FoundFileCount.load()) + , FoundFileByteCount(Rhs.FoundFileByteCount.load()) + , AcceptedFileCount(Rhs.AcceptedFileCount.load()) + , AcceptedFileByteCount(Rhs.AcceptedFileByteCount.load()) + , ElapsedWallTimeUS(Rhs.ElapsedWallTimeUS) + { + } std::atomic<uint64_t> FoundFileCount = 0; std::atomic<uint64_t> FoundFileByteCount = 0; std::atomic<uint64_t> AcceptedFileCount = 0; std::atomic<uint64_t> AcceptedFileByteCount = 0; uint64_t ElapsedWallTimeUS = 0; + + inline GetFolderContentStatistics& operator+=(const GetFolderContentStatistics& Rhs) + { + FoundFileCount += Rhs.FoundFileCount; + FoundFileByteCount += Rhs.FoundFileByteCount; + AcceptedFileCount += Rhs.AcceptedFileCount; + AcceptedFileByteCount += Rhs.AcceptedFileByteCount; + ElapsedWallTimeUS += Rhs.ElapsedWallTimeUS; + return *this; + } }; FolderContent GetFolderContent(GetFolderContentStatistics& Stats, @@ -146,6 +166,12 @@ struct ChunkingStatistics std::atomic<uint64_t> UniqueChunksFound = 0; std::atomic<uint64_t> UniqueSequencesFound = 0; std::atomic<uint64_t> UniqueBytesFound = 0; + std::atomic<uint64_t> FilesFoundInCache = 0; + std::atomic<uint64_t> ChunksFoundInCache = 0; + std::atomic<uint64_t> BytesFoundInCache = 0; + std::atomic<uint64_t> FilesStoredInCache = 0; + std::atomic<uint64_t> ChunksStoredInCache = 0; + std::atomic<uint64_t> BytesStoredInCache = 0; uint64_t ElapsedWallTimeUS = 0; inline ChunkingStatistics& operator+=(const ChunkingStatistics& Rhs) @@ -157,6 +183,12 @@ struct ChunkingStatistics UniqueSequencesFound += Rhs.UniqueSequencesFound; UniqueBytesFound += Rhs.UniqueBytesFound; ElapsedWallTimeUS += Rhs.ElapsedWallTimeUS; + FilesFoundInCache += Rhs.FilesFoundInCache; + ChunksFoundInCache += Rhs.ChunksFoundInCache; + BytesFoundInCache += Rhs.BytesFoundInCache; + FilesStoredInCache += Rhs.FilesStoredInCache; + ChunksStoredInCache += Rhs.ChunksStoredInCache; + BytesStoredInCache += Rhs.BytesStoredInCache; return *this; } }; @@ -166,6 +198,7 @@ ChunkedFolderContent ChunkFolderContent(ChunkingStatistics& Stats, const std::filesystem::path& RootPath, const FolderContent& Content, const ChunkingController& InChunkingController, + ChunkingCache& InChunkingCache, int32_t UpdateIntervalMS, std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)>&& UpdateCallback, std::atomic<bool>& AbortFlag, diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkedfile.h b/src/zenremotestore/include/zenremotestore/chunking/chunkedfile.h index 4cec80fdb..64e2c9c29 100644 --- a/src/zenremotestore/include/zenremotestore/chunking/chunkedfile.h +++ b/src/zenremotestore/include/zenremotestore/chunking/chunkedfile.h @@ -21,11 +21,14 @@ struct ChunkedInfo std::vector<IoHash> ChunkHashes; }; +#pragma pack(push) +#pragma pack(4) struct ChunkSource { uint64_t Offset; // 8 uint32_t Size; // 4 }; +#pragma pack(pop) struct ChunkedInfoWithSource { diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkingcache.h b/src/zenremotestore/include/zenremotestore/chunking/chunkingcache.h new file mode 100644 index 000000000..e213bc41b --- /dev/null +++ b/src/zenremotestore/include/zenremotestore/chunking/chunkingcache.h @@ -0,0 +1,44 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <filesystem> + +namespace zen { + +struct ChunkedInfoWithSource; +class ChunkingController; + +class ChunkingCache +{ +public: + virtual ~ChunkingCache() {} + + /* + * Attempting to fetch a cached file with mismatching RawSize of ModificationTick will delete any existing cached data for that + * InputPath + * + * If GetCachedFile returns false, OutChunked is untouched + */ + virtual bool GetCachedFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + uint64_t ModificationTick, + ChunkedInfoWithSource& OutChunked) = 0; + + /* + * Putting a cached entry with an existing InputPath will overwrite it with the new ModificationTick and Chunked data + */ + virtual bool PutCachedFile(const std::filesystem::path& InputPath, uint64_t ModificationTick, const ChunkedInfoWithSource& Chunked) = 0; +}; + +std::unique_ptr<ChunkingCache> CreateNullChunkingCache(); +std::unique_ptr<ChunkingCache> CreateMemoryChunkingCache(); +std::unique_ptr<ChunkingCache> CreateDiskChunkingCache(const std::filesystem::path& RootPath, + ChunkingController& ChunkController, + uint64_t MinimumRawSizeForCaching); + +#if ZEN_WITH_TESTS +void chunkingcache_forcelink(); +#endif // ZEN_WITH_TESTS + +} // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/filesystemutils.h b/src/zenremotestore/include/zenremotestore/filesystemutils.h index cfd6f02e1..cb2d718f7 100644 --- a/src/zenremotestore/include/zenremotestore/filesystemutils.h +++ b/src/zenremotestore/include/zenremotestore/filesystemutils.h @@ -12,6 +12,8 @@ class CompositeBuffer; class BufferedOpenFile { public: + static constexpr uint64_t BlockSize = 256u * 1024u; + BufferedOpenFile(const std::filesystem::path Path, std::atomic<uint64_t>& OpenReadCount, std::atomic<uint64_t>& CurrentOpenFileCount, @@ -30,8 +32,6 @@ public: void* Handle() { return m_Source.Handle(); } private: - const uint64_t BlockSize = 256u * 1024u; - BasicFile m_Source; const uint64_t m_SourceSize; std::atomic<uint64_t>& m_OpenReadCount; diff --git a/src/zenremotestore/include/zenremotestore/jupiter/jupitersession.h b/src/zenremotestore/include/zenremotestore/jupiter/jupitersession.h index 15077376c..eaf6962fd 100644 --- a/src/zenremotestore/include/zenremotestore/jupiter/jupitersession.h +++ b/src/zenremotestore/include/zenremotestore/jupiter/jupitersession.h @@ -110,7 +110,7 @@ public: JupiterResult ListBuildNamespaces(); JupiterResult ListBuildBuckets(std::string_view Namespace); - JupiterResult ListBuilds(std::string_view Namespace, std::string_view BucketId, const IoBuffer& Payload); + JupiterResult ListBuilds(std::string_view Namespace, std::string_view BucketId, std::string_view JsonQuery); JupiterResult PutBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const IoBuffer& Payload); JupiterResult GetBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); JupiterResult FinalizeBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); diff --git a/src/zenremotestore/jupiter/jupitersession.cpp b/src/zenremotestore/jupiter/jupitersession.cpp index dd0e5ad1f..1bc6564ce 100644 --- a/src/zenremotestore/jupiter/jupitersession.cpp +++ b/src/zenremotestore/jupiter/jupitersession.cpp @@ -430,9 +430,10 @@ JupiterSession::ListBuildBuckets(std::string_view Namespace) } JupiterResult -JupiterSession::ListBuilds(std::string_view Namespace, std::string_view BucketId, const IoBuffer& Payload) +JupiterSession::ListBuilds(std::string_view Namespace, std::string_view BucketId, std::string_view JsonQuery) { - ZEN_ASSERT(Payload.GetContentType() == ZenContentType::kCbObject); + IoBuffer Payload(IoBuffer::Wrap, JsonQuery.data(), JsonQuery.size()); + Payload.SetContentType(ZenContentType::kJSON); std::string OptionalBucketPath = BucketId.empty() ? "" : fmt::format("/{}", BucketId); HttpClient::Response Response = m_HttpClient.Post(fmt::format("/api/v2/builds/{}{}/search", Namespace, OptionalBucketPath), Payload, diff --git a/src/zenremotestore/zenremotestore.cpp b/src/zenremotestore/zenremotestore.cpp index 7f785599f..a0bb17260 100644 --- a/src/zenremotestore/zenremotestore.cpp +++ b/src/zenremotestore/zenremotestore.cpp @@ -2,9 +2,12 @@ #include <zenremotestore/zenremotestore.h> +#include <zenremotestore/builds/buildmanifest.h> #include <zenremotestore/builds/buildsavedstate.h> +#include <zenremotestore/builds/buildstorageoperations.h> #include <zenremotestore/chunking/chunkedcontent.h> #include <zenremotestore/chunking/chunkedfile.h> +#include <zenremotestore/chunking/chunkingcache.h> #include <zenremotestore/filesystemutils.h> #include <zenremotestore/projectstore/remoteprojectstore.h> @@ -15,11 +18,13 @@ namespace zen { void zenremotestore_forcelinktests() { + buildmanifest_forcelink(); buildsavedstate_forcelink(); + buildstorageoperations_forcelink(); chunkblock_forcelink(); chunkedcontent_forcelink(); chunkedfile_forcelink(); - chunkedcontent_forcelink(); + chunkingcache_forcelink(); filesystemutils_forcelink(); remoteprojectstore_forcelink(); } diff --git a/src/zenserver-test/hub-tests.cpp b/src/zenserver-test/hub-tests.cpp new file mode 100644 index 000000000..42a5dcae4 --- /dev/null +++ b/src/zenserver-test/hub-tests.cpp @@ -0,0 +1,252 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#if ZEN_WITH_TESTS +# include "zenserver-test.h" +# include <zencore/testing.h> +# include <zencore/testutils.h> +# include <zencore/workthreadpool.h> +# include <zencore/compactbinarybuilder.h> +# include <zencore/compactbinarypackage.h> +# include <zencore/compress.h> +# include <zencore/filesystem.h> +# include <zencore/stream.h> +# include <zencore/string.h> +# include <zencore/fmtutils.h> +# include <zencore/scopeguard.h> +# include <zenhttp/packageformat.h> +# include <zenremotestore/builds/buildstoragecache.h> +# include <zenutil/workerpools.h> +# include <zenutil/zenserverprocess.h> +# include <zenhttp/httpclient.h> +# include <zenutil/consul.h> + +namespace zen::tests::hub { + +using namespace std::literals; + +TEST_SUITE_BEGIN("hub.lifecycle"); + +TEST_CASE("hub.lifecycle.basic") +{ + { + ZenServerInstance Instance(TestEnv, ZenServerInstance::ServerMode::kHubServer); + + const uint16_t PortNumber = Instance.SpawnServerAndWaitUntilReady(); + CHECK(PortNumber != 0); + + HttpClient Client(Instance.GetBaseUri() + "/hub/"); + + HttpClient::Response Result = Client.Get("status"); + CHECK(Result); + } +} + +TEST_CASE("hub.lifecycle.children") +{ + ZenServerInstance Instance(TestEnv, ZenServerInstance::ServerMode::kHubServer); + + const uint16_t PortNumber = Instance.SpawnServerAndWaitUntilReady(); + REQUIRE(PortNumber != 0); + + SUBCASE("spawn") + { + HttpClient Client(Instance.GetBaseUri() + "/hub/"); + + HttpClient::Response Result = Client.Get("status"); + REQUIRE(Result); + + { + Result = Client.Post("modules/abc/provision"); + REQUIRE(Result); + + CbObject AbcResult = Result.AsObject(); + CHECK(AbcResult["moduleId"].AsString() == "abc"sv); + const uint16_t AbcPort = AbcResult["port"].AsUInt16(0); + CHECK_NE(AbcPort, 0); + + // This should be a fresh instance with no contents + + HttpClient AbcClient(fmt::format("http://localhost:{}", AbcPort)); + + Result = AbcClient.Get("/z$/ns1/b/0123456789abcdef0123456789abcdef01234567"); + CHECK_EQ(Result.StatusCode, HttpResponseCode::NotFound); + + Result = AbcClient.Put("/z$/ns1/b/0123456789abcdef0123456789abcdef01234567", + IoBufferBuilder::MakeFromMemory(MakeMemoryView("abcdef"sv))); + CHECK_EQ(Result.StatusCode, HttpResponseCode::Created); + } + + { + Result = Client.Post("modules/def/provision"); + REQUIRE(Result); + + CbObject DefResult = Result.AsObject(); + CHECK(DefResult["moduleId"].AsString() == "def"sv); + const uint16_t DefPort = DefResult["port"].AsUInt16(0); + REQUIRE_NE(DefPort, 0); + + // This should be a fresh instance with no contents + + HttpClient DefClient(fmt::format("http://localhost:{}", DefPort)); + + Result = DefClient.Get("/z$/ns1/b/0123456789abcdef0123456789abcdef01234567"); + CHECK_EQ(Result.StatusCode, HttpResponseCode::NotFound); + + Result = DefClient.Put("/z$/ns1/b/0123456789abcdef0123456789abcdef01234567", + IoBufferBuilder::MakeFromMemory(MakeMemoryView("AbcDef"sv))); + CHECK_EQ(Result.StatusCode, HttpResponseCode::Created); + } + + // this should be rejected because of the invalid module id + Result = Client.Post("modules/!!!!!/provision"); + CHECK(!Result); + + Result = Client.Post("modules/ghi/provision"); + REQUIRE(Result); + + // Tear down instances + + Result = Client.Post("modules/abc/deprovision"); + REQUIRE(Result); + + Result = Client.Post("modules/def/deprovision"); + REQUIRE(Result); + + Result = Client.Post("modules/ghi/deprovision"); + REQUIRE(Result); + + // re-provision to verify that (de)hydration preserved state + { + Result = Client.Post("modules/abc/provision"); + REQUIRE(Result); + + CbObject AbcResult = Result.AsObject(); + CHECK(AbcResult["moduleId"].AsString() == "abc"sv); + const uint16_t AbcPort = AbcResult["port"].AsUInt16(0); + REQUIRE_NE(AbcPort, 0); + + // This should contain the content from the previous run + + HttpClient AbcClient(fmt::format("http://localhost:{}", AbcPort)); + + Result = AbcClient.Get("/z$/ns1/b/0123456789abcdef0123456789abcdef01234567"); + CHECK_EQ(Result.StatusCode, HttpResponseCode::OK); + + CHECK_EQ(Result.AsText(), "abcdef"sv); + + Result = AbcClient.Put("/z$/ns1/b/1123456789abcdef0123456789abcdef01234567", + IoBufferBuilder::MakeFromMemory(MakeMemoryView("ghijklmnop"sv))); + CHECK_EQ(Result.StatusCode, HttpResponseCode::Created); + } + + { + Result = Client.Post("modules/def/provision"); + REQUIRE(Result); + + CbObject DefResult = Result.AsObject(); + CHECK(DefResult["moduleId"].AsString() == "def"sv); + const uint16_t DefPort = DefResult["port"].AsUInt16(0); + REQUIRE_NE(DefPort, 0); + + // This should contain the content from the previous run + + HttpClient DefClient(fmt::format("http://localhost:{}", DefPort)); + + Result = DefClient.Get("/z$/ns1/b/0123456789abcdef0123456789abcdef01234567"); + CHECK_EQ(Result.StatusCode, HttpResponseCode::OK); + + CHECK_EQ(Result.AsText(), "AbcDef"sv); + + Result = DefClient.Put("/z$/ns1/b/1123456789abcdef0123456789abcdef01234567", + IoBufferBuilder::MakeFromMemory(MakeMemoryView("GhijklmNop"sv))); + CHECK_EQ(Result.StatusCode, HttpResponseCode::Created); + } + + Result = Client.Post("modules/abc/deprovision"); + REQUIRE(Result); + + Result = Client.Post("modules/def/deprovision"); + REQUIRE(Result); + + // re-provision to verify that (de)hydration preserved state, including + // state which was generated after the very first dehydration + { + Result = Client.Post("modules/abc/provision"); + REQUIRE(Result); + + CbObject AbcResult = Result.AsObject(); + CHECK(AbcResult["moduleId"].AsString() == "abc"sv); + const uint16_t AbcPort = AbcResult["port"].AsUInt16(0); + REQUIRE_NE(AbcPort, 0); + + // This should contain the content from the previous two runs + + HttpClient AbcClient(fmt::format("http://localhost:{}", AbcPort)); + + Result = AbcClient.Get("/z$/ns1/b/0123456789abcdef0123456789abcdef01234567"); + CHECK_EQ(Result.StatusCode, HttpResponseCode::OK); + + CHECK_EQ(Result.AsText(), "abcdef"sv); + + Result = AbcClient.Get("/z$/ns1/b/1123456789abcdef0123456789abcdef01234567"); + CHECK_EQ(Result.StatusCode, HttpResponseCode::OK); + + CHECK_EQ(Result.AsText(), "ghijklmnop"sv); + } + + { + Result = Client.Post("modules/def/provision"); + REQUIRE(Result); + + CbObject DefResult = Result.AsObject(); + REQUIRE(DefResult["moduleId"].AsString() == "def"sv); + const uint16_t DefPort = DefResult["port"].AsUInt16(0); + REQUIRE_NE(DefPort, 0); + + // This should contain the content from the previous two runs + + HttpClient DefClient(fmt::format("http://localhost:{}", DefPort)); + + Result = DefClient.Get("/z$/ns1/b/0123456789abcdef0123456789abcdef01234567"); + CHECK_EQ(Result.StatusCode, HttpResponseCode::OK); + + CHECK_EQ(Result.AsText(), "AbcDef"sv); + + Result = DefClient.Get("/z$/ns1/b/1123456789abcdef0123456789abcdef01234567"); + CHECK_EQ(Result.StatusCode, HttpResponseCode::OK); + + CHECK_EQ(Result.AsText(), "GhijklmNop"sv); + } + + Result = Client.Post("modules/abc/deprovision"); + REQUIRE(Result); + + Result = Client.Post("modules/def/deprovision"); + REQUIRE(Result); + + // final sanity check that the hub is still responsive + Result = Client.Get("status"); + CHECK(Result); + } +} + +TEST_SUITE_END(); + +TEST_CASE("hub.consul.lifecycle") +{ + zen::consul::ConsulProcess ConsulProc; + ConsulProc.SpawnConsulAgent(); + + zen::consul::ConsulClient Client("http://localhost:8500/"); + Client.SetKeyValue("zen/hub/testkey", "testvalue"); + + std::string RetrievedValue = Client.GetKeyValue("zen/hub/testkey"); + CHECK_EQ(RetrievedValue, "testvalue"); + + Client.DeleteKey("zen/hub/testkey"); + + ConsulProc.StopConsulAgent(); +} + +} // namespace zen::tests::hub +#endif diff --git a/src/zenserver-test/zenserver-test.cpp b/src/zenserver-test/zenserver-test.cpp index 6f207b184..9a42bb73d 100644 --- a/src/zenserver-test/zenserver-test.cpp +++ b/src/zenserver-test/zenserver-test.cpp @@ -17,6 +17,7 @@ # include <zencore/timer.h> # include <zenhttp/httpclient.h> # include <zenhttp/packageformat.h> +# include <zenutil/commandlineoptions.h> # include <zenutil/logging/testformatter.h> # include <zenutil/zenserverprocess.h> @@ -68,6 +69,8 @@ main(int argc, char** argv) using namespace std::literals; using namespace zen; + zen::CommandLineConverter ArgConverter(argc, argv); + # if ZEN_PLATFORM_LINUX IgnoreChildSignals(); # endif diff --git a/src/zenserver/frontend/html.zip b/src/zenserver/frontend/html.zip Binary files differindex 36f08a05d..77b81d11c 100644 --- a/src/zenserver/frontend/html.zip +++ b/src/zenserver/frontend/html.zip diff --git a/src/zenserver/frontend/html/pages/oplog.js b/src/zenserver/frontend/html/pages/oplog.js index bef5bacce..879fc4c97 100644 --- a/src/zenserver/frontend/html/pages/oplog.js +++ b/src/zenserver/frontend/html/pages/oplog.js @@ -58,12 +58,12 @@ export class Page extends ZenPage { const nav = section.add_widget(Toolbar); const left = nav.left(); - left.add("|<") .on_click(() => this._on_next_prev(-10e10)); - left.add("<<").on_click(() => this._on_next_prev(-10)); - left.add("prev") .on_click(() => this._on_next_prev( -1)); - left.add("next") .on_click(() => this._on_next_prev( 1)); - left.add(">>").on_click(() => this._on_next_prev( 10)); - left.add(">|") .on_click(() => this._on_next_prev( 10e10)); + left.add("|<") .on_click(() => this._on_next_prev(-10e10)); + left.add("<<") .on_click(() => this._on_next_prev(-10)); + left.add("prev").on_click(() => this._on_next_prev( -1)); + left.add("next").on_click(() => this._on_next_prev( 1)); + left.add(">>") .on_click(() => this._on_next_prev( 10)); + left.add(">|") .on_click(() => this._on_next_prev( 10e10)); left.sep(); for (var count of [10, 25, 50, 100]) diff --git a/src/zenserver/hub/README.md b/src/zenserver/hub/README.md new file mode 100644 index 000000000..322be3649 --- /dev/null +++ b/src/zenserver/hub/README.md @@ -0,0 +1,28 @@ +# Zen Server Hub + +The Zen Server can act in a "hub" mode. In this mode, the only services offered are the basic health +and diagnostic services alongside an API to provision and deprovision Storage server instances. + +## Generic Server API + +GET `/health` - returns an `OK!` payload when all enabled services are up and responding + +## Hub API + +GET `{moduleid}` - alphanumeric identifier to identify a dataset (typically associated with a content plug-in module) + +GET `/hub/status` - obtain a summary of the currently live instances + +GET `/hub/modules/{moduleid}` - retrieve information about a module + +POST `/hub/modules/{moduleid}/provision` - provision service for module + +POST `/hub/modules/{moduleid}/deprovision` - deprovision service for module + +GET `/hub/stats` - retrieve stats for service + +## Hub Configuration + +The hub service can use Consul to provide status updates + +The hub service can emit telemetry to an Open Telemetry collector diff --git a/src/zenserver/hub/hubservice.cpp b/src/zenserver/hub/hubservice.cpp new file mode 100644 index 000000000..4d9da3a57 --- /dev/null +++ b/src/zenserver/hub/hubservice.cpp @@ -0,0 +1,867 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "hubservice.h" + +#include "hydration.h" + +#include <zencore/compactbinarybuilder.h> +#include <zencore/filesystem.h> +#include <zencore/fmtutils.h> +#include <zencore/logging.h> +#include <zencore/scopeguard.h> +#include <zencore/system.h> +#include <zenutil/zenserverprocess.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <EASTL/fixed_vector.h> +#include <asio.hpp> +ZEN_THIRD_PARTY_INCLUDES_END + +#include <unordered_map> +#include <unordered_set> + +namespace zen { + +/////////////////////////////////////////////////////////////////////////// + +/** + * A timeline of events with sequence IDs and timestamps. Used to + * track significant events for broadcasting to listeners. + */ +class EventTimeline +{ +public: + EventTimeline() { m_Events.reserve(1024); } + + ~EventTimeline() {} + + EventTimeline(const EventTimeline&) = delete; + EventTimeline& operator=(const EventTimeline&) = delete; + + void RecordEvent(std::string_view EventTag, CbObject EventMetadata) + { + const uint64_t SequenceId = m_NextEventId++; + const auto Now = std::chrono::steady_clock::now(); + RwLock::ExclusiveLockScope _(m_Lock); + m_Events.emplace_back(SequenceId, EventTag, Now, std::move(EventMetadata)); + } + + struct EventRecord + { + uint64_t SequenceId; + std::string Tag; + std::chrono::steady_clock::time_point Timestamp; + CbObject EventMetadata; + + EventRecord(uint64_t InSequenceId, + std::string_view InTag, + std::chrono::steady_clock::time_point InTimestamp, + CbObject InEventMetadata = CbObject()) + : SequenceId(InSequenceId) + , Tag(InTag) + , Timestamp(InTimestamp) + , EventMetadata(InEventMetadata) + { + } + }; + + /** + * Iterate over events that have a SequenceId greater than SinceEventId + * + * @param Callback A callable that takes a const EventRecord& + * @param SinceEventId The SequenceId to compare against + */ + void IterateEventsSince(auto&& Callback, uint64_t SinceEventId) + { + // Hold the lock for as short a time as possible + eastl::fixed_vector<EventRecord, 128> EventsToProcess; + m_Lock.WithSharedLock([&] { + for (auto& Event : m_Events) + { + if (Event.SequenceId > SinceEventId) + { + EventsToProcess.push_back(Event); + } + } + }); + + // Now invoke the callback outside the lock + for (auto& Event : EventsToProcess) + { + Callback(Event); + } + } + + /** + * Trim events up to (and including) the given SequenceId. Intended + * to be used for cleaning up events which are not longer interesting. + * + * @param UpToEventId The SequenceId up to which events should be removed + */ + void TrimEventsUpTo(uint64_t UpToEventId) + { + RwLock::ExclusiveLockScope _(m_Lock); + auto It = std::remove_if(m_Events.begin(), m_Events.end(), [UpToEventId](const EventRecord& Event) { + return Event.SequenceId <= UpToEventId; + }); + m_Events.erase(It, m_Events.end()); + } + +private: + std::atomic<uint64_t> m_NextEventId{0}; + + RwLock m_Lock; + std::vector<EventRecord> m_Events; +}; + +////////////////////////////////////////////////////////////////////////// + +struct ResourceMetrics +{ + uint64_t DiskUsageBytes = 0; + uint64_t MemoryUsageBytes = 0; +}; + +/** + * Storage Server Instance + * + * This class manages the lifecycle of a storage server instance, and + * provides functions to query its state. There should be one instance + * per module ID. + */ +struct StorageServerInstance +{ + StorageServerInstance(ZenServerEnvironment& RunEnvironment, + std::string_view ModuleId, + std::filesystem::path FileHydrationPath, + std::filesystem::path HydrationTempPath); + ~StorageServerInstance(); + + void Provision(); + void Deprovision(); + + void Hibernate(); + void Wake(); + + const ResourceMetrics& GetResourceMetrics() const { return m_ResourceMetrics; } + + inline std::string_view GetModuleId() const { return m_ModuleId; } + inline bool IsProvisioned() const { return m_IsProvisioned.load(); } + + inline uint16_t GetBasePort() const { return m_ServerInstance.GetBasePort(); } + +private: + RwLock m_Lock; + std::string m_ModuleId; + std::atomic<bool> m_IsProvisioned{false}; + std::atomic<bool> m_IsHibernated{false}; + ZenServerInstance m_ServerInstance; + std::filesystem::path m_BaseDir; + std::filesystem::path m_TempDir; + std::filesystem::path m_HydrationPath; + ResourceMetrics m_ResourceMetrics; + + void SpawnServerProcess(); + + void Hydrate(); + void Dehydrate(); +}; + +StorageServerInstance::StorageServerInstance(ZenServerEnvironment& RunEnvironment, + std::string_view ModuleId, + std::filesystem::path FileHydrationPath, + std::filesystem::path HydrationTempPath) +: m_ModuleId(ModuleId) +, m_ServerInstance(RunEnvironment, ZenServerInstance::ServerMode::kStorageServer) +, m_HydrationPath(FileHydrationPath) +{ + m_BaseDir = RunEnvironment.CreateChildDir(ModuleId); + m_TempDir = HydrationTempPath / ModuleId; +} + +StorageServerInstance::~StorageServerInstance() +{ +} + +void +StorageServerInstance::SpawnServerProcess() +{ + ZEN_ASSERT(!m_ServerInstance.IsRunning(), "Storage server instance for module '{}' is already running", m_ModuleId); + + m_ServerInstance.SetServerExecutablePath(GetRunningExecutablePath()); + m_ServerInstance.SetDataDir(m_BaseDir); + const uint16_t BasePort = m_ServerInstance.SpawnServerAndWaitUntilReady(); + + ZEN_DEBUG("Storage server instance for module '{}' started, listening on port {}", m_ModuleId, BasePort); + + m_ServerInstance.EnableShutdownOnDestroy(); +} + +void +StorageServerInstance::Provision() +{ + RwLock::ExclusiveLockScope _(m_Lock); + + if (m_IsProvisioned) + { + ZEN_WARN("Storage server instance for module '{}' is already provisioned", m_ModuleId); + + return; + } + + if (m_IsHibernated) + { + Wake(); + } + else + { + ZEN_INFO("Provisioning storage server instance for module '{}', at '{}'", m_ModuleId, m_BaseDir); + + Hydrate(); + + SpawnServerProcess(); + } + + m_IsProvisioned = true; +} + +void +StorageServerInstance::Deprovision() +{ + RwLock::ExclusiveLockScope _(m_Lock); + + if (!m_IsProvisioned) + { + ZEN_WARN("Attempted to deprovision storage server instance for module '{}' which is not provisioned", m_ModuleId); + + return; + } + + ZEN_INFO("Deprovisioning storage server instance for module '{}'", m_ModuleId); + + m_ServerInstance.Shutdown(); + + Dehydrate(); + + m_IsProvisioned = false; +} + +void +StorageServerInstance::Hibernate() +{ + // Signal server to shut down, but keep data around for later wake + + RwLock::ExclusiveLockScope _(m_Lock); + + if (!m_IsProvisioned) + { + ZEN_WARN("Attempted to hibernate storage server instance for module '{}' which is not provisioned", m_ModuleId); + + return; + } + + if (m_IsHibernated) + { + ZEN_WARN("Storage server instance for module '{}' is already hibernated", m_ModuleId); + + return; + } + + if (!m_ServerInstance.IsRunning()) + { + ZEN_WARN("Attempted to hibernate storage server instance for module '{}' which is not running", m_ModuleId); + + // This is an unexpected state. Should consider the instance invalid? + + return; + } + + try + { + m_ServerInstance.Shutdown(); + + m_IsHibernated = true; + m_IsProvisioned = false; + + return; + } + catch (const std::exception& Ex) + { + ZEN_ERROR("Failed to hibernate storage server instance for module '{}': {}", m_ModuleId, Ex.what()); + } +} + +void +StorageServerInstance::Wake() +{ + // Start server in-place using existing data + + RwLock::ExclusiveLockScope _(m_Lock); + + if (!m_IsHibernated) + { + ZEN_WARN("Attempted to wake storage server instance for module '{}' which is not hibernated", m_ModuleId); + + return; + } + + ZEN_ASSERT(!m_ServerInstance.IsRunning(), "Storage server instance for module '{}' is already running", m_ModuleId); + + try + { + SpawnServerProcess(); + m_IsHibernated = false; + } + catch (const std::exception& Ex) + { + ZEN_ERROR("Failed to wake storage server instance for module '{}': {}", m_ModuleId, Ex.what()); + + // TODO: this instance should be marked as invalid + } +} + +void +StorageServerInstance::Hydrate() +{ + HydrationConfig Config{.ServerStateDir = m_BaseDir, + .TempDir = m_TempDir, + .ModuleId = m_ModuleId, + .TargetSpecification = WideToUtf8(m_HydrationPath.native())}; + + std::unique_ptr<HydrationStrategyBase> Hydrator = CreateFileHydrator(); + + Hydrator->Configure(Config); + Hydrator->Hydrate(); +} + +void +StorageServerInstance::Dehydrate() +{ + HydrationConfig Config{.ServerStateDir = m_BaseDir, + .TempDir = m_TempDir, + .ModuleId = m_ModuleId, + .TargetSpecification = WideToUtf8(m_HydrationPath.native())}; + + std::unique_ptr<HydrationStrategyBase> Hydrator = CreateFileHydrator(); + + Hydrator->Configure(Config); + Hydrator->Dehydrate(); +} + +////////////////////////////////////////////////////////////////////////// + +struct HttpHubService::Impl +{ + Impl(const Impl&) = delete; + Impl& operator=(const Impl&) = delete; + + Impl(); + ~Impl(); + + void Initialize(std::filesystem::path HubBaseDir, std::filesystem::path ChildBaseDir) + { + m_RunEnvironment.InitializeForHub(HubBaseDir, ChildBaseDir); + m_FileHydrationPath = m_RunEnvironment.CreateChildDir("hydration_storage"); + ZEN_INFO("using file hydration path: '{}'", m_FileHydrationPath); + + m_HydrationTempPath = m_RunEnvironment.CreateChildDir("hydration_temp"); + ZEN_INFO("using hydration temp path: '{}'", m_HydrationTempPath); + + // This is necessary to ensure the hub assigns a distinct port range. + // We need to do this primarily because otherwise automated tests will + // fail as the test runner will create processes in the default range. + // We should probably make this configurable or dynamic for maximum + // flexibility, and to allow running multiple hubs on the same host if + // necessary. + m_RunEnvironment.SetNextPortNumber(21000); + } + + void Cleanup() + { + RwLock::ExclusiveLockScope _(m_Lock); + m_Instances.clear(); + } + + struct ProvisionedInstanceInfo + { + std::string BaseUri; + uint16_t Port; + }; + + /** + * Provision a storage server instance for the given module ID. + * + * @param ModuleId The ID of the module to provision. + * @param OutInfo If successful, information about the provisioned instance will be returned here. + * @param OutReason If unsuccessful, the reason will be returned here. + */ + bool Provision(std::string_view ModuleId, ProvisionedInstanceInfo& OutInfo, std::string& OutReason) + { + StorageServerInstance* Instance = nullptr; + bool IsNewInstance = false; + { + RwLock::ExclusiveLockScope _(m_Lock); + if (auto It = m_Instances.find(std::string(ModuleId)); It == m_Instances.end()) + { + std::string Reason; + if (!CanProvisionInstance(ModuleId, /* out */ Reason)) + { + ZEN_WARN("Cannot provision new storage server instance for module '{}': {}", ModuleId, Reason); + + OutReason = Reason; + + return false; + } + + IsNewInstance = true; + auto NewInstance = + std::make_unique<StorageServerInstance>(m_RunEnvironment, ModuleId, m_FileHydrationPath, m_HydrationTempPath); + Instance = NewInstance.get(); + m_Instances.emplace(std::string(ModuleId), std::move(NewInstance)); + + ZEN_INFO("Created new storage server instance for module '{}'", ModuleId); + } + else + { + Instance = It->second.get(); + } + + m_ProvisioningModules.emplace(std::string(ModuleId)); + } + + ZEN_ASSERT(Instance != nullptr); + + auto RemoveProvisioningModule = MakeGuard([&] { + RwLock::ExclusiveLockScope _(m_Lock); + m_ProvisioningModules.erase(std::string(ModuleId)); + }); + + // NOTE: this is done while not holding the lock, as provisioning may take time + // and we don't want to block other operations. We track which modules are being + // provisioned using m_ProvisioningModules, and reject attempts to provision/deprovision + // those modules while in this state. + + UpdateStats(); + + try + { + Instance->Provision(); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("Failed to provision storage server instance for module '{}': {}", ModuleId, Ex.what()); + if (IsNewInstance) + { + // Clean up + RwLock::ExclusiveLockScope _(m_Lock); + m_Instances.erase(std::string(ModuleId)); + } + return false; + } + + OutInfo.Port = Instance->GetBasePort(); + + // TODO: base URI? Would need to know what host name / IP to use + + return true; + } + + /** + * Deprovision a storage server instance for the given module ID. + * + * @param ModuleId The ID of the module to deprovision. + * @param OutReason If unsuccessful, the reason will be returned here. + * @return true if the instance was found and deprovisioned, false otherwise. + */ + bool Deprovision(const std::string& ModuleId, std::string& OutReason) + { + std::unique_ptr<StorageServerInstance> Instance; + + { + RwLock::ExclusiveLockScope _(m_Lock); + + if (auto It = m_ProvisioningModules.find(ModuleId); It != m_ProvisioningModules.end()) + { + OutReason = fmt::format("Module '{}' is currently being provisioned", ModuleId); + + ZEN_WARN("Attempted to deprovision module '{}' which is currently being provisioned", ModuleId); + + return false; + } + + if (auto It = m_Instances.find(ModuleId); It == m_Instances.end()) + { + ZEN_WARN("Attempted to deprovision non-existent module '{}'", ModuleId); + + // Not found, OutReason should be empty + return false; + } + else + { + Instance = std::move(It->second); + m_Instances.erase(It); + m_DeprovisioningModules.emplace(ModuleId); + } + } + + // The module is deprovisioned outside the lock to avoid blocking other operations. + // + // To ensure that no new provisioning can occur while we're deprovisioning, + // we add the module ID to m_DeprovisioningModules and remove it once + // deprovisioning is complete. + + auto _ = MakeGuard([&] { + RwLock::ExclusiveLockScope _(m_Lock); + m_DeprovisioningModules.erase(ModuleId); + }); + + Instance->Deprovision(); + + return true; + } + + /** + * Find a storage server instance for the given module ID. + * + * Beware that as this returns a raw pointer to the instance, the caller must ensure + * that the instance is not deprovisioned while in use. + * + * @param ModuleId The ID of the module to find. + * @param OutInstance If found, the instance will be returned here. + * @return true if the instance was found, false otherwise. + */ + bool Find(std::string_view ModuleId, StorageServerInstance** OutInstance = nullptr) + { + RwLock::SharedLockScope _(m_Lock); + if (auto It = m_Instances.find(std::string(ModuleId)); It != m_Instances.end()) + { + if (OutInstance) + { + *OutInstance = It->second.get(); + } + return true; + } + else if (OutInstance) + { + *OutInstance = nullptr; + } + return false; + } + + /** + * Enumerate all storage server instances. + * + * @param Callback The callback to invoke for each instance. Note that you should + * not do anything heavyweight in the callback as it is invoked while holding + * a shared lock. + */ + void EnumerateModules(auto&& Callback) + { + RwLock::SharedLockScope _(m_Lock); + for (auto& It : m_Instances) + { + Callback(*It.second); + } + } + + int GetInstanceCount() + { + RwLock::SharedLockScope _(m_Lock); + return gsl::narrow_cast<int>(m_Instances.size()); + } + + inline int GetInstanceLimit() { return m_InstanceLimit; } + inline int GetMaxInstanceCount() { return m_MaxInstanceCount; } + +private: + ZenServerEnvironment m_RunEnvironment; + std::filesystem::path m_FileHydrationPath; + std::filesystem::path m_HydrationTempPath; + RwLock m_Lock; + std::unordered_map<std::string, std::unique_ptr<StorageServerInstance>> m_Instances; + std::unordered_set<std::string> m_DeprovisioningModules; + std::unordered_set<std::string> m_ProvisioningModules; + int m_MaxInstanceCount = 0; + void UpdateStats(); + + // Capacity tracking + + int m_InstanceLimit = 1000; + ResourceMetrics m_ResourceLimits; + SystemMetrics m_HostMetrics; + + void UpdateCapacityMetrics(); + bool CanProvisionInstance(std::string_view ModuleId, std::string& OutReason); +}; + +HttpHubService::Impl::Impl() +{ + m_HostMetrics = zen::GetSystemMetrics(); + m_ResourceLimits.DiskUsageBytes = 1000ull * 1024 * 1024 * 1024; + m_ResourceLimits.MemoryUsageBytes = 16ull * 1024 * 1024 * 1024; +} + +HttpHubService::Impl::~Impl() +{ + try + { + ZEN_INFO("Hub service shutting down, deprovisioning any current instances"); + + m_Lock.WithExclusiveLock([this] { + for (auto& [ModuleId, Instance] : m_Instances) + { + Instance->Deprovision(); + } + m_Instances.clear(); + }); + } + catch (const std::exception& e) + { + ZEN_WARN("Exception during hub service shutdown: {}", e.what()); + } +} + +void +HttpHubService::Impl::UpdateCapacityMetrics() +{ + m_HostMetrics = zen::GetSystemMetrics(); + + // Update per-instance metrics +} + +void +HttpHubService::Impl::UpdateStats() +{ + m_Lock.WithSharedLock([this] { m_MaxInstanceCount = Max(m_MaxInstanceCount, gsl::narrow_cast<int>(m_Instances.size())); }); +} + +bool +HttpHubService::Impl::CanProvisionInstance(std::string_view ModuleId, std::string& OutReason) +{ + if (m_DeprovisioningModules.find(std::string(ModuleId)) != m_DeprovisioningModules.end()) + { + OutReason = fmt::format("module '{}' is currently being deprovisioned", ModuleId); + + return false; + } + + if (m_ProvisioningModules.find(std::string(ModuleId)) != m_ProvisioningModules.end()) + { + OutReason = fmt::format("module '{}' is currently being provisioned", ModuleId); + + return false; + } + + if (gsl::narrow_cast<int>(m_Instances.size()) >= m_InstanceLimit) + { + OutReason = fmt::format("instance limit exceeded ({})", m_InstanceLimit); + + return false; + } + + // TODO: handle additional resource metrics + + return true; +} + +/////////////////////////////////////////////////////////////////////////// + +HttpHubService::HttpHubService(std::filesystem::path HubBaseDir, std::filesystem::path ChildBaseDir) : m_Impl(std::make_unique<Impl>()) +{ + using namespace std::literals; + + m_Impl->Initialize(HubBaseDir, ChildBaseDir); + + m_Router.AddMatcher("moduleid", [](std::string_view Str) -> bool { + for (const auto C : Str) + { + if (std::isalnum(C) || C == '-') + { + // fine + } + else + { + // not fine + return false; + } + } + + return true; + }); + + m_Router.RegisterRoute( + "status", + [this](HttpRouterRequest& Req) { + CbObjectWriter Obj; + Obj.BeginArray("modules"); + m_Impl->EnumerateModules([&Obj](StorageServerInstance& Instance) { + Obj.BeginObject(); + Obj << "moduleId" << Instance.GetModuleId(); + Obj << "provisioned" << Instance.IsProvisioned(); + Obj.EndObject(); + }); + Obj.EndArray(); + Req.ServerRequest().WriteResponse(HttpResponseCode::OK, Obj.Save()); + }, + HttpVerb::kGet); + + m_Router.RegisterRoute( + "modules/{moduleid}", + [this](HttpRouterRequest& Req) { + std::string_view ModuleId = Req.GetCapture(1); + + if (Req.ServerRequest().RequestVerb() == HttpVerb::kDelete) + { + HandleModuleDelete(Req.ServerRequest(), ModuleId); + } + else + { + HandleModuleGet(Req.ServerRequest(), ModuleId); + } + }, + HttpVerb::kGet | HttpVerb::kDelete); + + m_Router.RegisterRoute( + "modules/{moduleid}/provision", + [this](HttpRouterRequest& Req) { + std::string_view ModuleId = Req.GetCapture(1); + + std::string FailureReason = "unknown"; + HttpResponseCode ResponseCode = HttpResponseCode::OK; + + try + { + Impl::ProvisionedInstanceInfo Info; + if (m_Impl->Provision(ModuleId, /* out */ Info, /* out */ FailureReason)) + { + CbObjectWriter Obj; + Obj << "moduleId" << ModuleId; + Obj << "baseUri" << Info.BaseUri; + Obj << "port" << Info.Port; + Req.ServerRequest().WriteResponse(HttpResponseCode::OK, Obj.Save()); + + return; + } + else + { + ResponseCode = HttpResponseCode::BadRequest; + } + } + catch (const std::exception& Ex) + { + ZEN_ERROR("Exception while provisioning module '{}': {}", ModuleId, Ex.what()); + + FailureReason = Ex.what(); + ResponseCode = HttpResponseCode::InternalServerError; + } + + Req.ServerRequest().WriteResponse(ResponseCode, HttpContentType::kText, FailureReason); + }, + HttpVerb::kPost); + + m_Router.RegisterRoute( + "modules/{moduleid}/deprovision", + [this](HttpRouterRequest& Req) { + std::string_view ModuleId = Req.GetCapture(1); + std::string FailureReason = "unknown"; + + try + { + if (!m_Impl->Deprovision(std::string(ModuleId), /* out */ FailureReason)) + { + if (FailureReason.empty()) + { + return Req.ServerRequest().WriteResponse(HttpResponseCode::NotFound); + } + else + { + return Req.ServerRequest().WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, FailureReason); + } + } + + CbObjectWriter Obj; + Obj << "moduleId" << ModuleId; + + return Req.ServerRequest().WriteResponse(HttpResponseCode::OK, Obj.Save()); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("Exception while deprovisioning module '{}': {}", ModuleId, Ex.what()); + + FailureReason = Ex.what(); + } + + Req.ServerRequest().WriteResponse(HttpResponseCode::InternalServerError, HttpContentType::kText, FailureReason); + }, + HttpVerb::kPost); + + m_Router.RegisterRoute( + "stats", + [this](HttpRouterRequest& Req) { + CbObjectWriter Obj; + Obj << "currentInstanceCount" << m_Impl->GetInstanceCount(); + Obj << "maxInstanceCount" << m_Impl->GetMaxInstanceCount(); + Obj << "instanceLimit" << m_Impl->GetInstanceLimit(); + Req.ServerRequest().WriteResponse(HttpResponseCode::OK); + }, + HttpVerb::kGet); +} + +HttpHubService::~HttpHubService() +{ +} + +const char* +HttpHubService::BaseUri() const +{ + return "/hub/"; +} + +void +HttpHubService::SetNotificationEndpoint(std::string_view UpstreamNotificationEndpoint, std::string_view InstanceId) +{ + ZEN_UNUSED(UpstreamNotificationEndpoint, InstanceId); + // TODO: store these for use in notifications, on some interval/criteria which is currently TBD +} + +void +HttpHubService::HandleRequest(zen::HttpServerRequest& Request) +{ + m_Router.HandleRequest(Request); +} + +void +HttpHubService::HandleModuleGet(HttpServerRequest& Request, std::string_view ModuleId) +{ + StorageServerInstance* Instance = nullptr; + if (!m_Impl->Find(ModuleId, &Instance)) + { + Request.WriteResponse(HttpResponseCode::NotFound); + return; + } + + CbObjectWriter Obj; + Obj << "moduleId" << Instance->GetModuleId(); + Obj << "provisioned" << Instance->IsProvisioned(); + Request.WriteResponse(HttpResponseCode::OK, Obj.Save()); +} + +void +HttpHubService::HandleModuleDelete(HttpServerRequest& Request, std::string_view ModuleId) +{ + StorageServerInstance* Instance = nullptr; + if (!m_Impl->Find(ModuleId, &Instance)) + { + Request.WriteResponse(HttpResponseCode::NotFound); + return; + } + + // TODO: deprovision and nuke all related storage + + CbObjectWriter Obj; + Obj << "moduleId" << Instance->GetModuleId(); + Obj << "provisioned" << Instance->IsProvisioned(); + Request.WriteResponse(HttpResponseCode::OK, Obj.Save()); +} + +} // namespace zen diff --git a/src/zenserver/hub/hubservice.h b/src/zenserver/hub/hubservice.h new file mode 100644 index 000000000..1a5a8c57c --- /dev/null +++ b/src/zenserver/hub/hubservice.h @@ -0,0 +1,42 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenhttp/httpserver.h> + +#include "hydration.h" + +namespace zen { + +/** ZenServer Hub Service + * + * Manages a set of storage servers on the behalf of external clients. For + * use in UEFN content worker style scenarios. + * + */ +class HttpHubService : public zen::HttpService +{ +public: + HttpHubService(std::filesystem::path HubBaseDir, std::filesystem::path ChildBaseDir); + ~HttpHubService(); + + HttpHubService(const HttpHubService&) = delete; + HttpHubService& operator=(const HttpHubService&) = delete; + + virtual const char* BaseUri() const override; + virtual void HandleRequest(zen::HttpServerRequest& Request) override; + + void SetNotificationEndpoint(std::string_view UpstreamNotificationEndpoint, std::string_view InstanceId); + +private: + HttpRequestRouter m_Router; + + struct Impl; + + std::unique_ptr<Impl> m_Impl; + + void HandleModuleGet(HttpServerRequest& Request, std::string_view ModuleId); + void HandleModuleDelete(HttpServerRequest& Request, std::string_view ModuleId); +}; + +} // namespace zen diff --git a/src/zenserver/hub/hydration.cpp b/src/zenserver/hub/hydration.cpp new file mode 100644 index 000000000..52c17fe1a --- /dev/null +++ b/src/zenserver/hub/hydration.cpp @@ -0,0 +1,119 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "hydration.h" + +#include <zencore/filesystem.h> +#include <zencore/fmtutils.h> + +namespace zen { + +/////////////////////////////////////////////////////////////////////////// + +struct FileHydrator : public HydrationStrategyBase +{ + virtual void Configure(const HydrationConfig& Config) override; + virtual void Hydrate() override; + virtual void Dehydrate() override; + +private: + HydrationConfig m_Config; + std::filesystem::path m_StorageModuleRootDir; +}; + +void +FileHydrator::Configure(const HydrationConfig& Config) +{ + m_Config = Config; + + std::filesystem::path ConfigPath(Utf8ToWide(m_Config.TargetSpecification)); + + if (!std::filesystem::exists(ConfigPath)) + { + throw std::invalid_argument(fmt::format("Target does not exist: '{}'", ConfigPath.string())); + } + + m_StorageModuleRootDir = ConfigPath / m_Config.ModuleId; + + CreateDirectories(m_StorageModuleRootDir); +} + +void +FileHydrator::Hydrate() +{ + ZEN_INFO("Hydrating state from '{}' to '{}'", m_StorageModuleRootDir, m_Config.ServerStateDir); + + // Ensure target is clean + ZEN_DEBUG("Wiping server state at '{}'", m_Config.ServerStateDir); + const bool ForceRemoveReadOnlyFiles = true; + CleanDirectory(m_Config.ServerStateDir, ForceRemoveReadOnlyFiles); + + bool WipeServerState = false; + + try + { + ZEN_DEBUG("Copying '{}' to '{}'", m_StorageModuleRootDir, m_Config.ServerStateDir); + CopyTree(m_StorageModuleRootDir, m_Config.ServerStateDir, {.EnableClone = true}); + } + catch (std::exception& Ex) + { + ZEN_WARN("Copy failed: {}. Will wipe any partially copied state from '{}'", Ex.what(), m_Config.ServerStateDir); + + // We don't do the clean right here to avoid potentially running into double-throws + WipeServerState = true; + } + + if (WipeServerState) + { + ZEN_DEBUG("Cleaning server state '{}'", m_Config.ServerStateDir); + CleanDirectory(m_Config.ServerStateDir, ForceRemoveReadOnlyFiles); + } + + // Note that we leave the storage state intact until next dehydration replaces the content +} + +void +FileHydrator::Dehydrate() +{ + ZEN_INFO("Dehydrating state from '{}' to '{}'", m_Config.ServerStateDir, m_StorageModuleRootDir); + + const std::filesystem::path TargetDir = m_StorageModuleRootDir; + + // Ensure target is clean. This could be replaced with an atomic copy at a later date + // (i.e copy into a temporary directory name and rename it once complete) + + ZEN_DEBUG("Cleaning storage root '{}'", TargetDir); + const bool ForceRemoveReadOnlyFiles = true; + CleanDirectory(TargetDir, ForceRemoveReadOnlyFiles); + + bool CopySuccess = true; + + try + { + ZEN_DEBUG("Copying '{}' to '{}'", m_Config.ServerStateDir, TargetDir); + CopyTree(m_Config.ServerStateDir, TargetDir, {.EnableClone = true}); + } + catch (std::exception& Ex) + { + ZEN_WARN("Copy failed: {}. Will wipe any partially copied state from '{}'", Ex.what(), m_StorageModuleRootDir); + + // We don't do the clean right here to avoid potentially running into double-throws + CopySuccess = false; + } + + if (!CopySuccess) + { + ZEN_DEBUG("Removing partially copied state from '{}'", TargetDir); + CleanDirectory(TargetDir, ForceRemoveReadOnlyFiles); + } + + ZEN_DEBUG("Wiping server state '{}'", m_Config.ServerStateDir); + CleanDirectory(m_Config.ServerStateDir, ForceRemoveReadOnlyFiles); +} + +std::unique_ptr<HydrationStrategyBase> +CreateFileHydrator() +{ + return std::make_unique<FileHydrator>(); +} + +} // namespace zen diff --git a/src/zenserver/hub/hydration.h b/src/zenserver/hub/hydration.h new file mode 100644 index 000000000..f86f2accf --- /dev/null +++ b/src/zenserver/hub/hydration.h @@ -0,0 +1,40 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "zenhubserver.h" + +namespace zen { + +struct HydrationConfig +{ + // Location of server state to hydrate/dehydrate + std::filesystem::path ServerStateDir; + // Temporary directory available for use during hydration/dehydration + std::filesystem::path TempDir; + // Module ID of the server state being hydrated/dehydrated + std::string ModuleId; + // Back-end specific target specification (e.g. S3 bucket, file path, etc) + std::string TargetSpecification; +}; + +/** + * @brief State hydration strategy interface + * + * An instance of this interface is used to perform hydration OR + * dehydration of server state. It's expected to be used only once + * and not reused. + * + */ +struct HydrationStrategyBase +{ + virtual ~HydrationStrategyBase() = default; + + virtual void Dehydrate() = 0; + virtual void Hydrate() = 0; + virtual void Configure(const HydrationConfig& Config) = 0; +}; + +std::unique_ptr<HydrationStrategyBase> CreateFileHydrator(); + +} // namespace zen diff --git a/src/zenserver/hub/zenhubserver.cpp b/src/zenserver/hub/zenhubserver.cpp new file mode 100644 index 000000000..7a4ba951d --- /dev/null +++ b/src/zenserver/hub/zenhubserver.cpp @@ -0,0 +1,303 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "zenhubserver.h" +#include "hubservice.h" + +#include <zencore/fmtutils.h> +#include <zencore/memory/llm.h> +#include <zencore/memory/memorytrace.h> +#include <zencore/memory/tagtrace.h> +#include <zencore/scopeguard.h> +#include <zencore/sentryintegration.h> +#include <zencore/system.h> +#include <zencore/windows.h> +#include <zenhttp/httpapiservice.h> +#include <zenutil/service.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <cxxopts.hpp> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +void +ZenHubServerConfigurator::AddCliOptions(cxxopts::Options& Options) +{ + Options.add_option("hub", + "", + "upstream-notification-endpoint", + "Endpoint URL for upstream notifications", + cxxopts::value<std::string>(m_ServerOptions.UpstreamNotificationEndpoint)->default_value(""), + ""); + + Options.add_option("hub", + "", + "instance-id", + "Instance ID for use in notifications", + cxxopts::value<std::string>(m_ServerOptions.InstanceId)->default_value(""), + ""); +} + +void +ZenHubServerConfigurator::AddConfigOptions(LuaConfig::Options& Options) +{ + ZEN_UNUSED(Options); +} + +void +ZenHubServerConfigurator::ApplyOptions(cxxopts::Options& Options) +{ + ZEN_UNUSED(Options); +} + +void +ZenHubServerConfigurator::OnConfigFileParsed(LuaConfig::Options& LuaOptions) +{ + ZEN_UNUSED(LuaOptions); +} + +void +ZenHubServerConfigurator::ValidateOptions() +{ +} + +/////////////////////////////////////////////////////////////////////////// + +ZenHubServer::ZenHubServer() +{ +} + +ZenHubServer::~ZenHubServer() +{ + Cleanup(); +} + +int +ZenHubServer::Initialize(const ZenHubServerConfig& ServerConfig, ZenServerState::ZenServerEntry* ServerEntry) +{ + ZEN_TRACE_CPU("ZenHubServer::Initialize"); + ZEN_MEMSCOPE(GetZenserverTag()); + + ZEN_INFO(ZEN_APP_NAME " initializing in HUB server mode"); + + const int EffectiveBasePort = ZenServerBase::Initialize(ServerConfig, ServerEntry); + if (EffectiveBasePort < 0) + { + return EffectiveBasePort; + } + + // This is a workaround to make sure we can have automated tests. Without + // this the ranges for different child zen hub processes could overlap with + // the main test range. + ZenServerEnvironment::SetBaseChildId(1000); + + m_DebugOptionForcedCrash = ServerConfig.ShouldCrash; + + InitializeState(ServerConfig); + InitializeServices(ServerConfig); + RegisterServices(ServerConfig); + + ZenServerBase::Finalize(); + + return EffectiveBasePort; +} + +void +ZenHubServer::Cleanup() +{ + ZEN_TRACE_CPU("ZenStorageServer::Cleanup"); + ZEN_INFO(ZEN_APP_NAME " cleaning up"); + try + { + m_IoContext.stop(); + if (m_IoRunner.joinable()) + { + m_IoRunner.join(); + } + + if (m_Http) + { + m_Http->Close(); + } + } + catch (const std::exception& Ex) + { + ZEN_ERROR("exception thrown during Cleanup() in {}: '{}'", ZEN_APP_NAME, Ex.what()); + } +} + +void +ZenHubServer::InitializeState(const ZenHubServerConfig& ServerConfig) +{ + ZEN_UNUSED(ServerConfig); +} + +void +ZenHubServer::InitializeServices(const ZenHubServerConfig& ServerConfig) +{ + ZEN_UNUSED(ServerConfig); + + ZEN_INFO("instantiating API service"); + m_ApiService = std::make_unique<zen::HttpApiService>(*m_Http); + + ZEN_INFO("instantiating hub service"); + m_HubService = std::make_unique<HttpHubService>(ServerConfig.DataDir / "hub", ServerConfig.DataDir / "servers"); + m_HubService->SetNotificationEndpoint(ServerConfig.UpstreamNotificationEndpoint, ServerConfig.InstanceId); +} + +void +ZenHubServer::RegisterServices(const ZenHubServerConfig& ServerConfig) +{ + ZEN_UNUSED(ServerConfig); + + if (m_HubService) + { + m_Http->RegisterService(*m_HubService); + } + + if (m_ApiService) + { + m_Http->RegisterService(*m_ApiService); + } +} + +void +ZenHubServer::Run() +{ + if (m_ProcessMonitor.IsActive()) + { + CheckOwnerPid(); + } + + if (!m_TestMode) + { + // clang-format off + ZEN_INFO(R"(__________ ___ ___ ___. )" "\n" + R"(\____ /____ ____ / | \ __ _\_ |__ )" "\n" + R"( / // __ \ / \ / ~ \ | \ __ \ )" "\n" + R"( / /\ ___/| | \ \ Y / | / \_\ \)" "\n" + R"(/_______ \___ >___| / \___|_ /|____/|___ /)" "\n" + R"( \/ \/ \/ \/ \/ )"); + // clang-format on + + ExtendableStringBuilder<256> BuildOptions; + GetBuildOptions(BuildOptions, '\n'); + ZEN_INFO("Build options ({}/{}):\n{}", GetOperatingSystemName(), GetCpuName(), BuildOptions); + } + + ZEN_INFO(ZEN_APP_NAME " now running as HUB (pid: {})", GetCurrentProcessId()); + +#if ZEN_PLATFORM_WINDOWS + if (zen::windows::IsRunningOnWine()) + { + ZEN_INFO("detected Wine session - " ZEN_APP_NAME " is not formally tested on Wine and may therefore not work or perform well"); + } +#endif + +#if ZEN_USE_SENTRY + ZEN_INFO("sentry crash handler {}", m_UseSentry ? "ENABLED" : "DISABLED"); + if (m_UseSentry) + { + SentryIntegration::ClearCaches(); + } +#endif + + if (m_DebugOptionForcedCrash) + { + ZEN_DEBUG_BREAK(); + } + + const bool IsInteractiveMode = IsInteractiveSession(); // &&!m_TestMode; + + SetNewState(kRunning); + + OnReady(); + + m_Http->Run(IsInteractiveMode); + + SetNewState(kShuttingDown); + + ZEN_INFO(ZEN_APP_NAME " exiting"); +} + +////////////////////////////////////////////////////////////////////////////////// + +ZenHubServerMain::ZenHubServerMain(ZenHubServerConfig& ServerOptions) : ZenServerMain(ServerOptions), m_ServerOptions(ServerOptions) +{ +} + +void +ZenHubServerMain::DoRun(ZenServerState::ZenServerEntry* Entry) +{ + ZenHubServer Server; + Server.SetDataRoot(m_ServerOptions.DataDir); + Server.SetContentRoot(m_ServerOptions.ContentDir); + Server.SetTestMode(m_ServerOptions.IsTest); + Server.SetDedicatedMode(m_ServerOptions.IsDedicated); + + const int EffectiveBasePort = Server.Initialize(m_ServerOptions, Entry); + if (EffectiveBasePort == -1) + { + // Server.Initialize has already logged what the issue is - just exit with failure code here. + std::exit(1); + } + + Entry->EffectiveListenPort = uint16_t(EffectiveBasePort); + if (EffectiveBasePort != m_ServerOptions.BasePort) + { + ZEN_INFO(ZEN_APP_NAME " - relocated to base port {}", EffectiveBasePort); + m_ServerOptions.BasePort = EffectiveBasePort; + } + + std::unique_ptr<std::thread> ShutdownThread; + std::unique_ptr<NamedEvent> ShutdownEvent; + + ExtendableStringBuilder<64> ShutdownEventName; + ShutdownEventName << "Zen_" << m_ServerOptions.BasePort << "_Shutdown"; + ShutdownEvent.reset(new NamedEvent{ShutdownEventName}); + + // Monitor shutdown signals + + ShutdownThread.reset(new std::thread{[&] { + SetCurrentThreadName("shutdown_mon"); + + ZEN_INFO("shutdown monitor thread waiting for shutdown signal '{}' for process {}", ShutdownEventName, zen::GetCurrentProcessId()); + + if (ShutdownEvent->Wait()) + { + ZEN_INFO("shutdown signal for pid {} received", zen::GetCurrentProcessId()); + Server.RequestExit(0); + } + else + { + ZEN_INFO("shutdown signal wait() failed"); + } + }}); + + auto CleanupShutdown = MakeGuard([&ShutdownEvent, &ShutdownThread] { + ReportServiceStatus(ServiceStatus::Stopping); + + if (ShutdownEvent) + { + ShutdownEvent->Set(); + } + if (ShutdownThread && ShutdownThread->joinable()) + { + ShutdownThread->join(); + } + }); + + // If we have a parent process, establish the mechanisms we need + // to be able to communicate readiness with the parent + + Server.SetIsReadyFunc([&] { + std::error_code Ec; + m_LockFile.Update(MakeLockData(true), Ec); + ReportServiceStatus(ServiceStatus::Running); + NotifyReady(); + }); + + Server.Run(); +} + +} // namespace zen diff --git a/src/zenserver/hub/zenhubserver.h b/src/zenserver/hub/zenhubserver.h new file mode 100644 index 000000000..ac14362f0 --- /dev/null +++ b/src/zenserver/hub/zenhubserver.h @@ -0,0 +1,92 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "zenserver.h" + +namespace cxxopts { +class Options; +} +namespace zen::LuaConfig { +struct Options; +} + +namespace zen { + +class HttpApiService; +class HttpHubService; + +struct ZenHubServerConfig : public ZenServerConfig +{ + std::string UpstreamNotificationEndpoint; + std::string InstanceId; // For use in notifications +}; + +struct ZenHubServerConfigurator : public ZenServerConfiguratorBase +{ + ZenHubServerConfigurator(ZenHubServerConfig& ServerOptions) : ZenServerConfiguratorBase(ServerOptions), m_ServerOptions(ServerOptions) + { + } + + ~ZenHubServerConfigurator() = default; + +private: + virtual void AddCliOptions(cxxopts::Options& Options) override; + virtual void AddConfigOptions(LuaConfig::Options& Options) override; + virtual void ApplyOptions(cxxopts::Options& Options) override; + virtual void OnConfigFileParsed(LuaConfig::Options& LuaOptions) override; + virtual void ValidateOptions() override; + + ZenHubServerConfig& m_ServerOptions; +}; + +class ZenHubServerMain : public ZenServerMain +{ +public: + ZenHubServerMain(ZenHubServerConfig& ServerOptions); + virtual void DoRun(ZenServerState::ZenServerEntry* Entry) override; + + ZenHubServerMain(const ZenHubServerMain&) = delete; + ZenHubServerMain& operator=(const ZenHubServerMain&) = delete; + + typedef ZenHubServerConfig Config; + typedef ZenHubServerConfigurator Configurator; + +private: + ZenHubServerConfig& m_ServerOptions; +}; + +class ZenHubServer : public ZenServerBase +{ + ZenHubServer& operator=(ZenHubServer&&) = delete; + ZenHubServer(ZenHubServer&&) = delete; + +public: + ZenHubServer(); + ~ZenHubServer(); + + int Initialize(const ZenHubServerConfig& ServerConfig, ZenServerState::ZenServerEntry* ServerEntry); + void Run(); + void Cleanup(); + + void SetDedicatedMode(bool State) { m_IsDedicatedMode = State; } + void SetTestMode(bool State) { m_TestMode = State; } + void SetDataRoot(std::filesystem::path Root) { m_DataRoot = Root; } + void SetContentRoot(std::filesystem::path Root) { m_ContentRoot = Root; } + +private: + bool m_IsDedicatedMode = false; + bool m_TestMode = false; + std::filesystem::path m_DataRoot; + std::filesystem::path m_ContentRoot; + bool m_DebugOptionForcedCrash = false; + + std::unique_ptr<HttpHubService> m_HubService; + std::unique_ptr<HttpApiService> m_ApiService; + + void InitializeState(const ZenHubServerConfig& ServerConfig); + void InitializeServices(const ZenHubServerConfig& ServerConfig); + void RegisterServices(const ZenHubServerConfig& ServerConfig); +}; + +} // namespace zen diff --git a/src/zenserver/main.cpp b/src/zenserver/main.cpp index 78bce7d06..3a58d1f4a 100644 --- a/src/zenserver/main.cpp +++ b/src/zenserver/main.cpp @@ -19,12 +19,15 @@ #include <zencore/thread.h> #include <zencore/trace.h> #include <zentelemetry/otlptrace.h> +#include <zenutil/commandlineoptions.h> #include <zenutil/service.h> #include "diag/logging.h" #include "storage/storageconfig.h" #include "storage/zenstorageserver.h" +#include "hub/zenhubserver.h" + #if ZEN_PLATFORM_WINDOWS # include <zencore/windows.h> # include <zenutil/windows/windowsservice.h> @@ -193,12 +196,12 @@ AppMain(int argc, char* argv[]) } catch (const AssertException& AssertEx) { - fprintf(stderr, ZEN_APP_NAME " ERROR: Caught assert exception in main: '%s'", AssertEx.FullDescription().c_str()); + fprintf(stderr, ZEN_APP_NAME " ERROR: Caught assert exception in main: '%s'\n", AssertEx.FullDescription().c_str()); return 1; } catch (const std::exception& Ex) { - fprintf(stderr, ZEN_APP_NAME " ERROR: Caught exception in main: '%s'", Ex.what()); + fprintf(stderr, ZEN_APP_NAME " ERROR: Caught exception in main: '%s'\n", Ex.what()); return 1; } @@ -231,6 +234,8 @@ main(int argc, char* argv[]) setlocale(LC_ALL, "en_us.UTF8"); #endif // ZEN_PLATFORM_WINDOWS + zen::CommandLineConverter ArgConverter(argc, argv); + using namespace zen; using namespace std::literals; @@ -273,6 +278,8 @@ main(int argc, char* argv[]) exit(5); #endif break; + case kHub: + return AppMain<ZenHubServerMain>(argc, argv); default: case kStore: return AppMain<ZenStorageServerMain>(argc, argv); diff --git a/src/zenserver/storage/admin/admin.cpp b/src/zenserver/storage/admin/admin.cpp index 04f43d33a..19155e02b 100644 --- a/src/zenserver/storage/admin/admin.cpp +++ b/src/zenserver/storage/admin/admin.cpp @@ -121,7 +121,10 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler, }, HttpVerb::kGet); - m_Router.AddPattern("jobid", "([[:digit:]]+?)"); + static constexpr AsciiSet ValidNumberCharactersSet{"0123456789"}; + + m_Router.AddMatcher("jobid", + [](std::string_view Str) -> bool { return !Str.empty() && AsciiSet::HasOnly(Str, ValidNumberCharactersSet); }); m_Router.RegisterRoute( "jobs", @@ -539,7 +542,7 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler, const HttpServerRequest::QueryParams Params = HttpReq.GetQueryParams(); GcScheduler::TriggerScrubParams ScrubParams; - ScrubParams.MaxTimeslice = std::chrono::seconds(100); + ScrubParams.MaxTimeslice = std::chrono::seconds(300); if (auto Param = Params.GetValue("skipdelete"); Param.empty() == false) { @@ -556,6 +559,14 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler, ScrubParams.SkipCas = (Param == "true"sv); } + if (auto Param = Params.GetValue("maxtimeslice"); Param.empty() == false) + { + if (auto Value = ParseInt<uint64_t>(Param)) + { + ScrubParams.MaxTimeslice = std::chrono::seconds(Value.value()); + } + } + m_GcScheduler.TriggerScrub(ScrubParams); CbObjectWriter Response; diff --git a/src/zenserver/storage/buildstore/httpbuildstore.cpp b/src/zenserver/storage/buildstore/httpbuildstore.cpp index 18fae7027..f5ba30616 100644 --- a/src/zenserver/storage/buildstore/httpbuildstore.cpp +++ b/src/zenserver/storage/buildstore/httpbuildstore.cpp @@ -48,10 +48,20 @@ HttpBuildStoreService::Initialize() { ZEN_LOG_INFO(LogBuilds, "Initializing Builds Service"); - m_Router.AddPattern("namespace", "([[:alnum:]\\-_.]+)"); - m_Router.AddPattern("bucket", "([[:alnum:]\\-_.]+)"); - m_Router.AddPattern("buildid", "([[:xdigit:]]{24})"); - m_Router.AddPattern("hash", "([[:xdigit:]]{40})"); + static constexpr AsciiSet ValidNamespaceCharactersSet{"abcdefghijklmnopqrstuvwxyz0123456789-_.ABCDEFGHIJKLMNOPQRSTUVWXYZ"}; + static constexpr AsciiSet ValidBucketCharactersSet{"abcdefghijklmnopqrstuvwxyz0123456789-_.ABCDEFGHIJKLMNOPQRSTUVWXYZ"}; + static constexpr AsciiSet ValidHexCharactersSet{"0123456789abcdefABCDEF"}; + + m_Router.AddMatcher("namespace", + [](std::string_view Str) -> bool { return !Str.empty() && AsciiSet::HasOnly(Str, ValidNamespaceCharactersSet); }); + m_Router.AddMatcher("bucket", + [](std::string_view Str) -> bool { return !Str.empty() && AsciiSet::HasOnly(Str, ValidBucketCharactersSet); }); + m_Router.AddMatcher("buildid", [](std::string_view Str) -> bool { + return Str.length() == Oid::StringLength && AsciiSet::HasOnly(Str, ValidHexCharactersSet); + }); + m_Router.AddMatcher("hash", [](std::string_view Str) -> bool { + return Str.length() == IoHash::StringLength && AsciiSet::HasOnly(Str, ValidHexCharactersSet); + }); m_Router.RegisterRoute( "{namespace}/{bucket}/{buildid}/blobs/{hash}", diff --git a/src/zenserver/storage/objectstore/objectstore.cpp b/src/zenserver/storage/objectstore/objectstore.cpp index d8ad40621..052c3d630 100644 --- a/src/zenserver/storage/objectstore/objectstore.cpp +++ b/src/zenserver/storage/objectstore/objectstore.cpp @@ -271,8 +271,13 @@ HttpObjectStoreService::Inititalize() CreateDirectories(BucketsPath); } - m_Router.AddPattern("path", "([[:alnum:]/_.,;$~\\{\\}\\+\\-\\[\\]\\%\\(\\)]+)"); - m_Router.AddPattern("bucket", "([[:alnum:]\\-_.]+)"); + static constexpr AsciiSet ValidPathCharactersSet{"abcdefghijklmnopqrstuvwxyz0123456789/_.,;$~{}+-[]%()]ABCDEFGHIJKLMNOPQRSTUVWXYZ"}; + static constexpr AsciiSet ValidBucketCharactersSet{"abcdefghijklmnopqrstuvwxyz0123456789-_.ABCDEFGHIJKLMNOPQRSTUVWXYZ"}; + + m_Router.AddMatcher("path", + [](std::string_view Str) -> bool { return !Str.empty() && AsciiSet::HasOnly(Str, ValidPathCharactersSet); }); + m_Router.AddMatcher("bucket", + [](std::string_view Str) -> bool { return !Str.empty() && AsciiSet::HasOnly(Str, ValidBucketCharactersSet); }); m_Router.RegisterRoute( "bucket", diff --git a/src/zenserver/storage/projectstore/httpprojectstore.cpp b/src/zenserver/storage/projectstore/httpprojectstore.cpp index 4e947f221..416e2ed69 100644 --- a/src/zenserver/storage/projectstore/httpprojectstore.cpp +++ b/src/zenserver/storage/projectstore/httpprojectstore.cpp @@ -549,11 +549,23 @@ HttpProjectService::HttpProjectService(CidStore& Store, using namespace std::literals; - m_Router.AddPattern("project", "([[:alnum:]_.]+)"); - m_Router.AddPattern("log", "([[:alnum:]_.]+)"); - m_Router.AddPattern("op", "([[:digit:]]+?)"); - m_Router.AddPattern("chunk", "([[:xdigit:]]{24})"); - m_Router.AddPattern("hash", "([[:xdigit:]]{40})"); + static constexpr AsciiSet ValidProjectCharactersSet{"abcdefghijklmnopqrstuvwxyz0123456789_.ABCDEFGHIJKLMNOPQRSTUVWXYZ"}; + static constexpr AsciiSet ValidOplogCharactersSet{"abcdefghijklmnopqrstuvwxyz0123456789_.ABCDEFGHIJKLMNOPQRSTUVWXYZ"}; + static constexpr AsciiSet ValidNumberCharactersSet{"0123456789"}; + static constexpr AsciiSet ValidHexCharactersSet{"0123456789abcdefABCDEF"}; + + m_Router.AddMatcher("project", + [](std::string_view Str) -> bool { return !Str.empty() && AsciiSet::HasOnly(Str, ValidProjectCharactersSet); }); + m_Router.AddMatcher("log", + [](std::string_view Str) -> bool { return !Str.empty() && AsciiSet::HasOnly(Str, ValidOplogCharactersSet); }); + m_Router.AddMatcher("op", + [](std::string_view Str) -> bool { return !Str.empty() && AsciiSet::HasOnly(Str, ValidNumberCharactersSet); }); + m_Router.AddMatcher("chunk", [](std::string_view Str) -> bool { + return Str.length() == Oid::StringLength && AsciiSet::HasOnly(Str, ValidHexCharactersSet); + }); + m_Router.AddMatcher("hash", [](std::string_view Str) -> bool { + return Str.length() == IoHash::StringLength && AsciiSet::HasOnly(Str, ValidHexCharactersSet); + }); m_Router.RegisterRoute( "", @@ -2880,6 +2892,8 @@ HttpProjectService::HandleRpcRequest(HttpRouterRequest& Req) }; tsl::robin_map<IoHash, AddedChunk, IoHash::Hasher> AddedChunks; + const std::filesystem::path CanonicalRoot = std::filesystem::canonical(Project->RootDir); + Oplog->IterateOplog( [&](CbObjectView Op) { bool OpRewritten = false; @@ -2898,10 +2912,36 @@ HttpProjectService::HandleRpcRequest(HttpRouterRequest& Req) if (DataHash == IoHash::Zero) { - std::string_view ServerPath = View["serverpath"sv].AsString(); - std::filesystem::path FilePath = Project->RootDir / ServerPath; - BasicFile DataFile; - std::error_code Ec; + std::string_view ServerPath = View["serverpath"sv].AsString(); + if (CanonicalRoot.empty()) + { + ZEN_WARN("Attempting to load file '{}' from project with unset project root", ServerPath); + AllOk = false; + continue; + } + + std::error_code Ec; + const std::filesystem::path FilePath = std::filesystem::canonical(Project->RootDir / ServerPath, Ec); + + if (Ec) + { + ZEN_WARN("Failed to find file '{}' in project root '{}' for 'snapshot'. Reason: '{}'", + ServerPath, + Project->RootDir, + Ec.message()); + AllOk = false; + continue; + } + + if (std::mismatch(CanonicalRoot.begin(), CanonicalRoot.end(), FilePath.begin()).first != + CanonicalRoot.end()) + { + ZEN_WARN("Unable to read file '{}' outside of project root '{}'", FilePath, CanonicalRoot); + AllOk = false; + continue; + } + + BasicFile DataFile; DataFile.Open(FilePath, BasicFile::Mode::kRead, Ec); if (Ec) diff --git a/src/zenserver/storage/workspaces/httpworkspaces.cpp b/src/zenserver/storage/workspaces/httpworkspaces.cpp index 3fea46b2f..dc4cc7e69 100644 --- a/src/zenserver/storage/workspaces/httpworkspaces.cpp +++ b/src/zenserver/storage/workspaces/httpworkspaces.cpp @@ -169,10 +169,20 @@ HttpWorkspacesService::Initialize() ZEN_LOG_INFO(LogFs, "Initializing Workspaces Service"); - m_Router.AddPattern("workspace_id", "([[:xdigit:]]{24})"); - m_Router.AddPattern("share_id", "([[:xdigit:]]{24})"); - m_Router.AddPattern("chunk", "([[:xdigit:]]{24})"); - m_Router.AddPattern("share_alias", "([[:alnum:]_.\\+\\-\\[\\]]+)"); + static constexpr AsciiSet ValidHexCharactersSet{"0123456789abcdefABCDEF"}; + + m_Router.AddMatcher("workspace_id", [](std::string_view Str) -> bool { + return Str.length() == Oid::StringLength && AsciiSet::HasOnly(Str, ValidHexCharactersSet); + }); + m_Router.AddMatcher("share_id", [](std::string_view Str) -> bool { + return Str.length() == Oid::StringLength && AsciiSet::HasOnly(Str, ValidHexCharactersSet); + }); + m_Router.AddMatcher("chunk", [](std::string_view Str) -> bool { + return Str.length() == Oid::StringLength && AsciiSet::HasOnly(Str, ValidHexCharactersSet); + }); + m_Router.AddMatcher("share_alias", [](std::string_view Str) -> bool { + return !Str.empty() && AsciiSet::HasOnly(Str, Workspaces::ValidAliasCharactersSet); + }); m_Router.RegisterRoute( "{workspace_id}/{share_id}/files", diff --git a/src/zenserver/xmake.lua b/src/zenserver/xmake.lua index fb65fa949..6ee80dc62 100644 --- a/src/zenserver/xmake.lua +++ b/src/zenserver/xmake.lua @@ -22,6 +22,7 @@ target("zenserver") add_deps("sol2") add_packages("json11") add_packages("lua") + add_packages("consul") if has_config("zenmimalloc") then add_packages("mimalloc") @@ -55,10 +56,7 @@ target("zenserver") add_ldflags("-framework Security") add_ldflags("-framework SystemConfiguration") end - - add_options("compute") - add_options("exec") - + -- to work around some unfortunate Ctrl-C behaviour on Linux/Mac due to -- our use of setsid() at startup we pass in `--no-detach` to zenserver -- ensure that it recieves signals when the user requests termination @@ -87,17 +85,60 @@ target("zenserver") end end) + after_build(function (target) - if has_config("zensentry") then - local crashpad_handler = "crashpad_handler" - if is_plat("windows") then - crashpad_handler = "crashpad_handler.exe" + local function copy_if_newer(src_file, dst_file, file_description) + if not os.exists(src_file) then + print("Source file '" .. src_file .. "' does not exist, cannot copy " .. file_description) + return end + local should_copy = false + if not os.exists(dst_file) then + should_copy = true + else + local src_size = os.filesize(src_file) + local dst_size = os.filesize(dst_file) + local src_mtime = os.mtime(src_file) + local dst_mtime = os.mtime(dst_file) + + if src_size ~= dst_size or src_mtime > dst_mtime then + should_copy = true + end + end + + if should_copy then + os.cp(src_file, dst_file) + print("Copied '" .. file_description .. "' to output directory") + end + end + + if has_config("zensentry") then local pkg = target:pkg("sentry-native") if pkg then local installdir = pkg:installdir() - os.cp(path.join(installdir, "bin/" .. crashpad_handler), target:targetdir()) - print("Copied " .. crashpad_handler .. " to output directory") + + local crashpad_handler = "crashpad_handler" + if is_plat("windows") then + crashpad_handler = "crashpad_handler.exe" + end + + local crashpad_handler_path = path.join(installdir, "bin/" .. crashpad_handler) + copy_if_newer(crashpad_handler_path, path.join(target:targetdir(), crashpad_handler), crashpad_handler) + + if is_plat("windows") then + local crashpad_wer_path = path.join(installdir, "bin/crashpad_wer.dll") + copy_if_newer(crashpad_wer_path, path.join(target:targetdir(), "crashpad_wer.dll"), "crashpad_wer.dll") + end + end + end + + local consul_pkg = target:pkg("consul") + if consul_pkg then + local installdir = consul_pkg:installdir() + local consul_bin = "consul" + if is_plat("windows") then + consul_bin = "consul.exe" end + copy_if_newer(path.join(installdir, "bin", consul_bin), path.join(target:targetdir(), consul_bin), consul_bin) end end) diff --git a/src/zenserver/zenserver.cpp b/src/zenserver/zenserver.cpp index c5c36a4a8..2bafeeaa1 100644 --- a/src/zenserver/zenserver.cpp +++ b/src/zenserver/zenserver.cpp @@ -151,6 +151,11 @@ ZenServerBase::Initialize(const ZenServerConfig& ServerOptions, ZenServerState:: EnqueueStatsReportingTimer(); } + m_HealthService.SetHealthInfo({.DataRoot = ServerOptions.DataDir, + .AbsLogPath = ServerOptions.AbsLogFile, + .HttpServerClass = std::string(ServerOptions.HttpConfig.ServerClass), + .BuildVersion = std::string(ZEN_CFG_VERSION_BUILD_STRING_FULL)}); + LogSettingsSummary(ServerOptions); return EffectiveBasePort; diff --git a/src/zenstore/blockstore.cpp b/src/zenstore/blockstore.cpp index 0542d1171..3ea91ead6 100644 --- a/src/zenstore/blockstore.cpp +++ b/src/zenstore/blockstore.cpp @@ -374,7 +374,7 @@ BlockStoreFile::GetMetaPath() const //////////////////////////////////////////////////////// -constexpr uint64_t DefaultIterateSmallChunkWindowSize = 2 * 1024 * 1024; +constexpr uint64_t DefaultIterateSmallChunkWindowSize = 512u * 1024u; BlockStore::BlockStore() { @@ -953,7 +953,7 @@ BlockStore::IterateBlock(std::span<const BlockStoreLocation> ChunkLocations, uint64_t IterateSmallChunkWindowSize = Max(DefaultIterateSmallChunkWindowSize, LargeSizeLimit); - const uint64_t IterateSmallChunkMaxGapSize = Max(2048u, IterateSmallChunkWindowSize / 512u); + const uint64_t IterateSmallChunkMaxGapSize = Max(2048u, IterateSmallChunkWindowSize / 256u); IterateSmallChunkWindowSize = Min((LargeSizeLimit + IterateSmallChunkMaxGapSize) * ChunkLocations.size(), IterateSmallChunkWindowSize); diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp index b2e045632..ead7e4f3a 100644 --- a/src/zenstore/cache/cachedisklayer.cpp +++ b/src/zenstore/cache/cachedisklayer.cpp @@ -2410,74 +2410,95 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx) try { - std::vector<BlockStoreLocation> ChunkLocations; - std::vector<IoHash> ChunkIndexToChunkHash; + std::vector<DiskLocation> ChunkLocations; + std::vector<IoHash> ChunkIndexToChunkHash; + std::vector<DiskLocation> StandaloneLocations; + std::vector<IoHash> StandaloneIndexToKeysHash; - RwLock::SharedLockScope _(m_IndexLock); + { + RwLock::SharedLockScope _(m_IndexLock); - const size_t BlockChunkInitialCount = m_Index.size() / 4; - ChunkLocations.reserve(BlockChunkInitialCount); - ChunkIndexToChunkHash.reserve(BlockChunkInitialCount); + const size_t InitialCount = m_Index.size() / 4; + ChunkLocations.reserve(InitialCount); + ChunkIndexToChunkHash.reserve(InitialCount); + StandaloneLocations.reserve(InitialCount); + StandaloneIndexToKeysHash.reserve(InitialCount); - // Do a pass over the index and verify any standalone file values straight away - // all other storage classes are gathered and verified in bulk in order to enable - // more efficient I/O scheduling + for (auto& Kv : m_Index) + { + const IoHash& HashKey = Kv.first; + const BucketPayload& Payload = m_Payloads[Kv.second]; + const DiskLocation& Loc = Payload.Location; - for (auto& Kv : m_Index) - { - const IoHash& HashKey = Kv.first; - const BucketPayload& Payload = m_Payloads[Kv.second]; - const DiskLocation& Loc = Payload.Location; + Ctx.ThrowIfDeadlineExpired(); + if (Loc.IsFlagSet(DiskLocation::kStandaloneFile)) + { + StandaloneLocations.push_back(Loc); + StandaloneIndexToKeysHash.push_back(HashKey); + } + else + { + ChunkLocations.push_back(Loc); + ChunkIndexToChunkHash.push_back(HashKey); + } + } + } + + for (size_t StandaloneKeyIndex = 0; StandaloneKeyIndex < StandaloneIndexToKeysHash.size(); StandaloneKeyIndex++) + { Ctx.ThrowIfDeadlineExpired(); - if (Loc.IsFlagSet(DiskLocation::kStandaloneFile)) - { - ChunkCount.fetch_add(1); - VerifiedChunkBytes.fetch_add(Loc.Size()); + const IoHash& HashKey = StandaloneIndexToKeysHash[StandaloneKeyIndex]; + const DiskLocation& Loc = StandaloneLocations[StandaloneKeyIndex]; - if (Loc.GetContentType() == ZenContentType::kBinary) - { - // Blob cache value, not much we can do about data integrity checking - // here since there's no hash available - ExtendablePathBuilder<256> DataFilePath; - BuildPath(DataFilePath, HashKey); + ChunkCount.fetch_add(1); + VerifiedChunkBytes.fetch_add(Loc.Size()); - RwLock::SharedLockScope ValueLock(LockForHash(HashKey)); + if (Loc.GetContentType() == ZenContentType::kBinary) + { + // Blob cache value, not much we can do about data integrity checking + // here since there's no hash available + ExtendablePathBuilder<256> DataFilePath; + BuildPath(DataFilePath, HashKey); - std::error_code Ec; - uintmax_t size = FileSizeFromPath(DataFilePath.ToPath(), Ec); - if (Ec) - { - ReportBadKey(HashKey); - } - if (size != Loc.Size()) - { - ReportBadKey(HashKey); - } - continue; + RwLock::SharedLockScope ValueLock(LockForHash(HashKey)); + + std::error_code Ec; + uintmax_t Size = FileSizeFromPath(DataFilePath.ToPath(), Ec); + if (Ec) + { + ReportBadKey(HashKey); } - else + ValueLock.ReleaseNow(); + + if (Size != Loc.Size()) { - // Structured cache value - IoBuffer Buffer = GetStandaloneCacheValue(Loc, HashKey); - if (!Buffer) + // Make sure we verify that size hasn't changed behind our back... + RwLock::SharedLockScope _(m_IndexLock); + if (auto It = m_Index.find(HashKey); It != m_Index.end()) { - ReportBadKey(HashKey); - continue; - } - if (!ValidateIoBuffer(Loc.GetContentType(), std::move(Buffer))) - { - ReportBadKey(HashKey); - continue; + const BucketPayload& Payload = m_Payloads[It->second]; + const DiskLocation& CurrentLoc = Payload.Location; + if (Size != CurrentLoc.Size()) + { + ReportBadKey(HashKey); + } } } } else { - ChunkLocations.emplace_back(Loc.GetBlockLocation(m_Configuration.PayloadAlignment)); - ChunkIndexToChunkHash.push_back(HashKey); - continue; + // Structured cache value + IoBuffer Buffer = GetStandaloneCacheValue(Loc, HashKey); + if (!Buffer) + { + ReportBadKey(HashKey); + } + else if (!ValidateIoBuffer(Loc.GetContentType(), std::move(Buffer))) + { + ReportBadKey(HashKey); + } } } @@ -2502,8 +2523,9 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx) ReportBadKey(Hash); return true; } - const BucketPayload& Payload = m_Payloads[m_Index.at(Hash)]; - ZenContentType ContentType = Payload.Location.GetContentType(); + + const DiskLocation& Loc = ChunkLocations[ChunkIndex]; + ZenContentType ContentType = Loc.GetContentType(); Buffer.SetContentType(ContentType); if (!ValidateIoBuffer(ContentType, std::move(Buffer))) { @@ -2525,8 +2547,8 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx) ReportBadKey(Hash); return true; } - const BucketPayload& Payload = m_Payloads[m_Index.at(Hash)]; - ZenContentType ContentType = Payload.Location.GetContentType(); + const DiskLocation& Loc = ChunkLocations[ChunkIndex]; + ZenContentType ContentType = Loc.GetContentType(); Buffer.SetContentType(ContentType); if (!ValidateIoBuffer(ContentType, std::move(Buffer))) { @@ -2536,8 +2558,16 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx) return true; }; - m_BlockStore.IterateChunks(ChunkLocations, [&](uint32_t, std::span<const size_t> ChunkIndexes) { - return m_BlockStore.IterateBlock(ChunkLocations, ChunkIndexes, ValidateSmallChunk, ValidateLargeChunk, 0); + std::vector<BlockStoreLocation> ChunkBlockLocations; + ChunkBlockLocations.reserve(ChunkLocations.size()); + + for (const DiskLocation& Loc : ChunkLocations) + { + ChunkBlockLocations.push_back(Loc.GetBlockLocation(m_Configuration.PayloadAlignment)); + } + + m_BlockStore.IterateChunks(ChunkBlockLocations, [&](uint32_t, std::span<const size_t> ChunkIndexes) { + return m_BlockStore.IterateBlock(ChunkBlockLocations, ChunkIndexes, ValidateSmallChunk, ValidateLargeChunk, 0); }); } catch (ScrubDeadlineExpiredException&) diff --git a/src/zenstore/cache/cacherpc.cpp b/src/zenstore/cache/cacherpc.cpp index 660c66b9a..94abcf547 100644 --- a/src/zenstore/cache/cacherpc.cpp +++ b/src/zenstore/cache/cacherpc.cpp @@ -594,16 +594,16 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb { FoundLocalInvalid = true; } - else if (CbValidateError Error = ValidateCompactBinary(Request.RecordCacheValue.GetView(), CbValidateMode::Default); - Error != CbValidateError::None) + else if (CbObjectView RecordObject = CbObjectView(Request.RecordCacheValue.GetData()); + RecordObject.GetSize() != Request.RecordCacheValue.GetSize()) { ZEN_WARN("HandleRpcGetCacheRecords stored record is corrupt, compact binary format validation failed. Reason: '{}'", - ToString(Error)); + "Object size does not match payload size"); FoundLocalInvalid = true; } else { - Request.RecordObject = CbObjectView(Request.RecordCacheValue.GetData()); + Request.RecordObject = std::move(RecordObject); ParseValues(Request); Request.Complete = true; @@ -1710,16 +1710,15 @@ CacheRpcHandler::GetLocalCacheRecords(const CacheRequestContext& Context, Record.ValuesRead = true; if (Record.CacheValue && Record.CacheValue.GetContentType() == ZenContentType::kCbObject) { - if (CbValidateError Error = ValidateCompactBinary(Record.CacheValue.GetView(), CbValidateMode::Default); - Error != CbValidateError::None) + if (CbObjectView RecordObject = CbObjectView(Record.CacheValue.GetData()); + RecordObject.GetSize() != Record.CacheValue.GetSize()) { - ZEN_WARN("GetLocalCacheRecords stored record for is corrupt, compact binary format validation failed. Reason: '{}'", - ToString(Error)); + ZEN_WARN("GetLocalCacheRecords stored record is corrupt, compact binary format validation failed. Reason: '{}'", + "Object size does not match payload size"); } else { - CbObjectView RecordObject = CbObjectView(Record.CacheValue.GetData()); - CbArrayView ValuesArray = RecordObject["Values"sv].AsArrayView(); + CbArrayView ValuesArray = RecordObject["Values"sv].AsArrayView(); Record.Values.reserve(ValuesArray.Num()); for (CbFieldView ValueField : ValuesArray) { diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp index 37a8c36b8..5d8f95c9e 100644 --- a/src/zenstore/compactcas.cpp +++ b/src/zenstore/compactcas.cpp @@ -547,11 +547,11 @@ CasContainerStrategy::ScrubStorage(ScrubContext& Ctx) if (Ctx.IsSkipCas()) { - ZEN_INFO("SKIPPED scrubbing: '{}'", m_BlocksBasePath); + ZEN_INFO("SKIPPED scrubbing: '{}'", m_RootDirectory); return; } - ZEN_INFO("scrubbing '{}'", m_BlocksBasePath); + ZEN_INFO("scrubbing '{}'", m_RootDirectory); RwLock BadKeysLock; std::vector<IoHash> BadKeys; @@ -565,20 +565,21 @@ CasContainerStrategy::ScrubStorage(ScrubContext& Ctx) try { - RwLock::SharedLockScope _(m_LocationMapLock); - - uint64_t TotalChunkCount = m_LocationMap.size(); - ChunkLocations.reserve(TotalChunkCount); - ChunkIndexToChunkHash.reserve(TotalChunkCount); { - for (const auto& Entry : m_LocationMap) + uint64_t TotalChunkCount = m_LocationMap.size(); + ChunkLocations.reserve(TotalChunkCount); + ChunkIndexToChunkHash.reserve(TotalChunkCount); + RwLock::SharedLockScope _(m_LocationMapLock); { - const IoHash& ChunkHash = Entry.first; - const BlockStoreDiskLocation& DiskLocation = m_Locations[Entry.second]; - BlockStoreLocation Location = DiskLocation.Get(m_PayloadAlignment); + for (const auto& Entry : m_LocationMap) + { + const IoHash& ChunkHash = Entry.first; + const BlockStoreDiskLocation& DiskLocation = m_Locations[Entry.second]; + BlockStoreLocation Location = DiskLocation.Get(m_PayloadAlignment); - ChunkLocations.push_back(Location); - ChunkIndexToChunkHash.push_back(ChunkHash); + ChunkLocations.push_back(Location); + ChunkIndexToChunkHash.push_back(ChunkHash); + } } } diff --git a/src/zenstore/projectstore.cpp b/src/zenstore/projectstore.cpp index c5b27c1ea..e6c8d624a 100644 --- a/src/zenstore/projectstore.cpp +++ b/src/zenstore/projectstore.cpp @@ -3932,6 +3932,7 @@ ProjectStore::Project::Scrub(ScrubContext& Ctx) { for (const std::string& OpLogId : OpLogs) { + Ctx.ThrowIfDeadlineExpired(); Ref<ProjectStore::Oplog> OpLog; { if (auto OpIt = m_Oplogs.find(OpLogId); OpIt != m_Oplogs.end()) @@ -4358,6 +4359,7 @@ ProjectStore::ScrubStorage(ScrubContext& Ctx) } for (const Ref<Project>& Project : Projects) { + Ctx.ThrowIfDeadlineExpired(); Project->Scrub(Ctx); } } diff --git a/src/zenutil/commandlineoptions.cpp b/src/zenutil/commandlineoptions.cpp index 81699361b..d94564843 100644 --- a/src/zenutil/commandlineoptions.cpp +++ b/src/zenutil/commandlineoptions.cpp @@ -2,7 +2,11 @@ #include <zenutil/commandlineoptions.h> +#include <zencore/string.h> #include <filesystem> + +#include <zencore/windows.h> + #if ZEN_WITH_TESTS # include <zencore/testing.h> #endif // ZEN_WITH_TESTS @@ -160,6 +164,29 @@ RemoveQuotes(const std::string_view& Arg) return Arg; } +CommandLineConverter::CommandLineConverter(int& argc, char**& argv) +{ +#if ZEN_PLATFORM_WINDOWS + LPWSTR RawCommandLine = GetCommandLineW(); + std::string CommandLine = WideToUtf8(RawCommandLine); + Args = ParseCommandLine(CommandLine); +#else + Args.reserve(argc); + for (int I = 0; I < argc; I++) + { + std::string Arg(argv[I]); + if ((!Arg.empty()) && (Arg != " ")) + { + Args.emplace_back(std::move(Arg)); + } + } +#endif + RawArgs = StripCommandlineQuotes(Args); + + argc = static_cast<int>(RawArgs.size()); + argv = RawArgs.data(); +} + #if ZEN_WITH_TESTS void diff --git a/src/zenutil/include/zenutil/commandlineoptions.h b/src/zenutil/include/zenutil/commandlineoptions.h index d6a171242..01cceedb1 100644 --- a/src/zenutil/include/zenutil/commandlineoptions.h +++ b/src/zenutil/include/zenutil/commandlineoptions.h @@ -22,6 +22,19 @@ std::vector<char*> StripCommandlineQuotes(std::vector<std::string>& InOutArgs) std::filesystem::path StringToPath(const std::string_view& Path); std::string_view RemoveQuotes(const std::string_view& Arg); +class CommandLineConverter +{ +public: + CommandLineConverter(int& argc, char**& argv); + + int ArgC = 0; + char** ArgV = nullptr; + +private: + std::vector<std::string> Args; + std::vector<char*> RawArgs; +}; + void commandlineoptions_forcelink(); // internal } // namespace zen diff --git a/src/zenutil/zenserverprocess.cpp b/src/zenutil/zenserverprocess.cpp index b56df400d..ef2a4fda5 100644 --- a/src/zenutil/zenserverprocess.cpp +++ b/src/zenutil/zenserverprocess.cpp @@ -698,7 +698,7 @@ ZenServerInstance::Shutdown() { Stopwatch Timer; ZEN_DEBUG("Waiting for zenserver process {} ({}) to shut down", m_Name, m_Process.Pid()); - while (!m_Process.Wait(1000)) + while (!m_Process.Wait(2000)) { if (!m_Process.IsValid()) { |