diff options
Diffstat (limited to 'src/zenremotestore')
52 files changed, 21257 insertions, 13767 deletions
diff --git a/src/zenremotestore/builds/buildinspect.cpp b/src/zenremotestore/builds/buildinspect.cpp new file mode 100644 index 000000000..1af9e20af --- /dev/null +++ b/src/zenremotestore/builds/buildinspect.cpp @@ -0,0 +1,463 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenremotestore/builds/buildinspect.h> + +#include <zencore/compactbinarybuilder.h> +#include <zencore/fmtutils.h> +#include <zencore/scopeguard.h> +#include <zencore/timer.h> +#include <zencore/trace.h> +#include <zenremotestore/builds/buildcontent.h> +#include <zenremotestore/builds/buildmanifest.h> +#include <zenremotestore/builds/buildstoragecache.h> +#include <zenremotestore/builds/buildupdatefolder.h> +#include <zenremotestore/builds/builduploadfolder.h> +#include <zenremotestore/chunking/chunkingcache.h> +#include <zenremotestore/chunking/chunkingcontroller.h> +#include <zenremotestore/transferthreadworkers.h> +#include <zenutil/filesystemutils.h> +#include <zenutil/filteredrate.h> +#include <zenutil/progress.h> +#include <zenutil/wildcard.h> + +#include <numeric> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +using namespace std::literals; + +ChunkedFolderContent +ScanAndChunkFolder(ProgressBase& Progress, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + bool IsQuiet, + TransferThreadWorkers& Workers, + GetFolderContentStatistics& GetFolderContentStats, + ChunkingStatistics& ChunkingStats, + const std::filesystem::path& Path, + std::function<bool(const std::string_view& RelativePath)>&& IsAcceptedFolder, + std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& IsAcceptedFile, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache) +{ + Stopwatch Timer; + + ZEN_TRACE_CPU("ScanAndChunkFolder"); + + FolderContent Content = GetFolderContent( + GetFolderContentStats, + Path, + std::move(IsAcceptedFolder), + std::move(IsAcceptedFile), + Workers.GetIOWorkerPool(), + Progress.GetProgressUpdateDelayMS(), + [](bool, std::ptrdiff_t) {}, + AbortFlag); + if (AbortFlag) + { + return {}; + } + + BuildState LocalContent = GetLocalContent(Progress, + AbortFlag, + PauseFlag, + IsQuiet, + Workers, + GetFolderContentStats, + ChunkingStats, + Path, + ZenStateFilePath(Path / ZenFolderName), + ChunkController, + ChunkCache) + .State; + + std::vector<std::filesystem::path> UntrackedPaths = GetNewPaths(LocalContent.ChunkedContent.Paths, Content.Paths); + + BuildState UntrackedLocalContent = GetLocalStateFromPaths(Progress, + AbortFlag, + PauseFlag, + Workers, + GetFolderContentStats, + ChunkingStats, + Path, + ChunkController, + ChunkCache, + UntrackedPaths) + .State; + + ChunkedFolderContent Result = + MergeChunkedFolderContents(LocalContent.ChunkedContent, std::vector<ChunkedFolderContent>{UntrackedLocalContent.ChunkedContent}); + + const uint64_t TotalRawSize = std::accumulate(Result.RawSizes.begin(), Result.RawSizes.end(), std::uint64_t(0)); + const uint64_t ChunkedRawSize = + std::accumulate(Result.ChunkedContent.ChunkRawSizes.begin(), Result.ChunkedContent.ChunkRawSizes.end(), std::uint64_t(0)); + + if (!IsQuiet) + { + ZEN_CONSOLE("Found {} ({}) files divided into {} ({}) unique chunks in '{}' in {}. Average hash rate {}B/sec", + Result.Paths.size(), + NiceBytes(TotalRawSize), + Result.ChunkedContent.ChunkHashes.size(), + NiceBytes(ChunkedRawSize), + Path, + NiceTimeSpanMs(Timer.GetElapsedTimeMs()), + NiceNum(GetBytesPerSecond(ChunkingStats.ElapsedWallTimeUS, ChunkingStats.BytesHashed))); + } + return Result; +}; + +void +ListBuild(bool IsQuiet, + StorageInstance& Storage, + const Oid& BuildId, + const std::vector<Oid>& BuildPartIds, + std::span<const std::string> BuildPartNames, + std::span<const std::string> IncludeWildcards, + std::span<const std::string> ExcludeWildcards, + CbObjectWriter* OptionalStructuredOutput) +{ + std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; + + CbObject BuildObject = GetBuild(*Storage.BuildStorage, BuildId, IsQuiet); + + if (OptionalStructuredOutput != nullptr) + { + OptionalStructuredOutput->AddObjectId("buildId"sv, BuildId); + OptionalStructuredOutput->AddObject("build"sv, BuildObject); + } + + std::vector<std::pair<Oid, std::string>> AllBuildParts = + ResolveBuildPartNames(BuildObject, BuildId, BuildPartIds, BuildPartNames, PreferredMultipartChunkSize); + + if (!AllBuildParts.empty()) + { + Stopwatch GetBuildPartTimer; + + if (OptionalStructuredOutput != nullptr) + { + OptionalStructuredOutput->BeginArray("parts"sv); + } + + for (size_t BuildPartIndex = 0; BuildPartIndex < AllBuildParts.size(); BuildPartIndex++) + { + const Oid BuildPartId = AllBuildParts[BuildPartIndex].first; + const std::string_view BuildPartName = AllBuildParts[BuildPartIndex].second; + CbObject BuildPartManifest = Storage.BuildStorage->GetBuildPart(BuildId, BuildPartId); + + if (OptionalStructuredOutput != nullptr) + { + OptionalStructuredOutput->BeginObject(); + OptionalStructuredOutput->AddObjectId("id"sv, BuildPartId); + OptionalStructuredOutput->AddString("partName"sv, BuildPartName); + } + { + if (OptionalStructuredOutput != nullptr) + { + } + else if (!IsQuiet) + { + ZEN_CONSOLE("{}Part: {} ('{}'):\n", + BuildPartIndex > 0 ? "\n" : "", + BuildPartId, + BuildPartName, + NiceTimeSpanMs(GetBuildPartTimer.GetElapsedTimeMs()), + NiceBytes(BuildPartManifest.GetSize())); + } + + std::vector<std::filesystem::path> Paths; + std::vector<IoHash> RawHashes; + std::vector<uint64_t> RawSizes; + std::vector<uint32_t> Attributes; + + SourcePlatform Platform; + std::vector<IoHash> SequenceRawHashes; + std::vector<uint32_t> ChunkCounts; + std::vector<uint32_t> AbsoluteChunkOrders; + std::vector<IoHash> LooseChunkHashes; + std::vector<uint64_t> LooseChunkRawSizes; + std::vector<IoHash> BlockRawHashes; + + ReadBuildContentFromCompactBinary(BuildPartManifest, + Platform, + Paths, + RawHashes, + RawSizes, + Attributes, + SequenceRawHashes, + ChunkCounts, + AbsoluteChunkOrders, + LooseChunkHashes, + LooseChunkRawSizes, + BlockRawHashes); + + std::vector<size_t> Order(Paths.size()); + std::iota(Order.begin(), Order.end(), 0); + + std::sort(Order.begin(), Order.end(), [&](size_t Lhs, size_t Rhs) { + const std::filesystem::path& LhsPath = Paths[Lhs]; + const std::filesystem::path& RhsPath = Paths[Rhs]; + return LhsPath < RhsPath; + }); + + if (OptionalStructuredOutput != nullptr) + { + OptionalStructuredOutput->BeginArray("files"sv); + } + { + for (size_t Index : Order) + { + const std::filesystem::path& Path = Paths[Index]; + if (IncludePath(IncludeWildcards, ExcludeWildcards, ToLower(Path.generic_string()), /*CaseSensitive*/ true)) + { + const IoHash& RawHash = RawHashes[Index]; + const uint64_t RawSize = RawSizes[Index]; + const uint32_t Attribute = Attributes[Index]; + + if (OptionalStructuredOutput != nullptr) + { + OptionalStructuredOutput->BeginObject(); + { + OptionalStructuredOutput->AddString("path"sv, fmt::format("{}", Path)); + OptionalStructuredOutput->AddInteger("rawSize"sv, RawSize); + OptionalStructuredOutput->AddHash("rawHash"sv, RawHash); + switch (Platform) + { + case SourcePlatform::Windows: + OptionalStructuredOutput->AddInteger("attributes"sv, Attribute); + break; + case SourcePlatform::MacOS: + case SourcePlatform::Linux: + OptionalStructuredOutput->AddString("chmod"sv, fmt::format("{:#04o}", Attribute)); + break; + default: + throw std::runtime_error(fmt::format("Unsupported platform: {}", (int)Platform)); + } + } + OptionalStructuredOutput->EndObject(); + } + else + { + ZEN_CONSOLE("{}\t{}\t{}", Path, RawSize, RawHash); + } + } + } + } + if (OptionalStructuredOutput != nullptr) + { + OptionalStructuredOutput->EndArray(); // "files" + } + } + if (OptionalStructuredOutput != nullptr) + { + OptionalStructuredOutput->EndObject(); + } + } + if (OptionalStructuredOutput != nullptr) + { + OptionalStructuredOutput->EndArray(); // parts + } + } +} + +void +DiffFolders(ProgressBase& Progress, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + bool IsQuiet, + TransferThreadWorkers& Workers, + const std::filesystem::path& BasePath, + const std::filesystem::path& ComparePath, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache, + const std::vector<std::string>& ExcludeFolders, + const std::vector<std::string>& ExcludeExtensions) +{ + ZEN_TRACE_CPU("DiffFolders"); + + Progress.SetLogOperationName("Diff Folders"); + + enum TaskSteps : uint32_t + { + CheckBase, + CheckCompare, + Diff, + Cleanup, + StepCount + }; + + auto EndProgress = MakeGuard([&]() { Progress.SetLogOperationProgress(TaskSteps::StepCount, TaskSteps::StepCount); }); + + ChunkedFolderContent BaseFolderContent; + ChunkedFolderContent CompareFolderContent; + + { + auto IsAcceptedFolder = [ExcludeFolders](const std::string_view& RelativePath) -> bool { + for (const std::string& ExcludeFolder : ExcludeFolders) + { + if (RelativePath.starts_with(ExcludeFolder)) + { + if (RelativePath.length() == ExcludeFolder.length()) + { + return false; + } + else if (RelativePath[ExcludeFolder.length()] == '/') + { + return false; + } + } + } + return true; + }; + + auto IsAcceptedFile = [ExcludeExtensions](const std::string_view& RelativePath, uint64_t, uint32_t) -> bool { + for (const std::string& ExcludeExtension : ExcludeExtensions) + { + if (RelativePath.ends_with(ExcludeExtension)) + { + return false; + } + } + return true; + }; + + Progress.SetLogOperationProgress(TaskSteps::CheckBase, TaskSteps::StepCount); + + GetFolderContentStatistics BaseGetFolderContentStats; + ChunkingStatistics BaseChunkingStats; + BaseFolderContent = ScanAndChunkFolder(Progress, + AbortFlag, + PauseFlag, + IsQuiet, + Workers, + BaseGetFolderContentStats, + BaseChunkingStats, + BasePath, + IsAcceptedFolder, + IsAcceptedFile, + ChunkController, + ChunkCache); + if (AbortFlag) + { + return; + } + + Progress.SetLogOperationProgress(TaskSteps::CheckCompare, TaskSteps::StepCount); + + GetFolderContentStatistics CompareGetFolderContentStats; + ChunkingStatistics CompareChunkingStats; + CompareFolderContent = ScanAndChunkFolder(Progress, + AbortFlag, + PauseFlag, + IsQuiet, + Workers, + CompareGetFolderContentStats, + CompareChunkingStats, + ComparePath, + IsAcceptedFolder, + IsAcceptedFile, + ChunkController, + ChunkCache); + + if (AbortFlag) + { + return; + } + } + + Progress.SetLogOperationProgress(TaskSteps::Diff, TaskSteps::StepCount); + + std::vector<IoHash> AddedHashes; + std::vector<IoHash> RemovedHashes; + uint64_t RemovedSize = 0; + uint64_t AddedSize = 0; + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> BaseRawHashLookup; + for (size_t PathIndex = 0; PathIndex < BaseFolderContent.RawHashes.size(); PathIndex++) + { + const IoHash& RawHash = BaseFolderContent.RawHashes[PathIndex]; + BaseRawHashLookup.insert_or_assign(RawHash, PathIndex); + } + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CompareRawHashLookup; + for (size_t PathIndex = 0; PathIndex < CompareFolderContent.RawHashes.size(); PathIndex++) + { + const IoHash& RawHash = CompareFolderContent.RawHashes[PathIndex]; + if (!BaseRawHashLookup.contains(RawHash)) + { + AddedHashes.push_back(RawHash); + AddedSize += CompareFolderContent.RawSizes[PathIndex]; + } + CompareRawHashLookup.insert_or_assign(RawHash, PathIndex); + } + for (uint32_t PathIndex = 0; PathIndex < BaseFolderContent.Paths.size(); PathIndex++) + { + const IoHash& RawHash = BaseFolderContent.RawHashes[PathIndex]; + if (!CompareRawHashLookup.contains(RawHash)) + { + RemovedHashes.push_back(RawHash); + RemovedSize += BaseFolderContent.RawSizes[PathIndex]; + } + } + + uint64_t BaseTotalRawSize = 0; + for (uint32_t PathIndex = 0; PathIndex < BaseFolderContent.Paths.size(); PathIndex++) + { + BaseTotalRawSize += BaseFolderContent.RawSizes[PathIndex]; + } + + double KeptPercent = BaseTotalRawSize > 0 ? (100.0 * (BaseTotalRawSize - RemovedSize)) / BaseTotalRawSize : 0; + + ZEN_CONSOLE("File diff : {} ({}) removed, {} ({}) added, {} ({} {:.1f}%) kept", + RemovedHashes.size(), + NiceBytes(RemovedSize), + AddedHashes.size(), + NiceBytes(AddedSize), + BaseFolderContent.Paths.size() - RemovedHashes.size(), + NiceBytes(BaseTotalRawSize - RemovedSize), + KeptPercent); + + uint64_t CompareTotalRawSize = 0; + + uint64_t FoundChunkCount = 0; + uint64_t FoundChunkSize = 0; + uint64_t NewChunkCount = 0; + uint64_t NewChunkSize = 0; + const ChunkedContentLookup BaseFolderLookup = BuildChunkedContentLookup(BaseFolderContent); + for (uint32_t ChunkIndex = 0; ChunkIndex < CompareFolderContent.ChunkedContent.ChunkHashes.size(); ChunkIndex++) + { + const IoHash& ChunkHash = CompareFolderContent.ChunkedContent.ChunkHashes[ChunkIndex]; + if (BaseFolderLookup.ChunkHashToChunkIndex.contains(ChunkHash)) + { + FoundChunkCount++; + FoundChunkSize += CompareFolderContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + else + { + NewChunkCount++; + NewChunkSize += CompareFolderContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + CompareTotalRawSize += CompareFolderContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + + double FoundPercent = CompareTotalRawSize > 0 ? (100.0 * FoundChunkSize) / CompareTotalRawSize : 0; + double NewPercent = CompareTotalRawSize > 0 ? (100.0 * NewChunkSize) / CompareTotalRawSize : 0; + + ZEN_CONSOLE("Chunk diff: {} ({} {:.1f}%) out of {} ({}) chunks in {} ({}) base chunks. Added {} ({} {:.1f}%) chunks.", + FoundChunkCount, + NiceBytes(FoundChunkSize), + FoundPercent, + CompareFolderContent.ChunkedContent.ChunkHashes.size(), + NiceBytes(CompareTotalRawSize), + BaseFolderContent.ChunkedContent.ChunkHashes.size(), + NiceBytes(BaseTotalRawSize), + NewChunkCount, + NiceBytes(NewChunkSize), + NewPercent); + + Progress.SetLogOperationProgress(TaskSteps::Cleanup, TaskSteps::StepCount); +} + +} // namespace zen diff --git a/src/zenremotestore/builds/buildmanifest.cpp b/src/zenremotestore/builds/buildmanifest.cpp index 051436e96..738e4b33b 100644 --- a/src/zenremotestore/builds/buildmanifest.cpp +++ b/src/zenremotestore/builds/buildmanifest.cpp @@ -97,6 +97,8 @@ ParseBuildManifest(const std::filesystem::path& ManifestPath) } #if ZEN_WITH_TESTS +TEST_SUITE_BEGIN("remotestore.buildmanifest"); + TEST_CASE("buildmanifest.unstructured") { ScopedTemporaryDirectory Root; @@ -163,6 +165,8 @@ TEST_CASE("buildmanifest.structured") CHECK_EQ(Manifest.Parts[1].Files[0].generic_string(), "baz.pdb"); } +TEST_SUITE_END(); + void buildmanifest_forcelink() { diff --git a/src/zenremotestore/builds/buildoperations-tests.cpp b/src/zenremotestore/builds/buildoperations-tests.cpp new file mode 100644 index 000000000..b1c856193 --- /dev/null +++ b/src/zenremotestore/builds/buildoperations-tests.cpp @@ -0,0 +1,454 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +// Round-trip integration tests for BuildsOperationUploadFolder / BuildsOperationUpdateFolder. +// Runs in-process against CreateFileBuildStorage so no HTTP server is needed. + +#include <zenremotestore/builds/buildupdatefolder.h> +#include <zenremotestore/builds/builduploadfolder.h> +#include <zenremotestore/builds/filebuildstorage.h> +#include <zenremotestore/chunking/chunkingcache.h> +#include <zenremotestore/chunking/chunkingcontroller.h> +#include <zenremotestore/transferthreadworkers.h> + +#include <zencore/basicfile.h> +#include <zencore/compactbinary.h> +#include <zencore/compactbinarybuilder.h> +#include <zencore/filesystem.h> +#include <zencore/fmtutils.h> +#include <zencore/iohash.h> +#include <zencore/logging.h> +#include <zencore/scopeguard.h> +#include <zencore/testing.h> +#include <zencore/testutils.h> +#include <zencore/workthreadpool.h> +#include <zenutil/progress.h> + +#include <algorithm> +#include <atomic> +#include <filesystem> +#include <string> +#include <vector> + +namespace zen { + +void +buildoperations_tests_forcelink() +{ +} + +#if ZEN_WITH_TESTS + +namespace buildops_test { + + using namespace std::literals; + + struct FolderSpec + { + uint64_t Seed = 1; + uint32_t SmallFileCount = 40; + uint32_t MediumFileCount = 10; + uint32_t LargeFileCount = 2; + uint32_t DuplicateFileCount = 6; + }; + + static IoBuffer MakeBlob(uint64_t Seed, size_t Size) + { + FastRandom Rnd{.Seed = Seed}; + IoBuffer Blob(Size); + uint8_t* Data = static_cast<uint8_t*>(Blob.MutableData()); + size_t Offset = 0; + while (Offset < Size) + { + uint64_t Word = Rnd.Next(); + size_t Chunk = std::min<size_t>(sizeof(Word), Size - Offset); + std::memcpy(Data + Offset, &Word, Chunk); + Offset += Chunk; + } + return Blob; + } + + static void WriteTestFile(const std::filesystem::path& Path, const IoBuffer& Blob) + { + CreateDirectories(Path.parent_path()); + zen::WriteFile(Path, Blob); + } + + static std::vector<std::filesystem::path> MakeTestFolder(const std::filesystem::path& Root, const FolderSpec& Spec) + { + CreateDirectories(Root); + FastRandom Rnd{.Seed = Spec.Seed}; + + std::vector<std::filesystem::path> Written; + Written.reserve(Spec.SmallFileCount + Spec.MediumFileCount + Spec.LargeFileCount + Spec.DuplicateFileCount); + + auto Emit = [&](std::string_view SubDir, uint32_t Index, size_t Size) { + std::filesystem::path Rel = std::filesystem::path(std::string(SubDir)) / fmt::format("f_{:05}.bin", Index); + WriteTestFile(Root / Rel, MakeBlob(Spec.Seed * 7919ull + Index, Size)); + Written.push_back(Rel); + }; + + for (uint32_t I = 0; I < Spec.SmallFileCount; ++I) + { + Emit("small", I, 1024u + static_cast<size_t>(Rnd.Next() & 0xFFFu)); + } + for (uint32_t I = 0; I < Spec.MediumFileCount; ++I) + { + Emit("medium", I, 60u * 1024u + static_cast<size_t>(Rnd.Next() & 0x3FFFu)); + } + for (uint32_t I = 0; I < Spec.LargeFileCount; ++I) + { + Emit("large", I, 900u * 1024u + static_cast<size_t>(Rnd.Next() & 0x1FFFFu)); + } + + // Duplicates of previously-written small files so upload can re-use blocks / chunks. + for (uint32_t I = 0; I < Spec.DuplicateFileCount && Spec.SmallFileCount > 0; ++I) + { + std::filesystem::path Source = Root / Written[I % Spec.SmallFileCount]; + std::filesystem::path Rel = std::filesystem::path("dupes") / fmt::format("d_{:05}.bin", I); + CreateDirectories((Root / Rel).parent_path()); + std::error_code Ec; + std::filesystem::copy_file(Source, Root / Rel, std::filesystem::copy_options::overwrite_existing, Ec); + if (!Ec) + { + Written.push_back(Rel); + } + } + + return Written; + } + + static void CopyTreeExcludingZen(const std::filesystem::path& Src, const std::filesystem::path& Dst) + { + CreateDirectories(Dst); + std::error_code Ec; + for (auto It = std::filesystem::recursive_directory_iterator(Src, Ec); !Ec && It != std::filesystem::recursive_directory_iterator(); + It.increment(Ec)) + { + const std::filesystem::path Rel = std::filesystem::relative(It->path(), Src); + if (!Rel.empty() && Rel.begin()->string() == ".zen") + { + It.disable_recursion_pending(); + continue; + } + if (It->is_directory()) + { + CreateDirectories(Dst / Rel); + } + else if (It->is_regular_file()) + { + CreateDirectories((Dst / Rel).parent_path()); + std::error_code CopyEc; + std::filesystem::copy_file(It->path(), Dst / Rel, std::filesystem::copy_options::overwrite_existing, CopyEc); + } + } + } + + static std::vector<std::filesystem::path> ListRelative(const std::filesystem::path& Root) + { + std::vector<std::filesystem::path> Paths; + std::error_code Ec; + for (auto It = std::filesystem::recursive_directory_iterator(Root, Ec); + !Ec && It != std::filesystem::recursive_directory_iterator(); + It.increment(Ec)) + { + const std::filesystem::path Rel = std::filesystem::relative(It->path(), Root); + if (!Rel.empty() && Rel.begin()->string() == ".zen") + { + It.disable_recursion_pending(); + continue; + } + if (It->is_regular_file()) + { + Paths.push_back(Rel); + } + } + std::sort(Paths.begin(), Paths.end()); + return Paths; + } + + static bool FoldersEquivalent(const std::filesystem::path& A, const std::filesystem::path& B) + { + const auto AFiles = ListRelative(A); + const auto BFiles = ListRelative(B); + if (AFiles != BFiles) + { + return false; + } + for (const std::filesystem::path& Rel : AFiles) + { + const IoHash HA = IoHash::HashBuffer(ReadFile(A / Rel).Flatten()); + const IoHash HB = IoHash::HashBuffer(ReadFile(B / Rel).Flatten()); + if (HA != HB) + { + return false; + } + } + return true; + } + + struct TestHarness + { + TestHarness() : Workers(/*BoostWorkers*/ false, /*SingleThreaded*/ false), Progress(CreateStandardProgress(zen::logging::Default())) + { + } + + StorageInstance MakeStorage(const std::filesystem::path& StoragePath) + { + StorageInstance SI; + SI.BuildStorage = CreateFileBuildStorage(StoragePath, StorageStats, /*EnableJsonOutput*/ false); + return SI; + } + + std::pair<Oid, Oid> UploadOnce(StorageInstance& Storage, + const std::filesystem::path& SourceFolder, + const std::filesystem::path& TempDir) + { + const Oid BuildId = Oid::NewOid(); + const Oid BuildPartId = Oid::NewOid(); + + auto ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{}); + auto ChunkCache = CreateNullChunkingCache(); + + CreateDirectories(TempDir); + + UploadFolderOptions Options{}; + Options.TempDir = TempDir; + Options.FindBlockMaxCount = 10000; + Options.BlockReuseMinPercentLimit = 85; + Options.AllowMultiparts = true; + Options.CreateBuild = true; + Options.IgnoreExistingBlocks = false; + Options.UploadToZenCache = false; + Options.IsQuiet = true; + + const CbObject MetaData; + + UploadFolder(zen::logging::Default(), + *Progress, + Workers, + Storage, + AbortFlag, + PauseFlag, + BuildId, + BuildPartId, + /*BuildPartName*/ "default"sv, + SourceFolder, + /*ManifestPath*/ {}, + MetaData, + *ChunkController, + *ChunkCache, + Options); + + return {BuildId, BuildPartId}; + } + + void DownloadOnce(StorageInstance& Storage, + const Oid& BuildId, + const std::filesystem::path& TargetFolder, + const std::filesystem::path& ZenFolderPath, + const std::filesystem::path& SystemRootDir, + const DownloadOptions* OverrideOptions = nullptr) + { + CreateDirectories(TargetFolder); + CreateDirectories(ZenFolderPath); + CreateDirectories(SystemRootDir); + + DownloadOptions Options; + if (OverrideOptions) + { + Options = *OverrideOptions; + } + Options.ZenFolderPath = ZenFolderPath; + Options.SystemRootDir = SystemRootDir; + Options.IsQuiet = true; + + const std::vector<Oid> BuildPartIds; + const std::vector<std::string> BuildPartNames; + + DownloadFolder(zen::logging::Default(), + *Progress, + Workers, + Storage, + AbortFlag, + PauseFlag, + StorageCacheStats, + BuildId, + BuildPartIds, + BuildPartNames, + /*DownloadSpecPath*/ {}, + TargetFolder, + Options); + } + + std::atomic<bool> AbortFlag{false}; + std::atomic<bool> PauseFlag{false}; + TransferThreadWorkers Workers; + std::unique_ptr<ProgressBase> Progress; + BuildStorageBase::Statistics StorageStats; + BuildStorageCache::Statistics StorageCacheStats; + }; + +} // namespace buildops_test + +TEST_SUITE_BEGIN("remotestore.buildoperations"); + +// Flagship case: one upload + reupload + multiple download variants against +// the same in-process storage. Exercises scavenge, local-chunk copy, +// cached-block reuse, partial-block fetch, and full-block download. +TEST_CASE("buildoperations.roundtrip.full_variations") +{ + using namespace buildops_test; + + ScopedTemporaryDirectory Root; + TestHarness H; + + const std::filesystem::path FolderA = Root.Path() / "src_a"; + const std::filesystem::path FolderB = Root.Path() / "src_b"; + const std::filesystem::path StoragePath = Root.Path() / "storage"; + const std::filesystem::path UploadTemp = Root.Path() / "upload_tmp"; + const std::filesystem::path SystemRoot = Root.Path() / "sys"; + + MakeTestFolder(FolderA, FolderSpec{.Seed = 1}); + MakeTestFolder(FolderB, FolderSpec{.Seed = 1, .DuplicateFileCount = 20}); + + CreateDirectories(StoragePath); + StorageInstance Storage = H.MakeStorage(StoragePath); + + const auto [BuildIdA, PartIdA] = H.UploadOnce(Storage, FolderA, UploadTemp); + CHECK(BuildIdA != Oid::Zero); + + // Re-upload A: should round-trip without error and still produce a + // usable build (we verify via a subsequent download). + const auto [BuildIdA2, PartIdA2] = H.UploadOnce(Storage, FolderA, UploadTemp); + CHECK(BuildIdA2 != Oid::Zero); + + // Upload B (shares content with A). + const auto [BuildIdB, PartIdB] = H.UploadOnce(Storage, FolderB, UploadTemp); + CHECK(BuildIdB != Oid::Zero); + + // Download A into an empty target. Exercises ScheduleFullBlockDownloads + + // ScheduleLooseChunkWrites. + { + const std::filesystem::path Target = Root.Path() / "dl_empty"; + const std::filesystem::path ZenState = Target / ".zen"; + H.DownloadOnce(Storage, BuildIdA, Target, ZenState, SystemRoot); + CHECK(FoldersEquivalent(FolderA, Target)); + } + + // Re-download A after removing some files but keeping the .zen state + // dir. Exercises ScheduleCachedBlockWrites. + { + const std::filesystem::path Target = Root.Path() / "dl_cached"; + const std::filesystem::path ZenState = Target / ".zen"; + H.DownloadOnce(Storage, BuildIdA, Target, ZenState, SystemRoot); + + int Deleted = 0; + for (auto& E : std::filesystem::recursive_directory_iterator(Target)) + { + if (Deleted >= 5) + break; + if (E.is_regular_file()) + { + const std::filesystem::path Rel = std::filesystem::relative(E.path(), Target); + if (!Rel.empty() && Rel.begin()->string() == ".zen") + continue; + std::error_code Ec; + std::filesystem::remove(E.path(), Ec); + if (!Ec) + ++Deleted; + } + } + CHECK(Deleted > 0); + + H.DownloadOnce(Storage, BuildIdA, Target, ZenState, SystemRoot); + CHECK(FoldersEquivalent(FolderA, Target)); + } + + // Download B into a target pre-seeded with A's content. Exercises + // ScheduleLocalChunkCopies and ScheduleScavengedSequenceWrites (the two + // span-capture sites that were fixed). + { + const std::filesystem::path Target = Root.Path() / "dl_scavenge"; + const std::filesystem::path ZenState = Target / ".zen"; + CopyTreeExcludingZen(FolderA, Target); + + DownloadOptions Opts; + Opts.EnableTargetFolderScavenging = true; + Opts.EnableOtherDownloadsScavenging = true; + H.DownloadOnce(Storage, BuildIdB, Target, ZenState, SystemRoot, &Opts); + + CHECK(FoldersEquivalent(FolderB, Target)); + } + + // Partial-block mode. + { + const std::filesystem::path Target = Root.Path() / "dl_partial"; + const std::filesystem::path ZenState = Target / ".zen"; + + DownloadOptions Opts; + Opts.PartialBlockRequestMode = EPartialBlockRequestMode::All; + H.DownloadOnce(Storage, BuildIdB, Target, ZenState, SystemRoot, &Opts); + + CHECK(FoldersEquivalent(FolderB, Target)); + } +} + +// Abort the download before it can do meaningful work. Expected to unwind +// cleanly, not crash or assert. +TEST_CASE("buildoperations.download.abort_midway") +{ + using namespace buildops_test; + + ScopedTemporaryDirectory Root; + TestHarness H; + + const std::filesystem::path Folder = Root.Path() / "src"; + const std::filesystem::path StoragePath = Root.Path() / "storage"; + const std::filesystem::path UploadTemp = Root.Path() / "upload_tmp"; + const std::filesystem::path SystemRoot = Root.Path() / "sys"; + + MakeTestFolder(Folder, FolderSpec{.Seed = 42}); + CreateDirectories(StoragePath); + StorageInstance Storage = H.MakeStorage(StoragePath); + + const auto [BuildId, PartId] = H.UploadOnce(Storage, Folder, UploadTemp); + + const std::filesystem::path Target = Root.Path() / "dl_abort"; + const std::filesystem::path ZenState = Target / ".zen"; + + H.AbortFlag.store(true); + CHECK_NOTHROW(H.DownloadOnce(Storage, BuildId, Target, ZenState, SystemRoot)); +} + +// Empty source folder round-trip: must not crash, must produce an empty +// download target. +TEST_CASE("buildoperations.roundtrip.empty_folder") +{ + using namespace buildops_test; + + ScopedTemporaryDirectory Root; + TestHarness H; + + const std::filesystem::path Folder = Root.Path() / "empty"; + const std::filesystem::path StoragePath = Root.Path() / "storage"; + const std::filesystem::path UploadTemp = Root.Path() / "upload_tmp"; + const std::filesystem::path SystemRoot = Root.Path() / "sys"; + + CreateDirectories(Folder); + CreateDirectories(StoragePath); + StorageInstance Storage = H.MakeStorage(StoragePath); + + const auto [BuildId, PartId] = H.UploadOnce(Storage, Folder, UploadTemp); + + const std::filesystem::path Target = Root.Path() / "dl_empty"; + const std::filesystem::path ZenState = Target / ".zen"; + H.DownloadOnce(Storage, BuildId, Target, ZenState, SystemRoot); + + CHECK(ListRelative(Target).empty()); +} + +TEST_SUITE_END(); + +#endif // ZEN_WITH_TESTS + +} // namespace zen diff --git a/src/zenremotestore/builds/buildprimecache.cpp b/src/zenremotestore/builds/buildprimecache.cpp new file mode 100644 index 000000000..12791f718 --- /dev/null +++ b/src/zenremotestore/builds/buildprimecache.cpp @@ -0,0 +1,350 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenremotestore/builds/buildprimecache.h> + +#include <zencore/compactbinaryutil.h> +#include <zencore/filesystem.h> +#include <zencore/fmtutils.h> +#include <zencore/parallelwork.h> +#include <zencore/timer.h> +#include <zencore/trace.h> +#include <zenremotestore/builds/buildstorageutil.h> +#include <zenremotestore/builds/builduploadfolder.h> +#include <zenutil/filteredrate.h> +#include <zenutil/progress.h> + +namespace zen { + +using namespace std::literals; + +BuildsOperationPrimeCache::BuildsOperationPrimeCache(LoggerRef Log, + ProgressBase& Progress, + StorageInstance& Storage, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + WorkerThreadPool& NetworkPool, + const Oid& BuildId, + std::span<const Oid> BuildPartIds, + const Options& Options, + BuildStorageCache::Statistics& StorageCacheStats) +: m_Log(Log) +, m_Progress(Progress) +, m_Storage(Storage) +, m_AbortFlag(AbortFlag) +, m_PauseFlag(PauseFlag) +, m_NetworkPool(NetworkPool) +, m_BuildId(BuildId) +, m_BuildPartIds(BuildPartIds.begin(), BuildPartIds.end()) +, m_Options(Options) +, m_StorageCacheStats(StorageCacheStats) +{ + m_TempPath = m_Options.ZenFolderPath / "tmp"; + CreateDirectories(m_TempPath); +} + +void +BuildsOperationPrimeCache::Execute() +{ + ZEN_TRACE_CPU("BuildsOperationPrimeCache::Execute"); + + Stopwatch PrimeTimer; + + tsl::robin_map<IoHash, uint64_t, IoHash::Hasher> LooseChunkRawSizes; + tsl::robin_set<IoHash, IoHash::Hasher> BuildBlobs; + CollectReferencedBlobs(BuildBlobs, LooseChunkRawSizes); + + if (!m_Options.IsQuiet) + { + ZEN_INFO("Found {} referenced blobs", BuildBlobs.size()); + } + + if (BuildBlobs.empty()) + { + return; + } + + std::vector<IoHash> BlobsToDownload = FilterAlreadyCachedBlobs(BuildBlobs); + + if (BlobsToDownload.empty()) + { + return; + } + + std::atomic<uint64_t> MultipartAttachmentCount; + std::atomic<size_t> CompletedDownloadCount; + FilteredRate FilteredDownloadedBytesPerSecond; + + ScheduleBlobDownloads(BlobsToDownload, + LooseChunkRawSizes, + MultipartAttachmentCount, + CompletedDownloadCount, + FilteredDownloadedBytesPerSecond); + + if (m_AbortFlag) + { + return; + } + + if (m_Storage.CacheStorage) + { + m_Storage.CacheStorage->Flush(m_Progress.GetProgressUpdateDelayMS(), [this](intptr_t Remaining) -> bool { + ZEN_UNUSED(Remaining); + if (!m_Options.IsQuiet) + { + ZEN_INFO("Waiting for {} blobs to finish upload to '{}'", Remaining, m_Storage.CacheHost.Name); + } + return !m_AbortFlag; + }); + } + + if (!m_Options.IsQuiet) + { + uint64_t DownloadedBytes = m_DownloadStats.DownloadedChunkByteCount.load() + m_DownloadStats.DownloadedBlockByteCount.load(); + ZEN_INFO("Downloaded {} ({}bits/s) in {}. {} as multipart. Completed in {}", + NiceBytes(DownloadedBytes), + NiceNum(GetBytesPerSecond(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS(), DownloadedBytes * 8)), + NiceTimeSpanMs(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS() / 1000), + MultipartAttachmentCount.load(), + NiceTimeSpanMs(PrimeTimer.GetElapsedTimeMs())); + } +} + +void +BuildsOperationPrimeCache::CollectReferencedBlobs(tsl::robin_set<IoHash, IoHash::Hasher>& OutBuildBlobs, + tsl::robin_map<IoHash, uint64_t, IoHash::Hasher>& OutLooseChunkRawSizes) +{ + for (const Oid& BuildPartId : m_BuildPartIds) + { + CbObject BuildPart = m_Storage.BuildStorage->GetBuildPart(m_BuildId, BuildPartId); + + CbObjectView BlockAttachmentsView = BuildPart["blockAttachments"sv].AsObjectView(); + std::vector<IoHash> BlockAttachments = compactbinary_helpers::ReadBinaryAttachmentArray("rawHashes"sv, BlockAttachmentsView); + + CbObjectView ChunkAttachmentsView = BuildPart["chunkAttachments"sv].AsObjectView(); + std::vector<IoHash> ChunkAttachments = compactbinary_helpers::ReadBinaryAttachmentArray("rawHashes"sv, ChunkAttachmentsView); + std::vector<uint64_t> ChunkRawSizes = compactbinary_helpers::ReadArray<uint64_t>("chunkRawSizes"sv, ChunkAttachmentsView); + if (ChunkAttachments.size() != ChunkRawSizes.size()) + { + throw std::runtime_error(fmt::format("Mismatch of loose chunk raw size array, expected {}, found {}", + ChunkAttachments.size(), + ChunkRawSizes.size())); + } + + OutBuildBlobs.reserve(ChunkAttachments.size() + BlockAttachments.size()); + OutBuildBlobs.insert(BlockAttachments.begin(), BlockAttachments.end()); + OutBuildBlobs.insert(ChunkAttachments.begin(), ChunkAttachments.end()); + + for (size_t ChunkAttachmentIndex = 0; ChunkAttachmentIndex < ChunkAttachments.size(); ChunkAttachmentIndex++) + { + OutLooseChunkRawSizes.insert_or_assign(ChunkAttachments[ChunkAttachmentIndex], ChunkRawSizes[ChunkAttachmentIndex]); + } + } +} + +std::vector<IoHash> +BuildsOperationPrimeCache::FilterAlreadyCachedBlobs(const tsl::robin_set<IoHash, IoHash::Hasher>& BuildBlobs) +{ + std::vector<IoHash> BlobsToDownload; + BlobsToDownload.reserve(BuildBlobs.size()); + + if (m_Storage.CacheStorage && !BuildBlobs.empty() && !m_Options.ForceUpload) + { + ZEN_TRACE_CPU("BlobCacheExistCheck"); + Stopwatch Timer; + + const std::vector<IoHash> BlobHashes(BuildBlobs.begin(), BuildBlobs.end()); + const std::vector<BuildStorageCache::BlobExistsResult> CacheExistsResult = + m_Storage.CacheStorage->BlobsExists(m_BuildId, BlobHashes); + + if (CacheExistsResult.size() == BlobHashes.size()) + { + for (size_t BlobIndex = 0; BlobIndex < BlobHashes.size(); BlobIndex++) + { + if (!CacheExistsResult[BlobIndex].HasBody) + { + BlobsToDownload.push_back(BlobHashes[BlobIndex]); + } + } + size_t FoundCount = BuildBlobs.size() - BlobsToDownload.size(); + + if (FoundCount > 0 && !m_Options.IsQuiet) + { + ZEN_INFO("Remote cache : Found {} out of {} needed blobs in {}", + FoundCount, + BuildBlobs.size(), + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + } + } + } + else + { + BlobsToDownload.insert(BlobsToDownload.end(), BuildBlobs.begin(), BuildBlobs.end()); + } + return BlobsToDownload; +} + +void +BuildsOperationPrimeCache::ScheduleBlobDownloads(std::span<const IoHash> BlobsToDownload, + const tsl::robin_map<IoHash, uint64_t, IoHash::Hasher>& LooseChunkRawSizes, + std::atomic<uint64_t>& MultipartAttachmentCount, + std::atomic<size_t>& CompletedDownloadCount, + FilteredRate& FilteredDownloadedBytesPerSecond) +{ + std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Downloading"); + + ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + + const size_t BlobCount = BlobsToDownload.size(); + + for (size_t BlobIndex = 0; BlobIndex < BlobCount; BlobIndex++) + { + Work.ScheduleWork( + m_NetworkPool, + [this, + &Work, + BlobsToDownload, + BlobCount, + &LooseChunkRawSizes, + &CompletedDownloadCount, + &FilteredDownloadedBytesPerSecond, + &MultipartAttachmentCount, + BlobIndex](std::atomic<bool>&) { + if (!m_AbortFlag) + { + const IoHash& BlobHash = BlobsToDownload[BlobIndex]; + bool IsLargeBlob = false; + if (auto It = LooseChunkRawSizes.find(BlobHash); It != LooseChunkRawSizes.end()) + { + IsLargeBlob = It->second >= m_Options.LargeAttachmentSize; + } + + FilteredDownloadedBytesPerSecond.Start(); + + if (IsLargeBlob) + { + DownloadLargeBlobForCache(Work, + BlobHash, + BlobCount, + CompletedDownloadCount, + MultipartAttachmentCount, + FilteredDownloadedBytesPerSecond); + } + else + { + DownloadSingleBlobForCache(BlobHash, BlobCount, CompletedDownloadCount, FilteredDownloadedBytesPerSecond); + } + } + }); + } + + Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(PendingWork); + + uint64_t DownloadedBytes = m_DownloadStats.DownloadedChunkByteCount.load() + m_DownloadStats.DownloadedBlockByteCount.load(); + FilteredDownloadedBytesPerSecond.Update(DownloadedBytes); + + std::string DownloadRateString = (CompletedDownloadCount == BlobCount) + ? "" + : fmt::format(" {}bits/s", NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8)); + std::string UploadDetails = m_Storage.CacheStorage ? fmt::format(" {} ({}) uploaded.", + m_StorageCacheStats.PutBlobCount.load(), + NiceBytes(m_StorageCacheStats.PutBlobByteCount.load())) + : ""; + + std::string Details = fmt::format("{}/{} ({}{}) downloaded.{}", + CompletedDownloadCount.load(), + BlobCount, + NiceBytes(DownloadedBytes), + DownloadRateString, + UploadDetails); + ProgressBar->UpdateState({.Task = "Downloading", + .Details = Details, + .TotalCount = BlobCount, + .RemainingCount = BlobCount - CompletedDownloadCount.load(), + .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }); + + FilteredDownloadedBytesPerSecond.Stop(); + ProgressBar->Finish(); +} + +void +BuildsOperationPrimeCache::DownloadLargeBlobForCache(ParallelWork& Work, + const IoHash& BlobHash, + size_t BlobCount, + std::atomic<size_t>& CompletedDownloadCount, + std::atomic<uint64_t>& MultipartAttachmentCount, + FilteredRate& FilteredDownloadedBytesPerSecond) +{ + DownloadLargeBlob(*m_Storage.BuildStorage, + m_TempPath, + m_BuildId, + BlobHash, + m_Options.PreferredMultipartChunkSize, + Work, + m_NetworkPool, + m_DownloadStats.DownloadedChunkByteCount, + MultipartAttachmentCount, + [this, BlobCount, BlobHash, &FilteredDownloadedBytesPerSecond, &CompletedDownloadCount](IoBuffer&& Payload) { + m_DownloadStats.DownloadedChunkCount++; + m_DownloadStats.RequestsCompleteCount++; + + if (!m_AbortFlag) + { + if (Payload && m_Storage.CacheStorage) + { + m_Storage.CacheStorage->PutBuildBlob(m_BuildId, + BlobHash, + ZenContentType::kCompressedBinary, + CompositeBuffer(SharedBuffer(Payload))); + } + } + if (CompletedDownloadCount.fetch_add(1) + 1 == BlobCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + }); +} + +void +BuildsOperationPrimeCache::DownloadSingleBlobForCache(const IoHash& BlobHash, + size_t BlobCount, + std::atomic<size_t>& CompletedDownloadCount, + FilteredRate& FilteredDownloadedBytesPerSecond) +{ + IoBuffer Payload; + try + { + Payload = m_Storage.BuildStorage->GetBuildBlob(m_BuildId, BlobHash); + + m_DownloadStats.DownloadedBlockCount++; + m_DownloadStats.DownloadedBlockByteCount += Payload.GetSize(); + m_DownloadStats.RequestsCompleteCount++; + } + catch (const std::exception&) + { + // Silence http errors due to abort + if (!m_AbortFlag) + { + throw; + } + } + + if (!m_AbortFlag) + { + if (Payload && m_Storage.CacheStorage) + { + m_Storage.CacheStorage->PutBuildBlob(m_BuildId, + BlobHash, + ZenContentType::kCompressedBinary, + CompositeBuffer(SharedBuffer(std::move(Payload)))); + } + if (CompletedDownloadCount.fetch_add(1) + 1 == BlobCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + } +} + +} // namespace zen diff --git a/src/zenremotestore/builds/buildsavedstate.cpp b/src/zenremotestore/builds/buildsavedstate.cpp index 1d1f4605f..dfc565d4a 100644 --- a/src/zenremotestore/builds/buildsavedstate.cpp +++ b/src/zenremotestore/builds/buildsavedstate.cpp @@ -163,6 +163,8 @@ BuildSaveState::Write(const BuildSaveState& SaveState, CbWriter& Output) { ZEN_ASSERT(!SaveState.LocalPath.empty()); + Output.AddInteger("version", SaveState.Version); + Output.AddString("path", (const char*)SaveState.LocalPath.u8string().c_str()); BuildsSelection::Write(SaveState.State.Selection, Output); @@ -182,6 +184,7 @@ BuildSaveState::Write(const BuildSaveState& SaveState, CbWriter& Output) BuildSaveState BuildSaveState::Read(CbObjectView& Input) { + uint32_t Version = Input["version"].AsUInt32(BuildSaveState::NoVersion); BuildState State = BuildState::Read(Input); CbObjectView LocalFolderStateObject = Input["localFolderState"sv].AsObjectView(); FolderContent FolderState = LoadFolderContentToCompactBinary(LocalFolderStateObject); @@ -191,7 +194,10 @@ BuildSaveState::Read(CbObjectView& Input) throw std::runtime_error("BuildSaveState is invalid, 'path' field is empty"); } - return BuildSaveState{.State = std::move(State), .FolderState = std::move(FolderState), .LocalPath = std::move(LocalPath)}; + return BuildSaveState{.Version = Version, + .State = std::move(State), + .FolderState = std::move(FolderState), + .LocalPath = std::move(LocalPath)}; } CbObject @@ -588,6 +594,8 @@ namespace buildsavestate_test { } } // namespace buildsavestate_test +TEST_SUITE_BEGIN("remotestore.buildsavedstate"); + TEST_CASE("buildsavestate.BuildsSelection") { using namespace buildsavestate_test; @@ -696,6 +704,8 @@ TEST_CASE("buildsavestate.DownloadedPaths") } } +TEST_SUITE_END(); + #endif // ZEN_WITH_TESTS } // namespace zen diff --git a/src/zenremotestore/builds/buildstoragecache.cpp b/src/zenremotestore/builds/buildstoragecache.cpp index faa85f81b..8fd31a326 100644 --- a/src/zenremotestore/builds/buildstoragecache.cpp +++ b/src/zenremotestore/builds/buildstoragecache.cpp @@ -96,7 +96,8 @@ public: ZEN_ASSERT(!IsFlushed); ZEN_ASSERT(ContentType == ZenContentType::kCompressedBinary); - // Move all segments in Payload to be file handle based so if Payload is materialized it does not affect buffers in queue + // Move all segments in Payload to be file handle based unless they are very small so if Payload is materialized it does not affect + // buffers in queue std::vector<SharedBuffer> FileBasedSegments; std::span<const SharedBuffer> Segments = Payload.GetSegments(); FileBasedSegments.reserve(Segments.size()); @@ -104,42 +105,56 @@ public: tsl::robin_map<void*, std::filesystem::path> HandleToPath; for (const SharedBuffer& Segment : Segments) { - std::filesystem::path FilePath; - IoBufferFileReference Ref; - if (Segment.AsIoBuffer().GetFileReference(Ref)) + const uint64_t SegmentSize = Segment.GetSize(); + if (SegmentSize < 16u * 1024u) { - if (auto It = HandleToPath.find(Ref.FileHandle); It != HandleToPath.end()) - { - FilePath = It->second; - } - else + FileBasedSegments.push_back(Segment); + } + else + { + std::filesystem::path FilePath; + IoBufferFileReference Ref; + if (Segment.AsIoBuffer().GetFileReference(Ref)) { - std::error_code Ec; - std::filesystem::path Path = PathFromHandle(Ref.FileHandle, Ec); - if (!Ec && !Path.empty()) + if (auto It = HandleToPath.find(Ref.FileHandle); It != HandleToPath.end()) + { + FilePath = It->second; + } + else { - HandleToPath.insert_or_assign(Ref.FileHandle, Path); - FilePath = std::move(Path); + std::error_code Ec; + std::filesystem::path Path = PathFromHandle(Ref.FileHandle, Ec); + if (!Ec && !Path.empty()) + { + HandleToPath.insert_or_assign(Ref.FileHandle, Path); + FilePath = std::move(Path); + } + else + { + ZEN_WARN("Failed getting path for chunk to upload to cache. Skipping upload."); + return; + } } } - } - if (!FilePath.empty()) - { - IoBuffer BufferFromFile = IoBufferBuilder::MakeFromFile(FilePath, Ref.FileChunkOffset, Ref.FileChunkSize); - if (BufferFromFile) + if (!FilePath.empty()) { - FileBasedSegments.push_back(SharedBuffer(std::move(BufferFromFile))); + IoBuffer BufferFromFile = IoBufferBuilder::MakeFromFile(FilePath, Ref.FileChunkOffset, Ref.FileChunkSize); + if (BufferFromFile) + { + FileBasedSegments.push_back(SharedBuffer(std::move(BufferFromFile))); + } + else + { + ZEN_WARN("Failed opening file '{}' to upload to cache. Skipping upload.", FilePath); + return; + } } else { FileBasedSegments.push_back(Segment); } } - else - { - FileBasedSegments.push_back(Segment); - } } } @@ -151,7 +166,7 @@ public: auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); HttpClient::Response CacheResponse = - m_HttpClient.Upload(fmt::format("/builds/{}/{}/{}/blobs/{}", m_Namespace, m_Bucket, BuildId, RawHash.ToHexString()), + m_HttpClient.Upload(fmt::format("/builds/{}/{}/{}/blobs/{}", m_Namespace, m_Bucket, BuildId, RawHash), Payload, ContentType); @@ -178,9 +193,12 @@ public: { Headers.Entries.insert({"Range", fmt::format("bytes={}-{}", RangeOffset, RangeOffset + RangeBytes - 1)}); } - CreateDirectories(m_TempFolderPath); + if (!m_TempFolderPath.empty()) + { + CreateDirectories(m_TempFolderPath); + } HttpClient::Response CacheResponse = - m_HttpClient.Download(fmt::format("/builds/{}/{}/{}/blobs/{}", m_Namespace, m_Bucket, BuildId, RawHash.ToHexString()), + m_HttpClient.Download(fmt::format("/builds/{}/{}/{}/blobs/{}", m_Namespace, m_Bucket, BuildId, RawHash), m_TempFolderPath, Headers); AddStatistic(CacheResponse); @@ -191,6 +209,78 @@ public: return {}; } + virtual BuildBlobRanges GetBuildBlobRanges(const Oid& BuildId, + const IoHash& RawHash, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) override + { + ZEN_TRACE_CPU("ZenBuildStorageCache::GetBuildBlobRanges"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + + CbObjectWriter Writer; + Writer.BeginArray("ranges"sv); + { + for (const std::pair<uint64_t, uint64_t>& Range : Ranges) + { + Writer.BeginObject(); + { + Writer.AddInteger("offset"sv, Range.first); + Writer.AddInteger("length"sv, Range.second); + } + Writer.EndObject(); + } + } + Writer.EndArray(); // ranges + + if (!m_TempFolderPath.empty()) + { + CreateDirectories(m_TempFolderPath); + } + HttpClient::Response CacheResponse = + m_HttpClient.Post(fmt::format("/builds/{}/{}/{}/blobs/{}", m_Namespace, m_Bucket, BuildId, RawHash), + Writer.Save(), + HttpClient::Accept(ZenContentType::kCbPackage), + m_TempFolderPath); + AddStatistic(CacheResponse); + if (CacheResponse.IsSuccess()) + { + CbPackage ResponsePackage = ParsePackageMessage(CacheResponse.ResponsePayload); + CbObjectView ResponseObject = ResponsePackage.GetObject(); + + CbArrayView RangeArray = ResponseObject["ranges"sv].AsArrayView(); + + std::vector<std::pair<uint64_t, uint64_t>> ReceivedRanges; + ReceivedRanges.reserve(RangeArray.Num()); + + uint64_t OffsetInPayloadRanges = 0; + + for (CbFieldView View : RangeArray) + { + CbObjectView RangeView = View.AsObjectView(); + uint64_t Offset = RangeView["offset"sv].AsUInt64(); + uint64_t Length = RangeView["length"sv].AsUInt64(); + + const std::pair<uint64_t, uint64_t>& Range = Ranges[ReceivedRanges.size()]; + + if (Offset != Range.first || Length != Range.second) + { + return {}; + } + ReceivedRanges.push_back(std::make_pair(OffsetInPayloadRanges, Length)); + OffsetInPayloadRanges += Length; + } + + const CbAttachment* DataAttachment = ResponsePackage.FindAttachment(RawHash); + if (DataAttachment) + { + SharedBuffer PayloadRanges = DataAttachment->AsBinary(); + return BuildBlobRanges{.PayloadBuffer = PayloadRanges.AsIoBuffer(), .Ranges = std::move(ReceivedRanges)}; + } + } + return {}; + } + virtual void PutBlobMetadatas(const Oid& BuildId, std::span<const IoHash> BlobHashes, std::span<const CbObject> MetaDatas) override { ZEN_ASSERT(!IsFlushed); @@ -460,6 +550,192 @@ CreateZenBuildStorageCache(HttpClient& HttpClient, return std::make_unique<ZenBuildStorageCache>(HttpClient, Stats, Namespace, Bucket, TempFolderPath, BackgroundWorkerPool); } +#if ZEN_WITH_TESTS + +class InMemoryBuildStorageCache : public BuildStorageCache +{ +public: + // MaxRangeSupported == 0 : no range requests are accepted, always return full blob + // MaxRangeSupported == 1 : single range is supported, multi range returns full blob + // MaxRangeSupported > 1 : multirange is supported up to MaxRangeSupported, more ranges returns empty blob (bad request) + explicit InMemoryBuildStorageCache(uint64_t MaxRangeSupported, + BuildStorageCache::Statistics& Stats, + double LatencySec = 0.0, + double DelayPerKBSec = 0.0) + : m_MaxRangeSupported(MaxRangeSupported) + , m_Stats(Stats) + , m_LatencySec(LatencySec) + , m_DelayPerKBSec(DelayPerKBSec) + { + } + void PutBuildBlob(const Oid&, const IoHash& RawHash, ZenContentType, const CompositeBuffer& Payload) override + { + IoBuffer Buf = Payload.Flatten().AsIoBuffer(); + Buf.MakeOwned(); + const uint64_t SentBytes = Buf.Size(); + uint64_t ReceivedBytes = 0; + SimulateLatency(SentBytes, 0); + auto _ = MakeGuard([&]() { SimulateLatency(0, ReceivedBytes); }); + Stopwatch ExecutionTimer; + auto __ = MakeGuard([&]() { AddStatistic(ExecutionTimer.GetElapsedTimeUs(), ReceivedBytes, SentBytes); }); + { + std::lock_guard Lock(m_Mutex); + m_Entries[RawHash] = std::move(Buf); + } + m_Stats.PutBlobCount.fetch_add(1); + m_Stats.PutBlobByteCount.fetch_add(SentBytes); + } + + IoBuffer GetBuildBlob(const Oid&, const IoHash& RawHash, uint64_t RangeOffset = 0, uint64_t RangeBytes = (uint64_t)-1) override + { + uint64_t SentBytes = 0; + uint64_t ReceivedBytes = 0; + SimulateLatency(SentBytes, 0); + auto _ = MakeGuard([&]() { SimulateLatency(0, ReceivedBytes); }); + Stopwatch ExecutionTimer; + auto __ = MakeGuard([&]() { AddStatistic(ExecutionTimer.GetElapsedTimeUs(), ReceivedBytes, SentBytes); }); + IoBuffer FullPayload; + { + std::lock_guard Lock(m_Mutex); + auto It = m_Entries.find(RawHash); + if (It == m_Entries.end()) + { + return {}; + } + FullPayload = It->second; + } + + if (RangeOffset != 0 || RangeBytes != (uint64_t)-1) + { + if (m_MaxRangeSupported == 0) + { + ReceivedBytes = FullPayload.Size(); + return FullPayload; + } + else + { + ReceivedBytes = (RangeBytes == (uint64_t)-1) ? FullPayload.Size() - RangeOffset : RangeBytes; + return IoBuffer(FullPayload, RangeOffset, RangeBytes); + } + } + else + { + ReceivedBytes = FullPayload.Size(); + return FullPayload; + } + } + + BuildBlobRanges GetBuildBlobRanges(const Oid&, const IoHash& RawHash, std::span<const std::pair<uint64_t, uint64_t>> Ranges) override + { + ZEN_ASSERT(!Ranges.empty()); + uint64_t SentBytes = 0; + uint64_t ReceivedBytes = 0; + SimulateLatency(SentBytes, 0); + auto _ = MakeGuard([&]() { SimulateLatency(0, ReceivedBytes); }); + Stopwatch ExecutionTimer; + auto __ = MakeGuard([&]() { AddStatistic(ExecutionTimer.GetElapsedTimeUs(), ReceivedBytes, SentBytes); }); + if (m_MaxRangeSupported > 1 && Ranges.size() > m_MaxRangeSupported) + { + return {}; + } + IoBuffer FullPayload; + { + std::lock_guard Lock(m_Mutex); + auto It = m_Entries.find(RawHash); + if (It == m_Entries.end()) + { + return {}; + } + FullPayload = It->second; + } + + if (Ranges.size() > m_MaxRangeSupported) + { + // An empty Ranges signals to the caller: "full buffer given, use it for all requested ranges". + ReceivedBytes = FullPayload.Size(); + return {.PayloadBuffer = FullPayload}; + } + else + { + uint64_t PayloadStart = Ranges.front().first; + uint64_t PayloadSize = Ranges.back().first + Ranges.back().second - PayloadStart; + IoBuffer RangeBuffer = IoBuffer(FullPayload, PayloadStart, PayloadSize); + std::vector<std::pair<uint64_t, uint64_t>> PayloadRanges; + PayloadRanges.reserve(Ranges.size()); + for (const std::pair<uint64_t, uint64_t>& Range : Ranges) + { + PayloadRanges.push_back(std::make_pair(Range.first - PayloadStart, Range.second)); + } + ReceivedBytes = PayloadSize; + return {.PayloadBuffer = RangeBuffer, .Ranges = std::move(PayloadRanges)}; + } + } + + void PutBlobMetadatas(const Oid&, std::span<const IoHash>, std::span<const CbObject>) override {} + + std::vector<CbObject> GetBlobMetadatas(const Oid&, std::span<const IoHash> Hashes) override + { + return std::vector<CbObject>(Hashes.size()); + } + + std::vector<BlobExistsResult> BlobsExists(const Oid&, std::span<const IoHash> Hashes) override + { + std::lock_guard Lock(m_Mutex); + std::vector<BlobExistsResult> Result; + Result.reserve(Hashes.size()); + for (const IoHash& Hash : Hashes) + { + auto It = m_Entries.find(Hash); + Result.push_back({.HasBody = (It != m_Entries.end() && It->second)}); + } + return Result; + } + + void Flush(int32_t, std::function<bool(intptr_t)>&&) override {} + +private: + void AddStatistic(uint64_t ElapsedTimeUs, uint64_t ReceivedBytes, uint64_t SentBytes) + { + m_Stats.TotalBytesWritten += SentBytes; + m_Stats.TotalBytesRead += ReceivedBytes; + m_Stats.TotalExecutionTimeUs += ElapsedTimeUs; + m_Stats.TotalRequestCount++; + SetAtomicMax(m_Stats.PeakSentBytes, SentBytes); + SetAtomicMax(m_Stats.PeakReceivedBytes, ReceivedBytes); + if (ElapsedTimeUs > 0) + { + SetAtomicMax(m_Stats.PeakBytesPerSec, (ReceivedBytes + SentBytes) * 1000000 / ElapsedTimeUs); + } + } + + void SimulateLatency(uint64_t SendBytes, uint64_t ReceiveBytes) + { + double SleepSec = m_LatencySec; + if (m_DelayPerKBSec > 0.0) + { + SleepSec += m_DelayPerKBSec * (double(SendBytes + ReceiveBytes) / 1024u); + } + if (SleepSec > 0) + { + Sleep(int(SleepSec * 1000)); + } + } + + uint64_t m_MaxRangeSupported = 0; + BuildStorageCache::Statistics& m_Stats; + const double m_LatencySec = 0.0; + const double m_DelayPerKBSec = 0.0; + std::mutex m_Mutex; + std::unordered_map<IoHash, IoBuffer, IoHash::Hasher> m_Entries; +}; + +std::unique_ptr<BuildStorageCache> +CreateInMemoryBuildStorageCache(uint64_t MaxRangeSupported, BuildStorageCache::Statistics& Stats, double LatencySec, double DelayPerKBSec) +{ + return std::make_unique<InMemoryBuildStorageCache>(MaxRangeSupported, Stats, LatencySec, DelayPerKBSec); +} +#endif // ZEN_WITH_TESTS + ZenCacheEndpointTestResult TestZenCacheEndpoint(std::string_view BaseUrl, const bool AssumeHttp2, const bool HttpVerbose) { @@ -474,15 +750,28 @@ TestZenCacheEndpoint(std::string_view BaseUrl, const bool AssumeHttp2, const boo HttpClient::Response TestResponse = TestHttpClient.Get("/status/builds"); if (TestResponse.IsSuccess()) { - LatencyTestResult LatencyResult = MeasureLatency(TestHttpClient, "/health"); + uint64_t MaxRangeCountPerRequest = 1; + CbObject StatusResponse = TestResponse.AsObject(); + if (StatusResponse["ok"].AsBool()) + { + MaxRangeCountPerRequest = StatusResponse["capabilities"].AsObjectView()["maxrangecountperrequest"].AsUInt64(1); + + LatencyTestResult LatencyResult = MeasureLatency(TestHttpClient, "/health"); + + if (!LatencyResult.Success) + { + return {.Success = false, .FailureReason = LatencyResult.FailureReason}; + } - if (!LatencyResult.Success) + return {.Success = true, .LatencySeconds = LatencyResult.LatencySeconds, .MaxRangeCountPerRequest = MaxRangeCountPerRequest}; + } + else { - return {.Success = false, .FailureReason = LatencyResult.FailureReason}; + return {.Success = false, + .FailureReason = fmt::format("ZenCache endpoint {}/status/builds did not respond with \"ok\"", BaseUrl)}; } - return {.Success = true, .LatencySeconds = LatencyResult.LatencySeconds}; } return {.Success = false, .FailureReason = TestResponse.ErrorMessage("")}; -}; +} } // namespace zen diff --git a/src/zenremotestore/builds/buildstorageoperations.cpp b/src/zenremotestore/builds/buildstorageoperations.cpp deleted file mode 100644 index 08a896f37..000000000 --- a/src/zenremotestore/builds/buildstorageoperations.cpp +++ /dev/null @@ -1,7943 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#include <zenremotestore/builds/buildstorageoperations.h> - -#include <zenremotestore/builds/buildcontent.h> -#include <zenremotestore/builds/buildmanifest.h> -#include <zenremotestore/builds/buildsavedstate.h> -#include <zenremotestore/builds/buildstorage.h> -#include <zenremotestore/builds/buildstoragecache.h> -#include <zenremotestore/builds/buildstorageutil.h> -#include <zenremotestore/chunking/chunkblock.h> -#include <zenremotestore/chunking/chunkingcache.h> -#include <zenremotestore/chunking/chunkingcontroller.h> -#include <zenremotestore/filesystemutils.h> -#include <zenremotestore/operationlogoutput.h> - -#include <zencore/basicfile.h> -#include <zencore/compactbinary.h> -#include <zencore/compactbinaryfile.h> -#include <zencore/compactbinaryutil.h> -#include <zencore/compactbinaryvalue.h> -#include <zencore/filesystem.h> -#include <zencore/fmtutils.h> -#include <zencore/parallelwork.h> -#include <zencore/scopeguard.h> -#include <zencore/string.h> -#include <zencore/timer.h> -#include <zencore/trace.h> -#include <zenutil/wildcard.h> - -#include <numeric> - -ZEN_THIRD_PARTY_INCLUDES_START -#include <tsl/robin_map.h> -#include <tsl/robin_set.h> -ZEN_THIRD_PARTY_INCLUDES_END - -#if ZEN_WITH_TESTS -# include <zencore/testing.h> -# include <zencore/testutils.h> -# include <zenremotestore/builds/filebuildstorage.h> -#endif // ZEN_WITH_TESTS - -namespace zen { - -using namespace std::literals; - -namespace { - std::filesystem::path ZenTempCacheFolderPath(const std::filesystem::path& ZenFolderPath) - { - return ZenTempFolderPath(ZenFolderPath) / "cache"; // Decompressed and verified data - chunks & sequences - } - std::filesystem::path ZenTempBlockFolderPath(const std::filesystem::path& ZenFolderPath) - { - return ZenTempFolderPath(ZenFolderPath) / "blocks"; // Temp storage for whole and partial blocks - } - std::filesystem::path ZenTempDownloadFolderPath(const std::filesystem::path& ZenFolderPath) - { - return ZenTempFolderPath(ZenFolderPath) / "download"; // Temp storage for decompressed and validated chunks - } - - uint64_t GetBytesPerSecond(uint64_t ElapsedWallTimeUS, uint64_t Count) - { - if (ElapsedWallTimeUS == 0) - { - return 0; - } - return Count * 1000000 / ElapsedWallTimeUS; - } - - std::filesystem::path GetTempChunkedSequenceFileName(const std::filesystem::path& CacheFolderPath, const IoHash& RawHash) - { - return CacheFolderPath / (RawHash.ToHexString() + ".tmp"); - } - - std::filesystem::path GetFinalChunkedSequenceFileName(const std::filesystem::path& CacheFolderPath, const IoHash& RawHash) - { - return CacheFolderPath / RawHash.ToHexString(); - } - - bool CleanDirectory(OperationLogOutput& OperationLogOutput, - WorkerThreadPool& IOWorkerPool, - std::atomic<bool>& AbortFlag, - std::atomic<bool>& PauseFlag, - bool IsQuiet, - const std::filesystem::path& Path, - std::span<const std::string> ExcludeDirectories) - { - ZEN_TRACE_CPU("CleanDirectory"); - Stopwatch Timer; - - std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(OperationLogOutput.CreateProgressBar("Clean Folder")); - OperationLogOutput::ProgressBar& Progress(*ProgressBarPtr); - - CleanDirectoryResult Result = CleanDirectory( - IOWorkerPool, - AbortFlag, - PauseFlag, - Path, - ExcludeDirectories, - [&](const std::string_view Details, uint64_t TotalCount, uint64_t RemainingCount, bool IsPaused, bool IsAborted) { - Progress.UpdateState({.Task = "Cleaning folder ", - .Details = std::string(Details), - .TotalCount = TotalCount, - .RemainingCount = RemainingCount, - .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, - false); - }, - OperationLogOutput.GetProgressUpdateDelayMS()); - - Progress.Finish(); - - if (AbortFlag) - { - return false; - } - - uint64_t ElapsedTimeMs = Timer.GetElapsedTimeMs(); - - if (!Result.FailedRemovePaths.empty()) - { - ExtendableStringBuilder<512> SB; - for (size_t FailedPathIndex = 0; FailedPathIndex < Result.FailedRemovePaths.size(); FailedPathIndex++) - { - SB << fmt::format("\n '{}': ({}) {}", - Result.FailedRemovePaths[FailedPathIndex].first, - Result.FailedRemovePaths[FailedPathIndex].second.value(), - Result.FailedRemovePaths[FailedPathIndex].second.message()); - } - ZEN_OPERATION_LOG_WARN(OperationLogOutput, "Clean failed to remove files from '{}': {}", Path, SB.ToView()); - } - - if (ElapsedTimeMs >= 200 && !IsQuiet) - { - ZEN_OPERATION_LOG_INFO(OperationLogOutput, - "Wiped folder '{}' {} ({}) in {}", - Path, - Result.FoundCount, - NiceBytes(Result.DeletedByteCount), - NiceTimeSpanMs(ElapsedTimeMs)); - } - - return Result.FailedRemovePaths.empty(); - } - - bool IsExtensionHashCompressable(const tsl::robin_set<uint32_t>& NonCompressableExtensionHashes, const uint32_t PathHash) - { - return !NonCompressableExtensionHashes.contains(PathHash); - } - - bool IsChunkCompressable(const tsl::robin_set<uint32_t>& NonCompressableExtensionHashes, - const ChunkedFolderContent& Content, - const ChunkedContentLookup& Lookup, - uint32_t ChunkIndex) - { - ZEN_UNUSED(Content); - const uint32_t ChunkLocationCount = Lookup.ChunkSequenceLocationCounts[ChunkIndex]; - if (ChunkLocationCount == 0) - { - return false; - } - const size_t ChunkLocationOffset = Lookup.ChunkSequenceLocationOffset[ChunkIndex]; - const uint32_t SequenceIndex = Lookup.ChunkSequenceLocations[ChunkLocationOffset].SequenceIndex; - const uint32_t PathIndex = Lookup.SequenceIndexFirstPathIndex[SequenceIndex]; - const uint32_t ExtensionHash = Lookup.PathExtensionHash[PathIndex]; - - const bool IsCompressable = IsExtensionHashCompressable(NonCompressableExtensionHashes, ExtensionHash); - return IsCompressable; - } - - template<typename T> - std::string FormatArray(std::span<const T> Items, std::string_view Prefix) - { - ExtendableStringBuilder<512> SB; - for (const T& Item : Items) - { - SB.Append(fmt::format("{}{}", Prefix, Item)); - } - return SB.ToString(); - } - - void DownloadLargeBlob(BuildStorageBase& Storage, - const std::filesystem::path& DownloadFolder, - const Oid& BuildId, - const IoHash& ChunkHash, - const std::uint64_t PreferredMultipartChunkSize, - ParallelWork& Work, - WorkerThreadPool& NetworkPool, - std::atomic<uint64_t>& DownloadedChunkByteCount, - std::atomic<uint64_t>& MultipartAttachmentCount, - std::function<void(IoBuffer&& Payload)>&& OnDownloadComplete) - { - ZEN_TRACE_CPU("DownloadLargeBlob"); - - struct WorkloadData - { - TemporaryFile TempFile; - }; - std::shared_ptr<WorkloadData> Workload(std::make_shared<WorkloadData>()); - - std::error_code Ec; - Workload->TempFile.CreateTemporary(DownloadFolder, Ec); - if (Ec) - { - throw std::runtime_error( - fmt::format("Failed opening temporary file '{}', reason: ({}) {}", Workload->TempFile.GetPath(), Ec.message(), Ec.value())); - } - std::vector<std::function<void()>> WorkItems = Storage.GetLargeBuildBlob( - BuildId, - ChunkHash, - PreferredMultipartChunkSize, - [&Work, Workload, &DownloadedChunkByteCount](uint64_t Offset, const IoBuffer& Chunk) { - DownloadedChunkByteCount += Chunk.GetSize(); - - if (!Work.IsAborted()) - { - ZEN_TRACE_CPU("Async_DownloadLargeBlob_OnReceive"); - Workload->TempFile.Write(Chunk.GetView(), Offset); - } - }, - [&Work, Workload, &DownloadedChunkByteCount, OnDownloadComplete = std::move(OnDownloadComplete)]() { - if (!Work.IsAborted()) - { - ZEN_TRACE_CPU("Async_DownloadLargeBlob_OnComplete"); - - uint64_t PayloadSize = Workload->TempFile.FileSize(); - void* FileHandle = Workload->TempFile.Detach(); - ZEN_ASSERT(FileHandle != nullptr); - IoBuffer Payload(IoBuffer::File, FileHandle, 0, PayloadSize, true); - Payload.SetDeleteOnClose(true); - OnDownloadComplete(std::move(Payload)); - } - }); - if (!WorkItems.empty()) - { - MultipartAttachmentCount++; - } - for (auto& WorkItem : WorkItems) - { - Work.ScheduleWork(NetworkPool, [WorkItem = std::move(WorkItem)](std::atomic<bool>& AbortFlag) { - if (!AbortFlag) - { - ZEN_TRACE_CPU("Async_DownloadLargeBlob_Work"); - - WorkItem(); - } - }); - } - } - - CompositeBuffer ValidateBlob(std::atomic<bool>& AbortFlag, - IoBuffer&& Payload, - const IoHash& BlobHash, - uint64_t& OutCompressedSize, - uint64_t& OutDecompressedSize) - { - ZEN_TRACE_CPU("ValidateBlob"); - - if (Payload.GetContentType() != ZenContentType::kCompressedBinary) - { - throw std::runtime_error(fmt::format("Blob {} ({} bytes) has unexpected content type '{}'", - BlobHash, - Payload.GetSize(), - ToString(Payload.GetContentType()))); - } - IoHash RawHash; - uint64_t RawSize; - CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(Payload), RawHash, RawSize); - if (!Compressed) - { - throw std::runtime_error(fmt::format("Blob {} ({} bytes) compressed header is invalid", BlobHash, Payload.GetSize())); - } - if (RawHash != BlobHash) - { - throw std::runtime_error( - fmt::format("Blob {} ({} bytes) compressed header has a mismatching raw hash {}", BlobHash, Payload.GetSize(), RawHash)); - } - - IoHashStream Hash; - bool CouldDecompress = Compressed.DecompressToStream( - 0, - RawSize, - [&AbortFlag, &Hash](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) { - ZEN_UNUSED(SourceOffset, SourceSize, Offset); - if (!AbortFlag) - { - for (const SharedBuffer& Segment : RangeBuffer.GetSegments()) - { - Hash.Append(Segment.GetView()); - } - return true; - } - return false; - }); - - if (AbortFlag) - { - return CompositeBuffer{}; - } - - if (!CouldDecompress) - { - throw std::runtime_error( - fmt::format("Blob {} ({} bytes) failed to decompress - header information mismatch", BlobHash, Payload.GetSize())); - } - IoHash ValidateRawHash = Hash.GetHash(); - if (ValidateRawHash != BlobHash) - { - throw std::runtime_error(fmt::format("Blob {} ({} bytes) decompressed hash {} does not match header information", - BlobHash, - Payload.GetSize(), - ValidateRawHash)); - } - OodleCompressor Compressor; - OodleCompressionLevel CompressionLevel; - uint64_t BlockSize; - if (!Compressed.TryGetCompressParameters(Compressor, CompressionLevel, BlockSize)) - { - throw std::runtime_error(fmt::format("Blob {} ({} bytes) failed to get compression details", BlobHash, Payload.GetSize())); - } - OutCompressedSize = Payload.GetSize(); - OutDecompressedSize = RawSize; - if (CompressionLevel == OodleCompressionLevel::None) - { - // Only decompress to composite if we need it for block verification - CompositeBuffer DecompressedComposite = Compressed.DecompressToComposite(); - if (!DecompressedComposite) - { - throw std::runtime_error(fmt::format("Blob {} ({} bytes) failed to decompress to composite", BlobHash, Payload.GetSize())); - } - return DecompressedComposite; - } - return CompositeBuffer{}; - } - -} // namespace - -bool -IsSingleFileChunk(const ChunkedFolderContent& RemoteContent, - const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> Locations) -{ - if (Locations.size() == 1) - { - const uint32_t FirstSequenceIndex = Locations[0]->SequenceIndex; - if (RemoteContent.ChunkedContent.ChunkCounts[FirstSequenceIndex] == 1) - { - ZEN_ASSERT_SLOW(Locations[0]->Offset == 0); - return true; - } - } - return false; -} - -IoBuffer -MakeBufferMemoryBased(const CompositeBuffer& PartialBlockBuffer) -{ - ZEN_TRACE_CPU("MakeBufferMemoryBased"); - IoBuffer BlockMemoryBuffer; - std::span<const SharedBuffer> Segments = PartialBlockBuffer.GetSegments(); - if (Segments.size() == 1) - { - IoBufferFileReference FileRef = {}; - if (PartialBlockBuffer.GetSegments().front().AsIoBuffer().GetFileReference(FileRef)) - { - BlockMemoryBuffer = UniqueBuffer::Alloc(FileRef.FileChunkSize).MoveToShared().AsIoBuffer(); - BasicFile Reader; - Reader.Attach(FileRef.FileHandle); - auto _ = MakeGuard([&Reader]() { Reader.Detach(); }); - MutableMemoryView ReadMem = BlockMemoryBuffer.GetMutableView(); - Reader.Read(ReadMem.GetData(), FileRef.FileChunkSize, FileRef.FileChunkOffset); - return BlockMemoryBuffer; - } - else - { - return PartialBlockBuffer.GetSegments().front().AsIoBuffer(); - } - } - else - { - // Not a homogenous memory buffer, read all to memory - - BlockMemoryBuffer = UniqueBuffer::Alloc(PartialBlockBuffer.GetSize()).MoveToShared().AsIoBuffer(); - MutableMemoryView ReadMem = BlockMemoryBuffer.GetMutableView(); - for (const SharedBuffer& Segment : Segments) - { - IoBufferFileReference FileRef = {}; - if (Segment.AsIoBuffer().GetFileReference(FileRef)) - { - BasicFile Reader; - Reader.Attach(FileRef.FileHandle); - auto _ = MakeGuard([&Reader]() { Reader.Detach(); }); - Reader.Read(ReadMem.GetData(), FileRef.FileChunkSize, FileRef.FileChunkOffset); - ReadMem = ReadMem.Mid(FileRef.FileChunkSize); - } - else - { - ReadMem = ReadMem.CopyFrom(Segment.AsIoBuffer().GetView()); - } - } - return BlockMemoryBuffer; - } -} - -class FilteredRate -{ -public: - FilteredRate() {} - - void Start() - { - if (StartTimeUS == (uint64_t)-1) - { - uint64_t Expected = (uint64_t)-1; - if (StartTimeUS.compare_exchange_weak(Expected, Timer.GetElapsedTimeUs())) - { - LastTimeUS = StartTimeUS.load(); - } - } - } - void Stop() - { - if (EndTimeUS == (uint64_t)-1) - { - uint64_t Expected = (uint64_t)-1; - EndTimeUS.compare_exchange_weak(Expected, Timer.GetElapsedTimeUs()); - } - } - - void Update(uint64_t Count) - { - if (LastTimeUS == (uint64_t)-1) - { - return; - } - uint64_t TimeUS = Timer.GetElapsedTimeUs(); - uint64_t TimeDeltaUS = TimeUS - LastTimeUS; - if (TimeDeltaUS >= 2000000) - { - uint64_t Delta = Count - LastCount; - uint64_t PerSecond = (Delta * 1000000) / TimeDeltaUS; - - LastPerSecond = PerSecond; - - LastCount = Count; - - FilteredPerSecond = (PerSecond + (LastPerSecond * 7)) / 8; - - LastTimeUS = TimeUS; - } - } - - uint64_t GetCurrent() const // If Stopped - return total count / total time - { - if (LastTimeUS == (uint64_t)-1) - { - return 0; - } - return FilteredPerSecond; - } - - uint64_t GetElapsedTimeUS() const - { - if (StartTimeUS == (uint64_t)-1) - { - return 0; - } - if (EndTimeUS == (uint64_t)-1) - { - return 0; - } - uint64_t TimeDeltaUS = EndTimeUS - StartTimeUS; - return TimeDeltaUS; - } - - bool IsActive() const { return (StartTimeUS != (uint64_t)-1) && (EndTimeUS == (uint64_t)-1); } - -private: - Stopwatch Timer; - std::atomic<uint64_t> StartTimeUS = (uint64_t)-1; - std::atomic<uint64_t> EndTimeUS = (uint64_t)-1; - std::atomic<uint64_t> LastTimeUS = (uint64_t)-1; - uint64_t LastCount = 0; - uint64_t LastPerSecond = 0; - uint64_t FilteredPerSecond = 0; -}; - -std::filesystem::path -ZenStateFilePath(const std::filesystem::path& ZenFolderPath) -{ - return ZenFolderPath / "current_state.cbo"; -} -std::filesystem::path -ZenTempFolderPath(const std::filesystem::path& ZenFolderPath) -{ - return ZenFolderPath / "tmp"; -} - -////////////////////// BuildsOperationUpdateFolder - -BuildsOperationUpdateFolder::BuildsOperationUpdateFolder(OperationLogOutput& OperationLogOutput, - StorageInstance& Storage, - std::atomic<bool>& AbortFlag, - std::atomic<bool>& PauseFlag, - WorkerThreadPool& IOWorkerPool, - WorkerThreadPool& NetworkPool, - const Oid& BuildId, - const std::filesystem::path& Path, - const ChunkedFolderContent& LocalContent, - const ChunkedContentLookup& LocalLookup, - const ChunkedFolderContent& RemoteContent, - const ChunkedContentLookup& RemoteLookup, - const std::vector<ChunkBlockDescription>& BlockDescriptions, - const std::vector<IoHash>& LooseChunkHashes, - const Options& Options) -: m_LogOutput(OperationLogOutput) -, m_Storage(Storage) -, m_AbortFlag(AbortFlag) -, m_PauseFlag(PauseFlag) -, m_IOWorkerPool(IOWorkerPool) -, m_NetworkPool(NetworkPool) -, m_BuildId(BuildId) -, m_Path(Path) -, m_LocalContent(LocalContent) -, m_LocalLookup(LocalLookup) -, m_RemoteContent(RemoteContent) -, m_RemoteLookup(RemoteLookup) -, m_BlockDescriptions(BlockDescriptions) -, m_LooseChunkHashes(LooseChunkHashes) -, m_Options(Options) -, m_CacheFolderPath(ZenTempCacheFolderPath(m_Options.ZenFolderPath)) -, m_TempDownloadFolderPath(ZenTempDownloadFolderPath(m_Options.ZenFolderPath)) -, m_TempBlockFolderPath(ZenTempBlockFolderPath(m_Options.ZenFolderPath)) -{ -} - -void -BuildsOperationUpdateFolder::Execute(FolderContent& OutLocalFolderState) -{ - ZEN_TRACE_CPU("BuildsOperationUpdateFolder::Execute"); - try - { - enum class TaskSteps : uint32_t - { - ScanExistingData, - WriteChunks, - PrepareTarget, - FinalizeTarget, - Cleanup, - StepCount - }; - - auto EndProgress = - MakeGuard([&]() { m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::StepCount, (uint32_t)TaskSteps::StepCount); }); - - ZEN_ASSERT((!m_Options.PrimeCacheOnly) || - (m_Options.PrimeCacheOnly && (m_Options.PartialBlockRequestMode == EPartialBlockRequestMode::Off))); - - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::ScanExistingData, (uint32_t)TaskSteps::StepCount); - - CreateDirectories(m_CacheFolderPath); - CreateDirectories(m_TempDownloadFolderPath); - CreateDirectories(m_TempBlockFolderPath); - - Stopwatch CacheMappingTimer; - - std::vector<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters(m_RemoteContent.ChunkedContent.SequenceRawHashes.size()); - std::vector<bool> RemoteChunkIndexNeedsCopyFromLocalFileFlags(m_RemoteContent.ChunkedContent.ChunkHashes.size()); - std::vector<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags(m_RemoteContent.ChunkedContent.ChunkHashes.size()); - - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CachedChunkHashesFound; - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CachedSequenceHashesFound; - if (!m_Options.PrimeCacheOnly) - { - ScanCacheFolder(CachedChunkHashesFound, CachedSequenceHashesFound); - } - - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CachedBlocksFound; - if (!m_Options.PrimeCacheOnly) - { - ScanTempBlocksFolder(CachedBlocksFound); - } - - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> SequenceIndexesLeftToFindToRemoteIndex; - - if (!m_Options.PrimeCacheOnly && m_Options.EnableTargetFolderScavenging) - { - // Pick up all whole files we can use from current local state - ZEN_TRACE_CPU("GetLocalSequences"); - - Stopwatch LocalTimer; - - std::vector<uint32_t> MissingSequenceIndexes = ScanTargetFolder(CachedChunkHashesFound, CachedSequenceHashesFound); - - for (uint32_t RemoteSequenceIndex : MissingSequenceIndexes) - { - // We must write the sequence - const uint32_t ChunkCount = m_RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex]; - const IoHash& RemoteSequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; - SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = ChunkCount; - SequenceIndexesLeftToFindToRemoteIndex.insert({RemoteSequenceRawHash, RemoteSequenceIndex}); - } - } - else - { - for (uint32_t RemoteSequenceIndex = 0; RemoteSequenceIndex < m_RemoteContent.ChunkedContent.SequenceRawHashes.size(); - RemoteSequenceIndex++) - { - const uint32_t ChunkCount = m_RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex]; - SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = ChunkCount; - } - } - - std::vector<ChunkedFolderContent> ScavengedContents; - std::vector<ChunkedContentLookup> ScavengedLookups; - std::vector<std::filesystem::path> ScavengedPaths; - - std::vector<ScavengedSequenceCopyOperation> ScavengedSequenceCopyOperations; - uint64_t ScavengedPathsCount = 0; - - if (!m_Options.PrimeCacheOnly && m_Options.EnableOtherDownloadsScavenging) - { - ZEN_TRACE_CPU("GetScavengedSequences"); - - Stopwatch ScavengeTimer; - - if (!SequenceIndexesLeftToFindToRemoteIndex.empty()) - { - std::vector<ScavengeSource> ScavengeSources = FindScavengeSources(); - - const size_t ScavengePathCount = ScavengeSources.size(); - - ScavengedContents.resize(ScavengePathCount); - ScavengedLookups.resize(ScavengePathCount); - ScavengedPaths.resize(ScavengePathCount); - - std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(m_LogOutput.CreateProgressBar("Scavenging")); - OperationLogOutput::ProgressBar& ScavengeProgressBar(*ProgressBarPtr); - - ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); - - std::atomic<uint64_t> PathsFound(0); - std::atomic<uint64_t> ChunksFound(0); - std::atomic<uint64_t> PathsScavenged(0); - - for (size_t ScavengeIndex = 0; ScavengeIndex < ScavengePathCount; ScavengeIndex++) - { - Work.ScheduleWork(m_IOWorkerPool, - [this, - &ScavengeSources, - &ScavengedContents, - &ScavengedPaths, - &ScavengedLookups, - &PathsFound, - &ChunksFound, - &PathsScavenged, - ScavengeIndex](std::atomic<bool>&) { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("Async_FindScavengeContent"); - - const ScavengeSource& Source = ScavengeSources[ScavengeIndex]; - ChunkedFolderContent& ScavengedLocalContent = ScavengedContents[ScavengeIndex]; - ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengeIndex]; - - if (FindScavengeContent(Source, ScavengedLocalContent, ScavengedLookup)) - { - ScavengedPaths[ScavengeIndex] = Source.Path; - PathsFound += ScavengedLocalContent.Paths.size(); - ChunksFound += ScavengedLocalContent.ChunkedContent.ChunkHashes.size(); - } - else - { - ScavengedPaths[ScavengeIndex].clear(); - } - PathsScavenged++; - } - }); - } - { - ZEN_TRACE_CPU("ScavengeScan_Wait"); - - Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { - ZEN_UNUSED(PendingWork); - std::string Details = fmt::format("{}/{} scanned. {} paths and {} chunks found for scavenging", - PathsScavenged.load(), - ScavengePathCount, - PathsFound.load(), - ChunksFound.load()); - ScavengeProgressBar.UpdateState( - {.Task = "Scavenging ", - .Details = Details, - .TotalCount = ScavengePathCount, - .RemainingCount = ScavengePathCount - PathsScavenged.load(), - .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, - false); - }); - } - - ScavengeProgressBar.Finish(); - if (m_AbortFlag) - { - return; - } - - for (uint32_t ScavengedContentIndex = 0; - ScavengedContentIndex < ScavengedContents.size() && (!SequenceIndexesLeftToFindToRemoteIndex.empty()); - ScavengedContentIndex++) - { - const std::filesystem::path& ScavengePath = ScavengedPaths[ScavengedContentIndex]; - if (!ScavengePath.empty()) - { - const ChunkedFolderContent& ScavengedLocalContent = ScavengedContents[ScavengedContentIndex]; - const ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengedContentIndex]; - - for (uint32_t ScavengedSequenceIndex = 0; - ScavengedSequenceIndex < ScavengedLocalContent.ChunkedContent.SequenceRawHashes.size(); - ScavengedSequenceIndex++) - { - const IoHash& SequenceRawHash = ScavengedLocalContent.ChunkedContent.SequenceRawHashes[ScavengedSequenceIndex]; - if (auto It = SequenceIndexesLeftToFindToRemoteIndex.find(SequenceRawHash); - It != SequenceIndexesLeftToFindToRemoteIndex.end()) - { - const uint32_t RemoteSequenceIndex = It->second; - const uint64_t RawSize = - m_RemoteContent.RawSizes[m_RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex]]; - ZEN_ASSERT(RawSize > 0); - - const uint32_t ScavengedPathIndex = ScavengedLookup.SequenceIndexFirstPathIndex[ScavengedSequenceIndex]; - ZEN_ASSERT_SLOW(IsFile((ScavengePath / ScavengedLocalContent.Paths[ScavengedPathIndex]).make_preferred())); - - ScavengedSequenceCopyOperations.push_back({.ScavengedContentIndex = ScavengedContentIndex, - .ScavengedPathIndex = ScavengedPathIndex, - .RemoteSequenceIndex = RemoteSequenceIndex, - .RawSize = RawSize}); - - SequenceIndexesLeftToFindToRemoteIndex.erase(SequenceRawHash); - SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = 0; - - m_CacheMappingStats.ScavengedPathsMatchingSequencesCount++; - m_CacheMappingStats.ScavengedPathsMatchingSequencesByteCount += RawSize; - } - } - ScavengedPathsCount++; - } - } - } - m_CacheMappingStats.ScavengeElapsedWallTimeUs += ScavengeTimer.GetElapsedTimeUs(); - } - - uint32_t RemainingChunkCount = 0; - for (uint32_t RemoteChunkIndex = 0; RemoteChunkIndex < m_RemoteContent.ChunkedContent.ChunkHashes.size(); RemoteChunkIndex++) - { - uint64_t ChunkWriteCount = GetChunkWriteCount(SequenceIndexChunksLeftToWriteCounters, RemoteChunkIndex); - if (ChunkWriteCount > 0) - { - RemainingChunkCount++; - } - } - - // Pick up all chunks in current local state - tsl::robin_map<IoHash, size_t, IoHash::Hasher> RawHashToCopyChunkDataIndex; - std::vector<CopyChunkData> CopyChunkDatas; - - if (!m_Options.PrimeCacheOnly && m_Options.EnableTargetFolderScavenging) - { - ZEN_TRACE_CPU("GetLocalChunks"); - - Stopwatch LocalTimer; - - ScavengeSourceForChunks(RemainingChunkCount, - RemoteChunkIndexNeedsCopyFromLocalFileFlags, - RawHashToCopyChunkDataIndex, - SequenceIndexChunksLeftToWriteCounters, - m_LocalContent, - m_LocalLookup, - CopyChunkDatas, - uint32_t(-1), - m_CacheMappingStats.LocalChunkMatchingRemoteCount, - m_CacheMappingStats.LocalChunkMatchingRemoteByteCount); - - m_CacheMappingStats.LocalScanElapsedWallTimeUs += LocalTimer.GetElapsedTimeUs(); - } - - if (!m_Options.PrimeCacheOnly && m_Options.EnableOtherDownloadsScavenging) - { - ZEN_TRACE_CPU("GetScavengeChunks"); - - Stopwatch ScavengeTimer; - - for (uint32_t ScavengedContentIndex = 0; ScavengedContentIndex < ScavengedContents.size() && (RemainingChunkCount > 0); - ScavengedContentIndex++) - { - const ChunkedFolderContent& ScavengedContent = ScavengedContents[ScavengedContentIndex]; - const ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengedContentIndex]; - - ScavengeSourceForChunks(RemainingChunkCount, - RemoteChunkIndexNeedsCopyFromLocalFileFlags, - RawHashToCopyChunkDataIndex, - SequenceIndexChunksLeftToWriteCounters, - ScavengedContent, - ScavengedLookup, - CopyChunkDatas, - ScavengedContentIndex, - m_CacheMappingStats.ScavengedChunkMatchingRemoteCount, - m_CacheMappingStats.ScavengedChunkMatchingRemoteByteCount); - } - m_CacheMappingStats.ScavengeElapsedWallTimeUs += ScavengeTimer.GetElapsedTimeUs(); - } - - if (!m_Options.IsQuiet) - { - if (m_CacheMappingStats.CacheSequenceHashesCount > 0 || m_CacheMappingStats.CacheChunkCount > 0 || - m_CacheMappingStats.CacheBlockCount > 0) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Download cache: Found {} ({}) chunk sequences, {} ({}) chunks, {} ({}) blocks in {}", - m_CacheMappingStats.CacheSequenceHashesCount, - NiceBytes(m_CacheMappingStats.CacheSequenceHashesByteCount), - m_CacheMappingStats.CacheChunkCount, - NiceBytes(m_CacheMappingStats.CacheChunkByteCount), - m_CacheMappingStats.CacheBlockCount, - NiceBytes(m_CacheMappingStats.CacheBlocksByteCount), - NiceTimeSpanMs(m_CacheMappingStats.CacheScanElapsedWallTimeUs / 1000)); - } - - if (m_CacheMappingStats.LocalPathsMatchingSequencesCount > 0 || m_CacheMappingStats.LocalChunkMatchingRemoteCount > 0) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Local state : Found {} ({}) chunk sequences, {} ({}) chunks in {}", - m_CacheMappingStats.LocalPathsMatchingSequencesCount, - NiceBytes(m_CacheMappingStats.LocalPathsMatchingSequencesByteCount), - m_CacheMappingStats.LocalChunkMatchingRemoteCount, - NiceBytes(m_CacheMappingStats.LocalChunkMatchingRemoteByteCount), - NiceTimeSpanMs(m_CacheMappingStats.LocalScanElapsedWallTimeUs / 1000)); - } - if (m_CacheMappingStats.ScavengedPathsMatchingSequencesCount > 0 || m_CacheMappingStats.ScavengedChunkMatchingRemoteCount > 0) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Scavenge of {} paths, found {} ({}) chunk sequences, {} ({}) chunks in {}", - ScavengedPathsCount, - m_CacheMappingStats.ScavengedPathsMatchingSequencesCount, - NiceBytes(m_CacheMappingStats.ScavengedPathsMatchingSequencesByteCount), - m_CacheMappingStats.ScavengedChunkMatchingRemoteCount, - NiceBytes(m_CacheMappingStats.ScavengedChunkMatchingRemoteByteCount), - NiceTimeSpanMs(m_CacheMappingStats.ScavengeElapsedWallTimeUs / 1000)); - } - } - - uint64_t BytesToWrite = 0; - - for (uint32_t RemoteChunkIndex = 0; RemoteChunkIndex < m_RemoteContent.ChunkedContent.ChunkHashes.size(); RemoteChunkIndex++) - { - uint64_t ChunkWriteCount = GetChunkWriteCount(SequenceIndexChunksLeftToWriteCounters, RemoteChunkIndex); - if (ChunkWriteCount > 0) - { - BytesToWrite += m_RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex] * ChunkWriteCount; - if (!RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex]) - { - RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex] = true; - } - } - } - - for (const ScavengedSequenceCopyOperation& ScavengeCopyOp : ScavengedSequenceCopyOperations) - { - BytesToWrite += ScavengeCopyOp.RawSize; - } - - uint64_t BytesToValidate = m_Options.ValidateCompletedSequences ? BytesToWrite : 0; - - uint64_t TotalRequestCount = 0; - uint64_t TotalPartWriteCount = 0; - std::atomic<uint64_t> WritePartsComplete = 0; - - tsl::robin_map<std::string, uint32_t> RemotePathToRemoteIndex; - RemotePathToRemoteIndex.reserve(m_RemoteContent.Paths.size()); - for (uint32_t RemotePathIndex = 0; RemotePathIndex < m_RemoteContent.Paths.size(); RemotePathIndex++) - { - RemotePathToRemoteIndex.insert({m_RemoteContent.Paths[RemotePathIndex].generic_string(), RemotePathIndex}); - } - - CheckRequiredDiskSpace(RemotePathToRemoteIndex); - - BlobsExistsResult ExistsResult; - { - ChunkBlockAnalyser BlockAnalyser(m_LogOutput, - m_BlockDescriptions, - ChunkBlockAnalyser::Options{.IsQuiet = m_Options.IsQuiet, - .IsVerbose = m_Options.IsVerbose, - .HostLatencySec = m_Storage.BuildStorageLatencySec, - .HostHighSpeedLatencySec = m_Storage.CacheLatencySec}); - - std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = BlockAnalyser.GetNeeded( - m_RemoteLookup.ChunkHashToChunkIndex, - [&](uint32_t RemoteChunkIndex) -> bool { return RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex]; }); - - std::vector<uint32_t> FetchBlockIndexes; - std::vector<uint32_t> CachedChunkBlockIndexes; - - { - ZEN_TRACE_CPU("BlockCacheFileExists"); - for (const ChunkBlockAnalyser::NeededBlock& NeededBlock : NeededBlocks) - { - if (m_Options.PrimeCacheOnly) - { - FetchBlockIndexes.push_back(NeededBlock.BlockIndex); - } - else - { - const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[NeededBlock.BlockIndex]; - bool UsingCachedBlock = false; - if (auto It = CachedBlocksFound.find(BlockDescription.BlockHash); It != CachedBlocksFound.end()) - { - TotalPartWriteCount++; - - std::filesystem::path BlockPath = m_TempBlockFolderPath / BlockDescription.BlockHash.ToHexString(); - if (IsFile(BlockPath)) - { - CachedChunkBlockIndexes.push_back(NeededBlock.BlockIndex); - UsingCachedBlock = true; - } - } - if (!UsingCachedBlock) - { - FetchBlockIndexes.push_back(NeededBlock.BlockIndex); - } - } - } - } - - std::vector<uint32_t> NeededLooseChunkIndexes; - - { - NeededLooseChunkIndexes.reserve(m_LooseChunkHashes.size()); - for (uint32_t LooseChunkIndex = 0; LooseChunkIndex < m_LooseChunkHashes.size(); LooseChunkIndex++) - { - const IoHash& ChunkHash = m_LooseChunkHashes[LooseChunkIndex]; - auto RemoteChunkIndexIt = m_RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); - ZEN_ASSERT(RemoteChunkIndexIt != m_RemoteLookup.ChunkHashToChunkIndex.end()); - const uint32_t RemoteChunkIndex = RemoteChunkIndexIt->second; - - if (RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex]) - { - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Skipping chunk {} due to cache reuse", - m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]); - } - continue; - } - - bool NeedsCopy = true; - if (RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex].compare_exchange_strong(NeedsCopy, false)) - { - uint64_t WriteCount = GetChunkWriteCount(SequenceIndexChunksLeftToWriteCounters, RemoteChunkIndex); - if (WriteCount == 0) - { - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Skipping chunk {} due to cache reuse", - m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]); - } - } - else - { - NeededLooseChunkIndexes.push_back(LooseChunkIndex); - } - } - } - } - - if (m_Storage.BuildCacheStorage) - { - ZEN_TRACE_CPU("BlobCacheExistCheck"); - Stopwatch Timer; - - std::vector<IoHash> BlobHashes; - BlobHashes.reserve(NeededLooseChunkIndexes.size() + FetchBlockIndexes.size()); - - for (const uint32_t LooseChunkIndex : NeededLooseChunkIndexes) - { - BlobHashes.push_back(m_LooseChunkHashes[LooseChunkIndex]); - } - - for (uint32_t BlockIndex : FetchBlockIndexes) - { - BlobHashes.push_back(m_BlockDescriptions[BlockIndex].BlockHash); - } - - const std::vector<BuildStorageCache::BlobExistsResult> CacheExistsResult = - m_Storage.BuildCacheStorage->BlobsExists(m_BuildId, BlobHashes); - - if (CacheExistsResult.size() == BlobHashes.size()) - { - ExistsResult.ExistingBlobs.reserve(CacheExistsResult.size()); - for (size_t BlobIndex = 0; BlobIndex < BlobHashes.size(); BlobIndex++) - { - if (CacheExistsResult[BlobIndex].HasBody) - { - ExistsResult.ExistingBlobs.insert(BlobHashes[BlobIndex]); - } - } - } - ExistsResult.ElapsedTimeMs = Timer.GetElapsedTimeMs(); - if (!ExistsResult.ExistingBlobs.empty() && !m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Remote cache : Found {} out of {} needed blobs in {}", - ExistsResult.ExistingBlobs.size(), - BlobHashes.size(), - NiceTimeSpanMs(ExistsResult.ElapsedTimeMs)); - } - } - - std::vector<ChunkBlockAnalyser::EPartialBlockDownloadMode> BlockPartialDownloadModes; - if (m_Options.PartialBlockRequestMode == EPartialBlockRequestMode::Off) - { - BlockPartialDownloadModes.resize(m_BlockDescriptions.size(), ChunkBlockAnalyser::EPartialBlockDownloadMode::Off); - } - else - { - BlockPartialDownloadModes.reserve(m_BlockDescriptions.size()); - for (uint32_t BlockIndex = 0; BlockIndex < m_BlockDescriptions.size(); BlockIndex++) - { - const bool BlockExistInCache = ExistsResult.ExistingBlobs.contains(m_BlockDescriptions[BlockIndex].BlockHash); - if (m_Options.PartialBlockRequestMode == EPartialBlockRequestMode::All) - { - BlockPartialDownloadModes.push_back(BlockExistInCache - ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed - : ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange); - } - else if (m_Options.PartialBlockRequestMode == EPartialBlockRequestMode::ZenCacheOnly) - { - BlockPartialDownloadModes.push_back(BlockExistInCache - ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed - : ChunkBlockAnalyser::EPartialBlockDownloadMode::Off); - } - else if (m_Options.PartialBlockRequestMode == EPartialBlockRequestMode::Mixed) - { - BlockPartialDownloadModes.push_back(BlockExistInCache - ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed - : ChunkBlockAnalyser::EPartialBlockDownloadMode::SingleRange); - } - } - } - ZEN_ASSERT(BlockPartialDownloadModes.size() == m_BlockDescriptions.size()); - - ChunkBlockAnalyser::BlockResult PartialBlocks = - BlockAnalyser.CalculatePartialBlockDownloads(NeededBlocks, BlockPartialDownloadModes); - - struct LooseChunkHashWorkData - { - std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs; - uint32_t RemoteChunkIndex = (uint32_t)-1; - }; - - TotalRequestCount += NeededLooseChunkIndexes.size(); - TotalPartWriteCount += NeededLooseChunkIndexes.size(); - TotalRequestCount += PartialBlocks.BlockRanges.size(); - TotalPartWriteCount += PartialBlocks.BlockRanges.size(); - TotalRequestCount += PartialBlocks.FullBlockIndexes.size(); - TotalPartWriteCount += PartialBlocks.FullBlockIndexes.size(); - - std::vector<LooseChunkHashWorkData> LooseChunkHashWorks; - for (uint32_t LooseChunkIndex : NeededLooseChunkIndexes) - { - const IoHash& ChunkHash = m_LooseChunkHashes[LooseChunkIndex]; - auto RemoteChunkIndexIt = m_RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); - ZEN_ASSERT(RemoteChunkIndexIt != m_RemoteLookup.ChunkHashToChunkIndex.end()); - const uint32_t RemoteChunkIndex = RemoteChunkIndexIt->second; - - std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs = - GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteChunkIndex); - - ZEN_ASSERT(!ChunkTargetPtrs.empty()); - LooseChunkHashWorks.push_back( - LooseChunkHashWorkData{.ChunkTargetPtrs = ChunkTargetPtrs, .RemoteChunkIndex = RemoteChunkIndex}); - } - - ZEN_TRACE_CPU("WriteChunks"); - - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::WriteChunks, (uint32_t)TaskSteps::StepCount); - - Stopwatch WriteTimer; - - FilteredRate FilteredDownloadedBytesPerSecond; - FilteredRate FilteredWrittenBytesPerSecond; - - std::unique_ptr<OperationLogOutput::ProgressBar> WriteProgressBarPtr( - m_LogOutput.CreateProgressBar(m_Options.PrimeCacheOnly ? "Downloading" : "Writing")); - OperationLogOutput::ProgressBar& WriteProgressBar(*WriteProgressBarPtr); - ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); - - TotalPartWriteCount += CopyChunkDatas.size(); - TotalPartWriteCount += ScavengedSequenceCopyOperations.size(); - - BufferedWriteFileCache WriteCache; - - for (uint32_t ScavengeOpIndex = 0; ScavengeOpIndex < ScavengedSequenceCopyOperations.size(); ScavengeOpIndex++) - { - if (m_AbortFlag) - { - break; - } - if (!m_Options.PrimeCacheOnly) - { - Work.ScheduleWork( - m_IOWorkerPool, - [this, - &ScavengedPaths, - &ScavengedSequenceCopyOperations, - &ScavengedContents, - &FilteredWrittenBytesPerSecond, - ScavengeOpIndex, - &WritePartsComplete, - TotalPartWriteCount](std::atomic<bool>&) mutable { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("Async_WriteScavenged"); - - FilteredWrittenBytesPerSecond.Start(); - - const ScavengedSequenceCopyOperation& ScavengeOp = ScavengedSequenceCopyOperations[ScavengeOpIndex]; - const ChunkedFolderContent& ScavengedContent = ScavengedContents[ScavengeOp.ScavengedContentIndex]; - const std::filesystem::path& ScavengeRootPath = ScavengedPaths[ScavengeOp.ScavengedContentIndex]; - - WriteScavengedSequenceToCache(ScavengeRootPath, ScavengedContent, ScavengeOp); - - WritePartsComplete++; - if (WritePartsComplete == TotalPartWriteCount) - { - FilteredWrittenBytesPerSecond.Stop(); - } - } - }); - } - } - - for (uint32_t LooseChunkHashWorkIndex = 0; LooseChunkHashWorkIndex < LooseChunkHashWorks.size(); LooseChunkHashWorkIndex++) - { - if (m_AbortFlag) - { - break; - } - - if (m_Options.PrimeCacheOnly) - { - const uint32_t RemoteChunkIndex = LooseChunkHashWorks[LooseChunkHashWorkIndex].RemoteChunkIndex; - if (ExistsResult.ExistingBlobs.contains(m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex])) - { - m_DownloadStats.RequestsCompleteCount++; - continue; - } - } - - Work.ScheduleWork( - m_IOWorkerPool, - [this, - &SequenceIndexChunksLeftToWriteCounters, - &Work, - &ExistsResult, - &WritePartsComplete, - &LooseChunkHashWorks, - LooseChunkHashWorkIndex, - TotalRequestCount, - TotalPartWriteCount, - &WriteCache, - &FilteredDownloadedBytesPerSecond, - &FilteredWrittenBytesPerSecond](std::atomic<bool>&) mutable { - ZEN_TRACE_CPU("Async_ReadPreDownloadedChunk"); - if (!m_AbortFlag) - { - LooseChunkHashWorkData& LooseChunkHashWork = LooseChunkHashWorks[LooseChunkHashWorkIndex]; - const uint32_t RemoteChunkIndex = LooseChunkHashWorks[LooseChunkHashWorkIndex].RemoteChunkIndex; - WriteLooseChunk(RemoteChunkIndex, - ExistsResult, - SequenceIndexChunksLeftToWriteCounters, - WritePartsComplete, - std::move(LooseChunkHashWork.ChunkTargetPtrs), - WriteCache, - Work, - TotalRequestCount, - TotalPartWriteCount, - FilteredDownloadedBytesPerSecond, - FilteredWrittenBytesPerSecond); - } - }, - WorkerThreadPool::EMode::EnableBacklog); - } - - std::unique_ptr<CloneQueryInterface> CloneQuery; - if (m_Options.AllowFileClone) - { - CloneQuery = GetCloneQueryInterface(m_CacheFolderPath); - } - - for (size_t CopyDataIndex = 0; CopyDataIndex < CopyChunkDatas.size(); CopyDataIndex++) - { - ZEN_ASSERT(!m_Options.PrimeCacheOnly); - if (m_AbortFlag) - { - break; - } - - Work.ScheduleWork(m_IOWorkerPool, - [this, - &CloneQuery, - &SequenceIndexChunksLeftToWriteCounters, - &WriteCache, - &Work, - &FilteredWrittenBytesPerSecond, - &CopyChunkDatas, - &ScavengedContents, - &ScavengedLookups, - &ScavengedPaths, - &WritePartsComplete, - TotalPartWriteCount, - CopyDataIndex](std::atomic<bool>&) { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("Async_CopyLocal"); - - FilteredWrittenBytesPerSecond.Start(); - const CopyChunkData& CopyData = CopyChunkDatas[CopyDataIndex]; - - std::vector<uint32_t> WrittenSequenceIndexes = WriteLocalChunkToCache(CloneQuery.get(), - CopyData, - ScavengedContents, - ScavengedLookups, - ScavengedPaths, - WriteCache); - WritePartsComplete++; - if (!m_AbortFlag) - { - if (WritePartsComplete == TotalPartWriteCount) - { - FilteredWrittenBytesPerSecond.Stop(); - } - - // Write tracking, updating this must be done without any files open - std::vector<uint32_t> CompletedChunkSequences; - for (uint32_t RemoteSequenceIndex : WrittenSequenceIndexes) - { - if (CompleteSequenceChunk(RemoteSequenceIndex, SequenceIndexChunksLeftToWriteCounters)) - { - CompletedChunkSequences.push_back(RemoteSequenceIndex); - } - } - WriteCache.Close(CompletedChunkSequences); - VerifyAndCompleteChunkSequencesAsync(CompletedChunkSequences, Work); - } - } - }); - } - - for (uint32_t BlockIndex : CachedChunkBlockIndexes) - { - ZEN_ASSERT(!m_Options.PrimeCacheOnly); - if (m_AbortFlag) - { - break; - } - - Work.ScheduleWork( - m_IOWorkerPool, - [this, - &RemoteChunkIndexNeedsCopyFromSourceFlags, - &SequenceIndexChunksLeftToWriteCounters, - &WriteCache, - &Work, - &FilteredWrittenBytesPerSecond, - &WritePartsComplete, - TotalPartWriteCount, - BlockIndex](std::atomic<bool>&) mutable { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("Async_WriteCachedBlock"); - - const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; - FilteredWrittenBytesPerSecond.Start(); - - std::filesystem::path BlockChunkPath = m_TempBlockFolderPath / BlockDescription.BlockHash.ToHexString(); - IoBuffer BlockBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath); - if (!BlockBuffer) - { - throw std::runtime_error( - fmt::format("Can not read block {} at {}", BlockDescription.BlockHash, BlockChunkPath)); - } - - if (!m_AbortFlag) - { - if (!WriteChunksBlockToCache(BlockDescription, - SequenceIndexChunksLeftToWriteCounters, - Work, - CompositeBuffer(std::move(BlockBuffer)), - RemoteChunkIndexNeedsCopyFromSourceFlags, - WriteCache)) - { - std::error_code DummyEc; - RemoveFile(BlockChunkPath, DummyEc); - throw std::runtime_error(fmt::format("Block {} is malformed", BlockDescription.BlockHash)); - } - - std::error_code Ec = TryRemoveFile(BlockChunkPath); - if (Ec) - { - ZEN_OPERATION_LOG_DEBUG(m_LogOutput, - "Failed removing file '{}', reason: ({}) {}", - BlockChunkPath, - Ec.value(), - Ec.message()); - } - - WritePartsComplete++; - - if (WritePartsComplete == TotalPartWriteCount) - { - FilteredWrittenBytesPerSecond.Stop(); - } - } - } - }); - } - - for (size_t BlockRangeIndex = 0; BlockRangeIndex < PartialBlocks.BlockRanges.size();) - { - ZEN_ASSERT(!m_Options.PrimeCacheOnly); - if (m_AbortFlag) - { - break; - } - - size_t RangeCount = 1; - size_t RangesLeft = PartialBlocks.BlockRanges.size() - BlockRangeIndex; - const ChunkBlockAnalyser::BlockRangeDescriptor& CurrentBlockRange = PartialBlocks.BlockRanges[BlockRangeIndex]; - while (RangeCount < RangesLeft && - CurrentBlockRange.BlockIndex == PartialBlocks.BlockRanges[BlockRangeIndex + RangeCount].BlockIndex) - { - RangeCount++; - } - - Work.ScheduleWork( - m_NetworkPool, - [this, - &RemoteChunkIndexNeedsCopyFromSourceFlags, - &SequenceIndexChunksLeftToWriteCounters, - &ExistsResult, - &WriteCache, - &FilteredDownloadedBytesPerSecond, - TotalRequestCount, - &WritePartsComplete, - TotalPartWriteCount, - &FilteredWrittenBytesPerSecond, - &Work, - &PartialBlocks, - BlockRangeStartIndex = BlockRangeIndex, - RangeCount](std::atomic<bool>&) { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("Async_GetPartialBlockRanges"); - - FilteredDownloadedBytesPerSecond.Start(); - - for (size_t BlockRangeIndex = BlockRangeStartIndex; BlockRangeIndex < BlockRangeStartIndex + RangeCount; - BlockRangeIndex++) - { - ZEN_TRACE_CPU("GetPartialBlock"); - - const ChunkBlockAnalyser::BlockRangeDescriptor& BlockRange = PartialBlocks.BlockRanges[BlockRangeIndex]; - - DownloadPartialBlock( - BlockRange, - ExistsResult, - [this, - &RemoteChunkIndexNeedsCopyFromSourceFlags, - &SequenceIndexChunksLeftToWriteCounters, - &WritePartsComplete, - &WriteCache, - &Work, - TotalRequestCount, - TotalPartWriteCount, - &FilteredDownloadedBytesPerSecond, - &FilteredWrittenBytesPerSecond, - &BlockRange](IoBuffer&& InMemoryBuffer, const std::filesystem::path& OnDiskPath) { - if (m_DownloadStats.RequestsCompleteCount == TotalRequestCount) - { - FilteredDownloadedBytesPerSecond.Stop(); - } - - if (!m_AbortFlag) - { - Work.ScheduleWork( - m_IOWorkerPool, - [this, - &RemoteChunkIndexNeedsCopyFromSourceFlags, - &SequenceIndexChunksLeftToWriteCounters, - &WritePartsComplete, - &WriteCache, - &Work, - TotalPartWriteCount, - &FilteredWrittenBytesPerSecond, - &BlockRange, - BlockChunkPath = std::filesystem::path(OnDiskPath), - BlockPartialBuffer = std::move(InMemoryBuffer)](std::atomic<bool>&) mutable { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("Async_WritePartialBlock"); - - const uint32_t BlockIndex = BlockRange.BlockIndex; - - const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; - - if (BlockChunkPath.empty()) - { - ZEN_ASSERT(BlockPartialBuffer); - } - else - { - ZEN_ASSERT(!BlockPartialBuffer); - BlockPartialBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath); - if (!BlockPartialBuffer) - { - throw std::runtime_error( - fmt::format("Could not open downloaded block {} from {}", - BlockDescription.BlockHash, - BlockChunkPath)); - } - } - - FilteredWrittenBytesPerSecond.Start(); - - if (!WritePartialBlockChunksToCache( - BlockDescription, - SequenceIndexChunksLeftToWriteCounters, - Work, - CompositeBuffer(std::move(BlockPartialBuffer)), - BlockRange.ChunkBlockIndexStart, - BlockRange.ChunkBlockIndexStart + BlockRange.ChunkBlockIndexCount - 1, - RemoteChunkIndexNeedsCopyFromSourceFlags, - WriteCache)) - { - std::error_code DummyEc; - RemoveFile(BlockChunkPath, DummyEc); - throw std::runtime_error( - fmt::format("Partial block {} is malformed", BlockDescription.BlockHash)); - } - - std::error_code Ec = TryRemoveFile(BlockChunkPath); - if (Ec) - { - ZEN_OPERATION_LOG_DEBUG(m_LogOutput, - "Failed removing file '{}', reason: ({}) {}", - BlockChunkPath, - Ec.value(), - Ec.message()); - } - - WritePartsComplete++; - if (WritePartsComplete == TotalPartWriteCount) - { - FilteredWrittenBytesPerSecond.Stop(); - } - } - }, - OnDiskPath.empty() ? WorkerThreadPool::EMode::DisableBacklog - : WorkerThreadPool::EMode::EnableBacklog); - } - }); - } - } - }); - BlockRangeIndex += RangeCount; - } - - for (uint32_t BlockIndex : PartialBlocks.FullBlockIndexes) - { - if (m_AbortFlag) - { - break; - } - - if (m_Options.PrimeCacheOnly && ExistsResult.ExistingBlobs.contains(m_BlockDescriptions[BlockIndex].BlockHash)) - { - m_DownloadStats.RequestsCompleteCount++; - continue; - } - - Work.ScheduleWork( - m_NetworkPool, - [this, - &WritePartsComplete, - TotalPartWriteCount, - &FilteredWrittenBytesPerSecond, - &ExistsResult, - &Work, - &WriteCache, - &RemoteChunkIndexNeedsCopyFromSourceFlags, - &SequenceIndexChunksLeftToWriteCounters, - &FilteredDownloadedBytesPerSecond, - TotalRequestCount, - BlockIndex](std::atomic<bool>&) { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("Async_GetFullBlock"); - - const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; - - FilteredDownloadedBytesPerSecond.Start(); - - IoBuffer BlockBuffer; - const bool ExistsInCache = - m_Storage.BuildCacheStorage && ExistsResult.ExistingBlobs.contains(BlockDescription.BlockHash); - if (ExistsInCache) - { - BlockBuffer = m_Storage.BuildCacheStorage->GetBuildBlob(m_BuildId, BlockDescription.BlockHash); - } - if (!BlockBuffer) - { - BlockBuffer = m_Storage.BuildStorage->GetBuildBlob(m_BuildId, BlockDescription.BlockHash); - if (BlockBuffer && m_Storage.BuildCacheStorage && m_Options.PopulateCache) - { - m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId, - BlockDescription.BlockHash, - ZenContentType::kCompressedBinary, - CompositeBuffer(SharedBuffer(BlockBuffer))); - } - } - if (!BlockBuffer) - { - throw std::runtime_error(fmt::format("Block {} is missing", BlockDescription.BlockHash)); - } - if (!m_AbortFlag) - { - uint64_t BlockSize = BlockBuffer.GetSize(); - m_DownloadStats.DownloadedBlockCount++; - m_DownloadStats.DownloadedBlockByteCount += BlockSize; - m_DownloadStats.RequestsCompleteCount++; - if (m_DownloadStats.RequestsCompleteCount == TotalRequestCount) - { - FilteredDownloadedBytesPerSecond.Stop(); - } - - if (!m_Options.PrimeCacheOnly) - { - std::filesystem::path BlockChunkPath; - - // Check if the dowloaded block is file based and we can move it directly without rewriting it - { - IoBufferFileReference FileRef; - if (BlockBuffer.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) && - (FileRef.FileChunkSize == BlockSize)) - { - ZEN_TRACE_CPU("MoveTempFullBlock"); - std::error_code Ec; - std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec); - if (!Ec) - { - BlockBuffer.SetDeleteOnClose(false); - BlockBuffer = {}; - BlockChunkPath = m_TempBlockFolderPath / BlockDescription.BlockHash.ToHexString(); - RenameFile(TempBlobPath, BlockChunkPath, Ec); - if (Ec) - { - BlockChunkPath = std::filesystem::path{}; - - // Re-open the temp file again - BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete); - BlockBuffer = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BlockSize, true); - BlockBuffer.SetDeleteOnClose(true); - } - } - } - } - - if (BlockChunkPath.empty() && (BlockSize > m_Options.MaximumInMemoryPayloadSize)) - { - ZEN_TRACE_CPU("WriteTempFullBlock"); - // Could not be moved and rather large, lets store it on disk - BlockChunkPath = m_TempBlockFolderPath / BlockDescription.BlockHash.ToHexString(); - TemporaryFile::SafeWriteFile(BlockChunkPath, BlockBuffer); - BlockBuffer = {}; - } - - if (!m_AbortFlag) - { - Work.ScheduleWork( - m_IOWorkerPool, - [this, - &Work, - &RemoteChunkIndexNeedsCopyFromSourceFlags, - &SequenceIndexChunksLeftToWriteCounters, - BlockIndex, - &WriteCache, - &WritePartsComplete, - TotalPartWriteCount, - &FilteredWrittenBytesPerSecond, - BlockChunkPath, - BlockBuffer = std::move(BlockBuffer)](std::atomic<bool>&) mutable { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("Async_WriteFullBlock"); - - const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; - - if (BlockChunkPath.empty()) - { - ZEN_ASSERT(BlockBuffer); - } - else - { - ZEN_ASSERT(!BlockBuffer); - BlockBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath); - if (!BlockBuffer) - { - throw std::runtime_error( - fmt::format("Could not open dowloaded block {} from {}", - BlockDescription.BlockHash, - BlockChunkPath)); - } - } - - FilteredWrittenBytesPerSecond.Start(); - if (!WriteChunksBlockToCache(BlockDescription, - SequenceIndexChunksLeftToWriteCounters, - Work, - CompositeBuffer(std::move(BlockBuffer)), - RemoteChunkIndexNeedsCopyFromSourceFlags, - WriteCache)) - { - std::error_code DummyEc; - RemoveFile(BlockChunkPath, DummyEc); - throw std::runtime_error( - fmt::format("Block {} is malformed", BlockDescription.BlockHash)); - } - - if (!BlockChunkPath.empty()) - { - std::error_code Ec = TryRemoveFile(BlockChunkPath); - if (Ec) - { - ZEN_OPERATION_LOG_DEBUG(m_LogOutput, - "Failed removing file '{}', reason: ({}) {}", - BlockChunkPath, - Ec.value(), - Ec.message()); - } - } - - WritePartsComplete++; - - if (WritePartsComplete == TotalPartWriteCount) - { - FilteredWrittenBytesPerSecond.Stop(); - } - } - }, - BlockChunkPath.empty() ? WorkerThreadPool::EMode::DisableBacklog - : WorkerThreadPool::EMode::EnableBacklog); - } - } - } - } - }); - } - - { - ZEN_TRACE_CPU("WriteChunks_Wait"); - - Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { - ZEN_UNUSED(PendingWork); - uint64_t DownloadedBytes = m_DownloadStats.DownloadedChunkByteCount.load() + - m_DownloadStats.DownloadedBlockByteCount.load() + - +m_DownloadStats.DownloadedPartialBlockByteCount.load(); - FilteredWrittenBytesPerSecond.Update(m_DiskStats.WriteByteCount.load()); - FilteredDownloadedBytesPerSecond.Update(DownloadedBytes); - std::string DownloadRateString = - (m_DownloadStats.RequestsCompleteCount == TotalRequestCount) - ? "" - : fmt::format(" {}bits/s", NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8)); - std::string CloneDetails; - if (m_DiskStats.CloneCount.load() > 0) - { - CloneDetails = fmt::format(" ({} cloned)", NiceBytes(m_DiskStats.CloneByteCount.load())); - } - std::string WriteDetails = m_Options.PrimeCacheOnly ? "" - : fmt::format(" {}/{} ({}B/s) written{}", - NiceBytes(m_WrittenChunkByteCount.load()), - NiceBytes(BytesToWrite), - NiceNum(FilteredWrittenBytesPerSecond.GetCurrent()), - CloneDetails); - - std::string Details = fmt::format("{}/{} ({}{}) downloaded.{}", - m_DownloadStats.RequestsCompleteCount.load(), - TotalRequestCount, - NiceBytes(DownloadedBytes), - DownloadRateString, - WriteDetails); - - std::string Task; - if (m_Options.PrimeCacheOnly) - { - Task = "Downloading "; - } - else if ((m_WrittenChunkByteCount < BytesToWrite) || (BytesToValidate == 0)) - { - Task = "Writing chunks "; - } - else - { - Task = "Verifying chunks "; - } - - WriteProgressBar.UpdateState( - {.Task = Task, - .Details = Details, - .TotalCount = m_Options.PrimeCacheOnly ? TotalRequestCount : (BytesToWrite + BytesToValidate), - .RemainingCount = m_Options.PrimeCacheOnly ? (TotalRequestCount - m_DownloadStats.RequestsCompleteCount.load()) - : ((BytesToWrite + BytesToValidate) - - (m_WrittenChunkByteCount.load() + m_ValidatedChunkByteCount.load())), - .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, - false); - }); - } - - CloneQuery.reset(); - - FilteredWrittenBytesPerSecond.Stop(); - FilteredDownloadedBytesPerSecond.Stop(); - - WriteProgressBar.Finish(); - if (m_AbortFlag) - { - return; - } - - if (!m_Options.PrimeCacheOnly) - { - uint32_t RawSequencesMissingWriteCount = 0; - for (uint32_t SequenceIndex = 0; SequenceIndex < SequenceIndexChunksLeftToWriteCounters.size(); SequenceIndex++) - { - const auto& SequenceIndexChunksLeftToWriteCounter = SequenceIndexChunksLeftToWriteCounters[SequenceIndex]; - if (SequenceIndexChunksLeftToWriteCounter.load() != 0) - { - RawSequencesMissingWriteCount++; - const uint32_t PathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex]; - const std::filesystem::path& IncompletePath = m_RemoteContent.Paths[PathIndex]; - ZEN_ASSERT(!IncompletePath.empty()); - const uint32_t ExpectedSequenceCount = m_RemoteContent.ChunkedContent.ChunkCounts[SequenceIndex]; - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "{}: Max count {}, Current count {}", - IncompletePath, - ExpectedSequenceCount, - SequenceIndexChunksLeftToWriteCounter.load()); - } - ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounter.load() <= ExpectedSequenceCount); - } - } - ZEN_ASSERT(RawSequencesMissingWriteCount == 0); - ZEN_ASSERT(m_WrittenChunkByteCount == BytesToWrite); - ZEN_ASSERT(m_ValidatedChunkByteCount == BytesToValidate); - } - - const uint64_t DownloadedBytes = m_DownloadStats.DownloadedChunkByteCount.load() + - m_DownloadStats.DownloadedBlockByteCount.load() + - m_DownloadStats.DownloadedPartialBlockByteCount.load(); - if (!m_Options.IsQuiet) - { - std::string CloneDetails; - if (m_DiskStats.CloneCount.load() > 0) - { - CloneDetails = fmt::format(" ({} cloned)", NiceBytes(m_DiskStats.CloneByteCount.load())); - } - ZEN_OPERATION_LOG_INFO( - m_LogOutput, - "Downloaded {} ({}bits/s) in {}. Wrote {} ({}B/s){} in {}. Completed in {}", - NiceBytes(DownloadedBytes), - NiceNum(GetBytesPerSecond(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS(), DownloadedBytes * 8)), - NiceTimeSpanMs(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS() / 1000), - NiceBytes(m_WrittenChunkByteCount.load()), - NiceNum(GetBytesPerSecond(FilteredWrittenBytesPerSecond.GetElapsedTimeUS(), m_DiskStats.WriteByteCount.load())), - CloneDetails, - NiceTimeSpanMs(FilteredWrittenBytesPerSecond.GetElapsedTimeUS() / 1000), - NiceTimeSpanMs(WriteTimer.GetElapsedTimeMs())); - } - - m_WriteChunkStats.WriteChunksElapsedWallTimeUs = WriteTimer.GetElapsedTimeUs(); - m_WriteChunkStats.DownloadTimeUs = FilteredDownloadedBytesPerSecond.GetElapsedTimeUS(); - m_WriteChunkStats.WriteTimeUs = FilteredWrittenBytesPerSecond.GetElapsedTimeUS(); - } - - if (m_Options.PrimeCacheOnly) - { - return; - } - - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::PrepareTarget, (uint32_t)TaskSteps::StepCount); - - tsl::robin_map<uint32_t, uint32_t> RemotePathIndexToLocalPathIndex; - RemotePathIndexToLocalPathIndex.reserve(m_RemoteContent.Paths.size()); - - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> SequenceHashToLocalPathIndex; - std::vector<uint32_t> RemoveLocalPathIndexes; - - if (m_AbortFlag) - { - return; - } - - { - ZEN_TRACE_CPU("PrepareTarget"); - - tsl::robin_set<IoHash, IoHash::Hasher> CachedRemoteSequences; - - std::vector<uint32_t> FilesToCache; - - uint64_t MatchCount = 0; - uint64_t PathMismatchCount = 0; - uint64_t HashMismatchCount = 0; - std::atomic<uint64_t> CachedCount = 0; - std::atomic<uint64_t> CachedByteCount = 0; - uint64_t SkippedCount = 0; - uint64_t DeleteCount = 0; - for (uint32_t LocalPathIndex = 0; LocalPathIndex < m_LocalContent.Paths.size(); LocalPathIndex++) - { - if (m_AbortFlag) - { - break; - } - const IoHash& RawHash = m_LocalContent.RawHashes[LocalPathIndex]; - const std::filesystem::path& LocalPath = m_LocalContent.Paths[LocalPathIndex]; - - ZEN_ASSERT_SLOW(IsFile((m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred())); - - if (m_Options.EnableTargetFolderScavenging) - { - if (!m_Options.WipeTargetFolder) - { - // Check if it is already in the correct place - if (auto RemotePathIt = RemotePathToRemoteIndex.find(LocalPath.generic_string()); - RemotePathIt != RemotePathToRemoteIndex.end()) - { - const uint32_t RemotePathIndex = RemotePathIt->second; - if (m_RemoteContent.RawHashes[RemotePathIndex] == RawHash) - { - // It is already in it's correct place - RemotePathIndexToLocalPathIndex[RemotePathIndex] = LocalPathIndex; - SequenceHashToLocalPathIndex.insert({RawHash, LocalPathIndex}); - MatchCount++; - continue; - } - else - { - HashMismatchCount++; - } - } - else - { - PathMismatchCount++; - } - } - - // Do we need it? - if (m_RemoteLookup.RawHashToSequenceIndex.contains(RawHash)) - { - if (!CachedRemoteSequences.contains(RawHash)) - { - // We need it, make sure we move it to the cache - FilesToCache.push_back(LocalPathIndex); - CachedRemoteSequences.insert(RawHash); - continue; - } - else - { - SkippedCount++; - } - } - } - - if (!m_Options.WipeTargetFolder) - { - // Explicitly delete the unneeded local file - RemoveLocalPathIndexes.push_back(LocalPathIndex); - DeleteCount++; - } - } - - if (m_AbortFlag) - { - return; - } - - { - ZEN_TRACE_CPU("CopyToCache"); - - Stopwatch Timer; - - std::unique_ptr<OperationLogOutput::ProgressBar> CacheLocalProgressBarPtr( - m_LogOutput.CreateProgressBar("Cache Local Data")); - OperationLogOutput::ProgressBar& CacheLocalProgressBar(*CacheLocalProgressBarPtr); - ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); - - for (uint32_t LocalPathIndex : FilesToCache) - { - if (m_AbortFlag) - { - break; - } - Work.ScheduleWork(m_IOWorkerPool, [this, &CachedCount, &CachedByteCount, LocalPathIndex](std::atomic<bool>&) { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("Async_CopyToCache"); - - const IoHash& RawHash = m_LocalContent.RawHashes[LocalPathIndex]; - const std::filesystem::path& LocalPath = m_LocalContent.Paths[LocalPathIndex]; - const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(m_CacheFolderPath, RawHash); - ZEN_ASSERT_SLOW(!IsFileWithRetry(CacheFilePath)); - const std::filesystem::path LocalFilePath = (m_Path / LocalPath).make_preferred(); - - std::error_code Ec = RenameFileWithRetry(LocalFilePath, CacheFilePath); - if (Ec) - { - ZEN_OPERATION_LOG_WARN(m_LogOutput, - "Failed to move file from '{}' to '{}', reason: ({}) {}, retrying...", - LocalFilePath, - CacheFilePath, - Ec.value(), - Ec.message()); - Ec = RenameFileWithRetry(LocalFilePath, CacheFilePath); - if (Ec) - { - throw std::system_error(std::error_code(Ec.value(), std::system_category()), - fmt::format("Failed to file from '{}' to '{}', reason: ({}) {}", - LocalFilePath, - CacheFilePath, - Ec.value(), - Ec.message())); - } - } - - CachedCount++; - CachedByteCount += m_LocalContent.RawSizes[LocalPathIndex]; - } - }); - } - - { - ZEN_TRACE_CPU("CopyToCache_Wait"); - - Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { - ZEN_UNUSED(PendingWork); - const uint64_t WorkTotal = FilesToCache.size(); - const uint64_t WorkComplete = CachedCount.load(); - std::string Details = fmt::format("{}/{} ({}) files", WorkComplete, WorkTotal, NiceBytes(CachedByteCount)); - CacheLocalProgressBar.UpdateState( - {.Task = "Caching local ", - .Details = Details, - .TotalCount = gsl::narrow<uint64_t>(WorkTotal), - .RemainingCount = gsl::narrow<uint64_t>(WorkTotal - WorkComplete), - .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, - false); - }); - } - - CacheLocalProgressBar.Finish(); - if (m_AbortFlag) - { - return; - } - - ZEN_OPERATION_LOG_DEBUG(m_LogOutput, - "Local state prep: Match: {}, PathMismatch: {}, HashMismatch: {}, Cached: {} ({}), Skipped: {}, " - "Delete: {}", - MatchCount, - PathMismatchCount, - HashMismatchCount, - CachedCount.load(), - NiceBytes(CachedByteCount.load()), - SkippedCount, - DeleteCount); - } - } - - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::FinalizeTarget, (uint32_t)TaskSteps::StepCount); - - if (m_Options.WipeTargetFolder) - { - ZEN_TRACE_CPU("WipeTarget"); - Stopwatch Timer; - - // Clean target folder - if (!CleanDirectory(m_LogOutput, m_IOWorkerPool, m_AbortFlag, m_PauseFlag, m_Options.IsQuiet, m_Path, m_Options.ExcludeFolders)) - { - ZEN_OPERATION_LOG_WARN(m_LogOutput, "Some files in {} could not be removed", m_Path); - } - m_RebuildFolderStateStats.CleanFolderElapsedWallTimeUs = Timer.GetElapsedTimeUs(); - } - - if (m_AbortFlag) - { - return; - } - - { - ZEN_TRACE_CPU("FinalizeTree"); - - Stopwatch Timer; - - std::unique_ptr<OperationLogOutput::ProgressBar> RebuildProgressBarPtr(m_LogOutput.CreateProgressBar("Rebuild State")); - OperationLogOutput::ProgressBar& RebuildProgressBar(*RebuildProgressBarPtr); - ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); - - OutLocalFolderState.Paths.resize(m_RemoteContent.Paths.size()); - OutLocalFolderState.RawSizes.resize(m_RemoteContent.Paths.size()); - OutLocalFolderState.Attributes.resize(m_RemoteContent.Paths.size()); - OutLocalFolderState.ModificationTicks.resize(m_RemoteContent.Paths.size()); - - std::atomic<uint64_t> DeletedCount = 0; - - for (uint32_t LocalPathIndex : RemoveLocalPathIndexes) - { - if (m_AbortFlag) - { - break; - } - Work.ScheduleWork(m_IOWorkerPool, [this, &DeletedCount, LocalPathIndex](std::atomic<bool>&) { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("Async_RemoveFile"); - - const std::filesystem::path LocalFilePath = (m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred(); - SetFileReadOnlyWithRetry(LocalFilePath, false); - RemoveFileWithRetry(LocalFilePath); - DeletedCount++; - } - }); - } - - std::atomic<uint64_t> TargetsComplete = 0; - - struct FinalizeTarget - { - IoHash RawHash; - uint32_t RemotePathIndex; - }; - - std::vector<FinalizeTarget> Targets; - Targets.reserve(m_RemoteContent.Paths.size()); - for (uint32_t RemotePathIndex = 0; RemotePathIndex < m_RemoteContent.Paths.size(); RemotePathIndex++) - { - Targets.push_back( - FinalizeTarget{.RawHash = m_RemoteContent.RawHashes[RemotePathIndex], .RemotePathIndex = RemotePathIndex}); - } - std::sort(Targets.begin(), Targets.end(), [](const FinalizeTarget& Lhs, const FinalizeTarget& Rhs) { - if (Lhs.RawHash < Rhs.RawHash) - { - return true; - } - else if (Lhs.RawHash > Rhs.RawHash) - { - return false; - } - return Lhs.RemotePathIndex < Rhs.RemotePathIndex; - }); - - size_t TargetOffset = 0; - while (TargetOffset < Targets.size()) - { - if (m_AbortFlag) - { - break; - } - - size_t TargetCount = 1; - while ((TargetOffset + TargetCount) < Targets.size() && - (Targets[TargetOffset + TargetCount].RawHash == Targets[TargetOffset].RawHash)) - { - TargetCount++; - } - - Work.ScheduleWork( - m_IOWorkerPool, - [this, - &SequenceHashToLocalPathIndex, - &Targets, - &RemotePathIndexToLocalPathIndex, - &OutLocalFolderState, - BaseTargetOffset = TargetOffset, - TargetCount, - &TargetsComplete](std::atomic<bool>&) { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("Async_FinalizeChunkSequence"); - - size_t TargetOffset = BaseTargetOffset; - const IoHash& RawHash = Targets[TargetOffset].RawHash; - - if (RawHash == IoHash::Zero) - { - ZEN_TRACE_CPU("CreateEmptyFiles"); - while (TargetOffset < (BaseTargetOffset + TargetCount)) - { - const uint32_t RemotePathIndex = Targets[TargetOffset].RemotePathIndex; - ZEN_ASSERT(Targets[TargetOffset].RawHash == RawHash); - const std::filesystem::path& TargetPath = m_RemoteContent.Paths[RemotePathIndex]; - std::filesystem::path TargetFilePath = (m_Path / TargetPath).make_preferred(); - if (!RemotePathIndexToLocalPathIndex[RemotePathIndex]) - { - if (IsFileWithRetry(TargetFilePath)) - { - SetFileReadOnlyWithRetry(TargetFilePath, false); - } - else - { - CreateDirectories(TargetFilePath.parent_path()); - } - BasicFile OutputFile; - OutputFile.Open(TargetFilePath, BasicFile::Mode::kTruncate); - } - OutLocalFolderState.Paths[RemotePathIndex] = TargetPath; - OutLocalFolderState.RawSizes[RemotePathIndex] = m_RemoteContent.RawSizes[RemotePathIndex]; - - OutLocalFolderState.Attributes[RemotePathIndex] = - m_RemoteContent.Attributes.empty() - ? GetNativeFileAttributes(TargetFilePath) - : SetNativeFileAttributes(TargetFilePath, - m_RemoteContent.Platform, - m_RemoteContent.Attributes[RemotePathIndex]); - OutLocalFolderState.ModificationTicks[RemotePathIndex] = GetModificationTickFromPath(TargetFilePath); - - TargetOffset++; - TargetsComplete++; - } - } - else - { - ZEN_TRACE_CPU("FinalizeFile"); - ZEN_ASSERT(m_RemoteLookup.RawHashToSequenceIndex.contains(RawHash)); - const uint32_t FirstRemotePathIndex = Targets[TargetOffset].RemotePathIndex; - const std::filesystem::path& FirstTargetPath = m_RemoteContent.Paths[FirstRemotePathIndex]; - std::filesystem::path FirstTargetFilePath = (m_Path / FirstTargetPath).make_preferred(); - - if (auto InPlaceIt = RemotePathIndexToLocalPathIndex.find(FirstRemotePathIndex); - InPlaceIt != RemotePathIndexToLocalPathIndex.end()) - { - ZEN_ASSERT_SLOW(IsFileWithRetry(FirstTargetFilePath)); - } - else - { - if (IsFileWithRetry(FirstTargetFilePath)) - { - SetFileReadOnlyWithRetry(FirstTargetFilePath, false); - } - else - { - CreateDirectories(FirstTargetFilePath.parent_path()); - } - - if (auto InplaceIt = SequenceHashToLocalPathIndex.find(RawHash); - InplaceIt != SequenceHashToLocalPathIndex.end()) - { - ZEN_TRACE_CPU("Copy"); - const uint32_t LocalPathIndex = InplaceIt->second; - const std::filesystem::path& SourcePath = m_LocalContent.Paths[LocalPathIndex]; - std::filesystem::path SourceFilePath = (m_Path / SourcePath).make_preferred(); - ZEN_ASSERT_SLOW(IsFileWithRetry(SourceFilePath)); - - ZEN_OPERATION_LOG_DEBUG(m_LogOutput, - "Copying from '{}' -> '{}'", - SourceFilePath, - FirstTargetFilePath); - const uint64_t RawSize = m_LocalContent.RawSizes[LocalPathIndex]; - FastCopyFile(m_Options.AllowFileClone, - m_Options.UseSparseFiles, - SourceFilePath, - FirstTargetFilePath, - RawSize, - m_DiskStats.WriteCount, - m_DiskStats.WriteByteCount, - m_DiskStats.CloneCount, - m_DiskStats.CloneByteCount); - - m_RebuildFolderStateStats.FinalizeTreeFilesCopiedCount++; - } - else - { - ZEN_TRACE_CPU("Rename"); - const std::filesystem::path CacheFilePath = - GetFinalChunkedSequenceFileName(m_CacheFolderPath, RawHash); - ZEN_ASSERT_SLOW(IsFileWithRetry(CacheFilePath)); - - std::error_code Ec = RenameFileWithRetry(CacheFilePath, FirstTargetFilePath); - if (Ec) - { - ZEN_OPERATION_LOG_WARN(m_LogOutput, - "Failed to move file from '{}' to '{}', reason: ({}) {}, retrying...", - CacheFilePath, - FirstTargetFilePath, - Ec.value(), - Ec.message()); - Ec = RenameFileWithRetry(CacheFilePath, FirstTargetFilePath); - if (Ec) - { - throw std::system_error( - std::error_code(Ec.value(), std::system_category()), - fmt::format("Failed to move file from '{}' to '{}', reason: ({}) {}", - CacheFilePath, - FirstTargetFilePath, - Ec.value(), - Ec.message())); - } - } - - m_RebuildFolderStateStats.FinalizeTreeFilesMovedCount++; - } - } - - OutLocalFolderState.Paths[FirstRemotePathIndex] = FirstTargetPath; - OutLocalFolderState.RawSizes[FirstRemotePathIndex] = m_RemoteContent.RawSizes[FirstRemotePathIndex]; - - OutLocalFolderState.Attributes[FirstRemotePathIndex] = - m_RemoteContent.Attributes.empty() - ? GetNativeFileAttributes(FirstTargetFilePath) - : SetNativeFileAttributes(FirstTargetFilePath, - m_RemoteContent.Platform, - m_RemoteContent.Attributes[FirstRemotePathIndex]); - OutLocalFolderState.ModificationTicks[FirstRemotePathIndex] = - GetModificationTickFromPath(FirstTargetFilePath); - - TargetOffset++; - TargetsComplete++; - - while (TargetOffset < (BaseTargetOffset + TargetCount)) - { - const uint32_t RemotePathIndex = Targets[TargetOffset].RemotePathIndex; - ZEN_ASSERT(Targets[TargetOffset].RawHash == RawHash); - const std::filesystem::path& TargetPath = m_RemoteContent.Paths[RemotePathIndex]; - std::filesystem::path TargetFilePath = (m_Path / TargetPath).make_preferred(); - - if (auto InPlaceIt = RemotePathIndexToLocalPathIndex.find(RemotePathIndex); - InPlaceIt != RemotePathIndexToLocalPathIndex.end()) - { - ZEN_ASSERT_SLOW(IsFileWithRetry(TargetFilePath)); - } - else - { - ZEN_TRACE_CPU("Copy"); - if (IsFileWithRetry(TargetFilePath)) - { - SetFileReadOnlyWithRetry(TargetFilePath, false); - } - else - { - CreateDirectories(TargetFilePath.parent_path()); - } - - ZEN_ASSERT_SLOW(IsFileWithRetry(FirstTargetFilePath)); - ZEN_OPERATION_LOG_DEBUG(m_LogOutput, - "Copying from '{}' -> '{}'", - FirstTargetFilePath, - TargetFilePath); - const uint64_t RawSize = m_RemoteContent.RawSizes[RemotePathIndex]; - FastCopyFile(m_Options.AllowFileClone, - m_Options.UseSparseFiles, - FirstTargetFilePath, - TargetFilePath, - RawSize, - m_DiskStats.WriteCount, - m_DiskStats.WriteByteCount, - m_DiskStats.CloneCount, - m_DiskStats.CloneByteCount); - - m_RebuildFolderStateStats.FinalizeTreeFilesCopiedCount++; - } - - OutLocalFolderState.Paths[RemotePathIndex] = TargetPath; - OutLocalFolderState.RawSizes[RemotePathIndex] = m_RemoteContent.RawSizes[RemotePathIndex]; - - OutLocalFolderState.Attributes[RemotePathIndex] = - m_RemoteContent.Attributes.empty() - ? GetNativeFileAttributes(TargetFilePath) - : SetNativeFileAttributes(TargetFilePath, - m_RemoteContent.Platform, - m_RemoteContent.Attributes[RemotePathIndex]); - OutLocalFolderState.ModificationTicks[RemotePathIndex] = GetModificationTickFromPath(TargetFilePath); - - TargetOffset++; - TargetsComplete++; - } - } - } - }); - - TargetOffset += TargetCount; - } - - { - ZEN_TRACE_CPU("FinalizeTree_Wait"); - - Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { - ZEN_UNUSED(PendingWork); - const uint64_t WorkTotal = Targets.size() + RemoveLocalPathIndexes.size(); - const uint64_t WorkComplete = TargetsComplete.load() + DeletedCount.load(); - std::string Details = fmt::format("{}/{} files", WorkComplete, WorkTotal); - RebuildProgressBar.UpdateState({.Task = "Rebuilding state ", - .Details = Details, - .TotalCount = gsl::narrow<uint64_t>(WorkTotal), - .RemainingCount = gsl::narrow<uint64_t>(WorkTotal - WorkComplete), - .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, - false); - }); - } - - m_RebuildFolderStateStats.FinalizeTreeElapsedWallTimeUs = Timer.GetElapsedTimeUs(); - RebuildProgressBar.Finish(); - } - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::Cleanup, (uint32_t)TaskSteps::StepCount); - } - catch (const std::exception&) - { - m_AbortFlag = true; - throw; - } -} - -void -BuildsOperationUpdateFolder::ScanCacheFolder(tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedChunkHashesFound, - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedSequenceHashesFound) -{ - ZEN_TRACE_CPU("ScanCacheFolder"); - - Stopwatch CacheTimer; - - DirectoryContent CacheDirContent; - GetDirectoryContent(m_CacheFolderPath, DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes, CacheDirContent); - for (size_t Index = 0; Index < CacheDirContent.Files.size(); Index++) - { - if (m_Options.EnableTargetFolderScavenging) - { - IoHash FileHash; - if (IoHash::TryParse(CacheDirContent.Files[Index].filename().string(), FileHash)) - { - if (auto ChunkIt = m_RemoteLookup.ChunkHashToChunkIndex.find(FileHash); - ChunkIt != m_RemoteLookup.ChunkHashToChunkIndex.end()) - { - const uint32_t ChunkIndex = ChunkIt->second; - const uint64_t ChunkSize = m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; - if (ChunkSize == CacheDirContent.FileSizes[Index]) - { - OutCachedChunkHashesFound.insert({FileHash, ChunkIndex}); - m_CacheMappingStats.CacheChunkCount++; - m_CacheMappingStats.CacheChunkByteCount += ChunkSize; - continue; - } - } - else if (auto SequenceIt = m_RemoteLookup.RawHashToSequenceIndex.find(FileHash); - SequenceIt != m_RemoteLookup.RawHashToSequenceIndex.end()) - { - const uint32_t SequenceIndex = SequenceIt->second; - const uint32_t PathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex]; - const uint64_t SequenceSize = m_RemoteContent.RawSizes[PathIndex]; - if (SequenceSize == CacheDirContent.FileSizes[Index]) - { - OutCachedSequenceHashesFound.insert({FileHash, SequenceIndex}); - m_CacheMappingStats.CacheSequenceHashesCount++; - m_CacheMappingStats.CacheSequenceHashesByteCount += SequenceSize; - - const std::filesystem::path CacheFilePath = - GetFinalChunkedSequenceFileName(m_CacheFolderPath, - m_RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]); - ZEN_ASSERT_SLOW(IsFile(CacheFilePath)); - - continue; - } - } - } - } - std::error_code Ec = TryRemoveFile(CacheDirContent.Files[Index]); - if (Ec) - { - ZEN_OPERATION_LOG_DEBUG(m_LogOutput, - "Failed removing file '{}', reason: ({}) {}", - CacheDirContent.Files[Index], - Ec.value(), - Ec.message()); - } - } - m_CacheMappingStats.CacheScanElapsedWallTimeUs += CacheTimer.GetElapsedTimeUs(); -} - -void -BuildsOperationUpdateFolder::ScanTempBlocksFolder(tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedBlocksFound) -{ - ZEN_TRACE_CPU("ScanTempBlocksFolder"); - - Stopwatch CacheTimer; - - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> AllBlockSizes; - AllBlockSizes.reserve(m_BlockDescriptions.size()); - for (uint32_t BlockIndex = 0; BlockIndex < m_BlockDescriptions.size(); BlockIndex++) - { - const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; - AllBlockSizes.insert({BlockDescription.BlockHash, BlockIndex}); - } - - DirectoryContent BlockDirContent; - GetDirectoryContent(m_TempBlockFolderPath, - DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes, - BlockDirContent); - OutCachedBlocksFound.reserve(BlockDirContent.Files.size()); - for (size_t Index = 0; Index < BlockDirContent.Files.size(); Index++) - { - if (m_Options.EnableTargetFolderScavenging) - { - IoHash FileHash; - if (IoHash::TryParse(BlockDirContent.Files[Index].filename().string(), FileHash)) - { - if (auto BlockIt = AllBlockSizes.find(FileHash); BlockIt != AllBlockSizes.end()) - { - const uint32_t BlockIndex = BlockIt->second; - const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; - uint64_t BlockSize = CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize; - for (uint64_t ChunkSize : BlockDescription.ChunkCompressedLengths) - { - BlockSize += ChunkSize; - } - - if (BlockSize == BlockDirContent.FileSizes[Index]) - { - OutCachedBlocksFound.insert({FileHash, BlockIndex}); - m_CacheMappingStats.CacheBlockCount++; - m_CacheMappingStats.CacheBlocksByteCount += BlockSize; - continue; - } - } - } - } - std::error_code Ec = TryRemoveFile(BlockDirContent.Files[Index]); - if (Ec) - { - ZEN_OPERATION_LOG_DEBUG(m_LogOutput, - "Failed removing file '{}', reason: ({}) {}", - BlockDirContent.Files[Index], - Ec.value(), - Ec.message()); - } - } - - m_CacheMappingStats.CacheScanElapsedWallTimeUs += CacheTimer.GetElapsedTimeUs(); -} - -std::vector<BuildsOperationUpdateFolder::ScavengeSource> -BuildsOperationUpdateFolder::FindScavengeSources() -{ - ZEN_TRACE_CPU("FindScavengeSources"); - - const bool TargetPathExists = IsDir(m_Path); - - std::vector<std::filesystem::path> StatePaths = GetDownloadedStatePaths(m_Options.SystemRootDir); - - std::vector<ScavengeSource> Result; - for (const std::filesystem::path& EntryPath : StatePaths) - { - if (IsFile(EntryPath)) - { - bool DeleteEntry = false; - - try - { - BuildsDownloadInfo Info = ReadDownloadedInfoFile(EntryPath); - const bool LocalPathExists = !Info.LocalPath.empty() && IsDir(Info.LocalPath); - const bool LocalStateFileExists = IsFile(Info.StateFilePath); - if (LocalPathExists && LocalStateFileExists) - { - if (TargetPathExists && std::filesystem::equivalent(Info.LocalPath, m_Path)) - { - DeleteEntry = true; - } - else - { - Result.push_back({.StateFilePath = std::move(Info.StateFilePath), .Path = std::move(Info.LocalPath)}); - } - } - else - { - DeleteEntry = true; - } - } - catch (const std::exception& Ex) - { - ZEN_OPERATION_LOG_WARN(m_LogOutput, "{}", Ex.what()); - DeleteEntry = true; - } - - if (DeleteEntry) - { - std::error_code DummyEc; - std::filesystem::remove(EntryPath, DummyEc); - } - } - } - return Result; -} - -std::vector<uint32_t> -BuildsOperationUpdateFolder::ScanTargetFolder(const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedChunkHashesFound, - const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedSequenceHashesFound) -{ - ZEN_TRACE_CPU("ScanTargetFolder"); - - Stopwatch LocalTimer; - - std::vector<uint32_t> MissingSequenceIndexes; - - for (uint32_t RemoteSequenceIndex = 0; RemoteSequenceIndex < m_RemoteContent.ChunkedContent.SequenceRawHashes.size(); - RemoteSequenceIndex++) - { - const IoHash& RemoteSequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; - const uint32_t RemotePathIndex = GetFirstPathIndexForSeqeuenceIndex(m_RemoteLookup, RemoteSequenceIndex); - const uint64_t RemoteRawSize = m_RemoteContent.RawSizes[RemotePathIndex]; - if (auto CacheSequenceIt = CachedSequenceHashesFound.find(RemoteSequenceRawHash); - CacheSequenceIt != CachedSequenceHashesFound.end()) - { - const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(m_CacheFolderPath, RemoteSequenceRawHash); - ZEN_ASSERT_SLOW(IsFile(CacheFilePath)); - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Found sequence {} at {} ({})", - RemoteSequenceRawHash, - CacheFilePath, - NiceBytes(RemoteRawSize)); - } - } - else if (auto CacheChunkIt = CachedChunkHashesFound.find(RemoteSequenceRawHash); CacheChunkIt != CachedChunkHashesFound.end()) - { - const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(m_CacheFolderPath, RemoteSequenceRawHash); - ZEN_ASSERT_SLOW(IsFile(CacheFilePath)); - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Found chunk {} at {} ({})", - RemoteSequenceRawHash, - CacheFilePath, - NiceBytes(RemoteRawSize)); - } - } - else if (auto It = m_LocalLookup.RawHashToSequenceIndex.find(RemoteSequenceRawHash); - It != m_LocalLookup.RawHashToSequenceIndex.end()) - { - const uint32_t LocalSequenceIndex = It->second; - const uint32_t LocalPathIndex = GetFirstPathIndexForSeqeuenceIndex(m_LocalLookup, LocalSequenceIndex); - const std::filesystem::path LocalFilePath = (m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred(); - ZEN_ASSERT_SLOW(IsFile(LocalFilePath)); - m_CacheMappingStats.LocalPathsMatchingSequencesCount++; - m_CacheMappingStats.LocalPathsMatchingSequencesByteCount += RemoteRawSize; - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Found sequence {} at {} ({})", - RemoteSequenceRawHash, - LocalFilePath, - NiceBytes(RemoteRawSize)); - } - } - else - { - MissingSequenceIndexes.push_back(RemoteSequenceIndex); - } - } - - m_CacheMappingStats.LocalScanElapsedWallTimeUs += LocalTimer.GetElapsedTimeUs(); - return MissingSequenceIndexes; -} - -bool -BuildsOperationUpdateFolder::FindScavengeContent(const ScavengeSource& Source, - ChunkedFolderContent& OutScavengedLocalContent, - ChunkedContentLookup& OutScavengedLookup) -{ - ZEN_TRACE_CPU("FindScavengeContent"); - - FolderContent LocalFolderState; - try - { - BuildSaveState SavedState = ReadBuildSaveStateFile(Source.StateFilePath); - OutScavengedLocalContent = std::move(SavedState.State.ChunkedContent); - LocalFolderState = std::move(SavedState.FolderState); - } - catch (const std::exception& Ex) - { - ZEN_OPERATION_LOG_DEBUG(m_LogOutput, "Skipping invalid build state at '{}', reason: {}", Source.StateFilePath, Ex.what()); - return false; - } - - tsl::robin_set<uint32_t> PathIndexesToScavenge; - PathIndexesToScavenge.reserve(OutScavengedLocalContent.Paths.size()); - std::vector<uint32_t> ChunkOrderOffsets = BuildChunkOrderOffset(OutScavengedLocalContent.ChunkedContent.ChunkCounts); - - { - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToPathIndex; - - RawHashToPathIndex.reserve(OutScavengedLocalContent.Paths.size()); - for (uint32_t ScavengedPathIndex = 0; ScavengedPathIndex < OutScavengedLocalContent.RawHashes.size(); ScavengedPathIndex++) - { - if (!RawHashToPathIndex.contains(OutScavengedLocalContent.RawHashes[ScavengedPathIndex])) - { - RawHashToPathIndex.insert_or_assign(OutScavengedLocalContent.RawHashes[ScavengedPathIndex], ScavengedPathIndex); - } - } - - for (uint32_t ScavengeSequenceIndex = 0; ScavengeSequenceIndex < OutScavengedLocalContent.ChunkedContent.SequenceRawHashes.size(); - ScavengeSequenceIndex++) - { - const IoHash& SequenceHash = OutScavengedLocalContent.ChunkedContent.SequenceRawHashes[ScavengeSequenceIndex]; - if (auto It = RawHashToPathIndex.find(SequenceHash); It != RawHashToPathIndex.end()) - { - uint32_t PathIndex = It->second; - if (!PathIndexesToScavenge.contains(PathIndex)) - { - if (m_RemoteLookup.RawHashToSequenceIndex.contains(SequenceHash)) - { - PathIndexesToScavenge.insert(PathIndex); - } - else - { - uint32_t ChunkOrderIndexStart = ChunkOrderOffsets[ScavengeSequenceIndex]; - const uint32_t ChunkCount = OutScavengedLocalContent.ChunkedContent.ChunkCounts[ScavengeSequenceIndex]; - for (uint32_t ChunkOrderIndex = 0; ChunkOrderIndex < ChunkCount; ChunkOrderIndex++) - { - const uint32_t ChunkIndex = - OutScavengedLocalContent.ChunkedContent.ChunkOrders[ChunkOrderIndexStart + ChunkOrderIndex]; - const IoHash& ChunkHash = OutScavengedLocalContent.ChunkedContent.ChunkHashes[ChunkIndex]; - if (m_RemoteLookup.ChunkHashToChunkIndex.contains(ChunkHash)) - { - PathIndexesToScavenge.insert(PathIndex); - break; - } - } - } - } - } - else - { - ZEN_OPERATION_LOG_WARN(m_LogOutput, - "Scavenged state file at '{}' for '{}' is invalid, skipping scavenging for sequence {}", - Source.StateFilePath, - Source.Path, - SequenceHash); - } - } - } - - if (PathIndexesToScavenge.empty()) - { - OutScavengedLocalContent = {}; - return false; - } - - std::vector<std::filesystem::path> PathsToScavenge; - PathsToScavenge.reserve(PathIndexesToScavenge.size()); - for (uint32_t ScavengedStatePathIndex : PathIndexesToScavenge) - { - PathsToScavenge.push_back(OutScavengedLocalContent.Paths[ScavengedStatePathIndex]); - } - - FolderContent ValidFolderContent = - GetValidFolderContent(m_IOWorkerPool, m_ScavengedFolderScanStats, Source.Path, PathsToScavenge, {}, 0, m_AbortFlag, m_PauseFlag); - - if (!LocalFolderState.AreKnownFilesEqual(ValidFolderContent)) - { - std::vector<std::filesystem::path> DeletedPaths; - FolderContent UpdatedContent = GetUpdatedContent(LocalFolderState, ValidFolderContent, DeletedPaths); - - // If the files are modified since the state was saved we ignore the files since we don't - // want to incur the cost of scanning/hashing scavenged files - DeletedPaths.insert(DeletedPaths.end(), UpdatedContent.Paths.begin(), UpdatedContent.Paths.end()); - if (!DeletedPaths.empty()) - { - OutScavengedLocalContent = - DeletePathsFromChunkedContent(OutScavengedLocalContent, - BuildHashLookup(OutScavengedLocalContent.ChunkedContent.SequenceRawHashes), - ChunkOrderOffsets, - DeletedPaths); - } - } - - if (OutScavengedLocalContent.Paths.empty()) - { - OutScavengedLocalContent = {}; - return false; - } - - OutScavengedLookup = BuildChunkedContentLookup(OutScavengedLocalContent); - - return true; -} - -void -BuildsOperationUpdateFolder::ScavengeSourceForChunks(uint32_t& InOutRemainingChunkCount, - std::vector<bool>& InOutRemoteChunkIndexNeedsCopyFromLocalFileFlags, - tsl::robin_map<IoHash, size_t, IoHash::Hasher>& InOutRawHashToCopyChunkDataIndex, - const std::vector<std::atomic<uint32_t>>& SequenceIndexChunksLeftToWriteCounters, - const ChunkedFolderContent& ScavengedContent, - const ChunkedContentLookup& ScavengedLookup, - std::vector<CopyChunkData>& InOutCopyChunkDatas, - uint32_t ScavengedContentIndex, - uint64_t& InOutChunkMatchingRemoteCount, - uint64_t& InOutChunkMatchingRemoteByteCount) -{ - for (uint32_t RemoteChunkIndex = 0; - RemoteChunkIndex < m_RemoteContent.ChunkedContent.ChunkHashes.size() && (InOutRemainingChunkCount > 0); - RemoteChunkIndex++) - { - if (!InOutRemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex]) - { - const IoHash& RemoteChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; - if (auto It = ScavengedLookup.ChunkHashToChunkIndex.find(RemoteChunkHash); It != ScavengedLookup.ChunkHashToChunkIndex.end()) - { - std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs = - GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteChunkIndex); - - if (!ChunkTargetPtrs.empty()) - { - const uint32_t ScavengedChunkIndex = It->second; - const uint64_t ScavengedChunkRawSize = ScavengedContent.ChunkedContent.ChunkRawSizes[ScavengedChunkIndex]; - const size_t ChunkSequenceLocationOffset = ScavengedLookup.ChunkSequenceLocationOffset[ScavengedChunkIndex]; - const ChunkedContentLookup::ChunkSequenceLocation& ScavengeLocation = - ScavengedLookup.ChunkSequenceLocations[ChunkSequenceLocationOffset]; - const IoHash& ScavengedSequenceRawHash = - ScavengedContent.ChunkedContent.SequenceRawHashes[ScavengeLocation.SequenceIndex]; - - CopyChunkData::ChunkTarget Target = {.TargetChunkLocationCount = gsl::narrow<uint32_t>(ChunkTargetPtrs.size()), - .RemoteChunkIndex = RemoteChunkIndex, - .CacheFileOffset = ScavengeLocation.Offset}; - if (auto CopySourceIt = InOutRawHashToCopyChunkDataIndex.find(ScavengedSequenceRawHash); - CopySourceIt != InOutRawHashToCopyChunkDataIndex.end()) - { - CopyChunkData& Data = InOutCopyChunkDatas[CopySourceIt->second]; - if (Data.TargetChunkLocationPtrs.size() > 1024) - { - InOutRawHashToCopyChunkDataIndex.insert_or_assign(ScavengedSequenceRawHash, InOutCopyChunkDatas.size()); - InOutCopyChunkDatas.push_back(CopyChunkData{.ScavengeSourceIndex = ScavengedContentIndex, - .SourceSequenceIndex = ScavengeLocation.SequenceIndex, - .TargetChunkLocationPtrs = ChunkTargetPtrs, - .ChunkTargets = std::vector<CopyChunkData::ChunkTarget>{Target}}); - } - else - { - Data.TargetChunkLocationPtrs.insert(Data.TargetChunkLocationPtrs.end(), - ChunkTargetPtrs.begin(), - ChunkTargetPtrs.end()); - Data.ChunkTargets.push_back(Target); - } - } - else - { - InOutRawHashToCopyChunkDataIndex.insert_or_assign(ScavengedSequenceRawHash, InOutCopyChunkDatas.size()); - InOutCopyChunkDatas.push_back(CopyChunkData{.ScavengeSourceIndex = ScavengedContentIndex, - .SourceSequenceIndex = ScavengeLocation.SequenceIndex, - .TargetChunkLocationPtrs = ChunkTargetPtrs, - .ChunkTargets = std::vector<CopyChunkData::ChunkTarget>{Target}}); - } - InOutChunkMatchingRemoteCount++; - InOutChunkMatchingRemoteByteCount += ScavengedChunkRawSize; - InOutRemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex] = true; - InOutRemainingChunkCount--; - } - } - } - } -} - -std::filesystem::path -BuildsOperationUpdateFolder::FindDownloadedChunk(const IoHash& ChunkHash) -{ - ZEN_TRACE_CPU("FindDownloadedChunk"); - - std::filesystem::path CompressedChunkPath = m_TempDownloadFolderPath / ChunkHash.ToHexString(); - if (IsFile(CompressedChunkPath)) - { - IoBuffer ExistingCompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath); - if (ExistingCompressedPart) - { - IoHash RawHash; - uint64_t RawSize; - if (CompressedBuffer::ValidateCompressedHeader(ExistingCompressedPart, - RawHash, - RawSize, - /*OutOptionalTotalCompressedSize*/ nullptr)) - { - return CompressedChunkPath; - } - else - { - std::error_code DummyEc; - RemoveFile(CompressedChunkPath, DummyEc); - } - } - } - return {}; -} - -std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> -BuildsOperationUpdateFolder::GetRemainingChunkTargets(std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, - uint32_t ChunkIndex) -{ - ZEN_TRACE_CPU("GetRemainingChunkTargets"); - - std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkSources = GetChunkSequenceLocations(m_RemoteLookup, ChunkIndex); - std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs; - if (!ChunkSources.empty()) - { - ChunkTargetPtrs.reserve(ChunkSources.size()); - for (const ChunkedContentLookup::ChunkSequenceLocation& Source : ChunkSources) - { - if (SequenceIndexChunksLeftToWriteCounters[Source.SequenceIndex].load() > 0) - { - ChunkTargetPtrs.push_back(&Source); - } - } - } - return ChunkTargetPtrs; -}; - -uint64_t -BuildsOperationUpdateFolder::GetChunkWriteCount(std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, - uint32_t ChunkIndex) -{ - ZEN_TRACE_CPU("GetChunkWriteCount"); - - uint64_t WriteCount = 0; - std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkSources = GetChunkSequenceLocations(m_RemoteLookup, ChunkIndex); - for (const ChunkedContentLookup::ChunkSequenceLocation& Source : ChunkSources) - { - if (SequenceIndexChunksLeftToWriteCounters[Source.SequenceIndex].load() > 0) - { - WriteCount++; - } - } - return WriteCount; -}; - -void -BuildsOperationUpdateFolder::CheckRequiredDiskSpace(const tsl::robin_map<std::string, uint32_t>& RemotePathToRemoteIndex) -{ - tsl::robin_set<uint32_t> ExistingRemotePaths; - - if (m_Options.EnableTargetFolderScavenging) - { - for (uint32_t LocalPathIndex = 0; LocalPathIndex < m_LocalContent.Paths.size(); LocalPathIndex++) - { - const IoHash& RawHash = m_LocalContent.RawHashes[LocalPathIndex]; - const std::filesystem::path& LocalPath = m_LocalContent.Paths[LocalPathIndex]; - - if (auto RemotePathIt = RemotePathToRemoteIndex.find(LocalPath.generic_string()); RemotePathIt != RemotePathToRemoteIndex.end()) - { - const uint32_t RemotePathIndex = RemotePathIt->second; - if (m_RemoteContent.RawHashes[RemotePathIndex] == RawHash) - { - ExistingRemotePaths.insert(RemotePathIndex); - } - } - } - } - - uint64_t RequiredSpace = 0; - for (uint32_t RemotePathIndex = 0; RemotePathIndex < m_RemoteContent.Paths.size(); RemotePathIndex++) - { - if (!ExistingRemotePaths.contains(RemotePathIndex)) - { - RequiredSpace += m_RemoteContent.RawSizes[RemotePathIndex]; - } - } - - std::error_code Ec; - DiskSpace Space = DiskSpaceInfo(m_Path, Ec); - if (Ec) - { - throw std::runtime_error(fmt::format("Get free disk space for target path '{}' FAILED, reason: {}", m_Path, Ec.message())); - } - if (Space.Free < (RequiredSpace + 16u * 1024u * 1024u)) - { - throw std::runtime_error( - fmt::format("Not enough free space for target path '{}', {} of free space is needed", m_Path, RequiredSpace)); - } -} - -void -BuildsOperationUpdateFolder::WriteScavengedSequenceToCache(const std::filesystem::path& ScavengeRootPath, - const ChunkedFolderContent& ScavengedContent, - const ScavengedSequenceCopyOperation& ScavengeOp) -{ - ZEN_TRACE_CPU("WriteScavengedSequenceToCache"); - - const std::filesystem::path ScavengedPath = ScavengedContent.Paths[ScavengeOp.ScavengedPathIndex]; - const std::filesystem::path ScavengedFilePath = (ScavengeRootPath / ScavengedPath).make_preferred(); - ZEN_ASSERT_SLOW(FileSizeFromPath(ScavengedFilePath) == ScavengeOp.RawSize); - - const IoHash& RemoteSequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[ScavengeOp.RemoteSequenceIndex]; - const std::filesystem::path TempFilePath = GetTempChunkedSequenceFileName(m_CacheFolderPath, RemoteSequenceRawHash); - - const uint64_t RawSize = ScavengedContent.RawSizes[ScavengeOp.ScavengedPathIndex]; - FastCopyFile(m_Options.AllowFileClone, - m_Options.UseSparseFiles, - ScavengedFilePath, - TempFilePath, - RawSize, - m_DiskStats.WriteCount, - m_DiskStats.WriteByteCount, - m_DiskStats.CloneCount, - m_DiskStats.CloneByteCount); - - const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(m_CacheFolderPath, RemoteSequenceRawHash); - RenameFile(TempFilePath, CacheFilePath); - - m_WrittenChunkByteCount += RawSize; - if (m_Options.ValidateCompletedSequences) - { - m_ValidatedChunkByteCount += RawSize; - } -} - -void -BuildsOperationUpdateFolder::WriteLooseChunk(const uint32_t RemoteChunkIndex, - const BlobsExistsResult& ExistsResult, - std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, - std::atomic<uint64_t>& WritePartsComplete, - std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>&& ChunkTargetPtrs, - BufferedWriteFileCache& WriteCache, - ParallelWork& Work, - uint64_t TotalRequestCount, - uint64_t TotalPartWriteCount, - FilteredRate& FilteredDownloadedBytesPerSecond, - FilteredRate& FilteredWrittenBytesPerSecond) -{ - std::filesystem::path ExistingCompressedChunkPath; - if (!m_Options.PrimeCacheOnly) - { - const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; - ExistingCompressedChunkPath = FindDownloadedChunk(ChunkHash); - if (!ExistingCompressedChunkPath.empty()) - { - m_DownloadStats.RequestsCompleteCount++; - if (m_DownloadStats.RequestsCompleteCount == TotalRequestCount) - { - FilteredDownloadedBytesPerSecond.Stop(); - } - } - } - if (!m_AbortFlag) - { - if (!ExistingCompressedChunkPath.empty()) - { - Work.ScheduleWork( - m_IOWorkerPool, - [this, - SequenceIndexChunksLeftToWriteCounters, - &WriteCache, - &Work, - &WritePartsComplete, - TotalPartWriteCount, - &FilteredWrittenBytesPerSecond, - RemoteChunkIndex, - ChunkTargetPtrs = std::move(ChunkTargetPtrs), - CompressedChunkPath = std::move(ExistingCompressedChunkPath)](std::atomic<bool>& AbortFlag) mutable { - if (!AbortFlag) - { - ZEN_TRACE_CPU("Async_WritePreDownloadedChunk"); - - FilteredWrittenBytesPerSecond.Start(); - - const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; - - IoBuffer CompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath); - if (!CompressedPart) - { - throw std::runtime_error( - fmt::format("Could not open dowloaded compressed chunk {} from {}", ChunkHash, CompressedChunkPath)); - } - - bool NeedHashVerify = - WriteCompressedChunkToCache(ChunkHash, ChunkTargetPtrs, WriteCache, std::move(CompressedPart)); - WritePartsComplete++; - - if (!AbortFlag) - { - if (WritePartsComplete == TotalPartWriteCount) - { - FilteredWrittenBytesPerSecond.Stop(); - } - - std::error_code Ec = TryRemoveFile(CompressedChunkPath); - if (Ec) - { - ZEN_OPERATION_LOG_DEBUG(m_LogOutput, - "Failed removing file '{}', reason: ({}) {}", - CompressedChunkPath, - Ec.value(), - Ec.message()); - } - - std::vector<uint32_t> CompletedSequences = - CompleteChunkTargets(ChunkTargetPtrs, SequenceIndexChunksLeftToWriteCounters); - WriteCache.Close(CompletedSequences); - if (NeedHashVerify) - { - VerifyAndCompleteChunkSequencesAsync(CompletedSequences, Work); - } - else - { - FinalizeChunkSequences(CompletedSequences); - } - } - } - }); - } - else - { - Work.ScheduleWork(m_NetworkPool, - [this, - &ExistsResult, - SequenceIndexChunksLeftToWriteCounters, - &WriteCache, - &Work, - &WritePartsComplete, - TotalPartWriteCount, - TotalRequestCount, - &FilteredDownloadedBytesPerSecond, - &FilteredWrittenBytesPerSecond, - RemoteChunkIndex, - ChunkTargetPtrs = std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>( - std::move(ChunkTargetPtrs))](std::atomic<bool>&) mutable { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("Async_DownloadChunk"); - - FilteredDownloadedBytesPerSecond.Start(); - DownloadBuildBlob(RemoteChunkIndex, - ExistsResult, - Work, - [this, - &ExistsResult, - SequenceIndexChunksLeftToWriteCounters, - &WriteCache, - &Work, - &WritePartsComplete, - TotalPartWriteCount, - TotalRequestCount, - RemoteChunkIndex, - &FilteredDownloadedBytesPerSecond, - &FilteredWrittenBytesPerSecond, - ChunkTargetPtrs = std::move(ChunkTargetPtrs)](IoBuffer&& Payload) mutable { - if (m_DownloadStats.RequestsCompleteCount == TotalRequestCount) - { - FilteredDownloadedBytesPerSecond.Stop(); - } - IoBufferFileReference FileRef; - bool EnableBacklog = Payload.GetFileReference(FileRef); - AsyncWriteDownloadedChunk(m_Options.ZenFolderPath, - RemoteChunkIndex, - std::move(ChunkTargetPtrs), - WriteCache, - Work, - std::move(Payload), - SequenceIndexChunksLeftToWriteCounters, - WritePartsComplete, - TotalPartWriteCount, - FilteredWrittenBytesPerSecond, - EnableBacklog); - }); - } - }); - } - } -} - -void -BuildsOperationUpdateFolder::DownloadBuildBlob(uint32_t RemoteChunkIndex, - const BlobsExistsResult& ExistsResult, - ParallelWork& Work, - std::function<void(IoBuffer&& Payload)>&& OnDownloaded) -{ - const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; - // FilteredDownloadedBytesPerSecond.Start(); - IoBuffer BuildBlob; - const bool ExistsInCache = m_Storage.BuildCacheStorage && ExistsResult.ExistingBlobs.contains(ChunkHash); - if (ExistsInCache) - { - BuildBlob = m_Storage.BuildCacheStorage->GetBuildBlob(m_BuildId, ChunkHash); - } - if (BuildBlob) - { - uint64_t BlobSize = BuildBlob.GetSize(); - m_DownloadStats.DownloadedChunkCount++; - m_DownloadStats.DownloadedChunkByteCount += BlobSize; - m_DownloadStats.RequestsCompleteCount++; - OnDownloaded(std::move(BuildBlob)); - } - else - { - if (m_RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex] >= m_Options.LargeAttachmentSize) - { - DownloadLargeBlob( - *m_Storage.BuildStorage, - m_TempDownloadFolderPath, - m_BuildId, - ChunkHash, - m_Options.PreferredMultipartChunkSize, - Work, - m_NetworkPool, - m_DownloadStats.DownloadedChunkByteCount, - m_DownloadStats.MultipartAttachmentCount, - [this, &Work, ChunkHash, RemoteChunkIndex, OnDownloaded = std::move(OnDownloaded)](IoBuffer&& Payload) mutable { - m_DownloadStats.DownloadedChunkCount++; - m_DownloadStats.RequestsCompleteCount++; - - if (Payload && m_Storage.BuildCacheStorage && m_Options.PopulateCache) - { - m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId, - ChunkHash, - ZenContentType::kCompressedBinary, - CompositeBuffer(SharedBuffer(Payload))); - } - - OnDownloaded(std::move(Payload)); - }); - } - else - { - BuildBlob = m_Storage.BuildStorage->GetBuildBlob(m_BuildId, ChunkHash); - if (BuildBlob && m_Storage.BuildCacheStorage && m_Options.PopulateCache) - { - m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId, - ChunkHash, - ZenContentType::kCompressedBinary, - CompositeBuffer(SharedBuffer(BuildBlob))); - } - if (!BuildBlob) - { - throw std::runtime_error(fmt::format("Chunk {} is missing", ChunkHash)); - } - if (!m_Options.PrimeCacheOnly) - { - if (!m_AbortFlag) - { - uint64_t BlobSize = BuildBlob.GetSize(); - m_DownloadStats.DownloadedChunkCount++; - m_DownloadStats.DownloadedChunkByteCount += BlobSize; - m_DownloadStats.RequestsCompleteCount++; - - OnDownloaded(std::move(BuildBlob)); - } - } - } - } -} - -void -BuildsOperationUpdateFolder::DownloadPartialBlock( - const ChunkBlockAnalyser::BlockRangeDescriptor BlockRange, - const BlobsExistsResult& ExistsResult, - std::function<void(IoBuffer&& InMemoryBuffer, const std::filesystem::path& OnDiskPath)>&& OnDownloaded) -{ - const uint32_t BlockIndex = BlockRange.BlockIndex; - - const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; - - IoBuffer BlockBuffer; - if (m_Storage.BuildCacheStorage && ExistsResult.ExistingBlobs.contains(BlockDescription.BlockHash)) - { - BlockBuffer = - m_Storage.BuildCacheStorage->GetBuildBlob(m_BuildId, BlockDescription.BlockHash, BlockRange.RangeStart, BlockRange.RangeLength); - } - if (!BlockBuffer) - { - BlockBuffer = - m_Storage.BuildStorage->GetBuildBlob(m_BuildId, BlockDescription.BlockHash, BlockRange.RangeStart, BlockRange.RangeLength); - } - if (!BlockBuffer) - { - throw std::runtime_error(fmt::format("Block {} is missing when fetching range {} -> {}", - BlockDescription.BlockHash, - BlockRange.RangeStart, - BlockRange.RangeStart + BlockRange.RangeLength)); - } - if (!m_AbortFlag) - { - uint64_t BlockSize = BlockBuffer.GetSize(); - m_DownloadStats.DownloadedBlockCount++; - m_DownloadStats.DownloadedBlockByteCount += BlockSize; - m_DownloadStats.RequestsCompleteCount++; - - std::filesystem::path BlockChunkPath; - - // Check if the dowloaded block is file based and we can move it directly without rewriting it - { - IoBufferFileReference FileRef; - if (BlockBuffer.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) && (FileRef.FileChunkSize == BlockSize)) - { - ZEN_TRACE_CPU("MoveTempPartialBlock"); - - std::error_code Ec; - std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec); - if (!Ec) - { - BlockBuffer.SetDeleteOnClose(false); - BlockBuffer = {}; - BlockChunkPath = m_TempBlockFolderPath / - fmt::format("{}_{:x}_{:x}", BlockDescription.BlockHash, BlockRange.RangeStart, BlockRange.RangeLength); - RenameFile(TempBlobPath, BlockChunkPath, Ec); - if (Ec) - { - BlockChunkPath = std::filesystem::path{}; - - // Re-open the temp file again - BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete); - BlockBuffer = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BlockSize, true); - BlockBuffer.SetDeleteOnClose(true); - } - } - } - } - - if (BlockChunkPath.empty() && (BlockSize > m_Options.MaximumInMemoryPayloadSize)) - { - ZEN_TRACE_CPU("WriteTempPartialBlock"); - // Could not be moved and rather large, lets store it on disk - BlockChunkPath = m_TempBlockFolderPath / - fmt::format("{}_{:x}_{:x}", BlockDescription.BlockHash, BlockRange.RangeStart, BlockRange.RangeLength); - TemporaryFile::SafeWriteFile(BlockChunkPath, BlockBuffer); - BlockBuffer = {}; - } - if (!m_AbortFlag) - { - OnDownloaded(std::move(BlockBuffer), std::move(BlockChunkPath)); - } - } -} - -std::vector<uint32_t> -BuildsOperationUpdateFolder::WriteLocalChunkToCache(CloneQueryInterface* CloneQuery, - const CopyChunkData& CopyData, - const std::vector<ChunkedFolderContent>& ScavengedContents, - const std::vector<ChunkedContentLookup>& ScavengedLookups, - const std::vector<std::filesystem::path>& ScavengedPaths, - BufferedWriteFileCache& WriteCache) -{ - ZEN_TRACE_CPU("WriteLocalChunkToCache"); - - std::filesystem::path SourceFilePath; - - if (CopyData.ScavengeSourceIndex == (uint32_t)-1) - { - const uint32_t LocalPathIndex = m_LocalLookup.SequenceIndexFirstPathIndex[CopyData.SourceSequenceIndex]; - SourceFilePath = (m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred(); - } - else - { - const ChunkedFolderContent& ScavengedContent = ScavengedContents[CopyData.ScavengeSourceIndex]; - const ChunkedContentLookup& ScavengedLookup = ScavengedLookups[CopyData.ScavengeSourceIndex]; - const std::filesystem::path ScavengedPath = ScavengedPaths[CopyData.ScavengeSourceIndex]; - const uint32_t ScavengedPathIndex = ScavengedLookup.SequenceIndexFirstPathIndex[CopyData.SourceSequenceIndex]; - SourceFilePath = (ScavengedPath / ScavengedContent.Paths[ScavengedPathIndex]).make_preferred(); - } - ZEN_ASSERT_SLOW(IsFile(SourceFilePath)); - ZEN_ASSERT(!CopyData.TargetChunkLocationPtrs.empty()); - - uint64_t CacheLocalFileBytesRead = 0; - - size_t TargetStart = 0; - const std::span<const ChunkedContentLookup::ChunkSequenceLocation* const> AllTargets(CopyData.TargetChunkLocationPtrs); - - struct WriteOp - { - const ChunkedContentLookup::ChunkSequenceLocation* Target = nullptr; - uint64_t CacheFileOffset = (uint64_t)-1; - uint32_t ChunkIndex = (uint32_t)-1; - }; - - std::vector<WriteOp> WriteOps; - - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("Sort"); - WriteOps.reserve(AllTargets.size()); - for (const CopyChunkData::ChunkTarget& ChunkTarget : CopyData.ChunkTargets) - { - std::span<const ChunkedContentLookup::ChunkSequenceLocation* const> TargetRange = - AllTargets.subspan(TargetStart, ChunkTarget.TargetChunkLocationCount); - for (const ChunkedContentLookup::ChunkSequenceLocation* Target : TargetRange) - { - WriteOps.push_back( - WriteOp{.Target = Target, .CacheFileOffset = ChunkTarget.CacheFileOffset, .ChunkIndex = ChunkTarget.RemoteChunkIndex}); - } - TargetStart += ChunkTarget.TargetChunkLocationCount; - } - - std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOp& Lhs, const WriteOp& Rhs) { - if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex) - { - return true; - } - else if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex) - { - return false; - } - if (Lhs.Target->Offset < Rhs.Target->Offset) - { - return true; - } - return false; - }); - } - - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("Write"); - - tsl::robin_set<uint32_t> ChunkIndexesWritten; - - BufferedOpenFile SourceFile(SourceFilePath, - m_DiskStats.OpenReadCount, - m_DiskStats.CurrentOpenFileCount, - m_DiskStats.ReadCount, - m_DiskStats.ReadByteCount); - - bool CanCloneSource = CloneQuery && CloneQuery->CanClone(SourceFile.Handle()); - - BufferedWriteFileCache::Local LocalWriter(WriteCache); - - for (size_t WriteOpIndex = 0; WriteOpIndex < WriteOps.size();) - { - if (m_AbortFlag) - { - break; - } - const WriteOp& Op = WriteOps[WriteOpIndex]; - - const uint32_t RemoteSequenceIndex = Op.Target->SequenceIndex; - const uint32_t RemotePathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex]; - const uint64_t TargetSize = m_RemoteContent.RawSizes[RemotePathIndex]; - const uint64_t ChunkSize = m_RemoteContent.ChunkedContent.ChunkRawSizes[Op.ChunkIndex]; - - uint64_t ReadLength = ChunkSize; - size_t WriteCount = 1; - uint64_t OpSourceEnd = Op.CacheFileOffset + ChunkSize; - uint64_t OpTargetEnd = Op.Target->Offset + ChunkSize; - while ((WriteOpIndex + WriteCount) < WriteOps.size()) - { - const WriteOp& NextOp = WriteOps[WriteOpIndex + WriteCount]; - if (NextOp.Target->SequenceIndex != Op.Target->SequenceIndex) - { - break; - } - if (NextOp.Target->Offset != OpTargetEnd) - { - break; - } - if (NextOp.CacheFileOffset != OpSourceEnd) - { - break; - } - const uint64_t NextChunkLength = m_RemoteContent.ChunkedContent.ChunkRawSizes[NextOp.ChunkIndex]; - if (ReadLength + NextChunkLength > BufferedOpenFile::BlockSize) - { - break; - } - ReadLength += NextChunkLength; - OpSourceEnd += NextChunkLength; - OpTargetEnd += NextChunkLength; - WriteCount++; - } - - { - bool DidClone = false; - - if (CanCloneSource) - { - uint64_t PreBytes = 0; - uint64_t PostBytes = 0; - uint64_t ClonableBytes = - CloneQuery->GetClonableRange(Op.CacheFileOffset, Op.Target->Offset, ReadLength, PreBytes, PostBytes); - if (ClonableBytes > 0) - { - // We need to open the file... - BufferedWriteFileCache::Local::Writer* Writer = LocalWriter.GetWriter(RemoteSequenceIndex); - if (!Writer) - { - Writer = LocalWriter.PutWriter(RemoteSequenceIndex, std::make_unique<BufferedWriteFileCache::Local::Writer>()); - - Writer->File = std::make_unique<BasicFile>(); - - const std::filesystem::path FileName = - GetTempChunkedSequenceFileName(m_CacheFolderPath, - m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]); - Writer->File->Open(FileName, BasicFile::Mode::kWrite); - if (m_Options.UseSparseFiles) - { - PrepareFileForScatteredWrite(Writer->File->Handle(), TargetSize); - } - } - DidClone = CloneQuery->TryClone(SourceFile.Handle(), - Writer->File->Handle(), - Op.CacheFileOffset + PreBytes, - Op.Target->Offset + PreBytes, - ClonableBytes, - TargetSize); - if (DidClone) - { - m_DiskStats.WriteCount++; - m_DiskStats.WriteByteCount += ClonableBytes; - - m_DiskStats.CloneCount++; - m_DiskStats.CloneByteCount += ClonableBytes; - - m_WrittenChunkByteCount += ClonableBytes; - - if (PreBytes > 0) - { - CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset, PreBytes); - const uint64_t FileOffset = Op.Target->Offset; - - WriteSequenceChunkToCache(LocalWriter, ChunkSource, RemoteSequenceIndex, FileOffset, RemotePathIndex); - } - if (PostBytes > 0) - { - CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset + ReadLength - PostBytes, PostBytes); - const uint64_t FileOffset = Op.Target->Offset + ReadLength - PostBytes; - - WriteSequenceChunkToCache(LocalWriter, ChunkSource, RemoteSequenceIndex, FileOffset, RemotePathIndex); - } - } - } - } - - if (!DidClone) - { - CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset, ReadLength); - - const uint64_t FileOffset = Op.Target->Offset; - - WriteSequenceChunkToCache(LocalWriter, ChunkSource, RemoteSequenceIndex, FileOffset, RemotePathIndex); - } - } - - CacheLocalFileBytesRead += ReadLength; // TODO: This should be the sum of unique chunk sizes? - - WriteOpIndex += WriteCount; - } - } - - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, "Copied {} from {}", NiceBytes(CacheLocalFileBytesRead), SourceFilePath); - } - - std::vector<uint32_t> Result; - Result.reserve(WriteOps.size()); - - for (const WriteOp& Op : WriteOps) - { - Result.push_back(Op.Target->SequenceIndex); - } - return Result; -} - -bool -BuildsOperationUpdateFolder::WriteCompressedChunkToCache( - const IoHash& ChunkHash, - const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>& ChunkTargetPtrs, - BufferedWriteFileCache& WriteCache, - IoBuffer&& CompressedPart) -{ - ZEN_TRACE_CPU("WriteCompressedChunkToCache"); - - auto ChunkHashToChunkIndexIt = m_RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); - ZEN_ASSERT(ChunkHashToChunkIndexIt != m_RemoteLookup.ChunkHashToChunkIndex.end()); - if (IsSingleFileChunk(m_RemoteContent, ChunkTargetPtrs)) - { - const std::uint32_t SequenceIndex = ChunkTargetPtrs.front()->SequenceIndex; - const IoHash& SequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]; - StreamDecompress(SequenceRawHash, CompositeBuffer(std::move(CompressedPart))); - return false; - } - else - { - IoHash RawHash; - uint64_t RawSize; - CompressedBuffer Compressed = CompressedBuffer::FromCompressed(CompositeBuffer(std::move(CompressedPart)), RawHash, RawSize); - if (!Compressed) - { - throw std::runtime_error(fmt::format("Failed to parse header of compressed large blob {}", ChunkHash)); - } - if (RawHash != ChunkHash) - { - throw std::runtime_error(fmt::format("RawHash in header {} in large blob {} does match.", RawHash, ChunkHash)); - } - - BufferedWriteFileCache::Local LocalWriter(WriteCache); - - IoHashStream Hash; - bool CouldDecompress = Compressed.DecompressToStream( - 0, - (uint64_t)-1, - [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) { - ZEN_UNUSED(SourceOffset); - ZEN_TRACE_CPU("Async_StreamDecompress_Write"); - m_DiskStats.ReadByteCount += SourceSize; - if (!m_AbortFlag) - { - for (const ChunkedContentLookup::ChunkSequenceLocation* TargetPtr : ChunkTargetPtrs) - { - const auto& Target = *TargetPtr; - const uint64_t FileOffset = Target.Offset + Offset; - const uint32_t SequenceIndex = Target.SequenceIndex; - const uint32_t PathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex]; - - WriteSequenceChunkToCache(LocalWriter, RangeBuffer, SequenceIndex, FileOffset, PathIndex); - } - - return true; - } - return false; - }); - - if (m_AbortFlag) - { - return false; - } - - if (!CouldDecompress) - { - throw std::runtime_error(fmt::format("Failed to decompress large chunk {}", ChunkHash)); - } - - return true; - } -} - -void -BuildsOperationUpdateFolder::StreamDecompress(const IoHash& SequenceRawHash, CompositeBuffer&& CompressedPart) -{ - ZEN_TRACE_CPU("StreamDecompress"); - const std::filesystem::path TempChunkSequenceFileName = GetTempChunkedSequenceFileName(m_CacheFolderPath, SequenceRawHash); - TemporaryFile DecompressedTemp; - std::error_code Ec; - DecompressedTemp.CreateTemporary(TempChunkSequenceFileName.parent_path(), Ec); - if (Ec) - { - throw std::runtime_error(fmt::format("Failed creating temporary file for decompressing large blob {}, reason: ({}) {}", - SequenceRawHash, - Ec.value(), - Ec.message())); - } - IoHash RawHash; - uint64_t RawSize; - CompressedBuffer Compressed = CompressedBuffer::FromCompressed(CompressedPart, RawHash, RawSize); - if (!Compressed) - { - throw std::runtime_error(fmt::format("Failed to parse header of compressed large blob {}", SequenceRawHash)); - } - if (RawHash != SequenceRawHash) - { - throw std::runtime_error(fmt::format("RawHash in header {} in large blob {} does match.", RawHash, SequenceRawHash)); - } - PrepareFileForScatteredWrite(DecompressedTemp.Handle(), RawSize); - - IoHashStream Hash; - bool CouldDecompress = - Compressed.DecompressToStream(0, - (uint64_t)-1, - [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) { - ZEN_UNUSED(SourceOffset); - ZEN_TRACE_CPU("StreamDecompress_Write"); - m_DiskStats.ReadCount++; - m_DiskStats.ReadByteCount += SourceSize; - if (!m_AbortFlag) - { - for (const SharedBuffer& Segment : RangeBuffer.GetSegments()) - { - if (m_Options.ValidateCompletedSequences) - { - Hash.Append(Segment.GetView()); - m_ValidatedChunkByteCount += Segment.GetSize(); - } - DecompressedTemp.Write(Segment, Offset); - Offset += Segment.GetSize(); - m_DiskStats.WriteByteCount += Segment.GetSize(); - m_DiskStats.WriteCount++; - m_WrittenChunkByteCount += Segment.GetSize(); - } - return true; - } - return false; - }); - - if (m_AbortFlag) - { - return; - } - - if (!CouldDecompress) - { - throw std::runtime_error(fmt::format("Failed to decompress large blob {}", SequenceRawHash)); - } - if (m_Options.ValidateCompletedSequences) - { - const IoHash VerifyHash = Hash.GetHash(); - if (VerifyHash != SequenceRawHash) - { - throw std::runtime_error( - fmt::format("Decompressed blob payload hash {} does not match expected hash {}", VerifyHash, SequenceRawHash)); - } - } - DecompressedTemp.MoveTemporaryIntoPlace(TempChunkSequenceFileName, Ec); - if (Ec) - { - throw std::runtime_error(fmt::format("Failed moving temporary file for decompressing large blob {}, reason: ({}) {}", - SequenceRawHash, - Ec.value(), - Ec.message())); - } - // WriteChunkStats.ChunkCountWritten++; -} - -void -BuildsOperationUpdateFolder::WriteSequenceChunkToCache(BufferedWriteFileCache::Local& LocalWriter, - const CompositeBuffer& Chunk, - const uint32_t SequenceIndex, - const uint64_t FileOffset, - const uint32_t PathIndex) -{ - ZEN_TRACE_CPU("WriteSequenceChunkToCache"); - - const uint64_t SequenceSize = m_RemoteContent.RawSizes[PathIndex]; - - auto OpenFile = [&](BasicFile& File) { - const std::filesystem::path FileName = - GetTempChunkedSequenceFileName(m_CacheFolderPath, m_RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]); - File.Open(FileName, BasicFile::Mode::kWrite); - if (m_Options.UseSparseFiles) - { - PrepareFileForScatteredWrite(File.Handle(), SequenceSize); - } - }; - - const uint64_t ChunkSize = Chunk.GetSize(); - ZEN_ASSERT(FileOffset + ChunkSize <= SequenceSize); - if (ChunkSize == SequenceSize) - { - BasicFile SingleChunkFile; - OpenFile(SingleChunkFile); - - m_DiskStats.CurrentOpenFileCount++; - auto _ = MakeGuard([this]() { m_DiskStats.CurrentOpenFileCount--; }); - SingleChunkFile.Write(Chunk, FileOffset); - } - else - { - const uint64_t MaxWriterBufferSize = 256u * 1025u; - - BufferedWriteFileCache::Local::Writer* Writer = LocalWriter.GetWriter(SequenceIndex); - if (Writer) - { - if ((!Writer->Writer) && (ChunkSize < MaxWriterBufferSize)) - { - Writer->Writer = std::make_unique<BasicFileWriter>(*Writer->File, Min(SequenceSize, MaxWriterBufferSize)); - } - Writer->Write(Chunk, FileOffset); - } - else - { - Writer = LocalWriter.PutWriter(SequenceIndex, std::make_unique<BufferedWriteFileCache::Local::Writer>()); - - Writer->File = std::make_unique<BasicFile>(); - OpenFile(*Writer->File); - if (ChunkSize < MaxWriterBufferSize) - { - Writer->Writer = std::make_unique<BasicFileWriter>(*Writer->File, Min(SequenceSize, MaxWriterBufferSize)); - } - Writer->Write(Chunk, FileOffset); - } - } - m_DiskStats.WriteCount++; - m_DiskStats.WriteByteCount += ChunkSize; - m_WrittenChunkByteCount += ChunkSize; -} - -bool -BuildsOperationUpdateFolder::GetBlockWriteOps(const IoHash& BlockRawHash, - std::span<const IoHash> ChunkRawHashes, - std::span<const uint32_t> ChunkCompressedLengths, - std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, - std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags, - const MemoryView BlockView, - uint32_t FirstIncludedBlockChunkIndex, - uint32_t LastIncludedBlockChunkIndex, - BlockWriteOps& OutOps) -{ - ZEN_TRACE_CPU("GetBlockWriteOps"); - - uint32_t OffsetInBlock = 0; - for (uint32_t ChunkBlockIndex = FirstIncludedBlockChunkIndex; ChunkBlockIndex <= LastIncludedBlockChunkIndex; ChunkBlockIndex++) - { - const uint32_t ChunkCompressedSize = ChunkCompressedLengths[ChunkBlockIndex]; - const IoHash& ChunkHash = ChunkRawHashes[ChunkBlockIndex]; - if (auto It = m_RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); It != m_RemoteLookup.ChunkHashToChunkIndex.end()) - { - const uint32_t ChunkIndex = It->second; - std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs = - GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, ChunkIndex); - - if (!ChunkTargetPtrs.empty()) - { - bool NeedsWrite = true; - if (RemoteChunkIndexNeedsCopyFromSourceFlags[ChunkIndex].compare_exchange_strong(NeedsWrite, false)) - { - MemoryView ChunkMemoryView = BlockView.Mid(OffsetInBlock, ChunkCompressedSize); - IoHash VerifyChunkHash; - uint64_t VerifyChunkSize; - CompressedBuffer CompressedChunk = - CompressedBuffer::FromCompressed(SharedBuffer::MakeView(ChunkMemoryView), VerifyChunkHash, VerifyChunkSize); - if (!CompressedChunk) - { - throw std::runtime_error(fmt::format("Chunk {} at {}, size {} in block {} is not a valid compressed buffer", - ChunkHash, - OffsetInBlock, - ChunkCompressedSize, - BlockRawHash)); - } - if (VerifyChunkHash != ChunkHash) - { - throw std::runtime_error(fmt::format("Chunk {} at {}, size {} in block {} has a mismatching content hash {}", - ChunkHash, - OffsetInBlock, - ChunkCompressedSize, - BlockRawHash, - VerifyChunkHash)); - } - if (VerifyChunkSize != m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]) - { - throw std::runtime_error( - fmt::format("Chunk {} at {}, size {} in block {} has a mismatching raw size {}, expected {}", - ChunkHash, - OffsetInBlock, - ChunkCompressedSize, - BlockRawHash, - VerifyChunkSize, - m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex])); - } - - OodleCompressor ChunkCompressor; - OodleCompressionLevel ChunkCompressionLevel; - uint64_t ChunkBlockSize; - - bool GetCompressParametersSuccess = - CompressedChunk.TryGetCompressParameters(ChunkCompressor, ChunkCompressionLevel, ChunkBlockSize); - ZEN_ASSERT(GetCompressParametersSuccess); - - IoBuffer Decompressed; - if (ChunkCompressionLevel == OodleCompressionLevel::None) - { - MemoryView ChunkDecompressedMemoryView = ChunkMemoryView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder()); - Decompressed = - IoBuffer(IoBuffer::Wrap, ChunkDecompressedMemoryView.GetData(), ChunkDecompressedMemoryView.GetSize()); - } - else - { - Decompressed = CompressedChunk.Decompress().AsIoBuffer(); - } - - if (Decompressed.GetSize() != m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]) - { - throw std::runtime_error(fmt::format("Chunk {} at {}, size {} in block {} decompressed to size {}, expected {}", - ChunkHash, - OffsetInBlock, - ChunkCompressedSize, - BlockRawHash, - Decompressed.GetSize(), - m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex])); - } - - ZEN_ASSERT_SLOW(ChunkHash == IoHash::HashBuffer(Decompressed)); - for (const ChunkedContentLookup::ChunkSequenceLocation* Target : ChunkTargetPtrs) - { - OutOps.WriteOps.push_back( - BlockWriteOps::WriteOpData{.Target = Target, .ChunkBufferIndex = OutOps.ChunkBuffers.size()}); - } - OutOps.ChunkBuffers.emplace_back(std::move(Decompressed)); - } - } - } - - OffsetInBlock += ChunkCompressedSize; - } - { - ZEN_TRACE_CPU("Sort"); - std::sort(OutOps.WriteOps.begin(), - OutOps.WriteOps.end(), - [](const BlockWriteOps::WriteOpData& Lhs, const BlockWriteOps::WriteOpData& Rhs) { - if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex) - { - return true; - } - if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex) - { - return false; - } - return Lhs.Target->Offset < Rhs.Target->Offset; - }); - } - return true; -} - -void -BuildsOperationUpdateFolder::WriteBlockChunkOpsToCache(std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, - const BlockWriteOps& Ops, - BufferedWriteFileCache& WriteCache, - ParallelWork& Work) -{ - ZEN_TRACE_CPU("WriteBlockChunkOpsToCache"); - - { - BufferedWriteFileCache::Local LocalWriter(WriteCache); - for (const BlockWriteOps::WriteOpData& WriteOp : Ops.WriteOps) - { - if (Work.IsAborted()) - { - break; - } - const CompositeBuffer& Chunk = Ops.ChunkBuffers[WriteOp.ChunkBufferIndex]; - const uint32_t SequenceIndex = WriteOp.Target->SequenceIndex; - ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[SequenceIndex].load() <= - m_RemoteContent.ChunkedContent.ChunkCounts[SequenceIndex]); - ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[SequenceIndex].load() > 0); - const uint64_t FileOffset = WriteOp.Target->Offset; - const uint32_t PathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex]; - - WriteSequenceChunkToCache(LocalWriter, Chunk, SequenceIndex, FileOffset, PathIndex); - } - } - if (!Work.IsAborted()) - { - // Write tracking, updating this must be done without any files open (BufferedWriteFileCache::Local) - std::vector<uint32_t> CompletedChunkSequences; - for (const BlockWriteOps::WriteOpData& WriteOp : Ops.WriteOps) - { - const uint32_t RemoteSequenceIndex = WriteOp.Target->SequenceIndex; - if (CompleteSequenceChunk(RemoteSequenceIndex, SequenceIndexChunksLeftToWriteCounters)) - { - CompletedChunkSequences.push_back(RemoteSequenceIndex); - } - } - WriteCache.Close(CompletedChunkSequences); - VerifyAndCompleteChunkSequencesAsync(CompletedChunkSequences, Work); - } -} - -bool -BuildsOperationUpdateFolder::WriteChunksBlockToCache(const ChunkBlockDescription& BlockDescription, - std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, - ParallelWork& Work, - CompositeBuffer&& BlockBuffer, - std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags, - BufferedWriteFileCache& WriteCache) -{ - ZEN_TRACE_CPU("WriteChunksBlockToCache"); - - IoBuffer BlockMemoryBuffer = MakeBufferMemoryBased(BlockBuffer); - const MemoryView BlockView = BlockMemoryBuffer.GetView(); - - BlockWriteOps Ops; - if ((BlockDescription.HeaderSize == 0) || BlockDescription.ChunkCompressedLengths.empty()) - { - ZEN_TRACE_CPU("WriteChunksBlockToCache_Legacy"); - - uint64_t HeaderSize; - const std::vector<uint32_t> ChunkCompressedLengths = - ReadChunkBlockHeader(BlockView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder()), HeaderSize); - - if (GetBlockWriteOps(BlockDescription.BlockHash, - BlockDescription.ChunkRawHashes, - ChunkCompressedLengths, - SequenceIndexChunksLeftToWriteCounters, - RemoteChunkIndexNeedsCopyFromSourceFlags, - BlockView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder() + HeaderSize), - 0, - gsl::narrow<uint32_t>(BlockDescription.ChunkRawHashes.size() - 1), - Ops)) - { - WriteBlockChunkOpsToCache(SequenceIndexChunksLeftToWriteCounters, Ops, WriteCache, Work); - return true; - } - return false; - } - - if (GetBlockWriteOps(BlockDescription.BlockHash, - BlockDescription.ChunkRawHashes, - BlockDescription.ChunkCompressedLengths, - SequenceIndexChunksLeftToWriteCounters, - RemoteChunkIndexNeedsCopyFromSourceFlags, - BlockView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize), - 0, - gsl::narrow<uint32_t>(BlockDescription.ChunkRawHashes.size() - 1), - Ops)) - { - WriteBlockChunkOpsToCache(SequenceIndexChunksLeftToWriteCounters, Ops, WriteCache, Work); - return true; - } - return false; -} - -bool -BuildsOperationUpdateFolder::WritePartialBlockChunksToCache(const ChunkBlockDescription& BlockDescription, - std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, - ParallelWork& Work, - CompositeBuffer&& PartialBlockBuffer, - uint32_t FirstIncludedBlockChunkIndex, - uint32_t LastIncludedBlockChunkIndex, - std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags, - BufferedWriteFileCache& WriteCache) -{ - ZEN_TRACE_CPU("WritePartialBlockChunksToCache"); - - IoBuffer BlockMemoryBuffer = MakeBufferMemoryBased(PartialBlockBuffer); - const MemoryView BlockView = BlockMemoryBuffer.GetView(); - - BlockWriteOps Ops; - if (GetBlockWriteOps(BlockDescription.BlockHash, - BlockDescription.ChunkRawHashes, - BlockDescription.ChunkCompressedLengths, - SequenceIndexChunksLeftToWriteCounters, - RemoteChunkIndexNeedsCopyFromSourceFlags, - BlockView, - FirstIncludedBlockChunkIndex, - LastIncludedBlockChunkIndex, - Ops)) - { - WriteBlockChunkOpsToCache(SequenceIndexChunksLeftToWriteCounters, Ops, WriteCache, Work); - return true; - } - else - { - return false; - } -} - -void -BuildsOperationUpdateFolder::AsyncWriteDownloadedChunk(const std::filesystem::path& ZenFolderPath, - uint32_t RemoteChunkIndex, - std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>&& ChunkTargetPtrs, - BufferedWriteFileCache& WriteCache, - ParallelWork& Work, - IoBuffer&& Payload, - std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, - std::atomic<uint64_t>& WritePartsComplete, - const uint64_t TotalPartWriteCount, - FilteredRate& FilteredWrittenBytesPerSecond, - bool EnableBacklog) -{ - ZEN_TRACE_CPU("AsyncWriteDownloadedChunk"); - - const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; - - const uint64_t Size = Payload.GetSize(); - - std::filesystem::path CompressedChunkPath; - - // Check if the dowloaded chunk is file based and we can move it directly without rewriting it - { - IoBufferFileReference FileRef; - if (Payload.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) && (FileRef.FileChunkSize == Size)) - { - ZEN_TRACE_CPU("MoveTempChunk"); - std::error_code Ec; - std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec); - if (!Ec) - { - Payload.SetDeleteOnClose(false); - Payload = {}; - CompressedChunkPath = m_TempDownloadFolderPath / ChunkHash.ToHexString(); - RenameFile(TempBlobPath, CompressedChunkPath, Ec); - if (Ec) - { - CompressedChunkPath = std::filesystem::path{}; - - // Re-open the temp file again - BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete); - Payload = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, Size, true); - Payload.SetDeleteOnClose(true); - } - } - } - } - - if (CompressedChunkPath.empty() && (Size > m_Options.MaximumInMemoryPayloadSize)) - { - ZEN_TRACE_CPU("WriteTempChunk"); - // Could not be moved and rather large, lets store it on disk - CompressedChunkPath = m_TempDownloadFolderPath / ChunkHash.ToHexString(); - TemporaryFile::SafeWriteFile(CompressedChunkPath, Payload); - Payload = {}; - } - - Work.ScheduleWork( - m_IOWorkerPool, - [&ZenFolderPath, - this, - SequenceIndexChunksLeftToWriteCounters, - &Work, - CompressedChunkPath, - RemoteChunkIndex, - TotalPartWriteCount, - &WriteCache, - &WritePartsComplete, - &FilteredWrittenBytesPerSecond, - ChunkTargetPtrs = std::move(ChunkTargetPtrs), - CompressedPart = IoBuffer(std::move(Payload))](std::atomic<bool>&) mutable { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("Async_WriteChunk"); - - FilteredWrittenBytesPerSecond.Start(); - - const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; - if (CompressedChunkPath.empty()) - { - ZEN_ASSERT(CompressedPart); - } - else - { - ZEN_ASSERT(!CompressedPart); - CompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath); - if (!CompressedPart) - { - throw std::runtime_error( - fmt::format("Could not open dowloaded compressed chunk {} from {}", ChunkHash, CompressedChunkPath)); - } - } - - bool NeedHashVerify = WriteCompressedChunkToCache(ChunkHash, ChunkTargetPtrs, WriteCache, std::move(CompressedPart)); - if (!m_AbortFlag) - { - WritePartsComplete++; - if (WritePartsComplete == TotalPartWriteCount) - { - FilteredWrittenBytesPerSecond.Stop(); - } - - if (!CompressedChunkPath.empty()) - { - std::error_code Ec = TryRemoveFile(CompressedChunkPath); - if (Ec) - { - ZEN_OPERATION_LOG_DEBUG(m_LogOutput, - "Failed removing file '{}', reason: ({}) {}", - CompressedChunkPath, - Ec.value(), - Ec.message()); - } - } - - std::vector<uint32_t> CompletedSequences = - CompleteChunkTargets(ChunkTargetPtrs, SequenceIndexChunksLeftToWriteCounters); - WriteCache.Close(CompletedSequences); - if (NeedHashVerify) - { - VerifyAndCompleteChunkSequencesAsync(CompletedSequences, Work); - } - else - { - FinalizeChunkSequences(CompletedSequences); - } - } - } - }, - EnableBacklog ? WorkerThreadPool::EMode::EnableBacklog : WorkerThreadPool::EMode::DisableBacklog); -} - -void -BuildsOperationUpdateFolder::VerifyAndCompleteChunkSequencesAsync(std::span<const uint32_t> RemoteSequenceIndexes, ParallelWork& Work) -{ - if (RemoteSequenceIndexes.empty()) - { - return; - } - ZEN_TRACE_CPU("VerifyAndCompleteChunkSequence"); - if (m_Options.ValidateCompletedSequences) - { - for (uint32_t RemoteSequenceIndexOffset = 1; RemoteSequenceIndexOffset < RemoteSequenceIndexes.size(); RemoteSequenceIndexOffset++) - { - const uint32_t RemoteSequenceIndex = RemoteSequenceIndexes[RemoteSequenceIndexOffset]; - Work.ScheduleWork(m_IOWorkerPool, [this, RemoteSequenceIndex](std::atomic<bool>&) { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("Async_VerifyAndFinalizeSequence"); - - VerifySequence(RemoteSequenceIndex); - if (!m_AbortFlag) - { - const IoHash& SequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; - FinalizeChunkSequence(SequenceRawHash); - } - } - }); - } - const uint32_t RemoteSequenceIndex = RemoteSequenceIndexes[0]; - - VerifySequence(RemoteSequenceIndex); - const IoHash& SequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; - FinalizeChunkSequence(SequenceRawHash); - } - else - { - for (uint32_t RemoteSequenceIndexOffset = 0; RemoteSequenceIndexOffset < RemoteSequenceIndexes.size(); RemoteSequenceIndexOffset++) - { - const uint32_t RemoteSequenceIndex = RemoteSequenceIndexes[RemoteSequenceIndexOffset]; - const IoHash& SequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; - FinalizeChunkSequence(SequenceRawHash); - } - } -} - -bool -BuildsOperationUpdateFolder::CompleteSequenceChunk(uint32_t RemoteSequenceIndex, - std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters) -{ - uint32_t PreviousValue = SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1); - ZEN_ASSERT(PreviousValue >= 1); - ZEN_ASSERT(PreviousValue != (uint32_t)-1); - return PreviousValue == 1; -} - -std::vector<uint32_t> -BuildsOperationUpdateFolder::CompleteChunkTargets(const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>& ChunkTargetPtrs, - std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters) -{ - ZEN_TRACE_CPU("CompleteChunkTargets"); - - std::vector<uint32_t> CompletedSequenceIndexes; - for (const ChunkedContentLookup::ChunkSequenceLocation* Location : ChunkTargetPtrs) - { - const uint32_t RemoteSequenceIndex = Location->SequenceIndex; - if (CompleteSequenceChunk(RemoteSequenceIndex, SequenceIndexChunksLeftToWriteCounters)) - { - CompletedSequenceIndexes.push_back(RemoteSequenceIndex); - } - } - return CompletedSequenceIndexes; -} - -void -BuildsOperationUpdateFolder::FinalizeChunkSequence(const IoHash& SequenceRawHash) -{ - ZEN_TRACE_CPU("FinalizeChunkSequence"); - - ZEN_ASSERT_SLOW(!IsFile(GetFinalChunkedSequenceFileName(m_CacheFolderPath, SequenceRawHash))); - std::error_code Ec; - RenameFile(GetTempChunkedSequenceFileName(m_CacheFolderPath, SequenceRawHash), - GetFinalChunkedSequenceFileName(m_CacheFolderPath, SequenceRawHash), - Ec); - if (Ec) - { - throw std::system_error(Ec); - } -} - -void -BuildsOperationUpdateFolder::FinalizeChunkSequences(std::span<const uint32_t> RemoteSequenceIndexes) -{ - ZEN_TRACE_CPU("FinalizeChunkSequences"); - - for (uint32_t SequenceIndex : RemoteSequenceIndexes) - { - FinalizeChunkSequence(m_RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]); - } -} - -void -BuildsOperationUpdateFolder::VerifySequence(uint32_t RemoteSequenceIndex) -{ - ZEN_TRACE_CPU("VerifySequence"); - - ZEN_ASSERT(m_Options.ValidateCompletedSequences); - - const IoHash& SequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; - { - ZEN_TRACE_CPU("HashSequence"); - const std::uint32_t RemotePathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex]; - const uint64_t ExpectedSize = m_RemoteContent.RawSizes[RemotePathIndex]; - IoBuffer VerifyBuffer = IoBufferBuilder::MakeFromFile(GetTempChunkedSequenceFileName(m_CacheFolderPath, SequenceRawHash)); - const uint64_t VerifySize = VerifyBuffer.GetSize(); - if (VerifySize != ExpectedSize) - { - throw std::runtime_error(fmt::format("Written chunk sequence {} size {} does not match expected size {}", - SequenceRawHash, - VerifySize, - ExpectedSize)); - } - - const IoHash VerifyChunkHash = IoHash::HashBuffer(std::move(VerifyBuffer), &m_ValidatedChunkByteCount); - if (VerifyChunkHash != SequenceRawHash) - { - throw std::runtime_error( - fmt::format("Written chunk sequence {} hash does not match expected hash {}", VerifyChunkHash, SequenceRawHash)); - } - } -} - -////////////////////// BuildsOperationUploadFolder - -BuildsOperationUploadFolder::BuildsOperationUploadFolder(OperationLogOutput& OperationLogOutput, - StorageInstance& Storage, - std::atomic<bool>& AbortFlag, - std::atomic<bool>& PauseFlag, - WorkerThreadPool& IOWorkerPool, - WorkerThreadPool& NetworkPool, - const Oid& BuildId, - const std::filesystem::path& Path, - bool CreateBuild, - const CbObject& MetaData, - const Options& Options) -: m_LogOutput(OperationLogOutput) -, m_Storage(Storage) -, m_AbortFlag(AbortFlag) -, m_PauseFlag(PauseFlag) -, m_IOWorkerPool(IOWorkerPool) -, m_NetworkPool(NetworkPool) -, m_BuildId(BuildId) -, m_Path(Path) -, m_CreateBuild(CreateBuild) -, m_MetaData(MetaData) -, m_Options(Options) -{ - m_NonCompressableExtensionHashes.reserve(Options.NonCompressableExtensions.size()); - for (const std::string& Extension : Options.NonCompressableExtensions) - { - m_NonCompressableExtensionHashes.insert(HashStringAsLowerDjb2(Extension)); - } -} - -BuildsOperationUploadFolder::PrepareBuildResult -BuildsOperationUploadFolder::PrepareBuild() -{ - ZEN_TRACE_CPU("PrepareBuild"); - - PrepareBuildResult Result; - Result.PreferredMultipartChunkSize = m_Options.PreferredMultipartChunkSize; - Stopwatch Timer; - if (m_CreateBuild) - { - ZEN_TRACE_CPU("CreateBuild"); - - Stopwatch PutBuildTimer; - CbObject PutBuildResult = m_Storage.BuildStorage->PutBuild(m_BuildId, m_MetaData); - Result.PrepareBuildTimeMs = PutBuildTimer.GetElapsedTimeMs(); - if (auto ChunkSize = PutBuildResult["chunkSize"sv].AsUInt64(); ChunkSize != 0) - { - Result.PreferredMultipartChunkSize = ChunkSize; - } - Result.PayloadSize = m_MetaData.GetSize(); - } - else - { - ZEN_TRACE_CPU("PutBuild"); - Stopwatch GetBuildTimer; - CbObject Build = m_Storage.BuildStorage->GetBuild(m_BuildId); - Result.PrepareBuildTimeMs = GetBuildTimer.GetElapsedTimeMs(); - Result.PayloadSize = Build.GetSize(); - if (auto ChunkSize = Build["chunkSize"sv].AsUInt64(); ChunkSize != 0) - { - Result.PreferredMultipartChunkSize = ChunkSize; - } - else if (m_Options.AllowMultiparts) - { - ZEN_OPERATION_LOG_WARN(m_LogOutput, - "PreferredMultipartChunkSize is unknown. Defaulting to '{}'", - NiceBytes(Result.PreferredMultipartChunkSize)); - } - } - - if (!m_Options.IgnoreExistingBlocks) - { - ZEN_TRACE_CPU("FindBlocks"); - Stopwatch KnownBlocksTimer; - CbObject BlockDescriptionList = m_Storage.BuildStorage->FindBlocks(m_BuildId, m_Options.FindBlockMaxCount); - if (BlockDescriptionList) - { - Result.KnownBlocks = ParseChunkBlockDescriptionList(BlockDescriptionList); - } - Result.FindBlocksTimeMs = KnownBlocksTimer.GetElapsedTimeMs(); - } - Result.ElapsedTimeMs = Timer.GetElapsedTimeMs(); - return Result; -} - -std::vector<BuildsOperationUploadFolder::UploadPart> -BuildsOperationUploadFolder::ReadFolder() -{ - std::vector<UploadPart> UploadParts; - std::filesystem::path ExcludeManifestPath = m_Path / m_Options.ZenExcludeManifestName; - tsl::robin_set<std::string> ExcludeAssetPaths; - if (IsFile(ExcludeManifestPath)) - { - std::filesystem::path AbsoluteExcludeManifestPath = - MakeSafeAbsolutePath(ExcludeManifestPath.is_absolute() ? ExcludeManifestPath : m_Path / ExcludeManifestPath); - BuildManifest Manifest = ParseBuildManifest(AbsoluteExcludeManifestPath); - const std::vector<std::filesystem::path>& AssetPaths = Manifest.Parts.front().Files; - ExcludeAssetPaths.reserve(AssetPaths.size()); - for (const std::filesystem::path& AssetPath : AssetPaths) - { - ExcludeAssetPaths.insert(AssetPath.generic_string()); - } - } - - UploadParts.resize(1); - - UploadPart& Part = UploadParts.front(); - GetFolderContentStatistics& LocalFolderScanStats = Part.LocalFolderScanStats; - - Part.Content = GetFolderContent( - Part.LocalFolderScanStats, - m_Path, - [this](const std::string_view& RelativePath) { return IsAcceptedFolder(RelativePath); }, - [this, &ExcludeAssetPaths](const std::string_view& RelativePath, uint64_t Size, uint32_t Attributes) -> bool { - ZEN_UNUSED(Size, Attributes); - if (!IsAcceptedFile(RelativePath)) - { - return false; - } - if (ExcludeAssetPaths.contains(std::filesystem::path(RelativePath).generic_string())) - { - return false; - } - return true; - }, - m_IOWorkerPool, - m_LogOutput.GetProgressUpdateDelayMS(), - [&](bool, std::ptrdiff_t) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, "Found {} files in '{}'...", LocalFolderScanStats.AcceptedFileCount.load(), m_Path); - }, - m_AbortFlag); - Part.TotalRawSize = std::accumulate(Part.Content.RawSizes.begin(), Part.Content.RawSizes.end(), std::uint64_t(0)); - - return UploadParts; -} - -std::vector<BuildsOperationUploadFolder::UploadPart> -BuildsOperationUploadFolder::ReadManifestParts(const std::filesystem::path& ManifestPath) -{ - std::vector<UploadPart> UploadParts; - Stopwatch ManifestParseTimer; - std::filesystem::path AbsoluteManifestPath = MakeSafeAbsolutePath(ManifestPath.is_absolute() ? ManifestPath : m_Path / ManifestPath); - BuildManifest Manifest = ParseBuildManifest(AbsoluteManifestPath); - if (Manifest.Parts.empty()) - { - throw std::runtime_error(fmt::format("Manifest file at '{}' is invalid", ManifestPath)); - } - - UploadParts.resize(Manifest.Parts.size()); - for (size_t PartIndex = 0; PartIndex < Manifest.Parts.size(); PartIndex++) - { - BuildManifest::Part& PartManifest = Manifest.Parts[PartIndex]; - if (ManifestPath.is_relative()) - { - PartManifest.Files.push_back(ManifestPath); - } - - UploadPart& Part = UploadParts[PartIndex]; - FolderContent& Content = Part.Content; - - GetFolderContentStatistics& LocalFolderScanStats = Part.LocalFolderScanStats; - - const std::vector<std::filesystem::path>& AssetPaths = PartManifest.Files; - Content = GetValidFolderContent( - m_IOWorkerPool, - LocalFolderScanStats, - m_Path, - AssetPaths, - [](uint64_t PathCount, uint64_t CompletedPathCount) { ZEN_UNUSED(PathCount, CompletedPathCount); }, - 1000, - m_AbortFlag, - m_PauseFlag); - - if (Content.Paths.size() != AssetPaths.size()) - { - const tsl::robin_set<std::filesystem::path> FoundPaths(Content.Paths.begin(), Content.Paths.end()); - ExtendableStringBuilder<1024> SB; - for (const std::filesystem::path& AssetPath : AssetPaths) - { - if (!FoundPaths.contains(AssetPath)) - { - SB << "\n " << AssetPath.generic_string(); - } - } - throw std::runtime_error( - fmt::format("Manifest file at '{}' references files that does not exist{}", ManifestPath, SB.ToView())); - } - - Part.PartId = PartManifest.PartId; - Part.PartName = PartManifest.PartName; - Part.TotalRawSize = std::accumulate(Part.Content.RawSizes.begin(), Part.Content.RawSizes.end(), std::uint64_t(0)); - } - - return UploadParts; -} - -std::vector<std::pair<Oid, std::string>> -BuildsOperationUploadFolder::Execute(const Oid& BuildPartId, - const std::string_view BuildPartName, - const std::filesystem::path& ManifestPath, - ChunkingController& ChunkController, - ChunkingCache& ChunkCache) -{ - ZEN_TRACE_CPU("BuildsOperationUploadFolder::Execute"); - try - { - Stopwatch ReadPartsTimer; - std::vector<UploadPart> UploadParts = ManifestPath.empty() ? ReadFolder() : ReadManifestParts(ManifestPath); - - for (UploadPart& Part : UploadParts) - { - if (Part.PartId == Oid::Zero) - { - if (UploadParts.size() != 1) - { - throw std::runtime_error(fmt::format("Multi part upload manifest '{}' must contains build part id", ManifestPath)); - } - - if (BuildPartId == Oid::Zero) - { - Part.PartId = Oid::NewOid(); - } - else - { - Part.PartId = BuildPartId; - } - } - if (Part.PartName.empty()) - { - if (UploadParts.size() != 1) - { - throw std::runtime_error(fmt::format("Multi part upload manifest '{}' must contains build part name", ManifestPath)); - } - if (BuildPartName.empty()) - { - throw std::runtime_error("Build part name must be set"); - } - Part.PartName = std::string(BuildPartName); - } - } - - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Reading {} parts took {}", - UploadParts.size(), - NiceTimeSpanMs(ReadPartsTimer.GetElapsedTimeMs())); - } - - const uint32_t PartsUploadStepCount = gsl::narrow<uint32_t>(uint32_t(PartTaskSteps::StepCount) * UploadParts.size()); - - const uint32_t PrepareBuildStep = 0; - const uint32_t UploadPartsStep = 1; - const uint32_t FinalizeBuildStep = UploadPartsStep + PartsUploadStepCount; - const uint32_t CleanupStep = FinalizeBuildStep + 1; - const uint32_t StepCount = CleanupStep + 1; - - auto EndProgress = MakeGuard([&]() { m_LogOutput.SetLogOperationProgress(StepCount, StepCount); }); - - Stopwatch ProcessTimer; - - CleanAndRemoveDirectory(m_IOWorkerPool, m_AbortFlag, m_PauseFlag, m_Options.TempDir); - CreateDirectories(m_Options.TempDir); - auto _ = MakeGuard([&]() { CleanAndRemoveDirectory(m_IOWorkerPool, m_AbortFlag, m_PauseFlag, m_Options.TempDir); }); - - m_LogOutput.SetLogOperationProgress(PrepareBuildStep, StepCount); - - m_PrepBuildResultFuture = m_NetworkPool.EnqueueTask(std::packaged_task<PrepareBuildResult()>{[this] { return PrepareBuild(); }}, - WorkerThreadPool::EMode::EnableBacklog); - - for (uint32_t PartIndex = 0; PartIndex < UploadParts.size(); PartIndex++) - { - const uint32_t PartStepOffset = UploadPartsStep + (PartIndex * uint32_t(PartTaskSteps::StepCount)); - - const UploadPart& Part = UploadParts[PartIndex]; - UploadBuildPart(ChunkController, ChunkCache, PartIndex, Part, PartStepOffset, StepCount); - if (m_AbortFlag) - { - return {}; - } - } - - m_LogOutput.SetLogOperationProgress(FinalizeBuildStep, StepCount); - - if (m_CreateBuild && !m_AbortFlag) - { - Stopwatch FinalizeBuildTimer; - m_Storage.BuildStorage->FinalizeBuild(m_BuildId); - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, "FinalizeBuild took {}", NiceTimeSpanMs(FinalizeBuildTimer.GetElapsedTimeMs())); - } - } - - m_LogOutput.SetLogOperationProgress(CleanupStep, StepCount); - - std::vector<std::pair<Oid, std::string>> Result; - Result.reserve(UploadParts.size()); - for (UploadPart& Part : UploadParts) - { - Result.push_back(std::make_pair(Part.PartId, Part.PartName)); - } - return Result; - } - catch (const std::exception&) - { - m_AbortFlag = true; - throw; - } -} - -bool -BuildsOperationUploadFolder::IsAcceptedFolder(const std::string_view& RelativePath) const -{ - for (const std::string& ExcludeFolder : m_Options.ExcludeFolders) - { - if (RelativePath.starts_with(ExcludeFolder)) - { - if (RelativePath.length() == ExcludeFolder.length()) - { - return false; - } - else if (RelativePath[ExcludeFolder.length()] == '/') - { - return false; - } - } - } - return true; -} - -bool -BuildsOperationUploadFolder::IsAcceptedFile(const std::string_view& RelativePath) const -{ - if (RelativePath == m_Options.ZenExcludeManifestName) - { - return false; - } - for (const std::string& ExcludeExtension : m_Options.ExcludeExtensions) - { - if (RelativePath.ends_with(ExcludeExtension)) - { - return false; - } - } - return true; -} - -void -BuildsOperationUploadFolder::ArrangeChunksIntoBlocks(const ChunkedFolderContent& Content, - const ChunkedContentLookup& Lookup, - std::vector<uint32_t>& ChunkIndexes, - std::vector<std::vector<uint32_t>>& OutBlocks) -{ - ZEN_TRACE_CPU("ArrangeChunksIntoBlocks"); - std::sort(ChunkIndexes.begin(), ChunkIndexes.end(), [&Content, &Lookup](uint32_t Lhs, uint32_t Rhs) { - const ChunkedContentLookup::ChunkSequenceLocation& LhsLocation = GetChunkSequenceLocations(Lookup, Lhs)[0]; - const ChunkedContentLookup::ChunkSequenceLocation& RhsLocation = GetChunkSequenceLocations(Lookup, Rhs)[0]; - if (LhsLocation.SequenceIndex < RhsLocation.SequenceIndex) - { - return true; - } - else if (LhsLocation.SequenceIndex > RhsLocation.SequenceIndex) - { - return false; - } - return LhsLocation.Offset < RhsLocation.Offset; - }); - - uint64_t MaxBlockSizeLowThreshold = m_Options.BlockParameters.MaxBlockSize - (m_Options.BlockParameters.MaxBlockSize / 16); - - uint64_t BlockSize = 0; - - uint32_t ChunkIndexStart = 0; - for (uint32_t ChunkIndexOffset = 0; ChunkIndexOffset < ChunkIndexes.size();) - { - const uint32_t ChunkIndex = ChunkIndexes[ChunkIndexOffset]; - const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; - - if (((BlockSize + ChunkSize) > m_Options.BlockParameters.MaxBlockSize) || - (ChunkIndexOffset - ChunkIndexStart) > m_Options.BlockParameters.MaxChunksPerBlock) - { - // Within the span of MaxBlockSizeLowThreshold and MaxBlockSize, see if there is a break - // between source paths for chunks. Break the block at the last such break if any. - ZEN_ASSERT(ChunkIndexOffset > ChunkIndexStart); - - const uint32_t ChunkSequenceIndex = Lookup.ChunkSequenceLocations[Lookup.ChunkSequenceLocationOffset[ChunkIndex]].SequenceIndex; - - uint64_t ScanBlockSize = BlockSize; - - uint32_t ScanChunkIndexOffset = ChunkIndexOffset - 1; - while (ScanChunkIndexOffset > (ChunkIndexStart + 2)) - { - const uint32_t TestChunkIndex = ChunkIndexes[ScanChunkIndexOffset]; - const uint64_t TestChunkSize = Content.ChunkedContent.ChunkRawSizes[TestChunkIndex]; - if ((ScanBlockSize - TestChunkSize) < MaxBlockSizeLowThreshold) - { - break; - } - - const uint32_t TestSequenceIndex = - Lookup.ChunkSequenceLocations[Lookup.ChunkSequenceLocationOffset[TestChunkIndex]].SequenceIndex; - if (ChunkSequenceIndex != TestSequenceIndex) - { - ChunkIndexOffset = ScanChunkIndexOffset + 1; - break; - } - - ScanBlockSize -= TestChunkSize; - ScanChunkIndexOffset--; - } - - std::vector<uint32_t> ChunksInBlock; - ChunksInBlock.reserve(ChunkIndexOffset - ChunkIndexStart); - for (uint32_t AddIndexOffset = ChunkIndexStart; AddIndexOffset < ChunkIndexOffset; AddIndexOffset++) - { - const uint32_t AddChunkIndex = ChunkIndexes[AddIndexOffset]; - ChunksInBlock.push_back(AddChunkIndex); - } - OutBlocks.emplace_back(std::move(ChunksInBlock)); - BlockSize = 0; - ChunkIndexStart = ChunkIndexOffset; - } - else - { - ChunkIndexOffset++; - BlockSize += ChunkSize; - } - } - if (ChunkIndexStart < ChunkIndexes.size()) - { - std::vector<uint32_t> ChunksInBlock; - ChunksInBlock.reserve(ChunkIndexes.size() - ChunkIndexStart); - for (uint32_t AddIndexOffset = ChunkIndexStart; AddIndexOffset < ChunkIndexes.size(); AddIndexOffset++) - { - const uint32_t AddChunkIndex = ChunkIndexes[AddIndexOffset]; - ChunksInBlock.push_back(AddChunkIndex); - } - OutBlocks.emplace_back(std::move(ChunksInBlock)); - } -} - -void -BuildsOperationUploadFolder::GenerateBuildBlocks(const ChunkedFolderContent& Content, - const ChunkedContentLookup& Lookup, - const std::vector<std::vector<uint32_t>>& NewBlockChunks, - GeneratedBlocks& OutBlocks, - GenerateBlocksStatistics& GenerateBlocksStats, - UploadStatistics& UploadStats) -{ - ZEN_TRACE_CPU("GenerateBuildBlocks"); - const std::size_t NewBlockCount = NewBlockChunks.size(); - if (NewBlockCount > 0) - { - std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(m_LogOutput.CreateProgressBar("Generate Blocks")); - OperationLogOutput::ProgressBar& Progress(*ProgressBarPtr); - - OutBlocks.BlockDescriptions.resize(NewBlockCount); - OutBlocks.BlockSizes.resize(NewBlockCount); - OutBlocks.BlockMetaDatas.resize(NewBlockCount); - OutBlocks.BlockHeaders.resize(NewBlockCount); - OutBlocks.MetaDataHasBeenUploaded.resize(NewBlockCount, 0); - OutBlocks.BlockHashToBlockIndex.reserve(NewBlockCount); - - RwLock Lock; - - WorkerThreadPool& GenerateBlobsPool = m_IOWorkerPool; - WorkerThreadPool& UploadBlocksPool = m_NetworkPool; - - FilteredRate FilteredGeneratedBytesPerSecond; - FilteredRate FilteredUploadedBytesPerSecond; - - ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); - - std::atomic<uint64_t> QueuedPendingBlocksForUpload = 0; - - for (size_t BlockIndex = 0; BlockIndex < NewBlockCount; BlockIndex++) - { - if (Work.IsAborted()) - { - break; - } - const std::vector<uint32_t>& ChunksInBlock = NewBlockChunks[BlockIndex]; - Work.ScheduleWork( - GenerateBlobsPool, - [this, - &Content, - &Lookup, - &Work, - &UploadBlocksPool, - NewBlockCount, - ChunksInBlock, - &Lock, - &OutBlocks, - &GenerateBlocksStats, - &UploadStats, - &FilteredGeneratedBytesPerSecond, - &QueuedPendingBlocksForUpload, - &FilteredUploadedBytesPerSecond, - BlockIndex](std::atomic<bool>&) { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("GenerateBuildBlocks_Generate"); - - FilteredGeneratedBytesPerSecond.Start(); - - Stopwatch GenerateTimer; - CompressedBuffer CompressedBlock = - GenerateBlock(Content, Lookup, ChunksInBlock, OutBlocks.BlockDescriptions[BlockIndex]); - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Generated block {} ({}) containing {} chunks in {}", - OutBlocks.BlockDescriptions[BlockIndex].BlockHash, - NiceBytes(CompressedBlock.GetCompressedSize()), - OutBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size(), - NiceTimeSpanMs(GenerateTimer.GetElapsedTimeMs())); - } - - OutBlocks.BlockSizes[BlockIndex] = CompressedBlock.GetCompressedSize(); - { - CbObjectWriter Writer; - Writer.AddString("createdBy", "zen"); - OutBlocks.BlockMetaDatas[BlockIndex] = Writer.Save(); - } - GenerateBlocksStats.GeneratedBlockByteCount += OutBlocks.BlockSizes[BlockIndex]; - GenerateBlocksStats.GeneratedBlockCount++; - - Lock.WithExclusiveLock([&]() { - OutBlocks.BlockHashToBlockIndex.insert_or_assign(OutBlocks.BlockDescriptions[BlockIndex].BlockHash, BlockIndex); - }); - - { - std::span<const SharedBuffer> Segments = CompressedBlock.GetCompressed().GetSegments(); - ZEN_ASSERT(Segments.size() >= 2); - OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]); - } - - if (GenerateBlocksStats.GeneratedBlockCount == NewBlockCount) - { - FilteredGeneratedBytesPerSecond.Stop(); - } - - if (QueuedPendingBlocksForUpload.load() > 16) - { - std::span<const SharedBuffer> Segments = CompressedBlock.GetCompressed().GetSegments(); - ZEN_ASSERT(Segments.size() >= 2); - OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]); - } - else - { - if (!m_AbortFlag) - { - QueuedPendingBlocksForUpload++; - - Work.ScheduleWork( - UploadBlocksPool, - [this, - NewBlockCount, - &GenerateBlocksStats, - &UploadStats, - &FilteredUploadedBytesPerSecond, - &QueuedPendingBlocksForUpload, - &OutBlocks, - BlockIndex, - Payload = std::move(CompressedBlock)](std::atomic<bool>&) mutable { - auto _ = MakeGuard([&QueuedPendingBlocksForUpload] { QueuedPendingBlocksForUpload--; }); - if (!m_AbortFlag) - { - if (GenerateBlocksStats.GeneratedBlockCount == NewBlockCount) - { - ZEN_TRACE_CPU("GenerateBuildBlocks_Save"); - - FilteredUploadedBytesPerSecond.Stop(); - std::span<const SharedBuffer> Segments = Payload.GetCompressed().GetSegments(); - ZEN_ASSERT(Segments.size() >= 2); - OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]); - } - else - { - ZEN_TRACE_CPU("GenerateBuildBlocks_Upload"); - - FilteredUploadedBytesPerSecond.Start(); - - const CbObject BlockMetaData = - BuildChunkBlockDescription(OutBlocks.BlockDescriptions[BlockIndex], - OutBlocks.BlockMetaDatas[BlockIndex]); - - const IoHash& BlockHash = OutBlocks.BlockDescriptions[BlockIndex].BlockHash; - const uint64_t CompressedBlockSize = Payload.GetCompressedSize(); - - if (m_Storage.BuildCacheStorage && m_Options.PopulateCache) - { - m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId, - BlockHash, - ZenContentType::kCompressedBinary, - Payload.GetCompressed()); - } - - m_Storage.BuildStorage->PutBuildBlob(m_BuildId, - BlockHash, - ZenContentType::kCompressedBinary, - std::move(Payload).GetCompressed()); - UploadStats.BlocksBytes += CompressedBlockSize; - - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Uploaded block {} ({}) containing {} chunks", - BlockHash, - NiceBytes(CompressedBlockSize), - OutBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); - } - - if (m_Storage.BuildCacheStorage && m_Options.PopulateCache) - { - m_Storage.BuildCacheStorage->PutBlobMetadatas(m_BuildId, - std::vector<IoHash>({BlockHash}), - std::vector<CbObject>({BlockMetaData})); - } - - bool MetadataSucceeded = - m_Storage.BuildStorage->PutBlockMetadata(m_BuildId, BlockHash, BlockMetaData); - if (MetadataSucceeded) - { - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Uploaded block {} metadata ({})", - BlockHash, - NiceBytes(BlockMetaData.GetSize())); - } - - OutBlocks.MetaDataHasBeenUploaded[BlockIndex] = true; - UploadStats.BlocksBytes += BlockMetaData.GetSize(); - } - - UploadStats.BlockCount++; - if (UploadStats.BlockCount == NewBlockCount) - { - FilteredUploadedBytesPerSecond.Stop(); - } - } - } - }); - } - } - } - }); - } - - Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { - ZEN_UNUSED(PendingWork); - - FilteredGeneratedBytesPerSecond.Update(GenerateBlocksStats.GeneratedBlockByteCount.load()); - FilteredUploadedBytesPerSecond.Update(UploadStats.BlocksBytes.load()); - - std::string Details = fmt::format("Generated {}/{} ({}, {}B/s). Uploaded {}/{} ({}, {}bits/s)", - GenerateBlocksStats.GeneratedBlockCount.load(), - NewBlockCount, - NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount.load()), - NiceNum(FilteredGeneratedBytesPerSecond.GetCurrent()), - UploadStats.BlockCount.load(), - NewBlockCount, - NiceBytes(UploadStats.BlocksBytes.load()), - NiceNum(FilteredUploadedBytesPerSecond.GetCurrent() * 8)); - - Progress.UpdateState({.Task = "Generating blocks", - .Details = Details, - .TotalCount = gsl::narrow<uint64_t>(NewBlockCount), - .RemainingCount = gsl::narrow<uint64_t>(NewBlockCount - GenerateBlocksStats.GeneratedBlockCount.load()), - .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, - false); - }); - - ZEN_ASSERT(m_AbortFlag || QueuedPendingBlocksForUpload.load() == 0); - - Progress.Finish(); - - GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS = FilteredGeneratedBytesPerSecond.GetElapsedTimeUS(); - UploadStats.ElapsedWallTimeUS = FilteredUploadedBytesPerSecond.GetElapsedTimeUS(); - } -} - -std::vector<uint32_t> -BuildsOperationUploadFolder::CalculateAbsoluteChunkOrders( - const std::span<const IoHash> LocalChunkHashes, - const std::span<const uint32_t> LocalChunkOrder, - const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToLocalChunkIndex, - const std::span<const uint32_t>& LooseChunkIndexes, - const std::span<const ChunkBlockDescription>& BlockDescriptions) -{ - ZEN_TRACE_CPU("CalculateAbsoluteChunkOrders"); - - std::vector<IoHash> TmpAbsoluteChunkHashes; - if (m_Options.DoExtraContentValidation) - { - TmpAbsoluteChunkHashes.reserve(LocalChunkHashes.size()); - } - std::vector<uint32_t> LocalChunkIndexToAbsoluteChunkIndex; - LocalChunkIndexToAbsoluteChunkIndex.resize(LocalChunkHashes.size(), (uint32_t)-1); - std::uint32_t AbsoluteChunkCount = 0; - for (uint32_t ChunkIndex : LooseChunkIndexes) - { - LocalChunkIndexToAbsoluteChunkIndex[ChunkIndex] = AbsoluteChunkCount; - if (m_Options.DoExtraContentValidation) - { - TmpAbsoluteChunkHashes.push_back(LocalChunkHashes[ChunkIndex]); - } - AbsoluteChunkCount++; - } - for (const ChunkBlockDescription& Block : BlockDescriptions) - { - for (const IoHash& ChunkHash : Block.ChunkRawHashes) - { - if (auto It = ChunkHashToLocalChunkIndex.find(ChunkHash); It != ChunkHashToLocalChunkIndex.end()) - { - const uint32_t LocalChunkIndex = It->second; - ZEN_ASSERT_SLOW(LocalChunkHashes[LocalChunkIndex] == ChunkHash); - LocalChunkIndexToAbsoluteChunkIndex[LocalChunkIndex] = AbsoluteChunkCount; - } - if (m_Options.DoExtraContentValidation) - { - TmpAbsoluteChunkHashes.push_back(ChunkHash); - } - AbsoluteChunkCount++; - } - } - std::vector<uint32_t> AbsoluteChunkOrder; - AbsoluteChunkOrder.reserve(LocalChunkHashes.size()); - for (const uint32_t LocalChunkIndex : LocalChunkOrder) - { - const uint32_t AbsoluteChunkIndex = LocalChunkIndexToAbsoluteChunkIndex[LocalChunkIndex]; - if (m_Options.DoExtraContentValidation) - { - ZEN_ASSERT(LocalChunkHashes[LocalChunkIndex] == TmpAbsoluteChunkHashes[AbsoluteChunkIndex]); - } - AbsoluteChunkOrder.push_back(AbsoluteChunkIndex); - } - if (m_Options.DoExtraContentValidation) - { - uint32_t OrderIndex = 0; - while (OrderIndex < LocalChunkOrder.size()) - { - const uint32_t LocalChunkIndex = LocalChunkOrder[OrderIndex]; - const IoHash& LocalChunkHash = LocalChunkHashes[LocalChunkIndex]; - const uint32_t AbsoluteChunkIndex = AbsoluteChunkOrder[OrderIndex]; - const IoHash& AbsoluteChunkHash = TmpAbsoluteChunkHashes[AbsoluteChunkIndex]; - ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash); - OrderIndex++; - } - } - return AbsoluteChunkOrder; -} - -CompositeBuffer -BuildsOperationUploadFolder::FetchChunk(const ChunkedFolderContent& Content, - const ChunkedContentLookup& Lookup, - const IoHash& ChunkHash, - ReadFileCache& OpenFileCache) -{ - ZEN_TRACE_CPU("FetchChunk"); - auto It = Lookup.ChunkHashToChunkIndex.find(ChunkHash); - ZEN_ASSERT(It != Lookup.ChunkHashToChunkIndex.end()); - uint32_t ChunkIndex = It->second; - std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkLocations = GetChunkSequenceLocations(Lookup, ChunkIndex); - ZEN_ASSERT(!ChunkLocations.empty()); - CompositeBuffer Chunk = - OpenFileCache.GetRange(ChunkLocations[0].SequenceIndex, ChunkLocations[0].Offset, Content.ChunkedContent.ChunkRawSizes[ChunkIndex]); - if (!Chunk) - { - throw std::runtime_error(fmt::format("Unable to read chunk at {}, size {} from '{}'", - ChunkLocations[0].Offset, - Content.ChunkedContent.ChunkRawSizes[ChunkIndex], - Content.Paths[Lookup.SequenceIndexFirstPathIndex[ChunkLocations[0].SequenceIndex]])); - } - ZEN_ASSERT_SLOW(IoHash::HashBuffer(Chunk) == ChunkHash); - return Chunk; -}; - -CompressedBuffer -BuildsOperationUploadFolder::GenerateBlock(const ChunkedFolderContent& Content, - const ChunkedContentLookup& Lookup, - const std::vector<uint32_t>& ChunksInBlock, - ChunkBlockDescription& OutBlockDescription) -{ - ZEN_TRACE_CPU("GenerateBlock"); - ReadFileCache OpenFileCache(m_DiskStats.OpenReadCount, - m_DiskStats.CurrentOpenFileCount, - m_DiskStats.ReadCount, - m_DiskStats.ReadByteCount, - m_Path, - Content, - Lookup, - 4); - - std::vector<std::pair<IoHash, FetchChunkFunc>> BlockContent; - BlockContent.reserve(ChunksInBlock.size()); - for (uint32_t ChunkIndex : ChunksInBlock) - { - BlockContent.emplace_back(std::make_pair( - Content.ChunkedContent.ChunkHashes[ChunkIndex], - [this, &Content, &Lookup, &OpenFileCache, ChunkIndex](const IoHash& ChunkHash) -> std::pair<uint64_t, CompressedBuffer> { - CompositeBuffer Chunk = FetchChunk(Content, Lookup, ChunkHash, OpenFileCache); - ZEN_ASSERT(Chunk); - uint64_t RawSize = Chunk.GetSize(); - - const bool ShouldCompressChunk = RawSize >= m_Options.MinimumSizeForCompressInBlock && - IsChunkCompressable(m_NonCompressableExtensionHashes, Content, Lookup, ChunkIndex); - - const OodleCompressionLevel CompressionLevel = - ShouldCompressChunk ? OodleCompressionLevel::VeryFast : OodleCompressionLevel::None; - return {RawSize, CompressedBuffer::Compress(Chunk, OodleCompressor::Mermaid, CompressionLevel)}; - })); - } - - return GenerateChunkBlock(std::move(BlockContent), OutBlockDescription); -}; - -CompressedBuffer -BuildsOperationUploadFolder::RebuildBlock(const ChunkedFolderContent& Content, - const ChunkedContentLookup& Lookup, - CompositeBuffer&& HeaderBuffer, - const std::vector<uint32_t>& ChunksInBlock) -{ - ZEN_TRACE_CPU("RebuildBlock"); - ReadFileCache OpenFileCache(m_DiskStats.OpenReadCount, - m_DiskStats.CurrentOpenFileCount, - m_DiskStats.ReadCount, - m_DiskStats.ReadByteCount, - m_Path, - Content, - Lookup, - 4); - - std::vector<SharedBuffer> ResultBuffers; - ResultBuffers.reserve(HeaderBuffer.GetSegments().size() + ChunksInBlock.size()); - ResultBuffers.insert(ResultBuffers.end(), HeaderBuffer.GetSegments().begin(), HeaderBuffer.GetSegments().end()); - for (uint32_t ChunkIndex : ChunksInBlock) - { - std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkLocations = GetChunkSequenceLocations(Lookup, ChunkIndex); - ZEN_ASSERT(!ChunkLocations.empty()); - CompositeBuffer Chunk = OpenFileCache.GetRange(ChunkLocations[0].SequenceIndex, - ChunkLocations[0].Offset, - Content.ChunkedContent.ChunkRawSizes[ChunkIndex]); - ZEN_ASSERT_SLOW(IoHash::HashBuffer(Chunk) == Content.ChunkedContent.ChunkHashes[ChunkIndex]); - - const uint64_t RawSize = Chunk.GetSize(); - const bool ShouldCompressChunk = RawSize >= m_Options.MinimumSizeForCompressInBlock && - IsChunkCompressable(m_NonCompressableExtensionHashes, Content, Lookup, ChunkIndex); - - const OodleCompressionLevel CompressionLevel = ShouldCompressChunk ? OodleCompressionLevel::VeryFast : OodleCompressionLevel::None; - - CompositeBuffer CompressedChunk = - CompressedBuffer::Compress(std::move(Chunk), OodleCompressor::Mermaid, CompressionLevel).GetCompressed(); - ResultBuffers.insert(ResultBuffers.end(), CompressedChunk.GetSegments().begin(), CompressedChunk.GetSegments().end()); - } - return CompressedBuffer::FromCompressedNoValidate(CompositeBuffer(std::move(ResultBuffers))); -}; - -void -BuildsOperationUploadFolder::UploadBuildPart(ChunkingController& ChunkController, - ChunkingCache& ChunkCache, - uint32_t PartIndex, - const UploadPart& Part, - uint32_t PartStepOffset, - uint32_t StepCount) -{ - Stopwatch UploadTimer; - - ChunkingStatistics ChunkingStats; - FindBlocksStatistics FindBlocksStats; - ReuseBlocksStatistics ReuseBlocksStats; - UploadStatistics UploadStats; - GenerateBlocksStatistics GenerateBlocksStats; - - LooseChunksStatistics LooseChunksStats; - ChunkedFolderContent LocalContent; - - m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::ChunkPartContent, StepCount); - - Stopwatch ScanTimer; - { - std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(m_LogOutput.CreateProgressBar("Scan Folder")); - OperationLogOutput::ProgressBar& Progress(*ProgressBarPtr); - - FilteredRate FilteredBytesHashed; - FilteredBytesHashed.Start(); - LocalContent = ChunkFolderContent( - ChunkingStats, - m_IOWorkerPool, - m_Path, - Part.Content, - ChunkController, - ChunkCache, - m_LogOutput.GetProgressUpdateDelayMS(), - [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { - FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); - std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found", - ChunkingStats.FilesProcessed.load(), - Part.Content.Paths.size(), - NiceBytes(ChunkingStats.BytesHashed.load()), - NiceBytes(Part.TotalRawSize), - NiceNum(FilteredBytesHashed.GetCurrent()), - ChunkingStats.UniqueChunksFound.load(), - NiceBytes(ChunkingStats.UniqueBytesFound.load())); - Progress.UpdateState({.Task = "Scanning files ", - .Details = Details, - .TotalCount = Part.TotalRawSize, - .RemainingCount = Part.TotalRawSize - ChunkingStats.BytesHashed.load(), - .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, - false); - }, - m_AbortFlag, - m_PauseFlag); - FilteredBytesHashed.Stop(); - Progress.Finish(); - if (m_AbortFlag) - { - return; - } - } - - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Found {} ({}) files divided into {} ({}) unique chunks in '{}' in {}. Average hash rate {}B/sec", - Part.Content.Paths.size(), - NiceBytes(Part.TotalRawSize), - ChunkingStats.UniqueChunksFound.load(), - NiceBytes(ChunkingStats.UniqueBytesFound.load()), - m_Path, - NiceTimeSpanMs(ScanTimer.GetElapsedTimeMs()), - NiceNum(GetBytesPerSecond(ChunkingStats.ElapsedWallTimeUS, ChunkingStats.BytesHashed))); - } - - const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalContent); - - std::vector<size_t> ReuseBlockIndexes; - std::vector<uint32_t> NewBlockChunkIndexes; - - if (PartIndex == 0) - { - const PrepareBuildResult PrepBuildResult = m_PrepBuildResultFuture.get(); - - m_FindBlocksStats.FindBlockTimeMS = PrepBuildResult.ElapsedTimeMs; - m_FindBlocksStats.FoundBlockCount = PrepBuildResult.KnownBlocks.size(); - - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Build prepare took {}. {} took {}, payload size {}{}", - NiceTimeSpanMs(PrepBuildResult.ElapsedTimeMs), - m_CreateBuild ? "PutBuild" : "GetBuild", - NiceTimeSpanMs(PrepBuildResult.PrepareBuildTimeMs), - NiceBytes(PrepBuildResult.PayloadSize), - m_Options.IgnoreExistingBlocks ? "" - : fmt::format(". Found {} blocks in {}", - PrepBuildResult.KnownBlocks.size(), - NiceTimeSpanMs(PrepBuildResult.FindBlocksTimeMs))); - } - - m_PreferredMultipartChunkSize = PrepBuildResult.PreferredMultipartChunkSize; - - m_LargeAttachmentSize = m_Options.AllowMultiparts ? m_PreferredMultipartChunkSize * 4u : (std::uint64_t)-1; - - m_KnownBlocks = std::move(PrepBuildResult.KnownBlocks); - } - - ZEN_ASSERT(m_PreferredMultipartChunkSize != 0); - ZEN_ASSERT(m_LargeAttachmentSize != 0); - - m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::CalculateDelta, StepCount); - - Stopwatch BlockArrangeTimer; - - std::vector<std::uint32_t> LooseChunkIndexes; - { - bool EnableBlocks = true; - std::vector<std::uint32_t> BlockChunkIndexes; - for (uint32_t ChunkIndex = 0; ChunkIndex < LocalContent.ChunkedContent.ChunkHashes.size(); ChunkIndex++) - { - const uint64_t ChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; - if (!EnableBlocks || ChunkRawSize == 0 || ChunkRawSize > m_Options.BlockParameters.MaxChunkEmbedSize) - { - LooseChunkIndexes.push_back(ChunkIndex); - LooseChunksStats.ChunkByteCount += ChunkRawSize; - } - else - { - BlockChunkIndexes.push_back(ChunkIndex); - FindBlocksStats.PotentialChunkByteCount += ChunkRawSize; - } - } - FindBlocksStats.PotentialChunkCount += BlockChunkIndexes.size(); - LooseChunksStats.ChunkCount = LooseChunkIndexes.size(); - - if (m_Options.IgnoreExistingBlocks) - { - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, "Ignoring any existing blocks in store"); - } - NewBlockChunkIndexes = std::move(BlockChunkIndexes); - } - else - { - ReuseBlockIndexes = FindReuseBlocks(m_LogOutput, - m_Options.BlockReuseMinPercentLimit, - m_Options.IsVerbose, - ReuseBlocksStats, - m_KnownBlocks, - LocalContent.ChunkedContent.ChunkHashes, - BlockChunkIndexes, - NewBlockChunkIndexes); - FindBlocksStats.AcceptedBlockCount += ReuseBlockIndexes.size(); - - for (const ChunkBlockDescription& Description : m_KnownBlocks) - { - for (uint32_t ChunkRawLength : Description.ChunkRawLengths) - { - FindBlocksStats.FoundBlockByteCount += ChunkRawLength; - } - FindBlocksStats.FoundBlockChunkCount += Description.ChunkRawHashes.size(); - } - } - } - - std::vector<std::vector<uint32_t>> NewBlockChunks; - ArrangeChunksIntoBlocks(LocalContent, LocalLookup, NewBlockChunkIndexes, NewBlockChunks); - - FindBlocksStats.NewBlocksCount += NewBlockChunks.size(); - for (uint32_t ChunkIndex : NewBlockChunkIndexes) - { - FindBlocksStats.NewBlocksChunkByteCount += LocalContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; - } - FindBlocksStats.NewBlocksChunkCount += NewBlockChunkIndexes.size(); - - const double AcceptedByteCountPercent = FindBlocksStats.PotentialChunkByteCount > 0 - ? (100.0 * ReuseBlocksStats.AcceptedRawByteCount / FindBlocksStats.PotentialChunkByteCount) - : 0.0; - - const double AcceptedReduntantByteCountPercent = - ReuseBlocksStats.AcceptedByteCount > 0 ? (100.0 * ReuseBlocksStats.AcceptedReduntantByteCount) / - (ReuseBlocksStats.AcceptedByteCount + ReuseBlocksStats.AcceptedReduntantByteCount) - : 0.0; - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Found {} chunks in {} ({}) blocks eligible for reuse in {}\n" - " Reusing {} ({}) matching chunks in {} blocks ({:.1f}%)\n" - " Accepting {} ({}) redundant chunks ({:.1f}%)\n" - " Rejected {} ({}) chunks in {} blocks\n" - " Arranged {} ({}) chunks in {} new blocks\n" - " Keeping {} ({}) chunks as loose chunks\n" - " Discovery completed in {}", - FindBlocksStats.FoundBlockChunkCount, - FindBlocksStats.FoundBlockCount, - NiceBytes(FindBlocksStats.FoundBlockByteCount), - NiceTimeSpanMs(FindBlocksStats.FindBlockTimeMS), - - ReuseBlocksStats.AcceptedChunkCount, - NiceBytes(ReuseBlocksStats.AcceptedRawByteCount), - FindBlocksStats.AcceptedBlockCount, - AcceptedByteCountPercent, - - ReuseBlocksStats.AcceptedReduntantChunkCount, - NiceBytes(ReuseBlocksStats.AcceptedReduntantByteCount), - AcceptedReduntantByteCountPercent, - - ReuseBlocksStats.RejectedChunkCount, - NiceBytes(ReuseBlocksStats.RejectedByteCount), - ReuseBlocksStats.RejectedBlockCount, - - FindBlocksStats.NewBlocksChunkCount, - NiceBytes(FindBlocksStats.NewBlocksChunkByteCount), - FindBlocksStats.NewBlocksCount, - - LooseChunksStats.ChunkCount, - NiceBytes(LooseChunksStats.ChunkByteCount), - - NiceTimeSpanMs(BlockArrangeTimer.GetElapsedTimeMs())); - } - - m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::GenerateBlocks, StepCount); - GeneratedBlocks NewBlocks; - - if (!NewBlockChunks.empty()) - { - Stopwatch GenerateBuildBlocksTimer; - auto __ = MakeGuard([&]() { - uint64_t BlockGenerateTimeUs = GenerateBuildBlocksTimer.GetElapsedTimeUs(); - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO( - m_LogOutput, - "Generated {} ({}) and uploaded {} ({}) blocks in {}. Generate speed: {}B/sec. Transfer speed {}bits/sec.", - GenerateBlocksStats.GeneratedBlockCount.load(), - NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount), - UploadStats.BlockCount.load(), - NiceBytes(UploadStats.BlocksBytes.load()), - NiceTimeSpanMs(BlockGenerateTimeUs / 1000), - NiceNum(GetBytesPerSecond(GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS, - GenerateBlocksStats.GeneratedBlockByteCount)), - NiceNum(GetBytesPerSecond(UploadStats.ElapsedWallTimeUS, UploadStats.BlocksBytes * 8))); - } - }); - GenerateBuildBlocks(LocalContent, LocalLookup, NewBlockChunks, NewBlocks, GenerateBlocksStats, UploadStats); - } - - m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::BuildPartManifest, StepCount); - - CbObject PartManifest; - { - CbObjectWriter PartManifestWriter; - Stopwatch ManifestGenerationTimer; - auto __ = MakeGuard([&]() { - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Generated build part manifest in {} ({})", - NiceTimeSpanMs(ManifestGenerationTimer.GetElapsedTimeMs()), - NiceBytes(PartManifestWriter.GetSaveSize())); - } - }); - - PartManifestWriter.BeginObject("chunker"sv); - { - PartManifestWriter.AddString("name"sv, ChunkController.GetName()); - PartManifestWriter.AddObject("parameters"sv, ChunkController.GetParameters()); - } - PartManifestWriter.EndObject(); // chunker - - std::vector<IoHash> AllChunkBlockHashes; - std::vector<ChunkBlockDescription> AllChunkBlockDescriptions; - AllChunkBlockHashes.reserve(ReuseBlockIndexes.size() + NewBlocks.BlockDescriptions.size()); - AllChunkBlockDescriptions.reserve(ReuseBlockIndexes.size() + NewBlocks.BlockDescriptions.size()); - for (size_t ReuseBlockIndex : ReuseBlockIndexes) - { - AllChunkBlockDescriptions.push_back(m_KnownBlocks[ReuseBlockIndex]); - AllChunkBlockHashes.push_back(m_KnownBlocks[ReuseBlockIndex].BlockHash); - } - AllChunkBlockDescriptions.insert(AllChunkBlockDescriptions.end(), - NewBlocks.BlockDescriptions.begin(), - NewBlocks.BlockDescriptions.end()); - for (const ChunkBlockDescription& BlockDescription : NewBlocks.BlockDescriptions) - { - AllChunkBlockHashes.push_back(BlockDescription.BlockHash); - } - - std::vector<IoHash> AbsoluteChunkHashes; - if (m_Options.DoExtraContentValidation) - { - tsl::robin_map<IoHash, size_t, IoHash::Hasher> ChunkHashToAbsoluteChunkIndex; - AbsoluteChunkHashes.reserve(LocalContent.ChunkedContent.ChunkHashes.size()); - for (uint32_t ChunkIndex : LooseChunkIndexes) - { - ChunkHashToAbsoluteChunkIndex.insert({LocalContent.ChunkedContent.ChunkHashes[ChunkIndex], AbsoluteChunkHashes.size()}); - AbsoluteChunkHashes.push_back(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); - } - for (const ChunkBlockDescription& Block : AllChunkBlockDescriptions) - { - for (const IoHash& ChunkHash : Block.ChunkRawHashes) - { - ChunkHashToAbsoluteChunkIndex.insert({ChunkHash, AbsoluteChunkHashes.size()}); - AbsoluteChunkHashes.push_back(ChunkHash); - } - } - for (const IoHash& ChunkHash : LocalContent.ChunkedContent.ChunkHashes) - { - ZEN_ASSERT(AbsoluteChunkHashes[ChunkHashToAbsoluteChunkIndex.at(ChunkHash)] == ChunkHash); - ZEN_ASSERT(LocalContent.ChunkedContent.ChunkHashes[LocalLookup.ChunkHashToChunkIndex.at(ChunkHash)] == ChunkHash); - } - for (const uint32_t ChunkIndex : LocalContent.ChunkedContent.ChunkOrders) - { - ZEN_ASSERT(AbsoluteChunkHashes[ChunkHashToAbsoluteChunkIndex.at(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex])] == - LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); - ZEN_ASSERT(LocalLookup.ChunkHashToChunkIndex.at(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]) == ChunkIndex); - } - } - std::vector<uint32_t> AbsoluteChunkOrders = CalculateAbsoluteChunkOrders(LocalContent.ChunkedContent.ChunkHashes, - LocalContent.ChunkedContent.ChunkOrders, - LocalLookup.ChunkHashToChunkIndex, - LooseChunkIndexes, - AllChunkBlockDescriptions); - - if (m_Options.DoExtraContentValidation) - { - for (uint32_t ChunkOrderIndex = 0; ChunkOrderIndex < LocalContent.ChunkedContent.ChunkOrders.size(); ChunkOrderIndex++) - { - uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[ChunkOrderIndex]; - uint32_t AbsoluteChunkIndex = AbsoluteChunkOrders[ChunkOrderIndex]; - const IoHash& LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; - const IoHash& AbsoluteChunkHash = AbsoluteChunkHashes[AbsoluteChunkIndex]; - ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash); - } - } - - WriteBuildContentToCompactBinary(PartManifestWriter, - LocalContent.Platform, - LocalContent.Paths, - LocalContent.RawHashes, - LocalContent.RawSizes, - LocalContent.Attributes, - LocalContent.ChunkedContent.SequenceRawHashes, - LocalContent.ChunkedContent.ChunkCounts, - LocalContent.ChunkedContent.ChunkHashes, - LocalContent.ChunkedContent.ChunkRawSizes, - AbsoluteChunkOrders, - LooseChunkIndexes, - AllChunkBlockHashes); - - if (m_Options.DoExtraContentValidation) - { - ChunkedFolderContent VerifyFolderContent; - - std::vector<uint32_t> OutAbsoluteChunkOrders; - std::vector<IoHash> OutLooseChunkHashes; - std::vector<uint64_t> OutLooseChunkRawSizes; - std::vector<IoHash> OutBlockRawHashes; - ReadBuildContentFromCompactBinary(PartManifestWriter.Save(), - VerifyFolderContent.Platform, - VerifyFolderContent.Paths, - VerifyFolderContent.RawHashes, - VerifyFolderContent.RawSizes, - VerifyFolderContent.Attributes, - VerifyFolderContent.ChunkedContent.SequenceRawHashes, - VerifyFolderContent.ChunkedContent.ChunkCounts, - OutAbsoluteChunkOrders, - OutLooseChunkHashes, - OutLooseChunkRawSizes, - OutBlockRawHashes); - ZEN_ASSERT(OutBlockRawHashes == AllChunkBlockHashes); - - for (uint32_t OrderIndex = 0; OrderIndex < OutAbsoluteChunkOrders.size(); OrderIndex++) - { - uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[OrderIndex]; - const IoHash LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; - - uint32_t VerifyChunkIndex = OutAbsoluteChunkOrders[OrderIndex]; - const IoHash VerifyChunkHash = AbsoluteChunkHashes[VerifyChunkIndex]; - - ZEN_ASSERT(LocalChunkHash == VerifyChunkHash); - } - - CalculateLocalChunkOrders(OutAbsoluteChunkOrders, - OutLooseChunkHashes, - OutLooseChunkRawSizes, - AllChunkBlockDescriptions, - VerifyFolderContent.ChunkedContent.ChunkHashes, - VerifyFolderContent.ChunkedContent.ChunkRawSizes, - VerifyFolderContent.ChunkedContent.ChunkOrders, - m_Options.DoExtraContentValidation); - - ZEN_ASSERT(LocalContent.Paths == VerifyFolderContent.Paths); - ZEN_ASSERT(LocalContent.RawHashes == VerifyFolderContent.RawHashes); - ZEN_ASSERT(LocalContent.RawSizes == VerifyFolderContent.RawSizes); - ZEN_ASSERT(LocalContent.Attributes == VerifyFolderContent.Attributes); - ZEN_ASSERT(LocalContent.ChunkedContent.SequenceRawHashes == VerifyFolderContent.ChunkedContent.SequenceRawHashes); - ZEN_ASSERT(LocalContent.ChunkedContent.ChunkCounts == VerifyFolderContent.ChunkedContent.ChunkCounts); - - for (uint32_t OrderIndex = 0; OrderIndex < LocalContent.ChunkedContent.ChunkOrders.size(); OrderIndex++) - { - uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[OrderIndex]; - const IoHash LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; - uint64_t LocalChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[LocalChunkIndex]; - - uint32_t VerifyChunkIndex = VerifyFolderContent.ChunkedContent.ChunkOrders[OrderIndex]; - const IoHash VerifyChunkHash = VerifyFolderContent.ChunkedContent.ChunkHashes[VerifyChunkIndex]; - uint64_t VerifyChunkRawSize = VerifyFolderContent.ChunkedContent.ChunkRawSizes[VerifyChunkIndex]; - - ZEN_ASSERT(LocalChunkHash == VerifyChunkHash); - ZEN_ASSERT(LocalChunkRawSize == VerifyChunkRawSize); - } - } - PartManifest = PartManifestWriter.Save(); - } - - m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::UploadBuildPart, StepCount); - - Stopwatch PutBuildPartResultTimer; - std::pair<IoHash, std::vector<IoHash>> PutBuildPartResult = - m_Storage.BuildStorage->PutBuildPart(m_BuildId, Part.PartId, Part.PartName, PartManifest); - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "PutBuildPart took {}, payload size {}. {} attachments are needed.", - NiceTimeSpanMs(PutBuildPartResultTimer.GetElapsedTimeMs()), - NiceBytes(PartManifest.GetSize()), - PutBuildPartResult.second.size()); - } - IoHash PartHash = PutBuildPartResult.first; - - auto UploadAttachments = - [this, &LooseChunksStats, &UploadStats, &LocalContent, &LocalLookup, &NewBlockChunks, &NewBlocks, &LooseChunkIndexes]( - std::span<IoHash> RawHashes, - std::vector<IoHash>& OutUnknownChunks) { - if (!m_AbortFlag) - { - UploadStatistics TempUploadStats; - LooseChunksStatistics TempLooseChunksStats; - - Stopwatch TempUploadTimer; - auto __ = MakeGuard([&]() { - if (!m_Options.IsQuiet) - { - uint64_t TempChunkUploadTimeUs = TempUploadTimer.GetElapsedTimeUs(); - ZEN_OPERATION_LOG_INFO( - m_LogOutput, - "Uploaded {} ({}) blocks. " - "Compressed {} ({} {}B/s) and uploaded {} ({}) chunks. " - "Transferred {} ({}bits/s) in {}", - TempUploadStats.BlockCount.load(), - NiceBytes(TempUploadStats.BlocksBytes), - - TempLooseChunksStats.CompressedChunkCount.load(), - NiceBytes(TempLooseChunksStats.CompressedChunkBytes.load()), - NiceNum(GetBytesPerSecond(TempLooseChunksStats.CompressChunksElapsedWallTimeUS, - TempLooseChunksStats.ChunkByteCount)), - TempUploadStats.ChunkCount.load(), - NiceBytes(TempUploadStats.ChunksBytes), - - NiceBytes(TempUploadStats.BlocksBytes + TempUploadStats.ChunksBytes), - NiceNum(GetBytesPerSecond(TempUploadStats.ElapsedWallTimeUS, TempUploadStats.ChunksBytes * 8)), - NiceTimeSpanMs(TempChunkUploadTimeUs / 1000)); - } - }); - UploadPartBlobs(LocalContent, - LocalLookup, - RawHashes, - NewBlockChunks, - NewBlocks, - LooseChunkIndexes, - m_LargeAttachmentSize, - TempUploadStats, - TempLooseChunksStats, - OutUnknownChunks); - UploadStats += TempUploadStats; - LooseChunksStats += TempLooseChunksStats; - } - }; - - m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::UploadAttachments, StepCount); - - std::vector<IoHash> UnknownChunks; - if (m_Options.IgnoreExistingBlocks) - { - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "PutBuildPart uploading all attachments, needs are: {}", - FormatArray<IoHash>(PutBuildPartResult.second, "\n "sv)); - } - - std::vector<IoHash> ForceUploadChunkHashes; - ForceUploadChunkHashes.reserve(LooseChunkIndexes.size()); - - for (uint32_t ChunkIndex : LooseChunkIndexes) - { - ForceUploadChunkHashes.push_back(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); - } - - for (size_t BlockIndex = 0; BlockIndex < NewBlocks.BlockHeaders.size(); BlockIndex++) - { - if (NewBlocks.BlockHeaders[BlockIndex]) - { - // Block was not uploaded during generation - ForceUploadChunkHashes.push_back(NewBlocks.BlockDescriptions[BlockIndex].BlockHash); - } - } - UploadAttachments(ForceUploadChunkHashes, UnknownChunks); - } - else if (!PutBuildPartResult.second.empty()) - { - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "PutBuildPart needs attachments: {}", - FormatArray<IoHash>(PutBuildPartResult.second, "\n "sv)); - } - UploadAttachments(PutBuildPartResult.second, UnknownChunks); - } - - auto BuildUnkownChunksResponse = [](const std::vector<IoHash>& UnknownChunks, bool WillRetry) { - return fmt::format( - "The following build blobs was reported as needed for upload but was reported as existing at the start of the " - "operation.{}{}", - WillRetry ? " Treating this as a transient inconsistency issue and will attempt to retry finalization."sv : ""sv, - FormatArray<IoHash>(UnknownChunks, "\n "sv)); - }; - - if (!UnknownChunks.empty()) - { - ZEN_OPERATION_LOG_WARN(m_LogOutput, "{}", BuildUnkownChunksResponse(UnknownChunks, /*WillRetry*/ true)); - } - - uint32_t FinalizeBuildPartRetryCount = 5; - while (!m_AbortFlag && (FinalizeBuildPartRetryCount--) > 0) - { - Stopwatch FinalizeBuildPartTimer; - std::vector<IoHash> Needs = m_Storage.BuildStorage->FinalizeBuildPart(m_BuildId, Part.PartId, PartHash); - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "FinalizeBuildPart took {}. {} attachments are missing.", - NiceTimeSpanMs(FinalizeBuildPartTimer.GetElapsedTimeMs()), - Needs.size()); - } - if (Needs.empty()) - { - break; - } - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, "FinalizeBuildPart needs attachments: {}", FormatArray<IoHash>(Needs, "\n "sv)); - } - - std::vector<IoHash> RetryUnknownChunks; - UploadAttachments(Needs, RetryUnknownChunks); - if (RetryUnknownChunks == UnknownChunks) - { - if (FinalizeBuildPartRetryCount > 0) - { - // Back off a bit - Sleep(1000); - } - } - else - { - UnknownChunks = RetryUnknownChunks; - ZEN_OPERATION_LOG_WARN(m_LogOutput, - "{}", - BuildUnkownChunksResponse(UnknownChunks, /*WillRetry*/ FinalizeBuildPartRetryCount != 0)); - } - } - - if (!UnknownChunks.empty()) - { - throw std::runtime_error(BuildUnkownChunksResponse(UnknownChunks, /*WillRetry*/ false)); - } - - if (!NewBlocks.BlockDescriptions.empty() && !m_AbortFlag) - { - uint64_t UploadBlockMetadataCount = 0; - Stopwatch UploadBlockMetadataTimer; - - uint32_t FailedMetadataUploadCount = 1; - int32_t MetadataUploadRetryCount = 3; - while ((MetadataUploadRetryCount-- > 0) && (FailedMetadataUploadCount > 0)) - { - FailedMetadataUploadCount = 0; - for (size_t BlockIndex = 0; BlockIndex < NewBlocks.BlockDescriptions.size(); BlockIndex++) - { - if (m_AbortFlag) - { - break; - } - const IoHash& BlockHash = NewBlocks.BlockDescriptions[BlockIndex].BlockHash; - if (!NewBlocks.MetaDataHasBeenUploaded[BlockIndex]) - { - const CbObject BlockMetaData = - BuildChunkBlockDescription(NewBlocks.BlockDescriptions[BlockIndex], NewBlocks.BlockMetaDatas[BlockIndex]); - if (m_Storage.BuildCacheStorage && m_Options.PopulateCache) - { - m_Storage.BuildCacheStorage->PutBlobMetadatas(m_BuildId, - std::vector<IoHash>({BlockHash}), - std::vector<CbObject>({BlockMetaData})); - } - bool MetadataSucceeded = m_Storage.BuildStorage->PutBlockMetadata(m_BuildId, BlockHash, BlockMetaData); - if (MetadataSucceeded) - { - UploadStats.BlocksBytes += BlockMetaData.GetSize(); - NewBlocks.MetaDataHasBeenUploaded[BlockIndex] = true; - UploadBlockMetadataCount++; - } - else - { - FailedMetadataUploadCount++; - } - } - } - } - if (UploadBlockMetadataCount > 0) - { - uint64_t ElapsedUS = UploadBlockMetadataTimer.GetElapsedTimeUs(); - UploadStats.ElapsedWallTimeUS += ElapsedUS; - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Uploaded metadata for {} blocks in {}", - UploadBlockMetadataCount, - NiceTimeSpanMs(ElapsedUS / 1000)); - } - } - - // The newly generated blocks are now known blocks so the next part upload can use those blocks as well - m_KnownBlocks.insert(m_KnownBlocks.end(), NewBlocks.BlockDescriptions.begin(), NewBlocks.BlockDescriptions.end()); - } - - m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::PutBuildPartStats, StepCount); - - m_Storage.BuildStorage->PutBuildPartStats( - m_BuildId, - Part.PartId, - {{"totalSize", double(Part.LocalFolderScanStats.FoundFileByteCount.load())}, - {"reusedRatio", AcceptedByteCountPercent / 100.0}, - {"reusedBlockCount", double(FindBlocksStats.AcceptedBlockCount)}, - {"reusedBlockByteCount", double(ReuseBlocksStats.AcceptedRawByteCount)}, - {"newBlockCount", double(FindBlocksStats.NewBlocksCount)}, - {"newBlockByteCount", double(FindBlocksStats.NewBlocksChunkByteCount)}, - {"uploadedCount", double(UploadStats.BlockCount.load() + UploadStats.ChunkCount.load())}, - {"uploadedByteCount", double(UploadStats.BlocksBytes.load() + UploadStats.ChunksBytes.load())}, - {"uploadedBytesPerSec", - double(GetBytesPerSecond(UploadStats.ElapsedWallTimeUS, UploadStats.ChunksBytes + UploadStats.BlocksBytes))}, - {"elapsedTimeSec", double(UploadTimer.GetElapsedTimeMs() / 1000.0)}}); - - m_LocalFolderScanStats += Part.LocalFolderScanStats; - m_ChunkingStats += ChunkingStats; - m_FindBlocksStats += FindBlocksStats; - m_ReuseBlocksStats += ReuseBlocksStats; - m_UploadStats += UploadStats; - m_GenerateBlocksStats += GenerateBlocksStats; - m_LooseChunksStats += LooseChunksStats; -} - -void -BuildsOperationUploadFolder::UploadPartBlobs(const ChunkedFolderContent& Content, - const ChunkedContentLookup& Lookup, - std::span<IoHash> RawHashes, - const std::vector<std::vector<uint32_t>>& NewBlockChunks, - GeneratedBlocks& NewBlocks, - std::span<const uint32_t> LooseChunkIndexes, - const std::uint64_t LargeAttachmentSize, - UploadStatistics& TempUploadStats, - LooseChunksStatistics& TempLooseChunksStats, - std::vector<IoHash>& OutUnknownChunks) -{ - ZEN_TRACE_CPU("UploadPartBlobs"); - { - std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(m_LogOutput.CreateProgressBar("Upload Blobs")); - OperationLogOutput::ProgressBar& Progress(*ProgressBarPtr); - - WorkerThreadPool& ReadChunkPool = m_IOWorkerPool; - WorkerThreadPool& UploadChunkPool = m_NetworkPool; - - FilteredRate FilteredGenerateBlockBytesPerSecond; - FilteredRate FilteredCompressedBytesPerSecond; - FilteredRate FilteredUploadedBytesPerSecond; - - ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); - - std::atomic<size_t> UploadedBlockSize = 0; - std::atomic<size_t> UploadedBlockCount = 0; - std::atomic<size_t> UploadedRawChunkSize = 0; - std::atomic<size_t> UploadedCompressedChunkSize = 0; - std::atomic<uint32_t> UploadedChunkCount = 0; - - tsl::robin_map<uint32_t, uint32_t> ChunkIndexToLooseChunkOrderIndex; - ChunkIndexToLooseChunkOrderIndex.reserve(LooseChunkIndexes.size()); - for (uint32_t OrderIndex = 0; OrderIndex < LooseChunkIndexes.size(); OrderIndex++) - { - ChunkIndexToLooseChunkOrderIndex.insert_or_assign(LooseChunkIndexes[OrderIndex], OrderIndex); - } - - std::vector<size_t> BlockIndexes; - std::vector<uint32_t> LooseChunkOrderIndexes; - - uint64_t TotalLooseChunksSize = 0; - uint64_t TotalBlocksSize = 0; - for (const IoHash& RawHash : RawHashes) - { - if (auto It = NewBlocks.BlockHashToBlockIndex.find(RawHash); It != NewBlocks.BlockHashToBlockIndex.end()) - { - BlockIndexes.push_back(It->second); - TotalBlocksSize += NewBlocks.BlockSizes[It->second]; - } - else if (auto ChunkIndexIt = Lookup.ChunkHashToChunkIndex.find(RawHash); ChunkIndexIt != Lookup.ChunkHashToChunkIndex.end()) - { - const uint32_t ChunkIndex = ChunkIndexIt->second; - if (auto LooseOrderIndexIt = ChunkIndexToLooseChunkOrderIndex.find(ChunkIndex); - LooseOrderIndexIt != ChunkIndexToLooseChunkOrderIndex.end()) - { - LooseChunkOrderIndexes.push_back(LooseOrderIndexIt->second); - TotalLooseChunksSize += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; - } - } - else - { - OutUnknownChunks.push_back(RawHash); - } - } - if (BlockIndexes.empty() && LooseChunkOrderIndexes.empty()) - { - return; - } - - uint64_t TotalRawSize = TotalLooseChunksSize + TotalBlocksSize; - - const size_t UploadBlockCount = BlockIndexes.size(); - const uint32_t UploadChunkCount = gsl::narrow<uint32_t>(LooseChunkOrderIndexes.size()); - - auto AsyncUploadBlock = [this, - &Work, - &NewBlocks, - UploadBlockCount, - &UploadedBlockCount, - UploadChunkCount, - &UploadedChunkCount, - &UploadedBlockSize, - &TempUploadStats, - &FilteredUploadedBytesPerSecond, - &UploadChunkPool](const size_t BlockIndex, - const IoHash BlockHash, - CompositeBuffer&& Payload, - std::atomic<uint64_t>& QueuedPendingInMemoryBlocksForUpload) { - bool IsInMemoryBlock = true; - if (QueuedPendingInMemoryBlocksForUpload.load() > 16) - { - ZEN_TRACE_CPU("AsyncUploadBlock_WriteTempBlock"); - std::filesystem::path TempFilePath = m_Options.TempDir / (BlockHash.ToHexString()); - Payload = CompositeBuffer(WriteToTempFile(std::move(Payload), TempFilePath)); - IsInMemoryBlock = false; - } - else - { - QueuedPendingInMemoryBlocksForUpload++; - } - - Work.ScheduleWork( - UploadChunkPool, - [this, - &QueuedPendingInMemoryBlocksForUpload, - &NewBlocks, - UploadBlockCount, - &UploadedBlockCount, - UploadChunkCount, - &UploadedChunkCount, - &UploadedBlockSize, - &TempUploadStats, - &FilteredUploadedBytesPerSecond, - IsInMemoryBlock, - BlockIndex, - BlockHash, - Payload = CompositeBuffer(std::move(Payload))](std::atomic<bool>&) mutable { - auto _ = MakeGuard([IsInMemoryBlock, &QueuedPendingInMemoryBlocksForUpload] { - if (IsInMemoryBlock) - { - QueuedPendingInMemoryBlocksForUpload--; - } - }); - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("AsyncUploadBlock"); - - const uint64_t PayloadSize = Payload.GetSize(); - - FilteredUploadedBytesPerSecond.Start(); - const CbObject BlockMetaData = - BuildChunkBlockDescription(NewBlocks.BlockDescriptions[BlockIndex], NewBlocks.BlockMetaDatas[BlockIndex]); - - if (m_Storage.BuildCacheStorage && m_Options.PopulateCache) - { - m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId, BlockHash, ZenContentType::kCompressedBinary, Payload); - } - m_Storage.BuildStorage->PutBuildBlob(m_BuildId, BlockHash, ZenContentType::kCompressedBinary, Payload); - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Uploaded block {} ({}) containing {} chunks", - BlockHash, - NiceBytes(PayloadSize), - NewBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); - } - UploadedBlockSize += PayloadSize; - TempUploadStats.BlocksBytes += PayloadSize; - - if (m_Storage.BuildCacheStorage && m_Options.PopulateCache) - { - m_Storage.BuildCacheStorage->PutBlobMetadatas(m_BuildId, - std::vector<IoHash>({BlockHash}), - std::vector<CbObject>({BlockMetaData})); - } - bool MetadataSucceeded = m_Storage.BuildStorage->PutBlockMetadata(m_BuildId, BlockHash, BlockMetaData); - if (MetadataSucceeded) - { - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Uploaded block {} metadata ({})", - BlockHash, - NiceBytes(BlockMetaData.GetSize())); - } - - NewBlocks.MetaDataHasBeenUploaded[BlockIndex] = true; - TempUploadStats.BlocksBytes += BlockMetaData.GetSize(); - } - - TempUploadStats.BlockCount++; - - UploadedBlockCount++; - if (UploadedBlockCount == UploadBlockCount && UploadedChunkCount == UploadChunkCount) - { - FilteredUploadedBytesPerSecond.Stop(); - } - } - }); - }; - - auto AsyncUploadLooseChunk = [this, - LargeAttachmentSize, - &Work, - &UploadChunkPool, - &FilteredUploadedBytesPerSecond, - &UploadedBlockCount, - &UploadedChunkCount, - UploadBlockCount, - UploadChunkCount, - &UploadedCompressedChunkSize, - &UploadedRawChunkSize, - &TempUploadStats](const IoHash& RawHash, uint64_t RawSize, CompositeBuffer&& Payload) { - Work.ScheduleWork( - UploadChunkPool, - [this, - &Work, - LargeAttachmentSize, - &FilteredUploadedBytesPerSecond, - &UploadChunkPool, - &UploadedBlockCount, - &UploadedChunkCount, - UploadBlockCount, - UploadChunkCount, - &UploadedCompressedChunkSize, - &UploadedRawChunkSize, - &TempUploadStats, - RawHash, - RawSize, - Payload = CompositeBuffer(std::move(Payload))](std::atomic<bool>&) mutable { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("AsyncUploadLooseChunk"); - - const uint64_t PayloadSize = Payload.GetSize(); - - if (m_Storage.BuildCacheStorage && m_Options.PopulateCache) - { - m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId, RawHash, ZenContentType::kCompressedBinary, Payload); - } - - if (PayloadSize >= LargeAttachmentSize) - { - ZEN_TRACE_CPU("AsyncUploadLooseChunk_Multipart"); - TempUploadStats.MultipartAttachmentCount++; - std::vector<std::function<void()>> MultipartWork = m_Storage.BuildStorage->PutLargeBuildBlob( - m_BuildId, - RawHash, - ZenContentType::kCompressedBinary, - PayloadSize, - [Payload = std::move(Payload), &FilteredUploadedBytesPerSecond](uint64_t Offset, - uint64_t Size) mutable -> IoBuffer { - FilteredUploadedBytesPerSecond.Start(); - - IoBuffer PartPayload = Payload.Mid(Offset, Size).Flatten().AsIoBuffer(); - PartPayload.SetContentType(ZenContentType::kBinary); - return PartPayload; - }, - [RawSize, - &TempUploadStats, - &UploadedCompressedChunkSize, - &UploadChunkPool, - &UploadedBlockCount, - UploadBlockCount, - &UploadedChunkCount, - UploadChunkCount, - &FilteredUploadedBytesPerSecond, - &UploadedRawChunkSize](uint64_t SentBytes, bool IsComplete) { - TempUploadStats.ChunksBytes += SentBytes; - UploadedCompressedChunkSize += SentBytes; - if (IsComplete) - { - TempUploadStats.ChunkCount++; - UploadedChunkCount++; - if (UploadedBlockCount == UploadBlockCount && UploadedChunkCount == UploadChunkCount) - { - FilteredUploadedBytesPerSecond.Stop(); - } - UploadedRawChunkSize += RawSize; - } - }); - for (auto& WorkPart : MultipartWork) - { - Work.ScheduleWork(UploadChunkPool, [Work = std::move(WorkPart)](std::atomic<bool>& AbortFlag) { - ZEN_TRACE_CPU("AsyncUploadLooseChunk_Multipart_Work"); - if (!AbortFlag) - { - Work(); - } - }); - } - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, "Uploaded multipart chunk {} ({})", RawHash, NiceBytes(PayloadSize)); - } - } - else - { - ZEN_TRACE_CPU("AsyncUploadLooseChunk_Singlepart"); - m_Storage.BuildStorage->PutBuildBlob(m_BuildId, RawHash, ZenContentType::kCompressedBinary, Payload); - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, "Uploaded chunk {} ({})", RawHash, NiceBytes(PayloadSize)); - } - TempUploadStats.ChunksBytes += Payload.GetSize(); - TempUploadStats.ChunkCount++; - UploadedCompressedChunkSize += Payload.GetSize(); - UploadedRawChunkSize += RawSize; - UploadedChunkCount++; - if (UploadedChunkCount == UploadChunkCount) - { - FilteredUploadedBytesPerSecond.Stop(); - } - } - } - }); - }; - - std::vector<size_t> GenerateBlockIndexes; - - std::atomic<uint64_t> GeneratedBlockCount = 0; - std::atomic<uint64_t> GeneratedBlockByteCount = 0; - - std::atomic<uint64_t> QueuedPendingInMemoryBlocksForUpload = 0; - - // Start generation of any non-prebuilt blocks and schedule upload - for (const size_t BlockIndex : BlockIndexes) - { - const IoHash& BlockHash = NewBlocks.BlockDescriptions[BlockIndex].BlockHash; - if (!m_AbortFlag) - { - Work.ScheduleWork( - ReadChunkPool, - [this, - BlockHash = IoHash(BlockHash), - BlockIndex, - &FilteredGenerateBlockBytesPerSecond, - &Content, - &Lookup, - &NewBlocks, - &NewBlockChunks, - &GenerateBlockIndexes, - &GeneratedBlockCount, - &GeneratedBlockByteCount, - &AsyncUploadBlock, - &QueuedPendingInMemoryBlocksForUpload](std::atomic<bool>&) { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("UploadPartBlobs_GenerateBlock"); - - FilteredGenerateBlockBytesPerSecond.Start(); - - Stopwatch GenerateTimer; - CompositeBuffer Payload; - if (NewBlocks.BlockHeaders[BlockIndex]) - { - Payload = - RebuildBlock(Content, Lookup, std::move(NewBlocks.BlockHeaders[BlockIndex]), NewBlockChunks[BlockIndex]) - .GetCompressed(); - } - else - { - ChunkBlockDescription BlockDescription; - CompressedBuffer CompressedBlock = - GenerateBlock(Content, Lookup, NewBlockChunks[BlockIndex], BlockDescription); - if (!CompressedBlock) - { - throw std::runtime_error(fmt::format("Failed generating block {}", BlockHash)); - } - ZEN_ASSERT(BlockDescription.BlockHash == BlockHash); - Payload = std::move(CompressedBlock).GetCompressed(); - } - - GeneratedBlockByteCount += NewBlocks.BlockSizes[BlockIndex]; - GeneratedBlockCount++; - if (GeneratedBlockCount == GenerateBlockIndexes.size()) - { - FilteredGenerateBlockBytesPerSecond.Stop(); - } - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "{} block {} ({}) containing {} chunks in {}", - NewBlocks.BlockHeaders[BlockIndex] ? "Regenerated" : "Generated", - NewBlocks.BlockDescriptions[BlockIndex].BlockHash, - NiceBytes(NewBlocks.BlockSizes[BlockIndex]), - NewBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size(), - NiceTimeSpanMs(GenerateTimer.GetElapsedTimeMs())); - } - if (!m_AbortFlag) - { - AsyncUploadBlock(BlockIndex, BlockHash, std::move(Payload), QueuedPendingInMemoryBlocksForUpload); - } - } - }); - } - } - - // Start compression of any non-precompressed loose chunks and schedule upload - for (const uint32_t LooseChunkOrderIndex : LooseChunkOrderIndexes) - { - const uint32_t ChunkIndex = LooseChunkIndexes[LooseChunkOrderIndex]; - Work.ScheduleWork( - ReadChunkPool, - [this, - &Content, - &Lookup, - &TempLooseChunksStats, - &LooseChunkOrderIndexes, - &FilteredCompressedBytesPerSecond, - &TempUploadStats, - &AsyncUploadLooseChunk, - ChunkIndex](std::atomic<bool>&) { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("UploadPartBlobs_CompressChunk"); - - FilteredCompressedBytesPerSecond.Start(); - Stopwatch CompressTimer; - CompositeBuffer Payload = CompressChunk(Content, Lookup, ChunkIndex, TempLooseChunksStats); - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Compressed chunk {} ({} -> {}) in {}", - Content.ChunkedContent.ChunkHashes[ChunkIndex], - NiceBytes(Content.ChunkedContent.ChunkRawSizes[ChunkIndex]), - NiceBytes(Payload.GetSize()), - NiceTimeSpanMs(CompressTimer.GetElapsedTimeMs())); - } - const uint64_t ChunkRawSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; - TempUploadStats.ReadFromDiskBytes += ChunkRawSize; - if (TempLooseChunksStats.CompressedChunkCount == LooseChunkOrderIndexes.size()) - { - FilteredCompressedBytesPerSecond.Stop(); - } - if (!m_AbortFlag) - { - AsyncUploadLooseChunk(Content.ChunkedContent.ChunkHashes[ChunkIndex], ChunkRawSize, std::move(Payload)); - } - } - }); - } - - Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { - ZEN_UNUSED(PendingWork); - FilteredCompressedBytesPerSecond.Update(TempLooseChunksStats.CompressedChunkRawBytes.load()); - FilteredGenerateBlockBytesPerSecond.Update(GeneratedBlockByteCount.load()); - FilteredUploadedBytesPerSecond.Update(UploadedCompressedChunkSize.load() + UploadedBlockSize.load()); - uint64_t UploadedRawSize = UploadedRawChunkSize.load() + UploadedBlockSize.load(); - uint64_t UploadedCompressedSize = UploadedCompressedChunkSize.load() + UploadedBlockSize.load(); - - std::string Details = fmt::format( - "Compressed {}/{} ({}/{}{}) chunks. " - "Uploaded {}/{} ({}/{}) blobs " - "({}{})", - TempLooseChunksStats.CompressedChunkCount.load(), - LooseChunkOrderIndexes.size(), - NiceBytes(TempLooseChunksStats.CompressedChunkRawBytes), - NiceBytes(TotalLooseChunksSize), - (TempLooseChunksStats.CompressedChunkCount == LooseChunkOrderIndexes.size()) - ? "" - : fmt::format(" {}B/s", NiceNum(FilteredCompressedBytesPerSecond.GetCurrent())), - - UploadedBlockCount.load() + UploadedChunkCount.load(), - UploadBlockCount + UploadChunkCount, - NiceBytes(UploadedRawSize), - NiceBytes(TotalRawSize), - - NiceBytes(UploadedCompressedSize), - (UploadedBlockCount == UploadBlockCount && UploadedChunkCount == UploadChunkCount) - ? "" - : fmt::format(" {}bits/s", NiceNum(FilteredUploadedBytesPerSecond.GetCurrent()))); - - Progress.UpdateState({.Task = "Uploading blobs ", - .Details = Details, - .TotalCount = gsl::narrow<uint64_t>(TotalRawSize), - .RemainingCount = gsl::narrow<uint64_t>(TotalRawSize - UploadedRawSize), - .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, - false); - }); - - ZEN_ASSERT(m_AbortFlag || QueuedPendingInMemoryBlocksForUpload.load() == 0); - - Progress.Finish(); - - TempUploadStats.ElapsedWallTimeUS += FilteredUploadedBytesPerSecond.GetElapsedTimeUS(); - TempLooseChunksStats.CompressChunksElapsedWallTimeUS += FilteredCompressedBytesPerSecond.GetElapsedTimeUS(); - } -} - -CompositeBuffer -BuildsOperationUploadFolder::CompressChunk(const ChunkedFolderContent& Content, - const ChunkedContentLookup& Lookup, - uint32_t ChunkIndex, - LooseChunksStatistics& TempLooseChunksStats) -{ - ZEN_TRACE_CPU("CompressChunk"); - ZEN_ASSERT(!m_Options.TempDir.empty()); - const IoHash& ChunkHash = Content.ChunkedContent.ChunkHashes[ChunkIndex]; - const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; - - const ChunkedContentLookup::ChunkSequenceLocation& Source = GetChunkSequenceLocations(Lookup, ChunkIndex)[0]; - const std::uint32_t PathIndex = Lookup.SequenceIndexFirstPathIndex[Source.SequenceIndex]; - IoBuffer RawSource = IoBufferBuilder::MakeFromFile((m_Path / Content.Paths[PathIndex]).make_preferred(), Source.Offset, ChunkSize); - if (!RawSource) - { - throw std::runtime_error(fmt::format("Failed fetching chunk {}", ChunkHash)); - } - if (RawSource.GetSize() != ChunkSize) - { - throw std::runtime_error(fmt::format("Fetched chunk {} has invalid size", ChunkHash)); - } - - const bool ShouldCompressChunk = IsChunkCompressable(m_NonCompressableExtensionHashes, Content, Lookup, ChunkIndex); - const OodleCompressionLevel CompressionLevel = ShouldCompressChunk ? OodleCompressionLevel::VeryFast : OodleCompressionLevel::None; - - if (ShouldCompressChunk) - { - std::filesystem::path TempFilePath = m_Options.TempDir / ChunkHash.ToHexString(); - - BasicFile CompressedFile; - std::error_code Ec; - CompressedFile.Open(TempFilePath, BasicFile::Mode::kTruncateDelete, Ec); - if (Ec) - { - throw std::runtime_error(fmt::format("Failed creating temporary file for compressing blob {}, reason: ({}) {}", - ChunkHash, - Ec.value(), - Ec.message())); - } - - uint64_t StreamRawBytes = 0; - uint64_t StreamCompressedBytes = 0; - - bool CouldCompress = CompressedBuffer::CompressToStream( - CompositeBuffer(SharedBuffer(RawSource)), - [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) { - ZEN_UNUSED(SourceOffset); - TempLooseChunksStats.CompressedChunkRawBytes += SourceSize; - CompressedFile.Write(RangeBuffer, Offset); - TempLooseChunksStats.CompressedChunkBytes += RangeBuffer.GetSize(); - StreamRawBytes += SourceSize; - StreamCompressedBytes += RangeBuffer.GetSize(); - }, - OodleCompressor::Mermaid, - CompressionLevel); - if (CouldCompress) - { - uint64_t CompressedSize = CompressedFile.FileSize(); - void* FileHandle = CompressedFile.Detach(); - IoBuffer TempPayload = IoBuffer(IoBuffer::File, - FileHandle, - 0, - CompressedSize, - /*IsWholeFile*/ true); - ZEN_ASSERT(TempPayload); - TempPayload.SetDeleteOnClose(true); - IoHash RawHash; - uint64_t RawSize; - CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(TempPayload), RawHash, RawSize); - ZEN_ASSERT(Compressed); - ZEN_ASSERT(RawHash == ChunkHash); - ZEN_ASSERT(RawSize == ChunkSize); - - TempLooseChunksStats.CompressedChunkCount++; - - return Compressed.GetCompressed(); - } - else - { - TempLooseChunksStats.CompressedChunkRawBytes -= StreamRawBytes; - TempLooseChunksStats.CompressedChunkBytes -= StreamCompressedBytes; - } - CompressedFile.Close(); - RemoveFile(TempFilePath, Ec); - ZEN_UNUSED(Ec); - } - - CompressedBuffer CompressedBlob = - CompressedBuffer::Compress(SharedBuffer(std::move(RawSource)), OodleCompressor::Mermaid, CompressionLevel); - if (!CompressedBlob) - { - throw std::runtime_error(fmt::format("Failed to compress large blob {}", ChunkHash)); - } - ZEN_ASSERT_SLOW(CompressedBlob.DecodeRawHash() == ChunkHash); - ZEN_ASSERT_SLOW(CompressedBlob.DecodeRawSize() == ChunkSize); - - TempLooseChunksStats.CompressedChunkRawBytes += ChunkSize; - TempLooseChunksStats.CompressedChunkBytes += CompressedBlob.GetCompressedSize(); - - // If we use none-compression, the compressed blob references the data and has 64 kb in memory so we don't need to write it to disk - if (ShouldCompressChunk) - { - std::filesystem::path TempFilePath = m_Options.TempDir / (ChunkHash.ToHexString()); - IoBuffer TempPayload = WriteToTempFile(std::move(CompressedBlob).GetCompressed(), TempFilePath); - CompressedBlob = CompressedBuffer::FromCompressedNoValidate(std::move(TempPayload)); - } - - TempLooseChunksStats.CompressedChunkCount++; - return std::move(CompressedBlob).GetCompressed(); -} - -BuildsOperationValidateBuildPart::BuildsOperationValidateBuildPart(OperationLogOutput& OperationLogOutput, - BuildStorageBase& Storage, - std::atomic<bool>& AbortFlag, - std::atomic<bool>& PauseFlag, - WorkerThreadPool& IOWorkerPool, - WorkerThreadPool& NetworkPool, - const Oid& BuildId, - const Oid& BuildPartId, - const std::string_view BuildPartName, - const Options& Options) - -: m_LogOutput(OperationLogOutput) -, m_Storage(Storage) -, m_AbortFlag(AbortFlag) -, m_PauseFlag(PauseFlag) -, m_IOWorkerPool(IOWorkerPool) -, m_NetworkPool(NetworkPool) -, m_BuildId(BuildId) -, m_BuildPartId(BuildPartId) -, m_BuildPartName(BuildPartName) -, m_Options(Options) -{ -} - -void -BuildsOperationValidateBuildPart::Execute() -{ - ZEN_TRACE_CPU("ValidateBuildPart"); - try - { - enum class TaskSteps : uint32_t - { - FetchBuild, - FetchBuildPart, - ValidateBlobs, - Cleanup, - StepCount - }; - - auto EndProgress = - MakeGuard([&]() { m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::StepCount, (uint32_t)TaskSteps::StepCount); }); - - Stopwatch Timer; - auto _ = MakeGuard([&]() { - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Validated build part {}/{} ('{}') in {}", - m_BuildId, - m_BuildPartId, - m_BuildPartName, - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); - } - }); - - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::FetchBuild, (uint32_t)TaskSteps::StepCount); - - CbObject Build = m_Storage.GetBuild(m_BuildId); - if (!m_BuildPartName.empty()) - { - m_BuildPartId = Build["parts"sv].AsObjectView()[m_BuildPartName].AsObjectId(); - if (m_BuildPartId == Oid::Zero) - { - throw std::runtime_error(fmt::format("Build {} does not have a part named '{}'", m_BuildId, m_BuildPartName)); - } - } - m_ValidateStats.BuildBlobSize = Build.GetSize(); - uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; - if (auto ChunkSize = Build["chunkSize"sv].AsUInt64(); ChunkSize != 0) - { - PreferredMultipartChunkSize = ChunkSize; - } - - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::FetchBuildPart, (uint32_t)TaskSteps::StepCount); - - CbObject BuildPart = m_Storage.GetBuildPart(m_BuildId, m_BuildPartId); - m_ValidateStats.BuildPartSize = BuildPart.GetSize(); - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Validating build part {}/{} ({})", - m_BuildId, - m_BuildPartId, - NiceBytes(BuildPart.GetSize())); - } - std::vector<IoHash> ChunkAttachments; - if (const CbObjectView ChunkAttachmentsView = BuildPart["chunkAttachments"sv].AsObjectView()) - { - for (CbFieldView LooseFileView : ChunkAttachmentsView["rawHashes"sv]) - { - ChunkAttachments.push_back(LooseFileView.AsBinaryAttachment()); - } - } - m_ValidateStats.ChunkAttachmentCount = ChunkAttachments.size(); - std::vector<IoHash> BlockAttachments; - if (const CbObjectView BlockAttachmentsView = BuildPart["blockAttachments"sv].AsObjectView()) - { - { - for (CbFieldView BlocksView : BlockAttachmentsView["rawHashes"sv]) - { - BlockAttachments.push_back(BlocksView.AsBinaryAttachment()); - } - } - } - m_ValidateStats.BlockAttachmentCount = BlockAttachments.size(); - - std::vector<ChunkBlockDescription> VerifyBlockDescriptions = - ParseChunkBlockDescriptionList(m_Storage.GetBlockMetadatas(m_BuildId, BlockAttachments)); - if (VerifyBlockDescriptions.size() != BlockAttachments.size()) - { - throw std::runtime_error(fmt::format("Uploaded blocks metadata could not all be found, {} blocks metadata is missing", - BlockAttachments.size() - VerifyBlockDescriptions.size())); - } - - ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); - - const std::filesystem::path TempFolder = ".zen-tmp"; - - CleanAndRemoveDirectory(m_IOWorkerPool, m_AbortFlag, m_PauseFlag, TempFolder); - CreateDirectories(TempFolder); - auto __ = MakeGuard([this, TempFolder]() { CleanAndRemoveDirectory(m_IOWorkerPool, m_AbortFlag, m_PauseFlag, TempFolder); }); - - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::ValidateBlobs, (uint32_t)TaskSteps::StepCount); - - std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(m_LogOutput.CreateProgressBar("Validate Blobs")); - OperationLogOutput::ProgressBar& Progress(*ProgressBarPtr); - - uint64_t AttachmentsToVerifyCount = ChunkAttachments.size() + BlockAttachments.size(); - FilteredRate FilteredDownloadedBytesPerSecond; - FilteredRate FilteredVerifiedBytesPerSecond; - - std::atomic<uint64_t> MultipartAttachmentCount = 0; - - for (const IoHash& ChunkAttachment : ChunkAttachments) - { - Work.ScheduleWork( - m_NetworkPool, - [this, - &Work, - AttachmentsToVerifyCount, - &TempFolder, - PreferredMultipartChunkSize, - &FilteredDownloadedBytesPerSecond, - &FilteredVerifiedBytesPerSecond, - &ChunkAttachments, - ChunkAttachment = IoHash(ChunkAttachment)](std::atomic<bool>&) { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("ValidateBuildPart_GetChunk"); - - FilteredDownloadedBytesPerSecond.Start(); - DownloadLargeBlob( - m_Storage, - TempFolder, - m_BuildId, - ChunkAttachment, - PreferredMultipartChunkSize, - Work, - m_NetworkPool, - m_DownloadStats.DownloadedChunkByteCount, - m_DownloadStats.MultipartAttachmentCount, - [this, - &Work, - AttachmentsToVerifyCount, - &FilteredDownloadedBytesPerSecond, - &FilteredVerifiedBytesPerSecond, - ChunkHash = IoHash(ChunkAttachment)](IoBuffer&& Payload) { - m_DownloadStats.DownloadedChunkCount++; - Payload.SetContentType(ZenContentType::kCompressedBinary); - if (!m_AbortFlag) - { - Work.ScheduleWork( - m_IOWorkerPool, - [this, - AttachmentsToVerifyCount, - &FilteredDownloadedBytesPerSecond, - &FilteredVerifiedBytesPerSecond, - Payload = IoBuffer(std::move(Payload)), - ChunkHash](std::atomic<bool>&) mutable { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("ValidateBuildPart_Validate"); - - if (m_DownloadStats.DownloadedChunkCount + m_DownloadStats.DownloadedBlockCount == - AttachmentsToVerifyCount) - { - FilteredDownloadedBytesPerSecond.Stop(); - } - - FilteredVerifiedBytesPerSecond.Start(); - - uint64_t CompressedSize; - uint64_t DecompressedSize; - ValidateBlob(m_AbortFlag, std::move(Payload), ChunkHash, CompressedSize, DecompressedSize); - m_ValidateStats.VerifiedAttachmentCount++; - m_ValidateStats.VerifiedByteCount += DecompressedSize; - if (m_ValidateStats.VerifiedAttachmentCount.load() == AttachmentsToVerifyCount) - { - FilteredVerifiedBytesPerSecond.Stop(); - } - } - }); - } - }); - } - }); - } - - for (const IoHash& BlockAttachment : BlockAttachments) - { - Work.ScheduleWork( - m_NetworkPool, - [this, - &Work, - AttachmentsToVerifyCount, - &FilteredDownloadedBytesPerSecond, - &FilteredVerifiedBytesPerSecond, - BlockAttachment = IoHash(BlockAttachment)](std::atomic<bool>&) { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("ValidateBuildPart_GetBlock"); - - FilteredDownloadedBytesPerSecond.Start(); - IoBuffer Payload = m_Storage.GetBuildBlob(m_BuildId, BlockAttachment); - m_DownloadStats.DownloadedBlockCount++; - m_DownloadStats.DownloadedBlockByteCount += Payload.GetSize(); - if (m_DownloadStats.DownloadedChunkCount + m_DownloadStats.DownloadedBlockCount == AttachmentsToVerifyCount) - { - FilteredDownloadedBytesPerSecond.Stop(); - } - if (!Payload) - { - throw std::runtime_error(fmt::format("Block attachment {} could not be found", BlockAttachment)); - } - if (!m_AbortFlag) - { - Work.ScheduleWork( - m_IOWorkerPool, - [this, - &FilteredVerifiedBytesPerSecond, - AttachmentsToVerifyCount, - Payload = std::move(Payload), - BlockAttachment](std::atomic<bool>&) mutable { - if (!m_AbortFlag) - { - ZEN_TRACE_CPU("ValidateBuildPart_ValidateBlock"); - - FilteredVerifiedBytesPerSecond.Start(); - - uint64_t CompressedSize; - uint64_t DecompressedSize; - ValidateChunkBlock(std::move(Payload), BlockAttachment, CompressedSize, DecompressedSize); - m_ValidateStats.VerifiedAttachmentCount++; - m_ValidateStats.VerifiedByteCount += DecompressedSize; - if (m_ValidateStats.VerifiedAttachmentCount.load() == AttachmentsToVerifyCount) - { - FilteredVerifiedBytesPerSecond.Stop(); - } - } - }); - } - } - }); - } - - Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { - ZEN_UNUSED(PendingWork); - - const uint64_t DownloadedAttachmentCount = m_DownloadStats.DownloadedChunkCount + m_DownloadStats.DownloadedBlockCount; - const uint64_t DownloadedByteCount = m_DownloadStats.DownloadedChunkByteCount + m_DownloadStats.DownloadedBlockByteCount; - - FilteredDownloadedBytesPerSecond.Update(DownloadedByteCount); - FilteredVerifiedBytesPerSecond.Update(m_ValidateStats.VerifiedByteCount); - - std::string Details = fmt::format("Downloaded {}/{} ({}, {}bits/s). Verified {}/{} ({}, {}B/s)", - DownloadedAttachmentCount, - AttachmentsToVerifyCount, - NiceBytes(DownloadedByteCount), - NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8), - m_ValidateStats.VerifiedAttachmentCount.load(), - AttachmentsToVerifyCount, - NiceBytes(m_ValidateStats.VerifiedByteCount.load()), - NiceNum(FilteredVerifiedBytesPerSecond.GetCurrent())); - - Progress.UpdateState( - {.Task = "Validating blobs ", - .Details = Details, - .TotalCount = gsl::narrow<uint64_t>(AttachmentsToVerifyCount * 2), - .RemainingCount = gsl::narrow<uint64_t>(AttachmentsToVerifyCount * 2 - - (DownloadedAttachmentCount + m_ValidateStats.VerifiedAttachmentCount.load())), - .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, - false); - }); - - Progress.Finish(); - m_ValidateStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); - - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::Cleanup, (uint32_t)TaskSteps::StepCount); - } - catch (const std::exception&) - { - m_AbortFlag = true; - throw; - } -} - -BuildsOperationPrimeCache::BuildsOperationPrimeCache(OperationLogOutput& OperationLogOutput, - StorageInstance& Storage, - std::atomic<bool>& AbortFlag, - std::atomic<bool>& PauseFlag, - WorkerThreadPool& NetworkPool, - const Oid& BuildId, - std::span<const Oid> BuildPartIds, - const Options& Options, - BuildStorageCache::Statistics& StorageCacheStats) -: m_LogOutput(OperationLogOutput) -, m_Storage(Storage) -, m_AbortFlag(AbortFlag) -, m_PauseFlag(PauseFlag) -, m_NetworkPool(NetworkPool) -, m_BuildId(BuildId) -, m_BuildPartIds(BuildPartIds.begin(), BuildPartIds.end()) -, m_Options(Options) -, m_StorageCacheStats(StorageCacheStats) -{ - m_TempPath = m_Options.ZenFolderPath / "tmp"; - CreateDirectories(m_TempPath); -} - -void -BuildsOperationPrimeCache::Execute() -{ - ZEN_TRACE_CPU("BuildsOperationPrimeCache::Execute"); - - Stopwatch PrimeTimer; - - tsl::robin_map<IoHash, uint64_t, IoHash::Hasher> LooseChunkRawSizes; - - tsl::robin_set<IoHash, IoHash::Hasher> BuildBlobs; - - for (const Oid& BuildPartId : m_BuildPartIds) - { - CbObject BuildPart = m_Storage.BuildStorage->GetBuildPart(m_BuildId, BuildPartId); - - CbObjectView BlockAttachmentsView = BuildPart["blockAttachments"sv].AsObjectView(); - std::vector<IoHash> BlockAttachments = compactbinary_helpers::ReadBinaryAttachmentArray("rawHashes"sv, BlockAttachmentsView); - - CbObjectView ChunkAttachmentsView = BuildPart["chunkAttachments"sv].AsObjectView(); - std::vector<IoHash> ChunkAttachments = compactbinary_helpers::ReadBinaryAttachmentArray("rawHashes"sv, ChunkAttachmentsView); - std::vector<uint64_t> ChunkRawSizes = compactbinary_helpers::ReadArray<uint64_t>("chunkRawSizes"sv, ChunkAttachmentsView); - if (ChunkAttachments.size() != ChunkRawSizes.size()) - { - throw std::runtime_error(fmt::format("Mismatch of loose chunk raw size array, expected {}, found {}", - ChunkAttachments.size(), - ChunkRawSizes.size())); - } - - BuildBlobs.reserve(ChunkAttachments.size() + BlockAttachments.size()); - BuildBlobs.insert(BlockAttachments.begin(), BlockAttachments.end()); - BuildBlobs.insert(ChunkAttachments.begin(), ChunkAttachments.end()); - - for (size_t ChunkAttachmentIndex = 0; ChunkAttachmentIndex < ChunkAttachments.size(); ChunkAttachmentIndex++) - { - LooseChunkRawSizes.insert_or_assign(ChunkAttachments[ChunkAttachmentIndex], ChunkRawSizes[ChunkAttachmentIndex]); - } - } - - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, "Found {} referenced blobs", BuildBlobs.size()); - } - - if (BuildBlobs.empty()) - { - return; - } - - std::vector<IoHash> BlobsToDownload; - BlobsToDownload.reserve(BuildBlobs.size()); - - if (m_Storage.BuildCacheStorage && !BuildBlobs.empty() && !m_Options.ForceUpload) - { - ZEN_TRACE_CPU("BlobCacheExistCheck"); - Stopwatch Timer; - - const std::vector<IoHash> BlobHashes(BuildBlobs.begin(), BuildBlobs.end()); - const std::vector<BuildStorageCache::BlobExistsResult> CacheExistsResult = - m_Storage.BuildCacheStorage->BlobsExists(m_BuildId, BlobHashes); - - if (CacheExistsResult.size() == BlobHashes.size()) - { - for (size_t BlobIndex = 0; BlobIndex < BlobHashes.size(); BlobIndex++) - { - if (!CacheExistsResult[BlobIndex].HasBody) - { - BlobsToDownload.push_back(BlobHashes[BlobIndex]); - } - } - size_t FoundCount = BuildBlobs.size() - BlobsToDownload.size(); - - if (FoundCount > 0 && !m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Remote cache : Found {} out of {} needed blobs in {}", - FoundCount, - BuildBlobs.size(), - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); - } - } - } - else - { - BlobsToDownload.insert(BlobsToDownload.end(), BuildBlobs.begin(), BuildBlobs.end()); - } - - if (BlobsToDownload.empty()) - { - return; - } - - std::atomic<uint64_t> MultipartAttachmentCount; - std::atomic<size_t> CompletedDownloadCount; - FilteredRate FilteredDownloadedBytesPerSecond; - - { - std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(m_LogOutput.CreateProgressBar("Downloading")); - OperationLogOutput::ProgressBar& Progress(*ProgressBarPtr); - - ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); - - const size_t BlobCount = BlobsToDownload.size(); - - for (size_t BlobIndex = 0; BlobIndex < BlobCount; BlobIndex++) - { - Work.ScheduleWork( - m_NetworkPool, - [this, - &Work, - &BlobsToDownload, - BlobCount, - &LooseChunkRawSizes, - &CompletedDownloadCount, - &FilteredDownloadedBytesPerSecond, - &MultipartAttachmentCount, - BlobIndex](std::atomic<bool>&) { - if (!m_AbortFlag) - { - const IoHash& BlobHash = BlobsToDownload[BlobIndex]; - - bool IsLargeBlob = false; - - if (auto It = LooseChunkRawSizes.find(BlobHash); It != LooseChunkRawSizes.end()) - { - IsLargeBlob = It->second >= m_Options.LargeAttachmentSize; - } - - FilteredDownloadedBytesPerSecond.Start(); - - if (IsLargeBlob) - { - DownloadLargeBlob(*m_Storage.BuildStorage, - m_TempPath, - m_BuildId, - BlobHash, - m_Options.PreferredMultipartChunkSize, - Work, - m_NetworkPool, - m_DownloadStats.DownloadedChunkByteCount, - MultipartAttachmentCount, - [this, BlobCount, BlobHash, &FilteredDownloadedBytesPerSecond, &CompletedDownloadCount]( - IoBuffer&& Payload) { - m_DownloadStats.DownloadedChunkCount++; - m_DownloadStats.RequestsCompleteCount++; - - if (!m_AbortFlag) - { - if (Payload && m_Storage.BuildCacheStorage) - { - m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId, - BlobHash, - ZenContentType::kCompressedBinary, - CompositeBuffer(SharedBuffer(Payload))); - } - } - CompletedDownloadCount++; - if (CompletedDownloadCount == BlobCount) - { - FilteredDownloadedBytesPerSecond.Stop(); - } - }); - } - else - { - IoBuffer Payload = m_Storage.BuildStorage->GetBuildBlob(m_BuildId, BlobHash); - m_DownloadStats.DownloadedBlockCount++; - m_DownloadStats.DownloadedBlockByteCount += Payload.GetSize(); - m_DownloadStats.RequestsCompleteCount++; - - if (!m_AbortFlag) - { - if (Payload && m_Storage.BuildCacheStorage) - { - m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId, - BlobHash, - ZenContentType::kCompressedBinary, - CompositeBuffer(SharedBuffer(std::move(Payload)))); - } - } - CompletedDownloadCount++; - if (CompletedDownloadCount == BlobCount) - { - FilteredDownloadedBytesPerSecond.Stop(); - } - } - } - }); - } - - Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { - ZEN_UNUSED(PendingWork); - - uint64_t DownloadedBytes = m_DownloadStats.DownloadedChunkByteCount.load() + m_DownloadStats.DownloadedBlockByteCount.load(); - FilteredDownloadedBytesPerSecond.Update(DownloadedBytes); - - std::string DownloadRateString = (CompletedDownloadCount == BlobCount) - ? "" - : fmt::format(" {}bits/s", NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8)); - std::string UploadDetails = m_Storage.BuildCacheStorage ? fmt::format(" {} ({}) uploaded.", - m_StorageCacheStats.PutBlobCount.load(), - NiceBytes(m_StorageCacheStats.PutBlobByteCount.load())) - : ""; - - std::string Details = fmt::format("{}/{} ({}{}) downloaded.{}", - CompletedDownloadCount.load(), - BlobCount, - NiceBytes(DownloadedBytes), - DownloadRateString, - UploadDetails); - Progress.UpdateState({.Task = "Downloading", - .Details = Details, - .TotalCount = BlobCount, - .RemainingCount = BlobCount - CompletedDownloadCount.load(), - .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, - false); - }); - - FilteredDownloadedBytesPerSecond.Stop(); - - Progress.Finish(); - } - if (m_AbortFlag) - { - return; - } - - if (m_Storage.BuildCacheStorage) - { - m_Storage.BuildCacheStorage->Flush(m_LogOutput.GetProgressUpdateDelayMS(), [this](intptr_t Remaining) -> bool { - ZEN_UNUSED(Remaining); - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, "Waiting for {} blobs to finish upload to '{}'", Remaining, m_Storage.CacheName); - } - return !m_AbortFlag; - }); - } - - if (!m_Options.IsQuiet) - { - uint64_t DownloadedBytes = m_DownloadStats.DownloadedChunkByteCount.load() + m_DownloadStats.DownloadedBlockByteCount.load(); - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Downloaded {} ({}bits/s) in {}. {} as multipart. Completed in {}", - NiceBytes(DownloadedBytes), - NiceNum(GetBytesPerSecond(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS(), DownloadedBytes * 8)), - NiceTimeSpanMs(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS() / 1000), - MultipartAttachmentCount.load(), - NiceTimeSpanMs(PrimeTimer.GetElapsedTimeMs())); - } -} - -CompositeBuffer -ValidateBlob(std::atomic<bool>& AbortFlag, - BuildStorageBase& Storage, - const Oid& BuildId, - const IoHash& BlobHash, - uint64_t& OutCompressedSize, - uint64_t& OutDecompressedSize) -{ - ZEN_TRACE_CPU("ValidateBlob"); - IoBuffer Payload = Storage.GetBuildBlob(BuildId, BlobHash); - if (!Payload) - { - throw std::runtime_error(fmt::format("Blob {} could not be found", BlobHash)); - } - return ValidateBlob(AbortFlag, std::move(Payload), BlobHash, OutCompressedSize, OutDecompressedSize); -} - -ChunkBlockDescription -BuildsOperationValidateBuildPart::ValidateChunkBlock(IoBuffer&& Payload, - const IoHash& BlobHash, - uint64_t& OutCompressedSize, - uint64_t& OutDecompressedSize) -{ - CompositeBuffer BlockBuffer = ValidateBlob(m_AbortFlag, std::move(Payload), BlobHash, OutCompressedSize, OutDecompressedSize); - if (!BlockBuffer) - { - throw std::runtime_error(fmt::format("Chunk block blob {} is not compressed using 'None' compression level", BlobHash)); - } - return GetChunkBlockDescription(BlockBuffer.Flatten(), BlobHash); -} - -std::vector<std::pair<Oid, std::string>> -ResolveBuildPartNames(CbObjectView BuildObject, - const Oid& BuildId, - const std::vector<Oid>& BuildPartIds, - std::span<const std::string> BuildPartNames, - std::uint64_t& OutPreferredMultipartChunkSize) -{ - std::vector<std::pair<Oid, std::string>> Result; - { - CbObjectView PartsObject = BuildObject["parts"sv].AsObjectView(); - if (!PartsObject) - { - throw std::runtime_error("Build object does not have a 'parts' object"); - } - - OutPreferredMultipartChunkSize = BuildObject["chunkSize"sv].AsUInt64(OutPreferredMultipartChunkSize); - - std::vector<std::pair<Oid, std::string>> AvailableParts; - - for (CbFieldView PartView : PartsObject) - { - const std::string BuildPartName = std::string(PartView.GetName()); - const Oid BuildPartId = PartView.AsObjectId(); - if (BuildPartId == Oid::Zero) - { - ExtendableStringBuilder<128> SB; - for (CbFieldView ScanPartView : PartsObject) - { - SB.Append(fmt::format("\n {}: {}", ScanPartView.GetName(), ScanPartView.AsObjectId())); - } - throw std::runtime_error(fmt::format("Build object parts does not have a '{}' object id{}", BuildPartName, SB.ToView())); - } - AvailableParts.push_back({BuildPartId, BuildPartName}); - } - - if (BuildPartIds.empty() && BuildPartNames.empty()) - { - Result = AvailableParts; - } - else - { - for (const std::string& BuildPartName : BuildPartNames) - { - if (auto It = std::find_if(AvailableParts.begin(), - AvailableParts.end(), - [&BuildPartName](const auto& Part) { return Part.second == BuildPartName; }); - It != AvailableParts.end()) - { - Result.push_back(*It); - } - else - { - throw std::runtime_error(fmt::format("Build {} object does not have a part named '{}'", BuildId, BuildPartName)); - } - } - for (const Oid& BuildPartId : BuildPartIds) - { - if (auto It = std::find_if(AvailableParts.begin(), - AvailableParts.end(), - [&BuildPartId](const auto& Part) { return Part.first == BuildPartId; }); - It != AvailableParts.end()) - { - Result.push_back(*It); - } - else - { - throw std::runtime_error(fmt::format("Build {} object does not have a part with id '{}'", BuildId, BuildPartId)); - } - } - } - - if (Result.empty()) - { - throw std::runtime_error(fmt::format("Build object does not have any parts", BuildId)); - } - } - return Result; -} - -ChunkedFolderContent -GetRemoteContent(OperationLogOutput& Output, - StorageInstance& Storage, - const Oid& BuildId, - const std::vector<std::pair<Oid, std::string>>& BuildParts, - const BuildManifest& Manifest, - std::span<const std::string> IncludeWildcards, - std::span<const std::string> ExcludeWildcards, - std::unique_ptr<ChunkingController>& OutChunkController, - std::vector<ChunkedFolderContent>& OutPartContents, - std::vector<ChunkBlockDescription>& OutBlockDescriptions, - std::vector<IoHash>& OutLooseChunkHashes, - bool IsQuiet, - bool IsVerbose, - bool DoExtraContentVerify) -{ - ZEN_TRACE_CPU("GetRemoteContent"); - - Stopwatch GetBuildPartTimer; - const Oid BuildPartId = BuildParts[0].first; - const std::string_view BuildPartName = BuildParts[0].second; - CbObject BuildPartManifest = Storage.BuildStorage->GetBuildPart(BuildId, BuildPartId); - if (!IsQuiet) - { - ZEN_OPERATION_LOG_INFO(Output, - "GetBuildPart {} ('{}') took {}. Payload size: {}", - BuildPartId, - BuildPartName, - NiceTimeSpanMs(GetBuildPartTimer.GetElapsedTimeMs()), - NiceBytes(BuildPartManifest.GetSize())); - ZEN_OPERATION_LOG_INFO(Output, "{}", GetCbObjectAsNiceString(BuildPartManifest, " "sv, "\n"sv)); - } - - { - CbObjectView Chunker = BuildPartManifest["chunker"sv].AsObjectView(); - std::string_view ChunkerName = Chunker["name"sv].AsString(); - CbObjectView Parameters = Chunker["parameters"sv].AsObjectView(); - OutChunkController = CreateChunkingController(ChunkerName, Parameters); - } - - auto ParseBuildPartManifest = [&Output, IsQuiet, IsVerbose, DoExtraContentVerify]( - StorageInstance& Storage, - const Oid& BuildId, - const Oid& BuildPartId, - CbObject BuildPartManifest, - std::span<const std::string> IncludeWildcards, - std::span<const std::string> ExcludeWildcards, - const BuildManifest::Part* OptionalManifest, - ChunkedFolderContent& OutRemoteContent, - std::vector<ChunkBlockDescription>& OutBlockDescriptions, - std::vector<IoHash>& OutLooseChunkHashes) { - std::vector<uint32_t> AbsoluteChunkOrders; - std::vector<uint64_t> LooseChunkRawSizes; - std::vector<IoHash> BlockRawHashes; - - ReadBuildContentFromCompactBinary(BuildPartManifest, - OutRemoteContent.Platform, - OutRemoteContent.Paths, - OutRemoteContent.RawHashes, - OutRemoteContent.RawSizes, - OutRemoteContent.Attributes, - OutRemoteContent.ChunkedContent.SequenceRawHashes, - OutRemoteContent.ChunkedContent.ChunkCounts, - AbsoluteChunkOrders, - OutLooseChunkHashes, - LooseChunkRawSizes, - BlockRawHashes); - - // TODO: GetBlockDescriptions for all BlockRawHashes in one go - check for local block descriptions when we cache them - - { - bool AttemptFallback = false; - OutBlockDescriptions = GetBlockDescriptions(Output, - *Storage.BuildStorage, - Storage.BuildCacheStorage.get(), - BuildId, - BuildPartId, - BlockRawHashes, - AttemptFallback, - IsQuiet, - IsVerbose); - } - - CalculateLocalChunkOrders(AbsoluteChunkOrders, - OutLooseChunkHashes, - LooseChunkRawSizes, - OutBlockDescriptions, - OutRemoteContent.ChunkedContent.ChunkHashes, - OutRemoteContent.ChunkedContent.ChunkRawSizes, - OutRemoteContent.ChunkedContent.ChunkOrders, - DoExtraContentVerify); - - std::vector<std::filesystem::path> DeletedPaths; - - if (OptionalManifest) - { - tsl::robin_set<std::string> PathsInManifest; - PathsInManifest.reserve(OptionalManifest->Files.size()); - for (const std::filesystem::path& ManifestPath : OptionalManifest->Files) - { - PathsInManifest.insert(ToLower(ManifestPath.generic_string())); - } - for (const std::filesystem::path& RemotePath : OutRemoteContent.Paths) - { - if (!PathsInManifest.contains(ToLower(RemotePath.generic_string()))) - { - DeletedPaths.push_back(RemotePath); - } - } - } - - if (!IncludeWildcards.empty() || !ExcludeWildcards.empty()) - { - for (const std::filesystem::path& RemotePath : OutRemoteContent.Paths) - { - if (!IncludePath(IncludeWildcards, ExcludeWildcards, ToLower(RemotePath.generic_string()), /*CaseSensitive*/ true)) - { - DeletedPaths.push_back(RemotePath); - } - } - } - - if (!DeletedPaths.empty()) - { - OutRemoteContent = DeletePathsFromChunkedContent(OutRemoteContent, DeletedPaths); - InlineRemoveUnusedHashes(OutLooseChunkHashes, OutRemoteContent.ChunkedContent.ChunkHashes); - } - -#if ZEN_BUILD_DEBUG - ValidateChunkedFolderContent(OutRemoteContent, OutBlockDescriptions, OutLooseChunkHashes, IncludeWildcards, ExcludeWildcards); -#endif // ZEN_BUILD_DEBUG - }; - - auto FindManifest = [&Manifest](const Oid& BuildPartId, std::string_view BuildPartName) -> const BuildManifest::Part* { - if (Manifest.Parts.empty()) - { - return nullptr; - } - if (Manifest.Parts.size() == 1) - { - if (Manifest.Parts[0].PartId == Oid::Zero && Manifest.Parts[0].PartName.empty()) - { - return &Manifest.Parts[0]; - } - } - - auto It = std::find_if(Manifest.Parts.begin(), Manifest.Parts.end(), [BuildPartId, BuildPartName](const BuildManifest::Part& Part) { - if (Part.PartId != Oid::Zero) - { - return Part.PartId == BuildPartId; - } - if (!Part.PartName.empty()) - { - return Part.PartName == BuildPartName; - } - return false; - }); - if (It != Manifest.Parts.end()) - { - return &(*It); - } - return nullptr; - }; - - OutPartContents.resize(1); - ParseBuildPartManifest(Storage, - BuildId, - BuildPartId, - BuildPartManifest, - IncludeWildcards, - ExcludeWildcards, - FindManifest(BuildPartId, BuildPartName), - OutPartContents[0], - OutBlockDescriptions, - OutLooseChunkHashes); - ChunkedFolderContent RemoteContent; - if (BuildParts.size() > 1) - { - std::vector<ChunkBlockDescription> OverlayBlockDescriptions; - std::vector<IoHash> OverlayLooseChunkHashes; - for (size_t PartIndex = 1; PartIndex < BuildParts.size(); PartIndex++) - { - const Oid& OverlayBuildPartId = BuildParts[PartIndex].first; - const std::string& OverlayBuildPartName = BuildParts[PartIndex].second; - Stopwatch GetOverlayBuildPartTimer; - CbObject OverlayBuildPartManifest = Storage.BuildStorage->GetBuildPart(BuildId, OverlayBuildPartId); - if (!IsQuiet) - { - ZEN_OPERATION_LOG_INFO(Output, - "GetBuildPart {} ('{}') took {}. Payload size: {}", - OverlayBuildPartId, - OverlayBuildPartName, - NiceTimeSpanMs(GetOverlayBuildPartTimer.GetElapsedTimeMs()), - NiceBytes(OverlayBuildPartManifest.GetSize())); - } - - ChunkedFolderContent OverlayPartContent; - std::vector<ChunkBlockDescription> OverlayPartBlockDescriptions; - std::vector<IoHash> OverlayPartLooseChunkHashes; - - ParseBuildPartManifest(Storage, - BuildId, - OverlayBuildPartId, - OverlayBuildPartManifest, - IncludeWildcards, - ExcludeWildcards, - FindManifest(OverlayBuildPartId, OverlayBuildPartName), - OverlayPartContent, - OverlayPartBlockDescriptions, - OverlayPartLooseChunkHashes); - OutPartContents.push_back(OverlayPartContent); - OverlayBlockDescriptions.insert(OverlayBlockDescriptions.end(), - OverlayPartBlockDescriptions.begin(), - OverlayPartBlockDescriptions.end()); - OverlayLooseChunkHashes.insert(OverlayLooseChunkHashes.end(), - OverlayPartLooseChunkHashes.begin(), - OverlayPartLooseChunkHashes.end()); - } - - RemoteContent = MergeChunkedFolderContents(OutPartContents[0], std::span<const ChunkedFolderContent>(OutPartContents).subspan(1)); - { - tsl::robin_set<IoHash> AllBlockHashes; - for (const ChunkBlockDescription& Description : OutBlockDescriptions) - { - AllBlockHashes.insert(Description.BlockHash); - } - for (const ChunkBlockDescription& Description : OverlayBlockDescriptions) - { - if (!AllBlockHashes.contains(Description.BlockHash)) - { - AllBlockHashes.insert(Description.BlockHash); - OutBlockDescriptions.push_back(Description); - } - } - } - { - tsl::robin_set<IoHash> AllLooseChunkHashes(OutLooseChunkHashes.begin(), OutLooseChunkHashes.end()); - for (const IoHash& OverlayLooseChunkHash : OverlayLooseChunkHashes) - { - if (!AllLooseChunkHashes.contains(OverlayLooseChunkHash)) - { - AllLooseChunkHashes.insert(OverlayLooseChunkHash); - OutLooseChunkHashes.push_back(OverlayLooseChunkHash); - } - } - } - } - else - { - RemoteContent = OutPartContents[0]; - } - return RemoteContent; -} - -std::string -GetCbObjectAsNiceString(CbObjectView Object, std::string_view Prefix, std::string_view Suffix) -{ - ExtendableStringBuilder<512> SB; - std::vector<std::pair<std::string, std::string>> NameStringValuePairs; - for (CbFieldView Field : Object) - { - std::string_view Name = Field.GetName(); - switch (CbValue Accessor = Field.GetValue(); Accessor.GetType()) - { - case CbFieldType::String: - NameStringValuePairs.push_back({std::string(Name), std::string(Accessor.AsString())}); - break; - case CbFieldType::IntegerPositive: - NameStringValuePairs.push_back({std::string(Name), fmt::format("{}", Accessor.AsIntegerPositive())}); - break; - case CbFieldType::IntegerNegative: - NameStringValuePairs.push_back({std::string(Name), fmt::format("{}", Accessor.AsIntegerNegative())}); - break; - case CbFieldType::Float32: - { - const float Value = Accessor.AsFloat32(); - if (std::isfinite(Value)) - { - NameStringValuePairs.push_back({std::string(Name), fmt::format("{:.9g}", Value)}); - } - else - { - NameStringValuePairs.push_back({std::string(Name), "null"}); - } - } - break; - case CbFieldType::Float64: - { - const double Value = Accessor.AsFloat64(); - if (std::isfinite(Value)) - { - NameStringValuePairs.push_back({std::string(Name), fmt::format("{:.17g}", Value)}); - } - else - { - NameStringValuePairs.push_back({std::string(Name), "null"}); - } - } - break; - case CbFieldType::BoolFalse: - NameStringValuePairs.push_back({std::string(Name), "false"}); - break; - case CbFieldType::BoolTrue: - NameStringValuePairs.push_back({std::string(Name), "true"}); - break; - case CbFieldType::Hash: - { - NameStringValuePairs.push_back({std::string(Name), Accessor.AsHash().ToHexString()}); - } - break; - case CbFieldType::Uuid: - { - StringBuilder<Oid::StringLength + 1> Builder; - Accessor.AsUuid().ToString(Builder); - NameStringValuePairs.push_back({std::string(Name), Builder.ToString()}); - } - break; - case CbFieldType::DateTime: - { - ExtendableStringBuilder<64> Builder; - Builder << DateTime(Accessor.AsDateTimeTicks()).ToIso8601(); - NameStringValuePairs.push_back({std::string(Name), Builder.ToString()}); - } - break; - case CbFieldType::TimeSpan: - { - ExtendableStringBuilder<64> Builder; - const TimeSpan Span(Accessor.AsTimeSpanTicks()); - if (Span.GetDays() == 0) - { - Builder << Span.ToString("%h:%m:%s.%n"); - } - else - { - Builder << Span.ToString("%d.%h:%m:%s.%n"); - } - NameStringValuePairs.push_back({std::string(Name), Builder.ToString()}); - break; - } - case CbFieldType::ObjectId: - NameStringValuePairs.push_back({std::string(Name), Accessor.AsObjectId().ToString()}); - break; - } - } - std::string::size_type LongestKey = 0; - for (const std::pair<std::string, std::string>& KeyValue : NameStringValuePairs) - { - LongestKey = Max(KeyValue.first.length(), LongestKey); - } - for (const std::pair<std::string, std::string>& KeyValue : NameStringValuePairs) - { - SB.Append(fmt::format("{}{:<{}}: {}{}", Prefix, KeyValue.first, LongestKey, KeyValue.second, Suffix)); - } - return SB.ToString(); -} - -#if ZEN_WITH_TESTS - -namespace buildstorageoperations_testutils { - struct TestState - { - TestState(const std::filesystem::path& InRootPath) - : RootPath(InRootPath) - , LogOutput(CreateStandardLogOutput(Log)) - , ChunkController(CreateStandardChunkingController(StandardChunkingControllerSettings{})) - , ChunkCache(CreateMemoryChunkingCache()) - , WorkerPool(2) - , NetworkPool(2) - { - } - - void Initialize() - { - StoragePath = RootPath / "storage"; - TempPath = RootPath / "temp"; - SystemRootDir = RootPath / "sysroot"; - ZenFolderPath = RootPath / ".zen"; - - CreateDirectories(TempPath); - CreateDirectories(StoragePath); - - Storage.BuildStorage = CreateFileBuildStorage(StoragePath, StorageStats, false); - } - - void CreateSourceData(const std::filesystem::path& Source, std::span<const std::string> Paths, std::span<const uint64_t> Sizes) - { - const std::filesystem::path SourcePath = RootPath / Source; - CreateDirectories(SourcePath); - for (size_t FileIndex = 0; FileIndex < Paths.size(); FileIndex++) - { - const std::string& FilePath = Paths[FileIndex]; - const uint64_t FileSize = Sizes[FileIndex]; - IoBuffer FileData = FileSize > 0 ? CreateSemiRandomBlob(FileSize) : IoBuffer{}; - WriteFile(SourcePath / FilePath, FileData); - } - } - - std::vector<std::pair<Oid, std::string>> Upload(const Oid& BuildId, - const Oid& BuildPartId, - const std::string_view BuildPartName, - const std::filesystem::path& Source, - const std::filesystem::path& ManifestPath) - { - const std::filesystem::path SourcePath = RootPath / Source; - CbObject MetaData; - BuildsOperationUploadFolder Upload(*LogOutput, - Storage, - AbortFlag, - PauseFlag, - WorkerPool, - NetworkPool, - BuildId, - SourcePath, - true, - MetaData, - BuildsOperationUploadFolder::Options{.TempDir = TempPath}); - return Upload.Execute(BuildPartId, BuildPartName, ManifestPath, *ChunkController, *ChunkCache); - } - - void ValidateUpload(const Oid& BuildId, const std::vector<std::pair<Oid, std::string>>& Parts) - { - for (auto Part : Parts) - { - BuildsOperationValidateBuildPart Validate(*LogOutput, - *Storage.BuildStorage, - AbortFlag, - PauseFlag, - WorkerPool, - NetworkPool, - BuildId, - Part.first, - Part.second, - BuildsOperationValidateBuildPart::Options{}); - Validate.Execute(); - } - } - - FolderContent Download(const Oid& BuildId, - const Oid& BuildPartId, - const std::string_view BuildPartName, - const std::filesystem::path& Target, - bool Append) - { - const std::filesystem::path TargetPath = RootPath / Target; - - CreateDirectories(TargetPath); - - uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; - CbObject BuildObject = Storage.BuildStorage->GetBuild(BuildId); - std::vector<Oid> PartIds; - if (BuildPartId != Oid::Zero) - { - PartIds.push_back(BuildPartId); - } - std::vector<std::string> PartNames; - if (!BuildPartName.empty()) - { - PartNames.push_back(std::string(BuildPartName)); - } - std::vector<std::pair<Oid, std::string>> AllBuildParts = - ResolveBuildPartNames(BuildObject, BuildId, PartIds, PartNames, PreferredMultipartChunkSize); - - std::vector<ChunkedFolderContent> PartContents; - - std::vector<ChunkBlockDescription> BlockDescriptions; - std::vector<IoHash> LooseChunkHashes; - - ChunkedFolderContent RemoteContent = GetRemoteContent(*LogOutput, - Storage, - BuildId, - AllBuildParts, - {}, - {}, - {}, - ChunkController, - PartContents, - BlockDescriptions, - LooseChunkHashes, - /*IsQuiet*/ false, - /*IsVerbose*/ false, - /*DoExtraContentVerify*/ true); - - GetFolderContentStatistics LocalFolderScanStats; - - struct ContentVisitor : public GetDirectoryContentVisitor - { - virtual void AsyncVisitDirectory(const std::filesystem::path& RelativeRoot, DirectoryContent&& Content) - { - RwLock::ExclusiveLockScope _(ExistingPathsLock); - for (const std::filesystem::path& FileName : Content.FileNames) - { - if (RelativeRoot.empty()) - { - ExistingPaths.push_back(FileName); - } - else - { - ExistingPaths.push_back(RelativeRoot / FileName); - } - } - } - - RwLock ExistingPathsLock; - std::vector<std::filesystem::path> ExistingPaths; - } Visitor; - - Latch PendingWorkCount(1); - - GetDirectoryContent(TargetPath, - DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive, - Visitor, - WorkerPool, - PendingWorkCount); - - PendingWorkCount.CountDown(); - PendingWorkCount.Wait(); - - FolderContent CurrentLocalFolderState = GetValidFolderContent( - WorkerPool, - LocalFolderScanStats, - TargetPath, - Visitor.ExistingPaths, - [](uint64_t PathCount, uint64_t CompletedPathCount) { ZEN_UNUSED(PathCount, CompletedPathCount); }, - 1000, - AbortFlag, - PauseFlag); - - ChunkingStatistics LocalChunkingStats; - ChunkedFolderContent LocalContent = ChunkFolderContent( - LocalChunkingStats, - WorkerPool, - TargetPath, - CurrentLocalFolderState, - *ChunkController, - *ChunkCache, - 1000, - [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { ZEN_UNUSED(IsAborted, IsPaused); }, - AbortFlag, - PauseFlag); - - if (Append) - { - RemoteContent = ApplyChunkedContentOverlay(LocalContent, RemoteContent, {}, {}); - } - - const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalContent); - const ChunkedContentLookup RemoteLookup = BuildChunkedContentLookup(RemoteContent); - - BuildsOperationUpdateFolder Download(*LogOutput, - Storage, - AbortFlag, - PauseFlag, - WorkerPool, - NetworkPool, - BuildId, - TargetPath, - LocalContent, - LocalLookup, - RemoteContent, - RemoteLookup, - BlockDescriptions, - LooseChunkHashes, - BuildsOperationUpdateFolder::Options{.SystemRootDir = SystemRootDir, - .ZenFolderPath = ZenFolderPath, - .ValidateCompletedSequences = true}); - FolderContent ResultingState; - Download.Execute(ResultingState); - - return ResultingState; - } - - void ValidateDownload(std::span<const std::string> Paths, - std::span<const uint64_t> Sizes, - const std::filesystem::path& Source, - const std::filesystem::path& Target, - const FolderContent& DownloadContent) - { - const std::filesystem::path SourcePath = RootPath / Source; - const std::filesystem::path TargetPath = RootPath / Target; - - CHECK_EQ(Paths.size(), DownloadContent.Paths.size()); - tsl::robin_map<std::string, uint64_t> ExpectedSizes; - tsl::robin_map<std::string, IoHash> ExpectedHashes; - for (size_t Index = 0; Index < Paths.size(); Index++) - { - const std::string LookupString = std::filesystem::path(Paths[Index]).generic_string(); - ExpectedSizes.insert_or_assign(LookupString, Sizes[Index]); - std::filesystem::path FilePath = SourcePath / Paths[Index]; - const IoHash SourceHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile(FilePath.make_preferred())); - ExpectedHashes.insert_or_assign(LookupString, SourceHash); - } - for (size_t Index = 0; Index < DownloadContent.Paths.size(); Index++) - { - const std::string LookupString = std::filesystem::path(DownloadContent.Paths[Index]).generic_string(); - auto SizeIt = ExpectedSizes.find(LookupString); - CHECK_NE(SizeIt, ExpectedSizes.end()); - CHECK_EQ(SizeIt->second, DownloadContent.RawSizes[Index]); - std::filesystem::path FilePath = TargetPath / DownloadContent.Paths[Index]; - const IoHash DownloadedHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile(FilePath.make_preferred())); - auto HashIt = ExpectedHashes.find(LookupString); - CHECK_NE(HashIt, ExpectedHashes.end()); - CHECK_EQ(HashIt->second, DownloadedHash); - } - } - - const std::filesystem::path RootPath; - std::filesystem::path StoragePath; - std::filesystem::path TempPath; - std::filesystem::path SystemRootDir; - std::filesystem::path ZenFolderPath; - - LoggerRef Log = ConsoleLog(); - std::unique_ptr<OperationLogOutput> LogOutput; - - std::unique_ptr<ChunkingController> ChunkController; - std::unique_ptr<ChunkingCache> ChunkCache; - - StorageInstance Storage; - BuildStorageBase::Statistics StorageStats; - - WorkerThreadPool WorkerPool; - WorkerThreadPool NetworkPool; - - std::atomic<bool> AbortFlag; - std::atomic<bool> PauseFlag; - }; - -} // namespace buildstorageoperations_testutils - -TEST_CASE("buildstorageoperations.upload.folder") -{ - using namespace buildstorageoperations_testutils; - - FastRandom BaseRandom; - - const size_t FileCount = 11; - - const std::string Paths[FileCount] = {{"file_1"}, - {"file_2.exe"}, - {"file_3.txt"}, - {"dir_1/dir1_file_1.exe"}, - {"dir_1/dir1_file_2.pdb"}, - {"dir_1/dir1_file_3.txt"}, - {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"}, - {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"}, - {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"}, - {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"}, - {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}}; - const uint64_t Sizes[FileCount] = - {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u}; - - ScopedTemporaryDirectory SourceFolder; - TestState State(SourceFolder.Path()); - State.Initialize(); - State.CreateSourceData("source", Paths, Sizes); - - const Oid BuildId = Oid::NewOid(); - const Oid BuildPartId = Oid::NewOid(); - const std::string BuildPartName = "default"; - - auto Result = State.Upload(BuildId, BuildPartId, BuildPartName, "source", {}); - - CHECK_EQ(Result.size(), 1u); - CHECK_EQ(Result[0].first, BuildPartId); - CHECK_EQ(Result[0].second, BuildPartName); - State.ValidateUpload(BuildId, Result); - - FolderContent DownloadContent = State.Download(BuildId, Oid::Zero, {}, "download", /* Append */ false); - CHECK_EQ(DownloadContent.Paths.size(), FileCount); - State.ValidateDownload(Paths, Sizes, "source", "download", DownloadContent); -} - -TEST_CASE("buildstorageoperations.upload.manifest") -{ - using namespace buildstorageoperations_testutils; - - FastRandom BaseRandom; - - const size_t FileCount = 11; - - const std::string Paths[FileCount] = {{"file_1"}, - {"file_2.exe"}, - {"file_3.txt"}, - {"dir_1/dir1_file_1.exe"}, - {"dir_1/dir1_file_2.pdb"}, - {"dir_1/dir1_file_3.txt"}, - {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"}, - {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"}, - {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"}, - {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"}, - {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}}; - const uint64_t Sizes[FileCount] = - {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u}; - - ScopedTemporaryDirectory SourceFolder; - TestState State(SourceFolder.Path()); - State.Initialize(); - State.CreateSourceData("source", Paths, Sizes); - - std::span<const std::string> ManifestFiles(Paths); - ManifestFiles = ManifestFiles.subspan(0, FileCount / 2); - - std::span<const uint64_t> ManifestSizes(Sizes); - ManifestSizes = ManifestSizes.subspan(0, FileCount / 2); - - ExtendableStringBuilder<1024> Manifest; - for (const std::string& FilePath : ManifestFiles) - { - Manifest << FilePath << "\n"; - } - - WriteFile(State.RootPath / "manifest.txt", IoBuffer(IoBuffer::Wrap, Manifest.Data(), Manifest.Size())); - - const Oid BuildId = Oid::NewOid(); - const Oid BuildPartId = Oid::NewOid(); - const std::string BuildPartName = "default"; - - auto Result = State.Upload(BuildId, BuildPartId, BuildPartName, "source", State.RootPath / "manifest.txt"); - - CHECK_EQ(Result.size(), 1u); - CHECK_EQ(Result[0].first, BuildPartId); - CHECK_EQ(Result[0].second, BuildPartName); - State.ValidateUpload(BuildId, Result); - - FolderContent DownloadContent = State.Download(BuildId, Oid::Zero, {}, "download", /* Append */ false); - State.ValidateDownload(ManifestFiles, ManifestSizes, "source", "download", DownloadContent); -} - -TEST_CASE("buildstorageoperations.memorychunkingcache") -{ - using namespace buildstorageoperations_testutils; - - FastRandom BaseRandom; - - const size_t FileCount = 11; - - const std::string Paths[FileCount] = {{"file_1"}, - {"file_2.exe"}, - {"file_3.txt"}, - {"dir_1/dir1_file_1.exe"}, - {"dir_1/dir1_file_2.pdb"}, - {"dir_1/dir1_file_3.txt"}, - {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"}, - {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"}, - {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"}, - {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"}, - {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}}; - const uint64_t Sizes[FileCount] = - {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u}; - - ScopedTemporaryDirectory SourceFolder; - TestState State(SourceFolder.Path()); - State.Initialize(); - State.CreateSourceData("source", Paths, Sizes); - - const Oid BuildId = Oid::NewOid(); - const Oid BuildPartId = Oid::NewOid(); - const std::string BuildPartName = "default"; - - { - const std::filesystem::path SourcePath = SourceFolder.Path() / "source"; - CbObject MetaData; - BuildsOperationUploadFolder Upload(*State.LogOutput, - State.Storage, - State.AbortFlag, - State.PauseFlag, - State.WorkerPool, - State.NetworkPool, - BuildId, - SourcePath, - true, - MetaData, - BuildsOperationUploadFolder::Options{.TempDir = State.TempPath}); - auto Result = Upload.Execute(BuildPartId, BuildPartName, {}, *State.ChunkController, *State.ChunkCache); - - CHECK_EQ(Upload.m_ChunkingStats.FilesStoredInCache.load(), FileCount - 1); // Zero size files are not stored in cache - CHECK_EQ(Upload.m_ChunkingStats.BytesStoredInCache.load(), std::accumulate(&Sizes[0], &Sizes[FileCount], uint64_t(0))); - CHECK(Upload.m_ChunkingStats.ChunksStoredInCache.load() >= FileCount - 1); // Zero size files are not stored in cache - - CHECK_EQ(Result.size(), 1u); - CHECK_EQ(Result[0].first, BuildPartId); - CHECK_EQ(Result[0].second, BuildPartName); - } - - auto Result = State.Upload(BuildId, BuildPartId, BuildPartName, "source", {}); - - const Oid BuildId2 = Oid::NewOid(); - const Oid BuildPartId2 = Oid::NewOid(); - - { - const std::filesystem::path SourcePath = SourceFolder.Path() / "source"; - CbObject MetaData; - BuildsOperationUploadFolder Upload(*State.LogOutput, - State.Storage, - State.AbortFlag, - State.PauseFlag, - State.WorkerPool, - State.NetworkPool, - BuildId2, - SourcePath, - true, - MetaData, - BuildsOperationUploadFolder::Options{.TempDir = State.TempPath}); - Upload.Execute(BuildPartId2, BuildPartName, {}, *State.ChunkController, *State.ChunkCache); - - CHECK_EQ(Upload.m_ChunkingStats.FilesFoundInCache.load(), FileCount - 1); // Zero size files are not stored in cache - CHECK_EQ(Upload.m_ChunkingStats.BytesFoundInCache.load(), std::accumulate(&Sizes[0], &Sizes[FileCount], uint64_t(0))); - CHECK(Upload.m_ChunkingStats.ChunksFoundInCache.load() >= FileCount - 1); // Zero size files are not stored in cache - } - - FolderContent DownloadContent = State.Download(BuildId2, BuildPartId2, {}, "download", /* Append */ false); - State.ValidateDownload(Paths, Sizes, "source", "download", DownloadContent); -} - -TEST_CASE("buildstorageoperations.upload.multipart") -{ - // Disabled since it relies on authentication and specific block being present in cloud storage - if (false) - { - using namespace buildstorageoperations_testutils; - - FastRandom BaseRandom; - - const size_t FileCount = 11; - - const std::string Paths[FileCount] = {{"file_1"}, - {"file_2.exe"}, - {"file_3.txt"}, - {"dir_1/dir1_file_1.exe"}, - {"dir_1/dir1_file_2.pdb"}, - {"dir_1/dir1_file_3.txt"}, - {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"}, - {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"}, - {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"}, - {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"}, - {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}}; - const uint64_t Sizes[FileCount] = - {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u}; - - ScopedTemporaryDirectory SourceFolder; - TestState State(SourceFolder.Path()); - State.Initialize(); - State.CreateSourceData("source", Paths, Sizes); - - std::span<const std::string> ManifestFiles1(Paths); - ManifestFiles1 = ManifestFiles1.subspan(0, FileCount / 2); - - std::span<const uint64_t> ManifestSizes1(Sizes); - ManifestSizes1 = ManifestSizes1.subspan(0, FileCount / 2); - - std::span<const std::string> ManifestFiles2(Paths); - ManifestFiles2 = ManifestFiles2.subspan(FileCount / 2 - 1); - - std::span<const uint64_t> ManifestSizes2(Sizes); - ManifestSizes2 = ManifestSizes2.subspan(FileCount / 2 - 1); - - const Oid BuildPart1Id = Oid::NewOid(); - const std::string BuildPart1Name = "part1"; - const Oid BuildPart2Id = Oid::NewOid(); - const std::string BuildPart2Name = "part2"; - { - CbObjectWriter Writer; - Writer.BeginObject("parts"sv); - { - Writer.BeginObject(BuildPart1Name); - { - Writer.AddObjectId("partId"sv, BuildPart1Id); - Writer.BeginArray("files"sv); - for (const std::string& ManifestFile : ManifestFiles1) - { - Writer.AddString(ManifestFile); - } - Writer.EndArray(); // files - } - Writer.EndObject(); // part1 - - Writer.BeginObject(BuildPart2Name); - { - Writer.AddObjectId("partId"sv, BuildPart2Id); - Writer.BeginArray("files"sv); - for (const std::string& ManifestFile : ManifestFiles2) - { - Writer.AddString(ManifestFile); - } - Writer.EndArray(); // files - } - Writer.EndObject(); // part2 - } - Writer.EndObject(); // parts - - ExtendableStringBuilder<1024> Manifest; - CompactBinaryToJson(Writer.Save(), Manifest); - WriteFile(State.RootPath / "manifest.json", IoBuffer(IoBuffer::Wrap, Manifest.Data(), Manifest.Size())); - } - - const Oid BuildId = Oid::NewOid(); - - auto Result = State.Upload(BuildId, {}, {}, "source", State.RootPath / "manifest.json"); - - CHECK_EQ(Result.size(), 2u); - CHECK_EQ(Result[0].first, BuildPart1Id); - CHECK_EQ(Result[0].second, BuildPart1Name); - CHECK_EQ(Result[1].first, BuildPart2Id); - CHECK_EQ(Result[1].second, BuildPart2Name); - State.ValidateUpload(BuildId, Result); - - FolderContent DownloadContent = State.Download(BuildId, Oid::Zero, {}, "download", /* Append */ false); - State.ValidateDownload(Paths, Sizes, "source", "download", DownloadContent); - - FolderContent Part1DownloadContent = State.Download(BuildId, BuildPart1Id, {}, "download_part1", /* Append */ false); - State.ValidateDownload(ManifestFiles1, ManifestSizes1, "source", "download_part1", Part1DownloadContent); - - FolderContent Part2DownloadContent = State.Download(BuildId, Oid::Zero, BuildPart2Name, "download_part2", /* Append */ false); - State.ValidateDownload(ManifestFiles2, ManifestSizes2, "source", "download_part2", Part2DownloadContent); - - (void)State.Download(BuildId, BuildPart1Id, BuildPart1Name, "download_part1+2", /* Append */ false); - FolderContent Part1And2DownloadContent = State.Download(BuildId, BuildPart2Id, {}, "download_part1+2", /* Append */ true); - State.ValidateDownload(Paths, Sizes, "source", "download_part1+2", Part1And2DownloadContent); - } -} - -void -buildstorageoperations_forcelink() -{ -} - -#endif // ZEN_WITH_TESTS - -} // namespace zen diff --git a/src/zenremotestore/builds/buildstorageresolve.cpp b/src/zenremotestore/builds/buildstorageresolve.cpp new file mode 100644 index 000000000..b33d7af29 --- /dev/null +++ b/src/zenremotestore/builds/buildstorageresolve.cpp @@ -0,0 +1,249 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenremotestore/builds/buildstorageresolve.h> + +#include <zencore/fmtutils.h> +#include <zenremotestore/builds/buildstoragecache.h> +#include <zenremotestore/jupiter/jupiterhost.h> +#include <zenutil/zenserverprocess.h> + +namespace zen { + +namespace { + std::string ConnectionSettingsToString(const HttpClientSettings& ClientSettings) + { + ExtendableStringBuilder<128> SB; + SB << "\n LogCategory: " << ClientSettings.LogCategory; + SB << "\n ConnectTimeout: " << ClientSettings.ConnectTimeout.count() << " ms"; + SB << "\n Timeout: " << ClientSettings.Timeout.count() << " ms"; + SB << "\n AccessTokenProvider: " << ClientSettings.AccessTokenProvider.has_value(); + SB << "\n AssumeHttp2: " << ClientSettings.AssumeHttp2; + SB << "\n AllowResume: " << ClientSettings.AllowResume; + SB << "\n RetryCount: " << ClientSettings.RetryCount; + SB << "\n SessionId: " << ClientSettings.SessionId.ToString(); + SB << "\n Verbose: " << ClientSettings.Verbose; + SB << "\n MaximumInMemoryDownloadSize: " << ClientSettings.MaximumInMemoryDownloadSize; + return SB.ToString(); + } +} // namespace + +BuildStorageResolveResult +ResolveBuildStorage(LoggerRef InLog, + const HttpClientSettings& ClientSettings, + std::string_view Host, + std::string_view OverrideHost, + std::string_view ZenCacheHost, + ZenCacheResolveMode ZenResolveMode, + bool Verbose) +{ + ZEN_SCOPED_LOG(InLog); + + bool AllowZenCacheDiscovery = ZenResolveMode == ZenCacheResolveMode::Discovery || ZenResolveMode == ZenCacheResolveMode::All; + bool AllowLocalZenCache = ZenResolveMode == ZenCacheResolveMode::LocalHost || ZenResolveMode == ZenCacheResolveMode::All; + + auto GetHostNameFromUrl = [](std::string_view Url) -> std::string_view { + std::string::size_type HostnameStart = 0; + std::string::size_type HostnameLength = std::string::npos; + if (auto StartPos = Url.find("//"); StartPos != std::string::npos) + { + HostnameStart = StartPos + 2; + } + if (auto EndPos = Url.find("/", HostnameStart); EndPos != std::string::npos) + { + HostnameLength = EndPos - HostnameStart; + } + if (auto EndPos = Url.find(":", HostnameStart); EndPos != std::string::npos) + { + HostnameLength = EndPos - HostnameStart; + } + return Url.substr(HostnameStart, HostnameLength); + }; + + std::string HostUrl; + std::string HostName; + double HostLatencySec = -1.0; + uint64_t HostMaxRangeCountPerRequest = 1; + + std::string CacheUrl; + std::string CacheName; + bool HostAssumeHttp2 = ClientSettings.AssumeHttp2; + bool CacheAssumeHttp2 = ClientSettings.AssumeHttp2; + double CacheLatencySec = -1.0; + uint64_t CacheMaxRangeCountPerRequest = 1; + + JupiterServerDiscovery DiscoveryResponse; + const std::string_view DiscoveryHost = Host.empty() ? OverrideHost : Host; + + if (OverrideHost.empty() || (ZenCacheHost.empty() && AllowZenCacheDiscovery)) + { + if (Verbose) + { + ZEN_INFO("Querying servers at '{}/api/v1/status/servers'\n Connection settings:{}", + DiscoveryHost, + ConnectionSettingsToString(ClientSettings)); + } + + DiscoveryResponse = DiscoverJupiterEndpoints(DiscoveryHost, ClientSettings); + } + + if (!OverrideHost.empty()) + { + if (Verbose) + { + ZEN_INFO("Testing server endpoint at '{}/health/live'. Assume http2: {}", OverrideHost, HostAssumeHttp2); + } + if (JupiterEndpointTestResult TestResult = TestJupiterEndpoint(OverrideHost, HostAssumeHttp2, ClientSettings.Verbose); + TestResult.Success) + { + if (Verbose) + { + ZEN_INFO("Server endpoint at '{}/api/v1/status/servers' succeeded", OverrideHost); + } + HostUrl = OverrideHost; + HostName = GetHostNameFromUrl(OverrideHost); + HostLatencySec = TestResult.LatencySeconds; + HostMaxRangeCountPerRequest = TestResult.MaxRangeCountPerRequest; + } + else + { + throw std::runtime_error(fmt::format("Host {} could not be reached. Reason: {}", OverrideHost, TestResult.FailureReason)); + } + } + else + { + if (DiscoveryResponse.ServerEndPoints.empty()) + { + throw std::runtime_error(fmt::format("Failed to find any builds hosts at {}", DiscoveryHost)); + } + + for (const JupiterServerDiscovery::EndPoint& ServerEndpoint : DiscoveryResponse.ServerEndPoints) + { + if (!ServerEndpoint.BaseUrl.empty()) + { + if (Verbose) + { + ZEN_INFO("Testing server endpoint at '{}/health/live'. Assume http2: {}", + ServerEndpoint.BaseUrl, + ServerEndpoint.AssumeHttp2); + } + + if (JupiterEndpointTestResult TestResult = + TestJupiterEndpoint(ServerEndpoint.BaseUrl, ServerEndpoint.AssumeHttp2, ClientSettings.Verbose); + TestResult.Success) + { + if (Verbose) + { + ZEN_INFO("Server endpoint at '{}/api/v1/status/servers' succeeded", ServerEndpoint.BaseUrl); + } + + HostUrl = ServerEndpoint.BaseUrl; + HostAssumeHttp2 = ServerEndpoint.AssumeHttp2; + HostName = ServerEndpoint.Name; + HostLatencySec = TestResult.LatencySeconds; + HostMaxRangeCountPerRequest = TestResult.MaxRangeCountPerRequest; + break; + } + else + { + ZEN_DEBUG("Unable to reach host {}. Reason: {}", ServerEndpoint.BaseUrl, TestResult.FailureReason); + } + } + } + if (HostUrl.empty()) + { + throw std::runtime_error(fmt::format("Failed to find any usable builds hosts out of {} using {}", + DiscoveryResponse.ServerEndPoints.size(), + DiscoveryHost)); + } + } + if (ZenCacheHost.empty()) + { + if (AllowZenCacheDiscovery) + { + for (const JupiterServerDiscovery::EndPoint& CacheEndpoint : DiscoveryResponse.CacheEndPoints) + { + if (!CacheEndpoint.BaseUrl.empty()) + { + if (Verbose) + { + ZEN_INFO("Testing cache endpoint at '{}/status/builds'. Assume http2: {}", + CacheEndpoint.BaseUrl, + CacheEndpoint.AssumeHttp2); + } + + if (ZenCacheEndpointTestResult TestResult = + TestZenCacheEndpoint(CacheEndpoint.BaseUrl, CacheEndpoint.AssumeHttp2, ClientSettings.Verbose); + TestResult.Success) + { + if (Verbose) + { + ZEN_INFO("Cache endpoint at '{}/status/builds' succeeded", CacheEndpoint.BaseUrl); + } + + CacheUrl = CacheEndpoint.BaseUrl; + CacheAssumeHttp2 = CacheEndpoint.AssumeHttp2; + CacheName = CacheEndpoint.Name; + CacheLatencySec = TestResult.LatencySeconds; + CacheMaxRangeCountPerRequest = TestResult.MaxRangeCountPerRequest; + break; + } + } + } + } + if (CacheUrl.empty() && AllowLocalZenCache) + { + ZenServerState State; + if (State.InitializeReadOnly()) + { + State.Snapshot([&](const ZenServerState::ZenServerEntry& Entry) { + if (CacheUrl.empty()) + { + std::string ZenServerLocalHostUrl = fmt::format("http://127.0.0.1:{}", Entry.EffectiveListenPort.load()); + if (ZenCacheEndpointTestResult TestResult = + TestZenCacheEndpoint(ZenServerLocalHostUrl, /*AssumeHttp2*/ false, ClientSettings.Verbose); + TestResult.Success) + { + CacheUrl = ZenServerLocalHostUrl; + CacheAssumeHttp2 = false; + CacheName = "localhost"; + CacheLatencySec = TestResult.LatencySeconds; + } + } + }); + } + } + } + else + { + if (Verbose) + { + ZEN_INFO("Testing cache endpoint at '{}/status/builds'. Assume http2: {}", ZenCacheHost, false); + } + if (ZenCacheEndpointTestResult TestResult = TestZenCacheEndpoint(ZenCacheHost, /*AssumeHttp2*/ false, ClientSettings.Verbose); + TestResult.Success) + { + CacheUrl = ZenCacheHost; + CacheName = GetHostNameFromUrl(ZenCacheHost); + CacheLatencySec = TestResult.LatencySeconds; + CacheMaxRangeCountPerRequest = TestResult.MaxRangeCountPerRequest; + } + else + { + ZEN_WARN("Unable to reach cache host {}. Reason: {}", ZenCacheHost, TestResult.FailureReason); + } + } + + return BuildStorageResolveResult{ + .Cloud = {.Address = HostUrl, + .Name = HostName, + .AssumeHttp2 = HostAssumeHttp2, + .LatencySec = HostLatencySec, + .Caps = BuildStorageResolveResult::Capabilities{.MaxRangeCountPerRequest = HostMaxRangeCountPerRequest}}, + .Cache = {.Address = CacheUrl, + .Name = CacheName, + .AssumeHttp2 = CacheAssumeHttp2, + .LatencySec = CacheLatencySec, + .Caps = BuildStorageResolveResult::Capabilities{.MaxRangeCountPerRequest = CacheMaxRangeCountPerRequest}}}; +} + +} // namespace zen diff --git a/src/zenremotestore/builds/buildstorageutil.cpp b/src/zenremotestore/builds/buildstorageutil.cpp index b249d7d52..dc8f79a47 100644 --- a/src/zenremotestore/builds/buildstorageutil.cpp +++ b/src/zenremotestore/builds/buildstorageutil.cpp @@ -2,293 +2,116 @@ #include <zenremotestore/builds/buildstorageutil.h> +#include <zencore/basicfile.h> +#include <zencore/compactbinary.h> +#include <zencore/compactbinaryutil.h> +#include <zencore/compactbinaryvalue.h> +#include <zencore/except.h> #include <zencore/fmtutils.h> +#include <zencore/logging/broadcastsink.h> +#include <zencore/parallelwork.h> #include <zencore/timer.h> +#include <zencore/trace.h> +#include <zenremotestore/builds/buildcontent.h> +#include <zenremotestore/builds/buildmanifest.h> +#include <zenremotestore/builds/buildprimecache.h> #include <zenremotestore/builds/buildstorage.h> #include <zenremotestore/builds/buildstoragecache.h> +#include <zenremotestore/builds/buildupdatefolder.h> +#include <zenremotestore/builds/builduploadfolder.h> +#include <zenremotestore/builds/buildvalidatebuildpart.h> #include <zenremotestore/builds/jupiterbuildstorage.h> #include <zenremotestore/chunking/chunkblock.h> +#include <zenremotestore/chunking/chunkingcache.h> +#include <zenremotestore/chunking/chunkingcontroller.h> #include <zenremotestore/jupiter/jupiterhost.h> -#include <zenremotestore/operationlogoutput.h> +#include <zenutil/filesystemutils.h> +#include <zenutil/logging.h> +#include <zenutil/progress.h> +#include <zenutil/wildcard.h> #include <zenutil/zenserverprocess.h> -namespace zen { -namespace { - std::string ConnectionSettingsToString(const HttpClientSettings& ClientSettings) - { - ExtendableStringBuilder<128> SB; - SB << "\n LogCategory: " << ClientSettings.LogCategory; - SB << "\n ConnectTimeout: " << ClientSettings.ConnectTimeout.count() << " ms"; - SB << "\n Timeout: " << ClientSettings.Timeout.count() << " ms"; - SB << "\n AccessTokenProvider: " << ClientSettings.AccessTokenProvider.has_value(); - SB << "\n AssumeHttp2: " << ClientSettings.AssumeHttp2; - SB << "\n AllowResume: " << ClientSettings.AllowResume; - SB << "\n RetryCount: " << ClientSettings.RetryCount; - SB << "\n SessionId: " << ClientSettings.SessionId.ToString(); - SB << "\n Verbose: " << ClientSettings.Verbose; - SB << "\n MaximumInMemoryDownloadSize: " << ClientSettings.MaximumInMemoryDownloadSize; - return SB.ToString(); - } -} // namespace - -BuildStorageResolveResult -ResolveBuildStorage(OperationLogOutput& Output, - const HttpClientSettings& ClientSettings, - std::string_view Host, - std::string_view OverrideHost, - std::string_view ZenCacheHost, - ZenCacheResolveMode ZenResolveMode, - bool Verbose) -{ - bool AllowZenCacheDiscovery = ZenResolveMode == ZenCacheResolveMode::Discovery || ZenResolveMode == ZenCacheResolveMode::All; - bool AllowLocalZenCache = ZenResolveMode == ZenCacheResolveMode::LocalHost || ZenResolveMode == ZenCacheResolveMode::All; - - auto GetHostNameFromUrl = [](std::string_view Url) -> std::string_view { - std::string::size_type HostnameStart = 0; - std::string::size_type HostnameLength = std::string::npos; - if (auto StartPos = Url.find("//"); StartPos != std::string::npos) - { - HostnameStart = StartPos + 2; - } - if (auto EndPos = Url.find("/", HostnameStart); EndPos != std::string::npos) - { - HostnameLength = EndPos - HostnameStart; - } - if (auto EndPos = Url.find(":", HostnameStart); EndPos != std::string::npos) - { - HostnameLength = EndPos - HostnameStart; - } - return Url.substr(HostnameStart, HostnameLength); - }; +#include <numeric> - std::string HostUrl; - std::string HostName; - double HostLatencySec = -1.0; +#if ZEN_WITH_TESTS +# include <zencore/testing.h> +# include <zencore/testutils.h> +# include <zenhttp/httpclientauth.h> +# include <zenremotestore/builds/filebuildstorage.h> +#endif // ZEN_WITH_TESTS - std::string CacheUrl; - std::string CacheName; - bool HostAssumeHttp2 = ClientSettings.AssumeHttp2; - bool CacheAssumeHttp2 = ClientSettings.AssumeHttp2; - double CacheLatencySec = -1.0; - - JupiterServerDiscovery DiscoveryResponse; - const std::string_view DiscoveryHost = Host.empty() ? OverrideHost : Host; +namespace zen { - if (OverrideHost.empty() || (ZenCacheHost.empty() && AllowZenCacheDiscovery)) +StorageInstance::~StorageInstance() +{ + if (CacheLogSink) { - if (Verbose) + if (Ref<logging::BroadcastSink> Broadcast = GetDefaultBroadcastSink()) { - ZEN_OPERATION_LOG_INFO(Output, - "Querying servers at '{}/api/v1/status/servers'\n Connection settings:{}", - DiscoveryHost, - ConnectionSettingsToString(ClientSettings)); + Broadcast->RemoveSink(CacheLogSink); } - - DiscoveryResponse = DiscoverJupiterEndpoints(DiscoveryHost, ClientSettings); } +} - if (!OverrideHost.empty()) - { - if (Verbose) - { - ZEN_OPERATION_LOG_INFO(Output, "Testing server endpoint at '{}/health/live'. Assume http2: {}", OverrideHost, HostAssumeHttp2); - } - if (JupiterEndpointTestResult TestResult = TestJupiterEndpoint(OverrideHost, HostAssumeHttp2, ClientSettings.Verbose); - TestResult.Success) - { - if (Verbose) - { - ZEN_OPERATION_LOG_INFO(Output, "Server endpoint at '{}/api/v1/status/servers' succeeded", OverrideHost); - } - HostUrl = OverrideHost; - HostName = GetHostNameFromUrl(OverrideHost); - HostLatencySec = TestResult.LatencySeconds; - } - else - { - throw std::runtime_error(fmt::format("Host {} could not be reached. Reason: {}", OverrideHost, TestResult.FailureReason)); - } - } - else - { - if (DiscoveryResponse.ServerEndPoints.empty()) - { - throw std::runtime_error(fmt::format("Failed to find any builds hosts at {}", DiscoveryHost)); - } - - for (const JupiterServerDiscovery::EndPoint& ServerEndpoint : DiscoveryResponse.ServerEndPoints) - { - if (!ServerEndpoint.BaseUrl.empty()) - { - if (Verbose) - { - ZEN_OPERATION_LOG_INFO(Output, - "Testing server endpoint at '{}/health/live'. Assume http2: {}", - ServerEndpoint.BaseUrl, - ServerEndpoint.AssumeHttp2); - } - - if (JupiterEndpointTestResult TestResult = - TestJupiterEndpoint(ServerEndpoint.BaseUrl, ServerEndpoint.AssumeHttp2, ClientSettings.Verbose); - TestResult.Success) - { - if (Verbose) - { - ZEN_OPERATION_LOG_INFO(Output, "Server endpoint at '{}/api/v1/status/servers' succeeded", ServerEndpoint.BaseUrl); - } - - HostUrl = ServerEndpoint.BaseUrl; - HostAssumeHttp2 = ServerEndpoint.AssumeHttp2; - HostName = ServerEndpoint.Name; - HostLatencySec = TestResult.LatencySeconds; - break; - } - else - { - ZEN_OPERATION_LOG_DEBUG(Output, - "Unable to reach host {}. Reason: {}", - ServerEndpoint.BaseUrl, - TestResult.FailureReason); - } - } - } - if (HostUrl.empty()) - { - throw std::runtime_error(fmt::format("Failed to find any usable builds hosts out of {} using {}", - DiscoveryResponse.ServerEndPoints.size(), - DiscoveryHost)); - } - } - if (ZenCacheHost.empty()) - { - if (AllowZenCacheDiscovery) - { - for (const JupiterServerDiscovery::EndPoint& CacheEndpoint : DiscoveryResponse.CacheEndPoints) - { - if (!CacheEndpoint.BaseUrl.empty()) - { - if (Verbose) - { - ZEN_OPERATION_LOG_INFO(Output, - "Testing cache endpoint at '{}/status/builds'. Assume http2: {}", - CacheEndpoint.BaseUrl, - CacheEndpoint.AssumeHttp2); - } +void +StorageInstance::SetupCacheSession(std::string_view TargetUrl, std::string_view Mode, const Oid& SessionId) +{ + CacheSession = std::make_unique<SessionsServiceClient>(SessionsServiceClient::Options{ + .TargetUrl = std::string(TargetUrl), + .AppName = "zen", + .Mode = std::string(Mode), + .SessionId = SessionId, + }); + CacheSession->Announce(); + CacheLogSink = CacheSession->CreateLogSink(); + GetDefaultBroadcastSink()->AddSink(CacheLogSink); +} - if (ZenCacheEndpointTestResult TestResult = - TestZenCacheEndpoint(CacheEndpoint.BaseUrl, CacheEndpoint.AssumeHttp2, ClientSettings.Verbose); - TestResult.Success) - { - if (Verbose) - { - ZEN_OPERATION_LOG_INFO(Output, "Cache endpoint at '{}/status/builds' succeeded", CacheEndpoint.BaseUrl); - } +using namespace std::literals; - CacheUrl = CacheEndpoint.BaseUrl; - CacheAssumeHttp2 = CacheEndpoint.AssumeHttp2; - CacheName = CacheEndpoint.Name; - CacheLatencySec = TestResult.LatencySeconds; - break; - } - } - } - } - if (CacheUrl.empty() && AllowLocalZenCache) - { - ZenServerState State; - if (State.InitializeReadOnly()) - { - State.Snapshot([&](const ZenServerState::ZenServerEntry& Entry) { - if (CacheUrl.empty()) - { - std::string ZenServerLocalHostUrl = fmt::format("http://127.0.0.1:{}", Entry.EffectiveListenPort.load()); - if (ZenCacheEndpointTestResult TestResult = - TestZenCacheEndpoint(ZenServerLocalHostUrl, /*AssumeHttp2*/ false, ClientSettings.Verbose); - TestResult.Success) - { - CacheUrl = ZenServerLocalHostUrl; - CacheAssumeHttp2 = false; - CacheName = "localhost"; - CacheLatencySec = TestResult.LatencySeconds; - } - } - }); - } - } - } - else +std::vector<ChunkBlockDescription> +ParseBlockMetadatas(std::span<const CbObject> BlockMetadatas) +{ + std::vector<ChunkBlockDescription> UnorderedList; + UnorderedList.reserve(BlockMetadatas.size()); + for (size_t CacheBlockMetadataIndex = 0; CacheBlockMetadataIndex < BlockMetadatas.size(); CacheBlockMetadataIndex++) { - if (Verbose) - { - ZEN_OPERATION_LOG_INFO(Output, "Testing cache endpoint at '{}/status/builds'. Assume http2: {}", ZenCacheHost, false); - } - if (ZenCacheEndpointTestResult TestResult = TestZenCacheEndpoint(ZenCacheHost, /*AssumeHttp2*/ false, ClientSettings.Verbose); - TestResult.Success) + const CbObject& CacheBlockMetadata = BlockMetadatas[CacheBlockMetadataIndex]; + ChunkBlockDescription Description = ParseChunkBlockDescription(CacheBlockMetadata); + if (Description.BlockHash != IoHash::Zero) { - CacheUrl = ZenCacheHost; - CacheName = GetHostNameFromUrl(ZenCacheHost); - CacheLatencySec = TestResult.LatencySeconds; - } - else - { - ZEN_WARN("Unable to reach cache host {}. Reason: {}", ZenCacheHost, TestResult.FailureReason); + UnorderedList.emplace_back(std::move(Description)); } } - - return BuildStorageResolveResult{.HostUrl = HostUrl, - .HostName = HostName, - .HostAssumeHttp2 = HostAssumeHttp2, - .HostLatencySec = HostLatencySec, - - .CacheUrl = CacheUrl, - .CacheName = CacheName, - .CacheAssumeHttp2 = CacheAssumeHttp2, - .CacheLatencySec = CacheLatencySec}; + return UnorderedList; } std::vector<ChunkBlockDescription> -GetBlockDescriptions(OperationLogOutput& Output, +GetBlockDescriptions(LoggerRef InLog, BuildStorageBase& Storage, BuildStorageCache* OptionalCacheStorage, const Oid& BuildId, - const Oid& BuildPartId, std::span<const IoHash> BlockRawHashes, bool AttemptFallback, bool IsQuiet, bool IsVerbose) { using namespace std::literals; - - if (!IsQuiet) - { - ZEN_OPERATION_LOG_INFO(Output, "Fetching metadata for {} blocks", BlockRawHashes.size()); - } - - Stopwatch GetBlockMetadataTimer; + ZEN_SCOPED_LOG(InLog); std::vector<ChunkBlockDescription> UnorderedList; tsl::robin_map<IoHash, size_t, IoHash::Hasher> BlockDescriptionLookup; if (OptionalCacheStorage && !BlockRawHashes.empty()) { std::vector<CbObject> CacheBlockMetadatas = OptionalCacheStorage->GetBlobMetadatas(BuildId, BlockRawHashes); - UnorderedList.reserve(CacheBlockMetadatas.size()); - for (size_t CacheBlockMetadataIndex = 0; CacheBlockMetadataIndex < CacheBlockMetadatas.size(); CacheBlockMetadataIndex++) + if (!CacheBlockMetadatas.empty()) { - const CbObject& CacheBlockMetadata = CacheBlockMetadatas[CacheBlockMetadataIndex]; - ChunkBlockDescription Description = ParseChunkBlockDescription(CacheBlockMetadata); - if (Description.BlockHash == IoHash::Zero) + UnorderedList = ParseBlockMetadatas(CacheBlockMetadatas); + for (size_t DescriptionIndex = 0; DescriptionIndex < UnorderedList.size(); DescriptionIndex++) { - ZEN_OPERATION_LOG_WARN(Output, "Unexpected/invalid block metadata received from remote cache, skipping block"); + const ChunkBlockDescription& Description = UnorderedList[DescriptionIndex]; + BlockDescriptionLookup.insert_or_assign(Description.BlockHash, DescriptionIndex); } - else - { - UnorderedList.emplace_back(std::move(Description)); - } - } - for (size_t DescriptionIndex = 0; DescriptionIndex < UnorderedList.size(); DescriptionIndex++) - { - const ChunkBlockDescription& Description = UnorderedList[DescriptionIndex]; - BlockDescriptionLookup.insert_or_assign(Description.BlockHash, DescriptionIndex); } } @@ -315,7 +138,7 @@ GetBlockDescriptions(OperationLogOutput& Output, if (Description.BlockHash == IoHash::Zero) { - ZEN_OPERATION_LOG_WARN(Output, "Unexpected/invalid block metadata received from remote store, skipping block"); + ZEN_WARN("Unexpected/invalid block metadata received from remote store, skipping block"); } else { @@ -355,15 +178,6 @@ GetBlockDescriptions(OperationLogOutput& Output, } } - if (!IsQuiet) - { - ZEN_OPERATION_LOG_INFO(Output, - "GetBlockMetadata for {} took {}. Found {} blocks", - BuildPartId, - NiceTimeSpanMs(GetBlockMetadataTimer.GetElapsedTimeMs()), - Result.size()); - } - if (Result.size() != BlockRawHashes.size()) { std::string ErrorDescription = @@ -385,7 +199,7 @@ GetBlockDescriptions(OperationLogOutput& Output, } if (AttemptFallback) { - ZEN_OPERATION_LOG_WARN(Output, "{} Attemping fallback options.", ErrorDescription); + ZEN_WARN("{} Attemping fallback options.", ErrorDescription); std::vector<ChunkBlockDescription> AugmentedBlockDescriptions; AugmentedBlockDescriptions.reserve(BlockRawHashes.size()); std::vector<ChunkBlockDescription> FoundBlocks = ParseChunkBlockDescriptionList(Storage.FindBlocks(BuildId, (uint64_t)-1)); @@ -408,7 +222,7 @@ GetBlockDescriptions(OperationLogOutput& Output, { if (!IsQuiet) { - ZEN_OPERATION_LOG_INFO(Output, "Found block {} via context find successfully", BlockHash); + ZEN_INFO("Found block {} via context find successfully", BlockHash); } AugmentedBlockDescriptions.emplace_back(std::move(*ListBlocksIt)); } @@ -453,4 +267,1472 @@ GetBlockDescriptions(OperationLogOutput& Output, return Result; } +////////////////////// Shared helpers + +std::filesystem::path +ZenStateFilePath(const std::filesystem::path& ZenFolderPath) +{ + return ZenFolderPath / "current_state.cbo"; +} +std::filesystem::path +ZenTempFolderPath(const std::filesystem::path& ZenFolderPath) +{ + return ZenFolderPath / "tmp"; +} + +CbObject +GetBuild(BuildStorageBase& Storage, const Oid& BuildId, bool IsQuiet) +{ + Stopwatch GetBuildTimer; + CbObject BuildObject = Storage.GetBuild(BuildId); + if (!IsQuiet) + { + ZEN_CONSOLE("GetBuild took {}. Name: '{}', Payload size: {}", + NiceTimeSpanMs(GetBuildTimer.GetElapsedTimeMs()), + BuildObject["name"sv].AsString(), + NiceBytes(BuildObject.GetSize())); + + ZEN_CONSOLE("{}", GetCbObjectAsNiceString(BuildObject, " "sv, "\n"sv)); + } + return BuildObject; +} + +uint64_t +GetMaxMemoryBufferSize(size_t MaxBlockSize, bool BoostWorkerMemory) +{ + return BoostWorkerMemory ? (MaxBlockSize + 16u * 1024u) : 1024u * 1024u; +} + +void +DownloadLargeBlob(BuildStorageBase& Storage, + const std::filesystem::path& DownloadFolder, + const Oid& BuildId, + const IoHash& ChunkHash, + const std::uint64_t PreferredMultipartChunkSize, + ParallelWork& Work, + WorkerThreadPool& NetworkPool, + std::atomic<uint64_t>& DownloadedChunkByteCount, + std::atomic<uint64_t>& MultipartAttachmentCount, + std::function<void(IoBuffer&& Payload)>&& OnDownloadComplete) +{ + ZEN_TRACE_CPU("DownloadLargeBlob"); + + struct WorkloadData + { + TemporaryFile TempFile; + }; + std::shared_ptr<WorkloadData> Workload(std::make_shared<WorkloadData>()); + + std::error_code Ec; + Workload->TempFile.CreateTemporary(DownloadFolder, Ec); + if (Ec) + { + throw std::runtime_error( + fmt::format("Failed opening temporary file '{}', reason: ({}) {}", Workload->TempFile.GetPath(), Ec.message(), Ec.value())); + } + std::vector<std::function<void()>> WorkItems = Storage.GetLargeBuildBlob( + BuildId, + ChunkHash, + PreferredMultipartChunkSize, + [&Work, Workload, &DownloadedChunkByteCount](uint64_t Offset, const IoBuffer& Chunk) { + DownloadedChunkByteCount += Chunk.GetSize(); + + if (!Work.IsAborted()) + { + ZEN_TRACE_CPU("Async_DownloadLargeBlob_OnReceive"); + Workload->TempFile.Write(Chunk.GetView(), Offset); + } + }, + [&Work, Workload, OnDownloadComplete = std::move(OnDownloadComplete)]() { + if (!Work.IsAborted()) + { + ZEN_TRACE_CPU("Async_DownloadLargeBlob_OnComplete"); + + uint64_t PayloadSize = Workload->TempFile.FileSize(); + void* FileHandle = Workload->TempFile.Detach(); + ZEN_ASSERT(FileHandle != nullptr); + IoBuffer Payload(IoBuffer::File, FileHandle, 0, PayloadSize, true); + Payload.SetDeleteOnClose(true); + OnDownloadComplete(std::move(Payload)); + } + }); + if (!WorkItems.empty()) + { + MultipartAttachmentCount++; + } + for (auto& WorkItem : WorkItems) + { + Work.ScheduleWork(NetworkPool, [WorkItem = std::move(WorkItem)](std::atomic<bool>& AbortFlag) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("Async_DownloadLargeBlob_Work"); + + WorkItem(); + } + }); + } +} + +CompositeBuffer +ValidateBlob(std::atomic<bool>& AbortFlag, + IoBuffer&& Payload, + const IoHash& BlobHash, + uint64_t& OutCompressedSize, + uint64_t& OutDecompressedSize) +{ + ZEN_TRACE_CPU("ValidateBlob"); + + if (Payload.GetContentType() != ZenContentType::kCompressedBinary) + { + throw std::runtime_error(fmt::format("Blob {} ({} bytes) has unexpected content type '{}'", + BlobHash, + Payload.GetSize(), + ToString(Payload.GetContentType()))); + } + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(Payload), RawHash, RawSize); + if (!Compressed) + { + throw std::runtime_error(fmt::format("Blob {} ({} bytes) compressed header is invalid", BlobHash, Payload.GetSize())); + } + if (RawHash != BlobHash) + { + throw std::runtime_error( + fmt::format("Blob {} ({} bytes) compressed header has a mismatching raw hash {}", BlobHash, Payload.GetSize(), RawHash)); + } + + IoHashStream Hash; + bool CouldDecompress = Compressed.DecompressToStream( + 0, + RawSize, + [&AbortFlag, &Hash](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) { + ZEN_UNUSED(SourceOffset, SourceSize, Offset); + if (!AbortFlag) + { + for (const SharedBuffer& Segment : RangeBuffer.GetSegments()) + { + Hash.Append(Segment.GetView()); + } + return true; + } + return false; + }); + + if (AbortFlag) + { + return CompositeBuffer{}; + } + + if (!CouldDecompress) + { + throw std::runtime_error( + fmt::format("Blob {} ({} bytes) failed to decompress - header information mismatch", BlobHash, Payload.GetSize())); + } + IoHash ValidateRawHash = Hash.GetHash(); + if (ValidateRawHash != BlobHash) + { + throw std::runtime_error(fmt::format("Blob {} ({} bytes) decompressed hash {} does not match header information", + BlobHash, + Payload.GetSize(), + ValidateRawHash)); + } + OodleCompressor Compressor; + OodleCompressionLevel CompressionLevel; + uint64_t BlockSize; + if (!Compressed.TryGetCompressParameters(Compressor, CompressionLevel, BlockSize)) + { + throw std::runtime_error(fmt::format("Blob {} ({} bytes) failed to get compression details", BlobHash, Payload.GetSize())); + } + OutCompressedSize = Payload.GetSize(); + OutDecompressedSize = RawSize; + if (CompressionLevel == OodleCompressionLevel::None) + { + // Only decompress to composite if we need it for block verification + CompositeBuffer DecompressedComposite = Compressed.DecompressToComposite(); + if (!DecompressedComposite) + { + throw std::runtime_error(fmt::format("Blob {} ({} bytes) failed to decompress to composite", BlobHash, Payload.GetSize())); + } + return DecompressedComposite; + } + return CompositeBuffer{}; +} + +CompositeBuffer +ValidateBlob(std::atomic<bool>& AbortFlag, + BuildStorageBase& Storage, + const Oid& BuildId, + const IoHash& BlobHash, + uint64_t& OutCompressedSize, + uint64_t& OutDecompressedSize) +{ + ZEN_TRACE_CPU("ValidateBlob"); + IoBuffer Payload = Storage.GetBuildBlob(BuildId, BlobHash); + if (!Payload) + { + throw std::runtime_error(fmt::format("Blob {} could not be found", BlobHash)); + } + return ValidateBlob(AbortFlag, std::move(Payload), BlobHash, OutCompressedSize, OutDecompressedSize); +} + +std::vector<std::pair<Oid, std::string>> +ResolveBuildPartNames(CbObjectView BuildObject, + const Oid& BuildId, + const std::vector<Oid>& BuildPartIds, + std::span<const std::string> BuildPartNames, + std::uint64_t& OutPreferredMultipartChunkSize) +{ + std::vector<std::pair<Oid, std::string>> Result; + { + CbObjectView PartsObject = BuildObject["parts"sv].AsObjectView(); + if (!PartsObject) + { + throw std::runtime_error("Build object does not have a 'parts' object"); + } + + OutPreferredMultipartChunkSize = BuildObject["chunkSize"sv].AsUInt64(OutPreferredMultipartChunkSize); + + std::vector<std::pair<Oid, std::string>> AvailableParts; + + for (CbFieldView PartView : PartsObject) + { + const std::string BuildPartName = std::string(PartView.GetName()); + const Oid BuildPartId = PartView.AsObjectId(); + if (BuildPartId == Oid::Zero) + { + ExtendableStringBuilder<128> SB; + for (CbFieldView ScanPartView : PartsObject) + { + SB.Append(fmt::format("\n {}: {}", ScanPartView.GetName(), ScanPartView.AsObjectId())); + } + throw std::runtime_error(fmt::format("Build object parts does not have a '{}' object id{}", BuildPartName, SB.ToView())); + } + AvailableParts.push_back({BuildPartId, BuildPartName}); + } + + if (BuildPartIds.empty() && BuildPartNames.empty()) + { + Result = AvailableParts; + } + else + { + for (const std::string& BuildPartName : BuildPartNames) + { + if (auto It = std::find_if(AvailableParts.begin(), + AvailableParts.end(), + [&BuildPartName](const auto& Part) { return Part.second == BuildPartName; }); + It != AvailableParts.end()) + { + Result.push_back(*It); + } + else + { + throw std::runtime_error(fmt::format("Build {} object does not have a part named '{}'", BuildId, BuildPartName)); + } + } + for (const Oid& BuildPartId : BuildPartIds) + { + if (auto It = std::find_if(AvailableParts.begin(), + AvailableParts.end(), + [&BuildPartId](const auto& Part) { return Part.first == BuildPartId; }); + It != AvailableParts.end()) + { + Result.push_back(*It); + } + else + { + throw std::runtime_error(fmt::format("Build {} object does not have a part with id '{}'", BuildId, BuildPartId)); + } + } + } + + if (Result.empty()) + { + throw std::runtime_error(fmt::format("Build object does not have any parts", BuildId)); + } + } + return Result; +} + +void +NormalizePartSelection(std::vector<Oid>& BuildPartIds, std::vector<std::string>& BuildPartNames, std::string_view HelpText) +{ + const bool HasWildcard = std::find(BuildPartNames.begin(), BuildPartNames.end(), "*") != BuildPartNames.end(); + if (HasWildcard) + { + if (BuildPartNames.size() != 1 || !BuildPartIds.empty()) + { + throw OptionParseException("'*' cannot be combined with other part names or ids", std::string(HelpText)); + } + BuildPartNames.clear(); + return; + } + + if (BuildPartIds.empty() && BuildPartNames.empty()) + { + BuildPartNames.push_back("default"); + } +} + +ChunkedFolderContent +GetRemoteContent(LoggerRef InLog, + StorageInstance& Storage, + const Oid& BuildId, + const std::vector<std::pair<Oid, std::string>>& BuildParts, + const BuildManifest& Manifest, + std::span<const std::string> IncludeWildcards, + std::span<const std::string> ExcludeWildcards, + std::unique_ptr<ChunkingController>& OutChunkController, + std::vector<ChunkedFolderContent>& OutPartContents, + std::vector<ChunkBlockDescription>& OutBlockDescriptions, + std::vector<IoHash>& OutLooseChunkHashes, + bool IsQuiet, + bool IsVerbose, + bool DoExtraContentVerify) +{ + ZEN_TRACE_CPU("GetRemoteContent"); + ZEN_SCOPED_LOG(InLog); + + Stopwatch GetBuildPartTimer; + const Oid BuildPartId = BuildParts[0].first; + const std::string_view BuildPartName = BuildParts[0].second; + CbObject BuildPartManifest = Storage.BuildStorage->GetBuildPart(BuildId, BuildPartId); + if (!IsQuiet) + { + ZEN_INFO("GetBuildPart {} ('{}') took {}. Payload size: {}", + BuildPartId, + BuildPartName, + NiceTimeSpanMs(GetBuildPartTimer.GetElapsedTimeMs()), + NiceBytes(BuildPartManifest.GetSize())); + ZEN_INFO("{}", GetCbObjectAsNiceString(BuildPartManifest, " "sv, "\n"sv)); + } + + { + CbObjectView Chunker = BuildPartManifest["chunker"sv].AsObjectView(); + std::string_view ChunkerName = Chunker["name"sv].AsString(); + CbObjectView Parameters = Chunker["parameters"sv].AsObjectView(); + OutChunkController = CreateChunkingController(ChunkerName, Parameters); + } + + auto ParseBuildPartManifest = [&Log, IsQuiet, IsVerbose, DoExtraContentVerify](StorageInstance& Storage, + const Oid& BuildId, + const Oid& BuildPartId, + CbObject BuildPartManifest, + std::span<const std::string> IncludeWildcards, + std::span<const std::string> ExcludeWildcards, + const BuildManifest::Part* OptionalManifest, + ChunkedFolderContent& OutRemoteContent, + std::vector<ChunkBlockDescription>& OutBlockDescriptions, + std::vector<IoHash>& OutLooseChunkHashes) { + std::vector<uint32_t> AbsoluteChunkOrders; + std::vector<uint64_t> LooseChunkRawSizes; + std::vector<IoHash> BlockRawHashes; + + ReadBuildContentFromCompactBinary(BuildPartManifest, + OutRemoteContent.Platform, + OutRemoteContent.Paths, + OutRemoteContent.RawHashes, + OutRemoteContent.RawSizes, + OutRemoteContent.Attributes, + OutRemoteContent.ChunkedContent.SequenceRawHashes, + OutRemoteContent.ChunkedContent.ChunkCounts, + AbsoluteChunkOrders, + OutLooseChunkHashes, + LooseChunkRawSizes, + BlockRawHashes); + + // TODO: GetBlockDescriptions for all BlockRawHashes in one go - check for local block descriptions when we cache them + + { + if (!IsQuiet) + { + ZEN_INFO("Fetching metadata for {} blocks", BlockRawHashes.size()); + } + + Stopwatch GetBlockMetadataTimer; + + bool AttemptFallback = false; + OutBlockDescriptions = GetBlockDescriptions(Log(), + *Storage.BuildStorage, + Storage.CacheStorage.get(), + BuildId, + BlockRawHashes, + AttemptFallback, + IsQuiet, + IsVerbose); + + if (!IsQuiet) + { + ZEN_INFO("GetBlockMetadata for {} took {}. Found {} blocks", + BuildPartId, + NiceTimeSpanMs(GetBlockMetadataTimer.GetElapsedTimeMs()), + OutBlockDescriptions.size()); + } + } + + CalculateLocalChunkOrders(AbsoluteChunkOrders, + OutLooseChunkHashes, + LooseChunkRawSizes, + OutBlockDescriptions, + OutRemoteContent.ChunkedContent.ChunkHashes, + OutRemoteContent.ChunkedContent.ChunkRawSizes, + OutRemoteContent.ChunkedContent.ChunkOrders, + DoExtraContentVerify); + + std::vector<std::filesystem::path> DeletedPaths; + + if (OptionalManifest) + { + tsl::robin_set<std::string> PathsInManifest; + PathsInManifest.reserve(OptionalManifest->Files.size()); + for (const std::filesystem::path& ManifestPath : OptionalManifest->Files) + { + PathsInManifest.insert(ToLower(ManifestPath.generic_string())); + } + for (const std::filesystem::path& RemotePath : OutRemoteContent.Paths) + { + if (!PathsInManifest.contains(ToLower(RemotePath.generic_string()))) + { + DeletedPaths.push_back(RemotePath); + } + } + } + + if (!IncludeWildcards.empty() || !ExcludeWildcards.empty()) + { + for (const std::filesystem::path& RemotePath : OutRemoteContent.Paths) + { + if (!IncludePath(IncludeWildcards, ExcludeWildcards, ToLower(RemotePath.generic_string()), /*CaseSensitive*/ true)) + { + DeletedPaths.push_back(RemotePath); + } + } + } + + if (!DeletedPaths.empty()) + { + OutRemoteContent = DeletePathsFromChunkedContent(OutRemoteContent, DeletedPaths); + InlineRemoveUnusedHashes(OutLooseChunkHashes, OutRemoteContent.ChunkedContent.ChunkHashes); + } + +#if ZEN_BUILD_DEBUG + ValidateChunkedFolderContent(OutRemoteContent, OutBlockDescriptions, OutLooseChunkHashes, IncludeWildcards, ExcludeWildcards); +#endif // ZEN_BUILD_DEBUG + }; + + auto FindManifest = [&Manifest](const Oid& BuildPartId, std::string_view BuildPartName) -> const BuildManifest::Part* { + if (Manifest.Parts.empty()) + { + return nullptr; + } + if (Manifest.Parts.size() == 1) + { + if (Manifest.Parts[0].PartId == Oid::Zero && Manifest.Parts[0].PartName.empty()) + { + return &Manifest.Parts[0]; + } + } + + auto It = std::find_if(Manifest.Parts.begin(), Manifest.Parts.end(), [BuildPartId, BuildPartName](const BuildManifest::Part& Part) { + if (Part.PartId != Oid::Zero) + { + return Part.PartId == BuildPartId; + } + if (!Part.PartName.empty()) + { + return Part.PartName == BuildPartName; + } + return false; + }); + if (It != Manifest.Parts.end()) + { + return &(*It); + } + return nullptr; + }; + + OutPartContents.resize(1); + ParseBuildPartManifest(Storage, + BuildId, + BuildPartId, + BuildPartManifest, + IncludeWildcards, + ExcludeWildcards, + FindManifest(BuildPartId, BuildPartName), + OutPartContents[0], + OutBlockDescriptions, + OutLooseChunkHashes); + ChunkedFolderContent RemoteContent; + if (BuildParts.size() > 1) + { + std::vector<ChunkBlockDescription> OverlayBlockDescriptions; + std::vector<IoHash> OverlayLooseChunkHashes; + for (size_t PartIndex = 1; PartIndex < BuildParts.size(); PartIndex++) + { + const Oid& OverlayBuildPartId = BuildParts[PartIndex].first; + const std::string& OverlayBuildPartName = BuildParts[PartIndex].second; + Stopwatch GetOverlayBuildPartTimer; + CbObject OverlayBuildPartManifest = Storage.BuildStorage->GetBuildPart(BuildId, OverlayBuildPartId); + if (!IsQuiet) + { + ZEN_INFO("GetBuildPart {} ('{}') took {}. Payload size: {}", + OverlayBuildPartId, + OverlayBuildPartName, + NiceTimeSpanMs(GetOverlayBuildPartTimer.GetElapsedTimeMs()), + NiceBytes(OverlayBuildPartManifest.GetSize())); + } + + ChunkedFolderContent OverlayPartContent; + std::vector<ChunkBlockDescription> OverlayPartBlockDescriptions; + std::vector<IoHash> OverlayPartLooseChunkHashes; + + ParseBuildPartManifest(Storage, + BuildId, + OverlayBuildPartId, + OverlayBuildPartManifest, + IncludeWildcards, + ExcludeWildcards, + FindManifest(OverlayBuildPartId, OverlayBuildPartName), + OverlayPartContent, + OverlayPartBlockDescriptions, + OverlayPartLooseChunkHashes); + OutPartContents.push_back(OverlayPartContent); + OverlayBlockDescriptions.insert(OverlayBlockDescriptions.end(), + OverlayPartBlockDescriptions.begin(), + OverlayPartBlockDescriptions.end()); + OverlayLooseChunkHashes.insert(OverlayLooseChunkHashes.end(), + OverlayPartLooseChunkHashes.begin(), + OverlayPartLooseChunkHashes.end()); + } + + RemoteContent = MergeChunkedFolderContents(OutPartContents[0], std::span<const ChunkedFolderContent>(OutPartContents).subspan(1)); + { + tsl::robin_set<IoHash> AllBlockHashes; + for (const ChunkBlockDescription& Description : OutBlockDescriptions) + { + AllBlockHashes.insert(Description.BlockHash); + } + for (const ChunkBlockDescription& Description : OverlayBlockDescriptions) + { + if (!AllBlockHashes.contains(Description.BlockHash)) + { + AllBlockHashes.insert(Description.BlockHash); + OutBlockDescriptions.push_back(Description); + } + } + } + { + tsl::robin_set<IoHash> AllLooseChunkHashes(OutLooseChunkHashes.begin(), OutLooseChunkHashes.end()); + for (const IoHash& OverlayLooseChunkHash : OverlayLooseChunkHashes) + { + if (!AllLooseChunkHashes.contains(OverlayLooseChunkHash)) + { + AllLooseChunkHashes.insert(OverlayLooseChunkHash); + OutLooseChunkHashes.push_back(OverlayLooseChunkHash); + } + } + } + } + else + { + RemoteContent = OutPartContents[0]; + } + return RemoteContent; +} +std::string +GetCbObjectAsNiceString(CbObjectView Object, std::string_view Prefix, std::string_view Suffix) +{ + ExtendableStringBuilder<512> SB; + std::vector<std::pair<std::string, std::string>> NameStringValuePairs; + for (CbFieldView Field : Object) + { + std::string_view Name = Field.GetName(); + switch (CbValue Accessor = Field.GetValue(); Accessor.GetType()) + { + case CbFieldType::String: + NameStringValuePairs.push_back({std::string(Name), std::string(Accessor.AsString())}); + break; + case CbFieldType::IntegerPositive: + NameStringValuePairs.push_back({std::string(Name), fmt::format("{}", Accessor.AsIntegerPositive())}); + break; + case CbFieldType::IntegerNegative: + NameStringValuePairs.push_back({std::string(Name), fmt::format("{}", Accessor.AsIntegerNegative())}); + break; + case CbFieldType::Float32: + { + const float Value = Accessor.AsFloat32(); + if (std::isfinite(Value)) + { + NameStringValuePairs.push_back({std::string(Name), fmt::format("{:.9g}", Value)}); + } + else + { + NameStringValuePairs.push_back({std::string(Name), "null"}); + } + } + break; + case CbFieldType::Float64: + { + const double Value = Accessor.AsFloat64(); + if (std::isfinite(Value)) + { + NameStringValuePairs.push_back({std::string(Name), fmt::format("{:.17g}", Value)}); + } + else + { + NameStringValuePairs.push_back({std::string(Name), "null"}); + } + } + break; + case CbFieldType::BoolFalse: + NameStringValuePairs.push_back({std::string(Name), "false"}); + break; + case CbFieldType::BoolTrue: + NameStringValuePairs.push_back({std::string(Name), "true"}); + break; + case CbFieldType::Hash: + { + NameStringValuePairs.push_back({std::string(Name), Accessor.AsHash().ToHexString()}); + } + break; + case CbFieldType::Uuid: + { + StringBuilder<Oid::StringLength + 1> Builder; + Accessor.AsUuid().ToString(Builder); + NameStringValuePairs.push_back({std::string(Name), Builder.ToString()}); + } + break; + case CbFieldType::DateTime: + { + ExtendableStringBuilder<64> Builder; + Builder << DateTime(Accessor.AsDateTimeTicks()).ToIso8601(); + NameStringValuePairs.push_back({std::string(Name), Builder.ToString()}); + } + break; + case CbFieldType::TimeSpan: + { + ExtendableStringBuilder<64> Builder; + const TimeSpan Span(Accessor.AsTimeSpanTicks()); + if (Span.GetDays() == 0) + { + Builder << Span.ToString("%h:%m:%s.%n"); + } + else + { + Builder << Span.ToString("%d.%h:%m:%s.%n"); + } + NameStringValuePairs.push_back({std::string(Name), Builder.ToString()}); + break; + } + case CbFieldType::ObjectId: + NameStringValuePairs.push_back({std::string(Name), Accessor.AsObjectId().ToString()}); + break; + } + } + std::string::size_type LongestKey = 0; + for (const std::pair<std::string, std::string>& KeyValue : NameStringValuePairs) + { + LongestKey = Max(KeyValue.first.length(), LongestKey); + } + for (const std::pair<std::string, std::string>& KeyValue : NameStringValuePairs) + { + SB.Append(fmt::format("{}{:<{}}: {}{}", Prefix, KeyValue.first, LongestKey, KeyValue.second, Suffix)); + } + return SB.ToString(); +} + +#if ZEN_WITH_TESTS + +namespace buildstorageoperations_testutils { + struct TestState + { + TestState(const std::filesystem::path& InRootPath) + : RootPath(InRootPath) + , LogOutput(CreateStandardProgress(Log)) + , ChunkController(CreateStandardChunkingController(StandardChunkingControllerSettings{})) + , ChunkCache(CreateMemoryChunkingCache()) + , WorkerPool(2) + , NetworkPool(2) + { + } + + void Initialize() + { + StoragePath = RootPath / "storage"; + TempPath = RootPath / "temp"; + SystemRootDir = RootPath / "sysroot"; + ZenFolderPath = RootPath / ".zen"; + + CreateDirectories(TempPath); + CreateDirectories(StoragePath); + + Storage.BuildStorage = CreateFileBuildStorage(StoragePath, StorageStats, false); + } + + void CreateSourceData(const std::filesystem::path& Source, std::span<const std::string> Paths, std::span<const uint64_t> Sizes) + { + const std::filesystem::path SourcePath = RootPath / Source; + CreateDirectories(SourcePath); + for (size_t FileIndex = 0; FileIndex < Paths.size(); FileIndex++) + { + const std::string& FilePath = Paths[FileIndex]; + const uint64_t FileSize = Sizes[FileIndex]; + IoBuffer FileData = FileSize > 0 ? CreateSemiRandomBlob(FileSize) : IoBuffer{}; + WriteFile(SourcePath / FilePath, FileData); + } + } + + std::vector<std::pair<Oid, std::string>> Upload(const Oid& BuildId, + const Oid& BuildPartId, + const std::string_view BuildPartName, + const std::filesystem::path& Source, + const std::filesystem::path& ManifestPath) + { + const std::filesystem::path SourcePath = RootPath / Source; + CbObject MetaData; + BuildsOperationUploadFolder Upload(Log, + *LogOutput, + Storage, + AbortFlag, + PauseFlag, + WorkerPool, + NetworkPool, + BuildId, + SourcePath, + true, + MetaData, + BuildsOperationUploadFolder::Options{.TempDir = TempPath}); + return Upload.Execute(BuildPartId, BuildPartName, ManifestPath, *ChunkController, *ChunkCache); + } + + void ValidateUpload(const Oid& BuildId, const std::vector<std::pair<Oid, std::string>>& Parts) + { + for (auto Part : Parts) + { + BuildsOperationValidateBuildPart Validate(Log, + *LogOutput, + *Storage.BuildStorage, + AbortFlag, + PauseFlag, + WorkerPool, + NetworkPool, + BuildId, + Part.first, + Part.second, + BuildsOperationValidateBuildPart::Options{.TempFolder = TempPath / "validate"}); + Validate.Execute(); + } + } + + FolderContent Download(const Oid& BuildId, + const Oid& BuildPartId, + const std::string_view BuildPartName, + const std::filesystem::path& Target, + bool Append) + { + const std::filesystem::path TargetPath = RootPath / Target; + + CreateDirectories(TargetPath); + + uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; + CbObject BuildObject = Storage.BuildStorage->GetBuild(BuildId); + std::vector<Oid> PartIds; + if (BuildPartId != Oid::Zero) + { + PartIds.push_back(BuildPartId); + } + std::vector<std::string> PartNames; + if (!BuildPartName.empty()) + { + PartNames.push_back(std::string(BuildPartName)); + } + std::vector<std::pair<Oid, std::string>> AllBuildParts = + ResolveBuildPartNames(BuildObject, BuildId, PartIds, PartNames, PreferredMultipartChunkSize); + + std::vector<ChunkedFolderContent> PartContents; + + std::vector<ChunkBlockDescription> BlockDescriptions; + std::vector<IoHash> LooseChunkHashes; + + ChunkedFolderContent RemoteContent = GetRemoteContent(Log, + Storage, + BuildId, + AllBuildParts, + {}, + {}, + {}, + ChunkController, + PartContents, + BlockDescriptions, + LooseChunkHashes, + /*IsQuiet*/ false, + /*IsVerbose*/ false, + /*DoExtraContentVerify*/ true); + + GetFolderContentStatistics LocalFolderScanStats; + + struct ContentVisitor : public GetDirectoryContentVisitor + { + virtual void AsyncVisitDirectory(const std::filesystem::path& RelativeRoot, DirectoryContent&& Content) + { + RwLock::ExclusiveLockScope _(ExistingPathsLock); + for (const std::filesystem::path& FileName : Content.FileNames) + { + if (RelativeRoot.empty()) + { + ExistingPaths.push_back(FileName); + } + else + { + ExistingPaths.push_back(RelativeRoot / FileName); + } + } + } + + RwLock ExistingPathsLock; + std::vector<std::filesystem::path> ExistingPaths; + } Visitor; + + Latch PendingWorkCount(1); + + GetDirectoryContent(TargetPath, + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive, + Visitor, + WorkerPool, + PendingWorkCount); + + PendingWorkCount.CountDown(); + PendingWorkCount.Wait(); + + FolderContent CurrentLocalFolderState = GetValidFolderContent( + WorkerPool, + LocalFolderScanStats, + TargetPath, + Visitor.ExistingPaths, + [](uint64_t PathCount, uint64_t CompletedPathCount) { ZEN_UNUSED(PathCount, CompletedPathCount); }, + 1000, + AbortFlag, + PauseFlag); + + ChunkingStatistics LocalChunkingStats; + ChunkedFolderContent LocalContent = ChunkFolderContent( + LocalChunkingStats, + WorkerPool, + TargetPath, + CurrentLocalFolderState, + *ChunkController, + *ChunkCache, + 1000, + [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { ZEN_UNUSED(IsAborted, IsPaused); }, + AbortFlag, + PauseFlag); + + if (Append) + { + RemoteContent = ApplyChunkedContentOverlay(LocalContent, RemoteContent, {}, {}); + } + + const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalContent); + const ChunkedContentLookup RemoteLookup = BuildChunkedContentLookup(RemoteContent); + + BuildsOperationUpdateFolder Download(Log, + *LogOutput, + Storage, + AbortFlag, + PauseFlag, + WorkerPool, + NetworkPool, + BuildId, + TargetPath, + LocalContent, + LocalLookup, + RemoteContent, + RemoteLookup, + BlockDescriptions, + LooseChunkHashes, + BuildsOperationUpdateFolder::Options{.SystemRootDir = SystemRootDir, + .ZenFolderPath = ZenFolderPath, + .ValidateCompletedSequences = true}); + FolderContent ResultingState; + Download.Execute(ResultingState); + + return ResultingState; + } + + void ValidateDownload(std::span<const std::string> Paths, + std::span<const uint64_t> Sizes, + const std::filesystem::path& Source, + const std::filesystem::path& Target, + const FolderContent& DownloadContent) + { + const std::filesystem::path SourcePath = RootPath / Source; + const std::filesystem::path TargetPath = RootPath / Target; + + CHECK_EQ(Paths.size(), DownloadContent.Paths.size()); + tsl::robin_map<std::string, uint64_t> ExpectedSizes; + tsl::robin_map<std::string, IoHash> ExpectedHashes; + for (size_t Index = 0; Index < Paths.size(); Index++) + { + const std::string LookupString = std::filesystem::path(Paths[Index]).generic_string(); + ExpectedSizes.insert_or_assign(LookupString, Sizes[Index]); + std::filesystem::path FilePath = SourcePath / Paths[Index]; + const IoHash SourceHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile(FilePath.make_preferred())); + ExpectedHashes.insert_or_assign(LookupString, SourceHash); + } + for (size_t Index = 0; Index < DownloadContent.Paths.size(); Index++) + { + const std::string LookupString = std::filesystem::path(DownloadContent.Paths[Index]).generic_string(); + auto SizeIt = ExpectedSizes.find(LookupString); + CHECK_NE(SizeIt, ExpectedSizes.end()); + CHECK_EQ(SizeIt->second, DownloadContent.RawSizes[Index]); + std::filesystem::path FilePath = TargetPath / DownloadContent.Paths[Index]; + const IoHash DownloadedHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile(FilePath.make_preferred())); + auto HashIt = ExpectedHashes.find(LookupString); + CHECK_NE(HashIt, ExpectedHashes.end()); + CHECK_EQ(HashIt->second, DownloadedHash); + } + } + + const std::filesystem::path RootPath; + std::filesystem::path StoragePath; + std::filesystem::path TempPath; + std::filesystem::path SystemRootDir; + std::filesystem::path ZenFolderPath; + + LoggerRef Log = ConsoleLog(); + std::unique_ptr<ProgressBase> LogOutput; + + std::unique_ptr<ChunkingController> ChunkController; + std::unique_ptr<ChunkingCache> ChunkCache; + + StorageInstance Storage; + BuildStorageBase::Statistics StorageStats; + + WorkerThreadPool WorkerPool; + WorkerThreadPool NetworkPool; + + std::atomic<bool> AbortFlag; + std::atomic<bool> PauseFlag; + }; + +} // namespace buildstorageoperations_testutils + +TEST_SUITE_BEGIN("remotestore.buildstorageutil"); + +TEST_CASE("normalizepartselection.empty_defaults_to_default") +{ + std::vector<Oid> Ids; + std::vector<std::string> Names; + NormalizePartSelection(Ids, Names, {}); + CHECK(Ids.empty()); + REQUIRE_EQ(Names.size(), 1u); + CHECK_EQ(Names[0], "default"); +} + +TEST_CASE("normalizepartselection.wildcard_alone_clears_names") +{ + std::vector<Oid> Ids; + std::vector<std::string> Names = {"*"}; + NormalizePartSelection(Ids, Names, {}); + CHECK(Ids.empty()); + CHECK(Names.empty()); +} + +TEST_CASE("normalizepartselection.wildcard_with_other_name_throws") +{ + std::vector<Oid> Ids; + std::vector<std::string> Names = {"*", "foo"}; + CHECK_THROWS_AS(NormalizePartSelection(Ids, Names, {}), OptionParseException); +} + +TEST_CASE("normalizepartselection.wildcard_with_ids_throws") +{ + std::vector<Oid> Ids = {Oid::NewOid()}; + std::vector<std::string> Names = {"*"}; + CHECK_THROWS_AS(NormalizePartSelection(Ids, Names, {}), OptionParseException); +} + +TEST_CASE("normalizepartselection.explicit_name_unchanged") +{ + std::vector<Oid> Ids; + std::vector<std::string> Names = {"foo"}; + NormalizePartSelection(Ids, Names, {}); + CHECK(Ids.empty()); + REQUIRE_EQ(Names.size(), 1u); + CHECK_EQ(Names[0], "foo"); +} + +TEST_CASE("normalizepartselection.ids_only_unchanged") +{ + const Oid Id = Oid::NewOid(); + std::vector<Oid> Ids = {Id}; + std::vector<std::string> Names; + NormalizePartSelection(Ids, Names, {}); + REQUIRE_EQ(Ids.size(), 1u); + CHECK_EQ(Ids[0], Id); + CHECK(Names.empty()); +} + +TEST_CASE("buildstorageoperations.upload.folder") +{ + using namespace buildstorageoperations_testutils; + + FastRandom BaseRandom; + + const size_t FileCount = 11; + + const std::string Paths[FileCount] = {{"file_1"}, + {"file_2.exe"}, + {"file_3.txt"}, + {"dir_1/dir1_file_1.exe"}, + {"dir_1/dir1_file_2.pdb"}, + {"dir_1/dir1_file_3.txt"}, + {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"}, + {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"}, + {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"}, + {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"}, + {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}}; + const uint64_t Sizes[FileCount] = + {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u}; + + ScopedTemporaryDirectory SourceFolder; + TestState State(SourceFolder.Path()); + State.Initialize(); + State.CreateSourceData("source", Paths, Sizes); + + const Oid BuildId = Oid::NewOid(); + const Oid BuildPartId = Oid::NewOid(); + const std::string BuildPartName = "default"; + + auto Result = State.Upload(BuildId, BuildPartId, BuildPartName, "source", {}); + + CHECK_EQ(Result.size(), 1u); + CHECK_EQ(Result[0].first, BuildPartId); + CHECK_EQ(Result[0].second, BuildPartName); + State.ValidateUpload(BuildId, Result); + + FolderContent DownloadContent = State.Download(BuildId, Oid::Zero, {}, "download", /* Append */ false); + CHECK_EQ(DownloadContent.Paths.size(), FileCount); + State.ValidateDownload(Paths, Sizes, "source", "download", DownloadContent); +} + +TEST_CASE("buildstorageoperations.upload.manifest") +{ + using namespace buildstorageoperations_testutils; + + FastRandom BaseRandom; + + const size_t FileCount = 11; + + const std::string Paths[FileCount] = {{"file_1"}, + {"file_2.exe"}, + {"file_3.txt"}, + {"dir_1/dir1_file_1.exe"}, + {"dir_1/dir1_file_2.pdb"}, + {"dir_1/dir1_file_3.txt"}, + {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"}, + {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"}, + {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"}, + {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"}, + {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}}; + const uint64_t Sizes[FileCount] = + {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u}; + + ScopedTemporaryDirectory SourceFolder; + TestState State(SourceFolder.Path()); + State.Initialize(); + State.CreateSourceData("source", Paths, Sizes); + + std::span<const std::string> ManifestFiles(Paths); + ManifestFiles = ManifestFiles.subspan(0, FileCount / 2); + + std::span<const uint64_t> ManifestSizes(Sizes); + ManifestSizes = ManifestSizes.subspan(0, FileCount / 2); + + ExtendableStringBuilder<1024> Manifest; + for (const std::string& FilePath : ManifestFiles) + { + Manifest << FilePath << "\n"; + } + + WriteFile(State.RootPath / "manifest.txt", IoBuffer(IoBuffer::Wrap, Manifest.Data(), Manifest.Size())); + + const Oid BuildId = Oid::NewOid(); + const Oid BuildPartId = Oid::NewOid(); + const std::string BuildPartName = "default"; + + auto Result = State.Upload(BuildId, BuildPartId, BuildPartName, "source", State.RootPath / "manifest.txt"); + + CHECK_EQ(Result.size(), 1u); + CHECK_EQ(Result[0].first, BuildPartId); + CHECK_EQ(Result[0].second, BuildPartName); + State.ValidateUpload(BuildId, Result); + + FolderContent DownloadContent = State.Download(BuildId, Oid::Zero, {}, "download", /* Append */ false); + State.ValidateDownload(ManifestFiles, ManifestSizes, "source", "download", DownloadContent); +} + +TEST_CASE("buildstorageoperations.memorychunkingcache") +{ + using namespace buildstorageoperations_testutils; + + FastRandom BaseRandom; + + const size_t FileCount = 11; + + const std::string Paths[FileCount] = {{"file_1"}, + {"file_2.exe"}, + {"file_3.txt"}, + {"dir_1/dir1_file_1.exe"}, + {"dir_1/dir1_file_2.pdb"}, + {"dir_1/dir1_file_3.txt"}, + {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"}, + {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"}, + {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"}, + {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"}, + {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}}; + const uint64_t Sizes[FileCount] = + {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u}; + + ScopedTemporaryDirectory SourceFolder; + TestState State(SourceFolder.Path()); + State.Initialize(); + State.CreateSourceData("source", Paths, Sizes); + + const Oid BuildId = Oid::NewOid(); + const Oid BuildPartId = Oid::NewOid(); + const std::string BuildPartName = "default"; + + { + const std::filesystem::path SourcePath = SourceFolder.Path() / "source"; + CbObject MetaData; + BuildsOperationUploadFolder Upload(State.Log, + *State.LogOutput, + State.Storage, + State.AbortFlag, + State.PauseFlag, + State.WorkerPool, + State.NetworkPool, + BuildId, + SourcePath, + true, + MetaData, + BuildsOperationUploadFolder::Options{.TempDir = State.TempPath}); + auto Result = Upload.Execute(BuildPartId, BuildPartName, {}, *State.ChunkController, *State.ChunkCache); + + CHECK_EQ(Upload.m_ChunkingStats.FilesStoredInCache.load(), FileCount - 1); // Zero size files are not stored in cache + CHECK_EQ(Upload.m_ChunkingStats.BytesStoredInCache.load(), std::accumulate(&Sizes[0], &Sizes[FileCount], uint64_t(0))); + CHECK(Upload.m_ChunkingStats.ChunksStoredInCache.load() >= FileCount - 1); // Zero size files are not stored in cache + + CHECK_EQ(Result.size(), 1u); + CHECK_EQ(Result[0].first, BuildPartId); + CHECK_EQ(Result[0].second, BuildPartName); + } + + auto Result = State.Upload(BuildId, BuildPartId, BuildPartName, "source", {}); + + const Oid BuildId2 = Oid::NewOid(); + const Oid BuildPartId2 = Oid::NewOid(); + + { + const std::filesystem::path SourcePath = SourceFolder.Path() / "source"; + CbObject MetaData; + BuildsOperationUploadFolder Upload(State.Log, + *State.LogOutput, + State.Storage, + State.AbortFlag, + State.PauseFlag, + State.WorkerPool, + State.NetworkPool, + BuildId2, + SourcePath, + true, + MetaData, + BuildsOperationUploadFolder::Options{.TempDir = State.TempPath}); + Upload.Execute(BuildPartId2, BuildPartName, {}, *State.ChunkController, *State.ChunkCache); + + CHECK_EQ(Upload.m_ChunkingStats.FilesFoundInCache.load(), FileCount - 1); // Zero size files are not stored in cache + CHECK_EQ(Upload.m_ChunkingStats.BytesFoundInCache.load(), std::accumulate(&Sizes[0], &Sizes[FileCount], uint64_t(0))); + CHECK(Upload.m_ChunkingStats.ChunksFoundInCache.load() >= FileCount - 1); // Zero size files are not stored in cache + } + + FolderContent DownloadContent = State.Download(BuildId2, BuildPartId2, {}, "download", /* Append */ false); + State.ValidateDownload(Paths, Sizes, "source", "download", DownloadContent); +} + +TEST_CASE("buildstorageoperations.upload.multipart") +{ + // Disabled since it relies on authentication and specific block being present in cloud storage + if (false) + { + using namespace buildstorageoperations_testutils; + + FastRandom BaseRandom; + + const size_t FileCount = 11; + + const std::string Paths[FileCount] = {{"file_1"}, + {"file_2.exe"}, + {"file_3.txt"}, + {"dir_1/dir1_file_1.exe"}, + {"dir_1/dir1_file_2.pdb"}, + {"dir_1/dir1_file_3.txt"}, + {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"}, + {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"}, + {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"}, + {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"}, + {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}}; + const uint64_t Sizes[FileCount] = + {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u}; + + ScopedTemporaryDirectory SourceFolder; + TestState State(SourceFolder.Path()); + State.Initialize(); + State.CreateSourceData("source", Paths, Sizes); + + std::span<const std::string> ManifestFiles1(Paths); + ManifestFiles1 = ManifestFiles1.subspan(0, FileCount / 2); + + std::span<const uint64_t> ManifestSizes1(Sizes); + ManifestSizes1 = ManifestSizes1.subspan(0, FileCount / 2); + + std::span<const std::string> ManifestFiles2(Paths); + ManifestFiles2 = ManifestFiles2.subspan(FileCount / 2 - 1); + + std::span<const uint64_t> ManifestSizes2(Sizes); + ManifestSizes2 = ManifestSizes2.subspan(FileCount / 2 - 1); + + const Oid BuildPart1Id = Oid::NewOid(); + const std::string BuildPart1Name = "part1"; + const Oid BuildPart2Id = Oid::NewOid(); + const std::string BuildPart2Name = "part2"; + { + CbObjectWriter Writer; + Writer.BeginObject("parts"sv); + { + Writer.BeginObject(BuildPart1Name); + { + Writer.AddObjectId("partId"sv, BuildPart1Id); + Writer.BeginArray("files"sv); + for (const std::string& ManifestFile : ManifestFiles1) + { + Writer.AddString(ManifestFile); + } + Writer.EndArray(); // files + } + Writer.EndObject(); // part1 + + Writer.BeginObject(BuildPart2Name); + { + Writer.AddObjectId("partId"sv, BuildPart2Id); + Writer.BeginArray("files"sv); + for (const std::string& ManifestFile : ManifestFiles2) + { + Writer.AddString(ManifestFile); + } + Writer.EndArray(); // files + } + Writer.EndObject(); // part2 + } + Writer.EndObject(); // parts + + ExtendableStringBuilder<1024> Manifest; + CompactBinaryToJson(Writer.Save(), Manifest); + WriteFile(State.RootPath / "manifest.json", IoBuffer(IoBuffer::Wrap, Manifest.Data(), Manifest.Size())); + } + + const Oid BuildId = Oid::NewOid(); + + auto Result = State.Upload(BuildId, {}, {}, "source", State.RootPath / "manifest.json"); + + CHECK_EQ(Result.size(), 2u); + CHECK_EQ(Result[0].first, BuildPart1Id); + CHECK_EQ(Result[0].second, BuildPart1Name); + CHECK_EQ(Result[1].first, BuildPart2Id); + CHECK_EQ(Result[1].second, BuildPart2Name); + State.ValidateUpload(BuildId, Result); + + FolderContent DownloadContent = State.Download(BuildId, Oid::Zero, {}, "download", /* Append */ false); + State.ValidateDownload(Paths, Sizes, "source", "download", DownloadContent); + + FolderContent Part1DownloadContent = State.Download(BuildId, BuildPart1Id, {}, "download_part1", /* Append */ false); + State.ValidateDownload(ManifestFiles1, ManifestSizes1, "source", "download_part1", Part1DownloadContent); + + FolderContent Part2DownloadContent = State.Download(BuildId, Oid::Zero, BuildPart2Name, "download_part2", /* Append */ false); + State.ValidateDownload(ManifestFiles2, ManifestSizes2, "source", "download_part2", Part2DownloadContent); + + (void)State.Download(BuildId, BuildPart1Id, BuildPart1Name, "download_part1+2", /* Append */ false); + FolderContent Part1And2DownloadContent = State.Download(BuildId, BuildPart2Id, {}, "download_part1+2", /* Append */ true); + State.ValidateDownload(Paths, Sizes, "source", "download_part1+2", Part1And2DownloadContent); + } +} + +TEST_CASE("buildstorageoperations.partial.block.download" * doctest::skip(true)) +{ + const std::string OidcExecutableName = "OidcToken" ZEN_EXE_SUFFIX_LITERAL; + std::filesystem::path OidcTokenExePath = (GetRunningExecutablePath().parent_path() / OidcExecutableName).make_preferred(); + + HttpClientSettings ClientSettings{ + .LogCategory = "httpbuildsclient", + .AccessTokenProvider = + httpclientauth::CreateFromOidcTokenExecutable(OidcTokenExePath, "https://jupiter.devtools.epicgames.com", true, false, false), + .AssumeHttp2 = false, + .AllowResume = true, + .RetryCount = 0, + .Verbose = false}; + + HttpClient HttpClient("https://euc.jupiter.devtools.epicgames.com", ClientSettings); + + const std::string_view Namespace = "fortnite.oplog"; + const std::string_view Bucket = "fortnitegame.staged-build.fortnite-main.ps4-client"; + const Oid BuildId = Oid::FromHexString("09a76ea92ad301d4724fafad"); + + { + HttpClient::Response Response = HttpClient.Get(fmt::format("/api/v2/builds/{}/{}/{}", Namespace, Bucket, BuildId), + HttpClient::Accept(ZenContentType::kCbObject)); + CbValidateError ValidateResult = CbValidateError::None; + CbObject Object = ValidateAndReadCompactBinaryObject(IoBuffer(Response.ResponsePayload), ValidateResult); + REQUIRE(ValidateResult == CbValidateError::None); + } + + std::vector<ChunkBlockDescription> BlockDescriptions; + { + CbObjectWriter Request; + + Request.BeginArray("blocks"sv); + { + Request.AddHash(IoHash::FromHexString("7c353ed782675a5e8f968e61e51fc797ecdc2882")); + } + Request.EndArray(); + + IoBuffer Payload = Request.Save().GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + + HttpClient::Response BlockDescriptionsResponse = + HttpClient.Post(fmt::format("/api/v2/builds/{}/{}/{}/blocks/getBlockMetadata", Namespace, Bucket, BuildId), + Payload, + HttpClient::Accept(ZenContentType::kCbObject)); + REQUIRE(BlockDescriptionsResponse.IsSuccess()); + + CbValidateError ValidateResult = CbValidateError::None; + CbObject Object = ValidateAndReadCompactBinaryObject(IoBuffer(BlockDescriptionsResponse.ResponsePayload), ValidateResult); + REQUIRE(ValidateResult == CbValidateError::None); + + { + CbArrayView BlocksArray = Object["blocks"sv].AsArrayView(); + for (CbFieldView Block : BlocksArray) + { + ChunkBlockDescription Description = ParseChunkBlockDescription(Block.AsObjectView()); + BlockDescriptions.emplace_back(std::move(Description)); + } + } + } + + REQUIRE(!BlockDescriptions.empty()); + + const IoHash BlockHash = BlockDescriptions.back().BlockHash; + + const ChunkBlockDescription& BlockDescription = BlockDescriptions.front(); + REQUIRE(!BlockDescription.ChunkRawHashes.empty()); + REQUIRE(!BlockDescription.ChunkCompressedLengths.empty()); + + std::vector<std::pair<uint64_t, uint64_t>> ChunkOffsetAndSizes; + uint64_t Offset = gsl::narrow<uint32_t>(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize); + + for (uint32_t ChunkCompressedSize : BlockDescription.ChunkCompressedLengths) + { + ChunkOffsetAndSizes.push_back(std::make_pair(Offset, ChunkCompressedSize)); + Offset += ChunkCompressedSize; + } + + ScopedTemporaryDirectory SourceFolder; + + auto Validate = [&](std::span<const uint32_t> ChunkIndexesToFetch) { + std::vector<std::pair<uint64_t, uint64_t>> Ranges; + for (uint32_t ChunkIndex : ChunkIndexesToFetch) + { + Ranges.push_back(ChunkOffsetAndSizes[ChunkIndex]); + } + + HttpClient::KeyValueMap Headers; + if (!Ranges.empty()) + { + ExtendableStringBuilder<512> SB; + for (const std::pair<uint64_t, uint64_t>& R : Ranges) + { + if (SB.Size() > 0) + { + SB << ", "; + } + SB << R.first << "-" << R.first + R.second - 1; + } + Headers.Entries.insert({"Range", fmt::format("bytes={}", SB.ToView())}); + } + + HttpClient::Response GetBlobRangesResponse = HttpClient.Download( + fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}?supportsRedirect=false", Namespace, Bucket, BuildId, BlockHash), + SourceFolder.Path(), + Headers); + + REQUIRE(GetBlobRangesResponse.IsSuccess()); + [[maybe_unused]] MemoryView RangesMemoryView = GetBlobRangesResponse.ResponsePayload.GetView(); + + std::vector<std::pair<uint64_t, uint64_t>> PayloadRanges = GetBlobRangesResponse.GetRanges(Ranges); + if (PayloadRanges.empty()) + { + // We got the whole blob, use the ranges as is + PayloadRanges = Ranges; + } + + REQUIRE(PayloadRanges.size() == Ranges.size()); + + for (uint32_t RangeIndex = 0; RangeIndex < PayloadRanges.size(); RangeIndex++) + { + const std::pair<uint64_t, uint64_t>& PayloadRange = PayloadRanges[RangeIndex]; + + CHECK_EQ(PayloadRange.second, Ranges[RangeIndex].second); + + IoBuffer ChunkPayload(GetBlobRangesResponse.ResponsePayload, PayloadRange.first, PayloadRange.second); + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer CompressedChunk = CompressedBuffer::FromCompressed(SharedBuffer(ChunkPayload), RawHash, RawSize); + CHECK(CompressedChunk); + CHECK_EQ(RawHash, BlockDescription.ChunkRawHashes[ChunkIndexesToFetch[RangeIndex]]); + CHECK_EQ(RawSize, BlockDescription.ChunkRawLengths[ChunkIndexesToFetch[RangeIndex]]); + } + }; + + { + // Single + std::vector<uint32_t> ChunkIndexesToFetch{uint32_t(BlockDescription.ChunkCompressedLengths.size() / 2)}; + Validate(ChunkIndexesToFetch); + } + { + // Many + std::vector<uint32_t> ChunkIndexesToFetch; + for (uint32_t Index = 0; Index < BlockDescription.ChunkCompressedLengths.size() / 16; Index++) + { + ChunkIndexesToFetch.push_back(uint32_t(BlockDescription.ChunkCompressedLengths.size() / 6 + Index * 7)); + ChunkIndexesToFetch.push_back(uint32_t(BlockDescription.ChunkCompressedLengths.size() / 6 + Index * 7 + 1)); + ChunkIndexesToFetch.push_back(uint32_t(BlockDescription.ChunkCompressedLengths.size() / 6 + Index * 7 + 3)); + } + Validate(ChunkIndexesToFetch); + } + + { + // First and last + std::vector<uint32_t> ChunkIndexesToFetch{0, uint32_t(BlockDescription.ChunkCompressedLengths.size() - 1)}; + Validate(ChunkIndexesToFetch); + } +} +TEST_SUITE_END(); + +void +buildstorageutil_forcelink() +{ +} + +#endif // ZEN_WITH_TESTS + } // namespace zen diff --git a/src/zenremotestore/builds/buildupdatefolder.cpp b/src/zenremotestore/builds/buildupdatefolder.cpp new file mode 100644 index 000000000..443ab957e --- /dev/null +++ b/src/zenremotestore/builds/buildupdatefolder.cpp @@ -0,0 +1,4947 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenremotestore/builds/buildupdatefolder.h> + +#include <zencore/basicfile.h> +#include <zencore/fmtutils.h> +#include <zencore/parallelwork.h> +#include <zencore/scopeguard.h> +#include <zencore/trace.h> +#include <zenremotestore/builds/buildcontent.h> +#include <zenremotestore/builds/buildmanifest.h> +#include <zenremotestore/chunking/chunkingcache.h> +#include <zenremotestore/chunking/chunkingcontroller.h> +#include <zenremotestore/transferthreadworkers.h> +#include <zenutil/filesystemutils.h> +#include <zenutil/filteredrate.h> +#include <zenutil/progress.h> + +#include <numeric> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_set.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +using namespace std::literals; + +namespace { + std::filesystem::path ZenTempCacheFolderPath(const std::filesystem::path& ZenFolderPath) + { + return ZenTempFolderPath(ZenFolderPath) / "cache"; // Decompressed and verified data - chunks & sequences + } + std::filesystem::path ZenTempBlockFolderPath(const std::filesystem::path& ZenFolderPath) + { + return ZenTempFolderPath(ZenFolderPath) / "blocks"; // Temp storage for whole and partial blocks + } + std::filesystem::path ZenTempDownloadFolderPath(const std::filesystem::path& ZenFolderPath) + { + return ZenTempFolderPath(ZenFolderPath) / "download"; // Temp storage for decompressed and validated chunks + } + std::filesystem::path GetTempChunkedSequenceFileName(const std::filesystem::path& CacheFolderPath, const IoHash& RawHash) + { + return CacheFolderPath / (RawHash.ToHexString() + ".tmp"); + } + + std::filesystem::path GetFinalChunkedSequenceFileName(const std::filesystem::path& CacheFolderPath, const IoHash& RawHash) + { + return CacheFolderPath / RawHash.ToHexString(); + } + bool CleanDirectory(LoggerRef InLog, + ProgressBase& Progress, + WorkerThreadPool& IOWorkerPool, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + bool IsQuiet, + const std::filesystem::path& Path, + std::span<const std::string> ExcludeDirectories) + { + ZEN_TRACE_CPU("CleanDirectory"); + ZEN_SCOPED_LOG(InLog); + Stopwatch Timer; + + std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = Progress.CreateProgressBar("Clean Folder"); + + CleanDirectoryResult Result = CleanDirectory( + IOWorkerPool, + AbortFlag, + PauseFlag, + Path, + ExcludeDirectories, + [&](const std::string_view Details, uint64_t TotalCount, uint64_t RemainingCount, bool IsPaused, bool IsAborted) { + ProgressBar->UpdateState({.Task = "Cleaning folder ", + .Details = std::string(Details), + .TotalCount = TotalCount, + .RemainingCount = RemainingCount, + .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }, + Progress.GetProgressUpdateDelayMS()); + + ProgressBar->Finish(); + + if (AbortFlag) + { + return false; + } + + uint64_t ElapsedTimeMs = Timer.GetElapsedTimeMs(); + + if (!Result.FailedRemovePaths.empty()) + { + ExtendableStringBuilder<512> SB; + for (size_t FailedPathIndex = 0; FailedPathIndex < Result.FailedRemovePaths.size(); FailedPathIndex++) + { + SB << fmt::format("\n '{}': ({}) {}", + Result.FailedRemovePaths[FailedPathIndex].first, + Result.FailedRemovePaths[FailedPathIndex].second.value(), + Result.FailedRemovePaths[FailedPathIndex].second.message()); + } + ZEN_WARN("Clean failed to remove files from '{}': {}", Path, SB.ToView()); + } + + if (ElapsedTimeMs >= 200 && !IsQuiet) + { + ZEN_INFO("Wiped folder '{}' {} ({}) in {}", + Path, + Result.FoundCount, + NiceBytes(Result.DeletedByteCount), + NiceTimeSpanMs(ElapsedTimeMs)); + } + + return Result.FailedRemovePaths.empty(); + } + uint32_t SetNativeFileAttributes(const std::filesystem::path FilePath, SourcePlatform SourcePlatform, uint32_t Attributes) + { +#if ZEN_PLATFORM_WINDOWS + if (SourcePlatform == SourcePlatform::Windows) + { + SetFileAttributesToPath(FilePath, Attributes); + return Attributes; + } + else + { + uint32_t CurrentAttributes = GetFileAttributesFromPath(FilePath); + uint32_t NewAttributes = zen::MakeFileAttributeReadOnly(CurrentAttributes, zen::IsFileModeReadOnly(Attributes)); + if (CurrentAttributes != NewAttributes) + { + SetFileAttributesToPath(FilePath, NewAttributes); + } + return NewAttributes; + } +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + if (SourcePlatform != SourcePlatform::Windows) + { + zen::SetFileMode(FilePath, Attributes); + return Attributes; + } + else + { + uint32_t CurrentMode = zen::GetFileMode(FilePath); + uint32_t NewMode = zen::MakeFileModeReadOnly(CurrentMode, zen::IsFileAttributeReadOnly(Attributes)); + if (CurrentMode != NewMode) + { + zen::SetFileMode(FilePath, NewMode); + } + return NewMode; + } +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + }; + + uint32_t GetNativeFileAttributes(const std::filesystem::path FilePath) + { +#if ZEN_PLATFORM_WINDOWS + return GetFileAttributesFromPath(FilePath); +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + return GetFileMode(FilePath); +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + } + std::filesystem::path TryMoveDownloadedChunk(IoBuffer& BlockBuffer, const std::filesystem::path& Path, bool ForceDiskBased) + { + uint64_t BlockSize = BlockBuffer.GetSize(); + IoBufferFileReference FileRef; + if (BlockBuffer.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) && (FileRef.FileChunkSize == BlockSize)) + { + ZEN_TRACE_CPU("MoveTempFullBlock"); + std::error_code Ec; + std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec); + if (!Ec) + { + BlockBuffer.SetDeleteOnClose(false); + BlockBuffer = {}; + RenameFile(TempBlobPath, Path, Ec); + if (Ec) + { + // Re-open the temp file again + BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete); + BlockBuffer = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BlockSize, true); + BlockBuffer.SetDeleteOnClose(true); + } + else + { + return Path; + } + } + } + + if (ForceDiskBased) + { + // Could not be moved and rather large, lets store it on disk + ZEN_TRACE_CPU("WriteTempFullBlock"); + TemporaryFile::SafeWriteFile(Path, BlockBuffer); + BlockBuffer = {}; + return Path; + } + + return {}; + } + bool IsSingleFileChunk(const ChunkedFolderContent& RemoteContent, + const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> Locations) + { + if (Locations.size() == 1) + { + const uint32_t FirstSequenceIndex = Locations[0]->SequenceIndex; + if (RemoteContent.ChunkedContent.ChunkCounts[FirstSequenceIndex] == 1) + { + ZEN_ASSERT_SLOW(Locations[0]->Offset == 0); + return true; + } + } + return false; + } + IoBuffer MakeBufferMemoryBased(const CompositeBuffer& PartialBlockBuffer) + { + ZEN_TRACE_CPU("MakeBufferMemoryBased"); + IoBuffer BlockMemoryBuffer; + std::span<const SharedBuffer> Segments = PartialBlockBuffer.GetSegments(); + if (Segments.size() == 1) + { + IoBufferFileReference FileRef = {}; + if (PartialBlockBuffer.GetSegments().front().AsIoBuffer().GetFileReference(FileRef)) + { + BlockMemoryBuffer = UniqueBuffer::Alloc(FileRef.FileChunkSize).MoveToShared().AsIoBuffer(); + BasicFile Reader; + Reader.Attach(FileRef.FileHandle); + auto _ = MakeGuard([&Reader]() { Reader.Detach(); }); + MutableMemoryView ReadMem = BlockMemoryBuffer.GetMutableView(); + Reader.Read(ReadMem.GetData(), FileRef.FileChunkSize, FileRef.FileChunkOffset); + return BlockMemoryBuffer; + } + else + { + return PartialBlockBuffer.GetSegments().front().AsIoBuffer(); + } + } + else + { + // Not a homogenous memory buffer, read all to memory + + BlockMemoryBuffer = UniqueBuffer::Alloc(PartialBlockBuffer.GetSize()).MoveToShared().AsIoBuffer(); + MutableMemoryView ReadMem = BlockMemoryBuffer.GetMutableView(); + for (const SharedBuffer& Segment : Segments) + { + IoBufferFileReference FileRef = {}; + if (Segment.AsIoBuffer().GetFileReference(FileRef)) + { + BasicFile Reader; + Reader.Attach(FileRef.FileHandle); + auto _ = MakeGuard([&Reader]() { Reader.Detach(); }); + Reader.Read(ReadMem.GetData(), FileRef.FileChunkSize, FileRef.FileChunkOffset); + ReadMem = ReadMem.Mid(FileRef.FileChunkSize); + } + else + { + ReadMem = ReadMem.CopyFrom(Segment.AsIoBuffer().GetView()); + } + } + return BlockMemoryBuffer; + } + } + + FolderContent CheckFolderFiles(ProgressBase& Progress, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + std::string_view ProgressLabel, + TransferThreadWorkers& Workers, + GetFolderContentStatistics& LocalFolderScanStats, + const std::filesystem::path& Path, + std::span<const std::filesystem::path> PathsToCheck) + { + std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = Progress.CreateProgressBar(ProgressLabel); + FolderContent Result = GetValidFolderContent( + Workers.GetIOWorkerPool(), + LocalFolderScanStats, + Path, + PathsToCheck, + [&ProgressBar, &LocalFolderScanStats, &AbortFlag, &PauseFlag](uint64_t PathCount, uint64_t CompletedPathCount) { + std::string Details = + fmt::format("{}/{} checked, {} found", CompletedPathCount, PathCount, LocalFolderScanStats.FoundFileCount.load()); + ProgressBar->UpdateState({.Task = "Checking files ", + .Details = Details, + .TotalCount = PathCount, + .RemainingCount = PathCount - CompletedPathCount, + .Status = ProgressBase::ProgressBar::State::CalculateStatus(AbortFlag, PauseFlag)}, + false); + }, + Progress.GetProgressUpdateDelayMS(), + AbortFlag, + PauseFlag); + ProgressBar->Finish(); + return Result; + } + + ChunkedFolderContent ScanFolderFiles(ProgressBase& Progress, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + std::string_view ProgressLabel, + TransferThreadWorkers& Workers, + const std::filesystem::path& Path, + const FolderContent& FolderSource, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache, + ChunkingStatistics& OutChunkingStats) + { + uint64_t ByteCountToScan = 0; + for (const uint64_t RawSize : FolderSource.RawSizes) + { + ByteCountToScan += RawSize; + } + std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = Progress.CreateProgressBar(ProgressLabel); + FilteredRate FilteredBytesHashed; + FilteredBytesHashed.Start(); + ChunkingStatistics LocalChunkingStats; + ChunkedFolderContent Result = ChunkFolderContent( + LocalChunkingStats, + Workers.GetIOWorkerPool(), + Path, + FolderSource, + ChunkController, + ChunkCache, + Progress.GetProgressUpdateDelayMS(), + [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { + FilteredBytesHashed.Update(LocalChunkingStats.BytesHashed.load()); + std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found", + LocalChunkingStats.FilesProcessed.load(), + FolderSource.Paths.size(), + NiceBytes(LocalChunkingStats.BytesHashed.load()), + NiceBytes(ByteCountToScan), + NiceNum(FilteredBytesHashed.GetCurrent()), + LocalChunkingStats.UniqueChunksFound.load(), + NiceBytes(LocalChunkingStats.UniqueBytesFound.load())); + ProgressBar->UpdateState({.Task = "Scanning files ", + .Details = Details, + .TotalCount = ByteCountToScan, + .RemainingCount = ByteCountToScan - LocalChunkingStats.BytesHashed.load(), + .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }, + AbortFlag, + PauseFlag); + OutChunkingStats += LocalChunkingStats; + FilteredBytesHashed.Stop(); + ProgressBar->Finish(); + return Result; + } +} // namespace + +BuildsOperationUpdateFolder::BuildsOperationUpdateFolder(LoggerRef Log, + ProgressBase& Progress, + StorageInstance& Storage, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + WorkerThreadPool& IOWorkerPool, + WorkerThreadPool& NetworkPool, + const Oid& BuildId, + const std::filesystem::path& Path, + const ChunkedFolderContent& LocalContent, + const ChunkedContentLookup& LocalLookup, + const ChunkedFolderContent& RemoteContent, + const ChunkedContentLookup& RemoteLookup, + const std::vector<ChunkBlockDescription>& BlockDescriptions, + const std::vector<IoHash>& LooseChunkHashes, + const Options& Options) +: m_Log(Log) +, m_Progress(Progress) +, m_Storage(Storage) +, m_AbortFlag(AbortFlag) +, m_PauseFlag(PauseFlag) +, m_IOWorkerPool(IOWorkerPool) +, m_NetworkPool(NetworkPool) +, m_BuildId(BuildId) +, m_Path(Path) +, m_LocalContent(LocalContent) +, m_LocalLookup(LocalLookup) +, m_RemoteContent(RemoteContent) +, m_RemoteLookup(RemoteLookup) +, m_BlockDescriptions(BlockDescriptions) +, m_LooseChunkHashes(LooseChunkHashes) +, m_Options(Options) +, m_CacheFolderPath(ZenTempCacheFolderPath(m_Options.ZenFolderPath)) +, m_TempDownloadFolderPath(ZenTempDownloadFolderPath(m_Options.ZenFolderPath)) +, m_TempBlockFolderPath(ZenTempBlockFolderPath(m_Options.ZenFolderPath)) +{ +} + +void +BuildsOperationUpdateFolder::Execute(FolderContent& OutLocalFolderState) +{ + ZEN_TRACE_CPU("BuildsOperationUpdateFolder::Execute"); + try + { + enum class TaskSteps : uint32_t + { + ScanExistingData, + WriteChunks, + PrepareTarget, + FinalizeTarget, + Cleanup, + StepCount + }; + + auto EndProgress = + MakeGuard([&]() { m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::StepCount, (uint32_t)TaskSteps::StepCount); }); + + m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::ScanExistingData, (uint32_t)TaskSteps::StepCount); + + CreateDirectories(m_CacheFolderPath); + CreateDirectories(m_TempDownloadFolderPath); + CreateDirectories(m_TempBlockFolderPath); + + std::vector<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters(m_RemoteContent.ChunkedContent.SequenceRawHashes.size()); + std::vector<bool> RemoteChunkIndexNeedsCopyFromLocalFileFlags(m_RemoteContent.ChunkedContent.ChunkHashes.size()); + std::vector<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags(m_RemoteContent.ChunkedContent.ChunkHashes.size()); + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CachedChunkHashesFound; + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CachedSequenceHashesFound; + ScanCacheFolder(CachedChunkHashesFound, CachedSequenceHashesFound); + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CachedBlocksFound; + ScanTempBlocksFolder(CachedBlocksFound); + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> SequenceIndexesLeftToFindToRemoteIndex; + InitializeSequenceCounters(SequenceIndexChunksLeftToWriteCounters, + SequenceIndexesLeftToFindToRemoteIndex, + CachedChunkHashesFound, + CachedSequenceHashesFound); + + std::vector<ChunkedFolderContent> ScavengedContents; + std::vector<ChunkedContentLookup> ScavengedLookups; + std::vector<std::filesystem::path> ScavengedPaths; + + std::vector<ScavengedSequenceCopyOperation> ScavengedSequenceCopyOperations; + uint64_t ScavengedPathsCount = 0; + + if (m_Options.EnableOtherDownloadsScavenging) + { + ZEN_TRACE_CPU("GetScavengedSequences"); + + Stopwatch ScavengeTimer; + + if (!SequenceIndexesLeftToFindToRemoteIndex.empty()) + { + std::vector<ScavengeSource> ScavengeSources = FindScavengeSources(); + ScanScavengeSources(ScavengeSources, ScavengedContents, ScavengedLookups, ScavengedPaths); + if (m_AbortFlag) + { + return; + } + + MatchScavengedSequencesToRemote(ScavengedContents, + ScavengedLookups, + ScavengedPaths, + SequenceIndexesLeftToFindToRemoteIndex, + SequenceIndexChunksLeftToWriteCounters, + ScavengedSequenceCopyOperations, + ScavengedPathsCount); + } + m_CacheMappingStats.ScavengeElapsedWallTimeUs += ScavengeTimer.GetElapsedTimeUs(); + } + + uint32_t RemainingChunkCount = 0; + for (uint32_t RemoteChunkIndex = 0; RemoteChunkIndex < m_RemoteContent.ChunkedContent.ChunkHashes.size(); RemoteChunkIndex++) + { + uint64_t ChunkWriteCount = GetChunkWriteCount(SequenceIndexChunksLeftToWriteCounters, RemoteChunkIndex); + if (ChunkWriteCount > 0) + { + RemainingChunkCount++; + } + } + + // Pick up all chunks in current local state + tsl::robin_map<IoHash, size_t, IoHash::Hasher> RawHashToCopyChunkDataIndex; + std::vector<CopyChunkData> CopyChunkDatas; + + if (m_Options.EnableTargetFolderScavenging) + { + ZEN_TRACE_CPU("GetLocalChunks"); + + Stopwatch LocalTimer; + + ScavengeSourceForChunks(RemainingChunkCount, + RemoteChunkIndexNeedsCopyFromLocalFileFlags, + RawHashToCopyChunkDataIndex, + SequenceIndexChunksLeftToWriteCounters, + m_LocalContent, + m_LocalLookup, + CopyChunkDatas, + uint32_t(-1), + m_CacheMappingStats.LocalChunkMatchingRemoteCount, + m_CacheMappingStats.LocalChunkMatchingRemoteByteCount); + + m_CacheMappingStats.LocalScanElapsedWallTimeUs += LocalTimer.GetElapsedTimeUs(); + } + + if (m_Options.EnableOtherDownloadsScavenging) + { + ZEN_TRACE_CPU("GetScavengeChunks"); + + Stopwatch ScavengeTimer; + + for (uint32_t ScavengedContentIndex = 0; ScavengedContentIndex < ScavengedContents.size() && (RemainingChunkCount > 0); + ScavengedContentIndex++) + { + const ChunkedFolderContent& ScavengedContent = ScavengedContents[ScavengedContentIndex]; + const ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengedContentIndex]; + + ScavengeSourceForChunks(RemainingChunkCount, + RemoteChunkIndexNeedsCopyFromLocalFileFlags, + RawHashToCopyChunkDataIndex, + SequenceIndexChunksLeftToWriteCounters, + ScavengedContent, + ScavengedLookup, + CopyChunkDatas, + ScavengedContentIndex, + m_CacheMappingStats.ScavengedChunkMatchingRemoteCount, + m_CacheMappingStats.ScavengedChunkMatchingRemoteByteCount); + } + m_CacheMappingStats.ScavengeElapsedWallTimeUs += ScavengeTimer.GetElapsedTimeUs(); + } + + if (!m_Options.IsQuiet) + { + if (m_CacheMappingStats.CacheSequenceHashesCount > 0 || m_CacheMappingStats.CacheChunkCount > 0 || + m_CacheMappingStats.CacheBlockCount > 0) + { + ZEN_INFO("Download cache: Found {} ({}) chunk sequences, {} ({}) chunks, {} ({}) blocks in {}", + m_CacheMappingStats.CacheSequenceHashesCount, + NiceBytes(m_CacheMappingStats.CacheSequenceHashesByteCount), + m_CacheMappingStats.CacheChunkCount, + NiceBytes(m_CacheMappingStats.CacheChunkByteCount), + m_CacheMappingStats.CacheBlockCount, + NiceBytes(m_CacheMappingStats.CacheBlocksByteCount), + NiceTimeSpanMs(m_CacheMappingStats.CacheScanElapsedWallTimeUs / 1000)); + } + + if (m_CacheMappingStats.LocalPathsMatchingSequencesCount > 0 || m_CacheMappingStats.LocalChunkMatchingRemoteCount > 0) + { + ZEN_INFO("Local state : Found {} ({}) chunk sequences, {} ({}) chunks in {}", + m_CacheMappingStats.LocalPathsMatchingSequencesCount, + NiceBytes(m_CacheMappingStats.LocalPathsMatchingSequencesByteCount), + m_CacheMappingStats.LocalChunkMatchingRemoteCount, + NiceBytes(m_CacheMappingStats.LocalChunkMatchingRemoteByteCount), + NiceTimeSpanMs(m_CacheMappingStats.LocalScanElapsedWallTimeUs / 1000)); + } + if (m_CacheMappingStats.ScavengedPathsMatchingSequencesCount > 0 || m_CacheMappingStats.ScavengedChunkMatchingRemoteCount > 0) + { + ZEN_INFO("Scavenge of {} paths, found {} ({}) chunk sequences, {} ({}) chunks in {}", + ScavengedPathsCount, + m_CacheMappingStats.ScavengedPathsMatchingSequencesCount, + NiceBytes(m_CacheMappingStats.ScavengedPathsMatchingSequencesByteCount), + m_CacheMappingStats.ScavengedChunkMatchingRemoteCount, + NiceBytes(m_CacheMappingStats.ScavengedChunkMatchingRemoteByteCount), + NiceTimeSpanMs(m_CacheMappingStats.ScavengeElapsedWallTimeUs / 1000)); + } + } + + uint64_t BytesToWrite = CalculateBytesToWriteAndFlagNeededChunks(SequenceIndexChunksLeftToWriteCounters, + RemoteChunkIndexNeedsCopyFromLocalFileFlags, + RemoteChunkIndexNeedsCopyFromSourceFlags); + + for (const ScavengedSequenceCopyOperation& ScavengeCopyOp : ScavengedSequenceCopyOperations) + { + BytesToWrite += ScavengeCopyOp.RawSize; + } + + uint64_t BytesToValidate = m_Options.ValidateCompletedSequences ? BytesToWrite : 0; + + uint64_t TotalRequestCount = 0; + uint64_t TotalPartWriteCount = 0; + std::atomic<uint64_t> WritePartsComplete = 0; + + tsl::robin_map<std::string, uint32_t> RemotePathToRemoteIndex; + RemotePathToRemoteIndex.reserve(m_RemoteContent.Paths.size()); + for (uint32_t RemotePathIndex = 0; RemotePathIndex < m_RemoteContent.Paths.size(); RemotePathIndex++) + { + RemotePathToRemoteIndex.insert({m_RemoteContent.Paths[RemotePathIndex].generic_string(), RemotePathIndex}); + } + + CheckRequiredDiskSpace(RemotePathToRemoteIndex); + + BlobsExistsResult ExistsResult; + { + ChunkBlockAnalyser BlockAnalyser( + Log(), + m_BlockDescriptions, + ChunkBlockAnalyser::Options{.IsQuiet = m_Options.IsQuiet, + .IsVerbose = m_Options.IsVerbose, + .HostLatencySec = m_Storage.BuildStorageHost.LatencySec, + .HostHighSpeedLatencySec = m_Storage.CacheHost.LatencySec, + .HostMaxRangeCountPerRequest = m_Storage.BuildStorageHost.Caps.MaxRangeCountPerRequest, + .HostHighSpeedMaxRangeCountPerRequest = m_Storage.CacheHost.Caps.MaxRangeCountPerRequest}); + + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = BlockAnalyser.GetNeeded( + m_RemoteLookup.ChunkHashToChunkIndex, + [&](uint32_t RemoteChunkIndex) -> bool { return RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex]; }); + + std::vector<uint32_t> FetchBlockIndexes; + std::vector<uint32_t> CachedChunkBlockIndexes; + ClassifyCachedAndFetchBlocks(NeededBlocks, CachedBlocksFound, TotalPartWriteCount, CachedChunkBlockIndexes, FetchBlockIndexes); + + std::vector<uint32_t> NeededLooseChunkIndexes = DetermineNeededLooseChunkIndexes(SequenceIndexChunksLeftToWriteCounters, + RemoteChunkIndexNeedsCopyFromLocalFileFlags, + RemoteChunkIndexNeedsCopyFromSourceFlags); + + ExistsResult = QueryBlobCacheExists(NeededLooseChunkIndexes, FetchBlockIndexes); + + std::vector<ChunkBlockAnalyser::EPartialBlockDownloadMode> BlockPartialDownloadModes = + DeterminePartialDownloadModes(ExistsResult); + ZEN_ASSERT(BlockPartialDownloadModes.size() == m_BlockDescriptions.size()); + + ChunkBlockAnalyser::BlockResult PartialBlocks = + BlockAnalyser.CalculatePartialBlockDownloads(NeededBlocks, BlockPartialDownloadModes); + + TotalRequestCount += NeededLooseChunkIndexes.size(); + TotalPartWriteCount += NeededLooseChunkIndexes.size(); + TotalRequestCount += PartialBlocks.BlockRanges.size(); + TotalPartWriteCount += PartialBlocks.BlockRanges.size(); + TotalRequestCount += PartialBlocks.FullBlockIndexes.size(); + TotalPartWriteCount += PartialBlocks.FullBlockIndexes.size(); + + std::vector<LooseChunkHashWorkData> LooseChunkHashWorks = + BuildLooseChunkHashWorks(NeededLooseChunkIndexes, SequenceIndexChunksLeftToWriteCounters); + + ZEN_TRACE_CPU("WriteChunks"); + + m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::WriteChunks, (uint32_t)TaskSteps::StepCount); + + Stopwatch WriteTimer; + + FilteredRate FilteredDownloadedBytesPerSecond; + FilteredRate FilteredWrittenBytesPerSecond; + + std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Writing"); + ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + + TotalPartWriteCount += CopyChunkDatas.size(); + TotalPartWriteCount += ScavengedSequenceCopyOperations.size(); + + BufferedWriteFileCache WriteCache; + + WriteChunksContext Context{.Work = Work, + .WriteCache = WriteCache, + .SequenceIndexChunksLeftToWriteCounters = SequenceIndexChunksLeftToWriteCounters, + .RemoteChunkIndexNeedsCopyFromSourceFlags = RemoteChunkIndexNeedsCopyFromSourceFlags, + .WritePartsComplete = WritePartsComplete, + .TotalPartWriteCount = TotalPartWriteCount, + .TotalRequestCount = TotalRequestCount, + .ExistsResult = ExistsResult, + .FilteredDownloadedBytesPerSecond = FilteredDownloadedBytesPerSecond, + .FilteredWrittenBytesPerSecond = FilteredWrittenBytesPerSecond}; + + ScheduleScavengedSequenceWrites(Context, ScavengedSequenceCopyOperations, ScavengedContents, ScavengedPaths); + ScheduleLooseChunkWrites(Context, LooseChunkHashWorks); + + std::unique_ptr<CloneQueryInterface> CloneQuery = + m_Options.AllowFileClone ? GetCloneQueryInterface(m_CacheFolderPath) : nullptr; + + ScheduleLocalChunkCopies(Context, CopyChunkDatas, CloneQuery.get(), ScavengedContents, ScavengedLookups, ScavengedPaths); + ScheduleCachedBlockWrites(Context, CachedChunkBlockIndexes); + SchedulePartialBlockDownloads(Context, PartialBlocks); + ScheduleFullBlockDownloads(Context, PartialBlocks.FullBlockIndexes); + + { + ZEN_TRACE_CPU("WriteChunks_Wait"); + + Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(PendingWork); + uint64_t DownloadedBytes = m_DownloadStats.DownloadedChunkByteCount.load() + + m_DownloadStats.DownloadedBlockByteCount.load() + + +m_DownloadStats.DownloadedPartialBlockByteCount.load(); + FilteredWrittenBytesPerSecond.Update(m_DiskStats.WriteByteCount.load()); + FilteredDownloadedBytesPerSecond.Update(DownloadedBytes); + std::string DownloadRateString = + (m_DownloadStats.RequestsCompleteCount == TotalRequestCount) + ? "" + : fmt::format(" {}bits/s", NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8)); + std::string CloneDetails; + if (m_DiskStats.CloneCount.load() > 0) + { + CloneDetails = fmt::format(" ({} cloned)", NiceBytes(m_DiskStats.CloneByteCount.load())); + } + std::string WriteDetails = fmt::format(" {}/{} ({}B/s) written{}", + NiceBytes(m_WrittenChunkByteCount.load()), + NiceBytes(BytesToWrite), + NiceNum(FilteredWrittenBytesPerSecond.GetCurrent()), + CloneDetails); + + std::string Details = fmt::format("{}/{} ({}{}) downloaded.{}", + m_DownloadStats.RequestsCompleteCount.load(), + TotalRequestCount, + NiceBytes(DownloadedBytes), + DownloadRateString, + WriteDetails); + + std::string Task; + if ((m_WrittenChunkByteCount < BytesToWrite) || (BytesToValidate == 0)) + { + Task = "Writing chunks "; + } + else + { + Task = "Verifying chunks "; + } + + ProgressBar->UpdateState({.Task = Task, + .Details = Details, + .TotalCount = (BytesToWrite + BytesToValidate), + .RemainingCount = ((BytesToWrite + BytesToValidate) - + (m_WrittenChunkByteCount.load() + m_ValidatedChunkByteCount.load())), + .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }); + } + + CloneQuery.reset(); + + FilteredWrittenBytesPerSecond.Stop(); + FilteredDownloadedBytesPerSecond.Stop(); + + ProgressBar->Finish(); + if (m_AbortFlag) + { + return; + } + + VerifyWriteChunksComplete(SequenceIndexChunksLeftToWriteCounters, BytesToWrite, BytesToValidate); + + const uint64_t DownloadedBytes = m_DownloadStats.DownloadedChunkByteCount.load() + + m_DownloadStats.DownloadedBlockByteCount.load() + + m_DownloadStats.DownloadedPartialBlockByteCount.load(); + if (!m_Options.IsQuiet) + { + std::string CloneDetails; + if (m_DiskStats.CloneCount.load() > 0) + { + CloneDetails = fmt::format(" ({} cloned)", NiceBytes(m_DiskStats.CloneByteCount.load())); + } + ZEN_INFO("Downloaded {} ({}bits/s) in {}. Wrote {} ({}B/s){} in {}. Completed in {}", + NiceBytes(DownloadedBytes), + NiceNum(GetBytesPerSecond(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS(), DownloadedBytes * 8)), + NiceTimeSpanMs(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS() / 1000), + NiceBytes(m_WrittenChunkByteCount.load()), + NiceNum(GetBytesPerSecond(FilteredWrittenBytesPerSecond.GetElapsedTimeUS(), m_DiskStats.WriteByteCount.load())), + CloneDetails, + NiceTimeSpanMs(FilteredWrittenBytesPerSecond.GetElapsedTimeUS() / 1000), + NiceTimeSpanMs(WriteTimer.GetElapsedTimeMs())); + } + + m_WriteChunkStats.WriteChunksElapsedWallTimeUs = WriteTimer.GetElapsedTimeUs(); + m_WriteChunkStats.DownloadTimeUs = FilteredDownloadedBytesPerSecond.GetElapsedTimeUS(); + m_WriteChunkStats.WriteTimeUs = FilteredWrittenBytesPerSecond.GetElapsedTimeUS(); + } + + m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::PrepareTarget, (uint32_t)TaskSteps::StepCount); + + if (m_AbortFlag) + { + return; + } + + LocalPathCategorization Categorization = CategorizeLocalPaths(RemotePathToRemoteIndex); + + if (m_AbortFlag) + { + return; + } + + std::atomic<uint64_t> CachedCount = 0; + std::atomic<uint64_t> CachedByteCount = 0; + ScheduleLocalFileCaching(Categorization.FilesToCache, CachedCount, CachedByteCount); + if (m_AbortFlag) + { + return; + } + + ZEN_DEBUG( + "Local state prep: Match: {}, PathMismatch: {}, HashMismatch: {}, Cached: {} ({}), Skipped: {}, " + "Delete: {}", + Categorization.MatchCount, + Categorization.PathMismatchCount, + Categorization.HashMismatchCount, + CachedCount.load(), + NiceBytes(CachedByteCount.load()), + Categorization.SkippedCount, + Categorization.DeleteCount); + + m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::FinalizeTarget, (uint32_t)TaskSteps::StepCount); + + if (m_Options.WipeTargetFolder) + { + ZEN_TRACE_CPU("WipeTarget"); + Stopwatch Timer; + + // Clean target folder + if (!CleanDirectory(Log(), + m_Progress, + m_IOWorkerPool, + m_AbortFlag, + m_PauseFlag, + m_Options.IsQuiet, + m_Path, + m_Options.ExcludeFolders)) + { + ZEN_WARN("Some files in {} could not be removed", m_Path); + } + m_RebuildFolderStateStats.CleanFolderElapsedWallTimeUs = Timer.GetElapsedTimeUs(); + } + + if (m_AbortFlag) + { + return; + } + + { + ZEN_TRACE_CPU("FinalizeTree"); + + Stopwatch Timer; + + std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Rebuild State"); + ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + + OutLocalFolderState.Paths.resize(m_RemoteContent.Paths.size()); + OutLocalFolderState.RawSizes.resize(m_RemoteContent.Paths.size()); + OutLocalFolderState.Attributes.resize(m_RemoteContent.Paths.size()); + OutLocalFolderState.ModificationTicks.resize(m_RemoteContent.Paths.size()); + + std::atomic<uint64_t> DeletedCount = 0; + std::atomic<uint64_t> TargetsComplete = 0; + + ScheduleLocalFileRemovals(Work, Categorization.RemoveLocalPathIndexes, DeletedCount); + + std::vector<FinalizeTarget> Targets = BuildSortedFinalizeTargets(); + + ScheduleTargetFinalization(Work, + Targets, + Categorization.SequenceHashToLocalPathIndex, + Categorization.RemotePathIndexToLocalPathIndex, + OutLocalFolderState, + TargetsComplete); + + { + ZEN_TRACE_CPU("FinalizeTree_Wait"); + + Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(PendingWork); + const uint64_t WorkTotal = Targets.size() + Categorization.RemoveLocalPathIndexes.size(); + const uint64_t WorkComplete = TargetsComplete.load() + DeletedCount.load(); + std::string Details = fmt::format("{}/{} files", WorkComplete, WorkTotal); + ProgressBar->UpdateState({.Task = "Rebuilding state ", + .Details = Details, + .TotalCount = gsl::narrow<uint64_t>(WorkTotal), + .RemainingCount = gsl::narrow<uint64_t>(WorkTotal - WorkComplete), + .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }); + } + + m_RebuildFolderStateStats.FinalizeTreeElapsedWallTimeUs = Timer.GetElapsedTimeUs(); + ProgressBar->Finish(); + } + m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::Cleanup, (uint32_t)TaskSteps::StepCount); + } + catch (const std::exception&) + { + m_AbortFlag = true; + throw; + } +} + +void +BuildsOperationUpdateFolder::ScanCacheFolder(tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedChunkHashesFound, + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedSequenceHashesFound) +{ + ZEN_TRACE_CPU("ScanCacheFolder"); + + Stopwatch CacheTimer; + + DirectoryContent CacheDirContent; + GetDirectoryContent(m_CacheFolderPath, DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes, CacheDirContent); + for (size_t Index = 0; Index < CacheDirContent.Files.size(); Index++) + { + if (m_Options.EnableTargetFolderScavenging) + { + IoHash FileHash; + if (IoHash::TryParse(CacheDirContent.Files[Index].filename().string(), FileHash)) + { + if (auto ChunkIt = m_RemoteLookup.ChunkHashToChunkIndex.find(FileHash); + ChunkIt != m_RemoteLookup.ChunkHashToChunkIndex.end()) + { + const uint32_t ChunkIndex = ChunkIt->second; + const uint64_t ChunkSize = m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + if (ChunkSize == CacheDirContent.FileSizes[Index]) + { + OutCachedChunkHashesFound.insert({FileHash, ChunkIndex}); + m_CacheMappingStats.CacheChunkCount++; + m_CacheMappingStats.CacheChunkByteCount += ChunkSize; + continue; + } + } + else if (auto SequenceIt = m_RemoteLookup.RawHashToSequenceIndex.find(FileHash); + SequenceIt != m_RemoteLookup.RawHashToSequenceIndex.end()) + { + const uint32_t SequenceIndex = SequenceIt->second; + const uint32_t PathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex]; + const uint64_t SequenceSize = m_RemoteContent.RawSizes[PathIndex]; + if (SequenceSize == CacheDirContent.FileSizes[Index]) + { + OutCachedSequenceHashesFound.insert({FileHash, SequenceIndex}); + m_CacheMappingStats.CacheSequenceHashesCount++; + m_CacheMappingStats.CacheSequenceHashesByteCount += SequenceSize; + + const std::filesystem::path CacheFilePath = + GetFinalChunkedSequenceFileName(m_CacheFolderPath, + m_RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]); + ZEN_ASSERT_SLOW(IsFile(CacheFilePath)); + + continue; + } + } + } + } + std::error_code Ec = TryRemoveFile(CacheDirContent.Files[Index]); + if (Ec) + { + ZEN_DEBUG("Failed removing file '{}', reason: ({}) {}", CacheDirContent.Files[Index], Ec.value(), Ec.message()); + } + } + m_CacheMappingStats.CacheScanElapsedWallTimeUs += CacheTimer.GetElapsedTimeUs(); +} + +void +BuildsOperationUpdateFolder::ScanTempBlocksFolder(tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedBlocksFound) +{ + ZEN_TRACE_CPU("ScanTempBlocksFolder"); + + Stopwatch CacheTimer; + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> AllBlockSizes; + AllBlockSizes.reserve(m_BlockDescriptions.size()); + for (uint32_t BlockIndex = 0; BlockIndex < m_BlockDescriptions.size(); BlockIndex++) + { + const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; + AllBlockSizes.insert({BlockDescription.BlockHash, BlockIndex}); + } + + DirectoryContent BlockDirContent; + GetDirectoryContent(m_TempBlockFolderPath, + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes, + BlockDirContent); + OutCachedBlocksFound.reserve(BlockDirContent.Files.size()); + for (size_t Index = 0; Index < BlockDirContent.Files.size(); Index++) + { + if (m_Options.EnableTargetFolderScavenging) + { + IoHash FileHash; + if (IoHash::TryParse(BlockDirContent.Files[Index].filename().string(), FileHash)) + { + if (auto BlockIt = AllBlockSizes.find(FileHash); BlockIt != AllBlockSizes.end()) + { + const uint32_t BlockIndex = BlockIt->second; + const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; + uint64_t BlockSize = CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize; + for (uint64_t ChunkSize : BlockDescription.ChunkCompressedLengths) + { + BlockSize += ChunkSize; + } + + if (BlockSize == BlockDirContent.FileSizes[Index]) + { + OutCachedBlocksFound.insert({FileHash, BlockIndex}); + m_CacheMappingStats.CacheBlockCount++; + m_CacheMappingStats.CacheBlocksByteCount += BlockSize; + continue; + } + } + } + } + std::error_code Ec = TryRemoveFile(BlockDirContent.Files[Index]); + if (Ec) + { + ZEN_DEBUG("Failed removing file '{}', reason: ({}) {}", BlockDirContent.Files[Index], Ec.value(), Ec.message()); + } + } + + m_CacheMappingStats.CacheScanElapsedWallTimeUs += CacheTimer.GetElapsedTimeUs(); +} + +void +BuildsOperationUpdateFolder::InitializeSequenceCounters(std::vector<std::atomic<uint32_t>>& OutSequenceCounters, + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutSequencesLeftToFind, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedChunkHashesFound, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedSequenceHashesFound) +{ + if (m_Options.EnableTargetFolderScavenging) + { + // Pick up all whole files we can use from current local state + ZEN_TRACE_CPU("GetLocalSequences"); + + std::vector<uint32_t> MissingSequenceIndexes = ScanTargetFolder(CachedChunkHashesFound, CachedSequenceHashesFound); + + for (uint32_t RemoteSequenceIndex : MissingSequenceIndexes) + { + // We must write the sequence + const uint32_t ChunkCount = m_RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex]; + const IoHash& RemoteSequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + OutSequenceCounters[RemoteSequenceIndex] = ChunkCount; + OutSequencesLeftToFind.insert({RemoteSequenceRawHash, RemoteSequenceIndex}); + } + } + else + { + for (uint32_t RemoteSequenceIndex = 0; RemoteSequenceIndex < m_RemoteContent.ChunkedContent.SequenceRawHashes.size(); + RemoteSequenceIndex++) + { + OutSequenceCounters[RemoteSequenceIndex] = m_RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex]; + } + } +} + +void +BuildsOperationUpdateFolder::MatchScavengedSequencesToRemote(std::span<const ChunkedFolderContent> Contents, + std::span<const ChunkedContentLookup> Lookups, + std::span<const std::filesystem::path> Paths, + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& InOutSequencesLeftToFind, + std::vector<std::atomic<uint32_t>>& InOutSequenceCounters, + std::vector<ScavengedSequenceCopyOperation>& OutCopyOperations, + uint64_t& OutScavengedPathsCount) +{ + for (uint32_t ScavengedContentIndex = 0; ScavengedContentIndex < Contents.size() && !InOutSequencesLeftToFind.empty(); + ScavengedContentIndex++) + { + const std::filesystem::path& ScavengePath = Paths[ScavengedContentIndex]; + if (ScavengePath.empty()) + { + continue; + } + const ChunkedFolderContent& ScavengedLocalContent = Contents[ScavengedContentIndex]; + const ChunkedContentLookup& ScavengedLookup = Lookups[ScavengedContentIndex]; + + for (uint32_t ScavengedSequenceIndex = 0; ScavengedSequenceIndex < ScavengedLocalContent.ChunkedContent.SequenceRawHashes.size(); + ScavengedSequenceIndex++) + { + const IoHash& SequenceRawHash = ScavengedLocalContent.ChunkedContent.SequenceRawHashes[ScavengedSequenceIndex]; + auto It = InOutSequencesLeftToFind.find(SequenceRawHash); + if (It == InOutSequencesLeftToFind.end()) + { + continue; + } + const uint32_t RemoteSequenceIndex = It->second; + const uint64_t RawSize = m_RemoteContent.RawSizes[m_RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex]]; + ZEN_ASSERT(RawSize > 0); + + const uint32_t ScavengedPathIndex = ScavengedLookup.SequenceIndexFirstPathIndex[ScavengedSequenceIndex]; + ZEN_ASSERT_SLOW(IsFile((ScavengePath / ScavengedLocalContent.Paths[ScavengedPathIndex]).make_preferred())); + + OutCopyOperations.push_back({.ScavengedContentIndex = ScavengedContentIndex, + .ScavengedPathIndex = ScavengedPathIndex, + .RemoteSequenceIndex = RemoteSequenceIndex, + .RawSize = RawSize}); + + InOutSequencesLeftToFind.erase(SequenceRawHash); + InOutSequenceCounters[RemoteSequenceIndex] = 0; + + m_CacheMappingStats.ScavengedPathsMatchingSequencesCount++; + m_CacheMappingStats.ScavengedPathsMatchingSequencesByteCount += RawSize; + } + OutScavengedPathsCount++; + } +} + +uint64_t +BuildsOperationUpdateFolder::CalculateBytesToWriteAndFlagNeededChunks(std::span<const std::atomic<uint32_t>> SequenceCounters, + const std::vector<bool>& NeedsCopyFromLocalFileFlags, + std::span<std::atomic<bool>> OutNeedsCopyFromSourceFlags) +{ + uint64_t BytesToWrite = 0; + for (uint32_t RemoteChunkIndex = 0; RemoteChunkIndex < m_RemoteContent.ChunkedContent.ChunkHashes.size(); RemoteChunkIndex++) + { + const uint64_t ChunkWriteCount = GetChunkWriteCount(SequenceCounters, RemoteChunkIndex); + if (ChunkWriteCount > 0) + { + BytesToWrite += m_RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex] * ChunkWriteCount; + if (!NeedsCopyFromLocalFileFlags[RemoteChunkIndex]) + { + OutNeedsCopyFromSourceFlags[RemoteChunkIndex] = true; + } + } + } + return BytesToWrite; +} + +void +BuildsOperationUpdateFolder::ClassifyCachedAndFetchBlocks(std::span<const ChunkBlockAnalyser::NeededBlock> NeededBlocks, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedBlocksFound, + uint64_t& TotalPartWriteCount, + std::vector<uint32_t>& OutCachedChunkBlockIndexes, + std::vector<uint32_t>& OutFetchBlockIndexes) +{ + ZEN_TRACE_CPU("BlockCacheFileExists"); + for (const ChunkBlockAnalyser::NeededBlock& NeededBlock : NeededBlocks) + { + const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[NeededBlock.BlockIndex]; + bool UsingCachedBlock = false; + if (auto It = CachedBlocksFound.find(BlockDescription.BlockHash); It != CachedBlocksFound.end()) + { + TotalPartWriteCount++; + + std::filesystem::path BlockPath = m_TempBlockFolderPath / BlockDescription.BlockHash.ToHexString(); + if (IsFile(BlockPath)) + { + OutCachedChunkBlockIndexes.push_back(NeededBlock.BlockIndex); + UsingCachedBlock = true; + } + } + if (!UsingCachedBlock) + { + OutFetchBlockIndexes.push_back(NeededBlock.BlockIndex); + } + } +} + +std::vector<uint32_t> +BuildsOperationUpdateFolder::DetermineNeededLooseChunkIndexes(std::span<const std::atomic<uint32_t>> SequenceCounters, + const std::vector<bool>& NeedsCopyFromLocalFileFlags, + std::span<std::atomic<bool>> NeedsCopyFromSourceFlags) +{ + std::vector<uint32_t> NeededLooseChunkIndexes; + NeededLooseChunkIndexes.reserve(m_LooseChunkHashes.size()); + for (uint32_t LooseChunkIndex = 0; LooseChunkIndex < m_LooseChunkHashes.size(); LooseChunkIndex++) + { + const IoHash& ChunkHash = m_LooseChunkHashes[LooseChunkIndex]; + auto RemoteChunkIndexIt = m_RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); + ZEN_ASSERT(RemoteChunkIndexIt != m_RemoteLookup.ChunkHashToChunkIndex.end()); + const uint32_t RemoteChunkIndex = RemoteChunkIndexIt->second; + + if (NeedsCopyFromLocalFileFlags[RemoteChunkIndex]) + { + if (m_Options.IsVerbose) + { + ZEN_INFO("Skipping chunk {} due to cache reuse", m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]); + } + continue; + } + + bool NeedsCopy = true; + if (NeedsCopyFromSourceFlags[RemoteChunkIndex].compare_exchange_strong(NeedsCopy, false)) + { + const uint64_t WriteCount = GetChunkWriteCount(SequenceCounters, RemoteChunkIndex); + if (WriteCount == 0) + { + if (m_Options.IsVerbose) + { + ZEN_INFO("Skipping chunk {} due to cache reuse", m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]); + } + } + else + { + NeededLooseChunkIndexes.push_back(LooseChunkIndex); + } + } + } + return NeededLooseChunkIndexes; +} + +BuildsOperationUpdateFolder::BlobsExistsResult +BuildsOperationUpdateFolder::QueryBlobCacheExists(std::span<const uint32_t> NeededLooseChunkIndexes, + std::span<const uint32_t> FetchBlockIndexes) +{ + BlobsExistsResult Result; + if (!m_Storage.CacheStorage) + { + return Result; + } + + ZEN_TRACE_CPU("BlobCacheExistCheck"); + Stopwatch Timer; + + std::vector<IoHash> BlobHashes; + BlobHashes.reserve(NeededLooseChunkIndexes.size() + FetchBlockIndexes.size()); + + for (const uint32_t LooseChunkIndex : NeededLooseChunkIndexes) + { + BlobHashes.push_back(m_LooseChunkHashes[LooseChunkIndex]); + } + + for (uint32_t BlockIndex : FetchBlockIndexes) + { + BlobHashes.push_back(m_BlockDescriptions[BlockIndex].BlockHash); + } + + const std::vector<BuildStorageCache::BlobExistsResult> CacheExistsResult = m_Storage.CacheStorage->BlobsExists(m_BuildId, BlobHashes); + + if (CacheExistsResult.size() == BlobHashes.size()) + { + Result.ExistingBlobs.reserve(CacheExistsResult.size()); + for (size_t BlobIndex = 0; BlobIndex < BlobHashes.size(); BlobIndex++) + { + if (CacheExistsResult[BlobIndex].HasBody) + { + Result.ExistingBlobs.insert(BlobHashes[BlobIndex]); + } + } + } + Result.ElapsedTimeMs = Timer.GetElapsedTimeMs(); + if (!Result.ExistingBlobs.empty() && !m_Options.IsQuiet) + { + ZEN_INFO("Remote cache : Found {} out of {} needed blobs in {}", + Result.ExistingBlobs.size(), + BlobHashes.size(), + NiceTimeSpanMs(Result.ElapsedTimeMs)); + } + return Result; +} + +std::vector<ChunkBlockAnalyser::EPartialBlockDownloadMode> +BuildsOperationUpdateFolder::DeterminePartialDownloadModes(const BlobsExistsResult& ExistsResult) +{ + std::vector<ChunkBlockAnalyser::EPartialBlockDownloadMode> Modes; + + if (m_Options.PartialBlockRequestMode == EPartialBlockRequestMode::Off) + { + Modes.resize(m_BlockDescriptions.size(), ChunkBlockAnalyser::EPartialBlockDownloadMode::Off); + return Modes; + } + + const bool MultiRangeCache = m_Storage.CacheHost.Caps.MaxRangeCountPerRequest > 1; + const bool MultiRangeBuild = m_Storage.BuildStorageHost.Caps.MaxRangeCountPerRequest > 1; + ChunkBlockAnalyser::EPartialBlockDownloadMode CachePartialDownloadMode = + MultiRangeCache ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed + : ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange; + ChunkBlockAnalyser::EPartialBlockDownloadMode CloudPartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::Off; + + switch (m_Options.PartialBlockRequestMode) + { + case EPartialBlockRequestMode::Off: + break; + case EPartialBlockRequestMode::ZenCacheOnly: + CloudPartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::Off; + break; + case EPartialBlockRequestMode::Mixed: + CloudPartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::SingleRange; + break; + case EPartialBlockRequestMode::All: + CloudPartialDownloadMode = MultiRangeBuild ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange + : ChunkBlockAnalyser::EPartialBlockDownloadMode::SingleRange; + break; + default: + ZEN_ASSERT(false); + break; + } + + Modes.reserve(m_BlockDescriptions.size()); + for (uint32_t BlockIndex = 0; BlockIndex < m_BlockDescriptions.size(); BlockIndex++) + { + const bool BlockExistInCache = ExistsResult.ExistingBlobs.contains(m_BlockDescriptions[BlockIndex].BlockHash); + Modes.push_back(BlockExistInCache ? CachePartialDownloadMode : CloudPartialDownloadMode); + } + return Modes; +} + +std::vector<BuildsOperationUpdateFolder::LooseChunkHashWorkData> +BuildsOperationUpdateFolder::BuildLooseChunkHashWorks(std::span<const uint32_t> NeededLooseChunkIndexes, + std::span<const std::atomic<uint32_t>> SequenceCounters) +{ + std::vector<LooseChunkHashWorkData> LooseChunkHashWorks; + LooseChunkHashWorks.reserve(NeededLooseChunkIndexes.size()); + for (uint32_t LooseChunkIndex : NeededLooseChunkIndexes) + { + const IoHash& ChunkHash = m_LooseChunkHashes[LooseChunkIndex]; + auto RemoteChunkIndexIt = m_RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); + ZEN_ASSERT(RemoteChunkIndexIt != m_RemoteLookup.ChunkHashToChunkIndex.end()); + const uint32_t RemoteChunkIndex = RemoteChunkIndexIt->second; + + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs = + GetRemainingChunkTargets(SequenceCounters, RemoteChunkIndex); + + ZEN_ASSERT(!ChunkTargetPtrs.empty()); + LooseChunkHashWorks.push_back(LooseChunkHashWorkData{.ChunkTargetPtrs = ChunkTargetPtrs, .RemoteChunkIndex = RemoteChunkIndex}); + } + return LooseChunkHashWorks; +} + +void +BuildsOperationUpdateFolder::VerifyWriteChunksComplete(std::span<const std::atomic<uint32_t>> SequenceCounters, + uint64_t BytesToWrite, + uint64_t BytesToValidate) +{ + uint32_t RawSequencesMissingWriteCount = 0; + for (uint32_t SequenceIndex = 0; SequenceIndex < SequenceCounters.size(); SequenceIndex++) + { + const auto& Counter = SequenceCounters[SequenceIndex]; + if (Counter.load() != 0) + { + RawSequencesMissingWriteCount++; + const uint32_t PathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex]; + const std::filesystem::path& IncompletePath = m_RemoteContent.Paths[PathIndex]; + ZEN_ASSERT(!IncompletePath.empty()); + const uint32_t ExpectedSequenceCount = m_RemoteContent.ChunkedContent.ChunkCounts[SequenceIndex]; + if (!m_Options.IsQuiet) + { + ZEN_INFO("{}: Max count {}, Current count {}", IncompletePath, ExpectedSequenceCount, Counter.load()); + } + ZEN_ASSERT(Counter.load() <= ExpectedSequenceCount); + } + } + ZEN_ASSERT(RawSequencesMissingWriteCount == 0); + ZEN_ASSERT(m_WrittenChunkByteCount == BytesToWrite); + ZEN_ASSERT(m_ValidatedChunkByteCount == BytesToValidate); +} + +std::vector<BuildsOperationUpdateFolder::FinalizeTarget> +BuildsOperationUpdateFolder::BuildSortedFinalizeTargets() +{ + std::vector<FinalizeTarget> Targets; + Targets.reserve(m_RemoteContent.Paths.size()); + for (uint32_t RemotePathIndex = 0; RemotePathIndex < m_RemoteContent.Paths.size(); RemotePathIndex++) + { + Targets.push_back(FinalizeTarget{.RawHash = m_RemoteContent.RawHashes[RemotePathIndex], .RemotePathIndex = RemotePathIndex}); + } + std::sort(Targets.begin(), Targets.end(), [](const FinalizeTarget& Lhs, const FinalizeTarget& Rhs) { + return std::tie(Lhs.RawHash, Lhs.RemotePathIndex) < std::tie(Rhs.RawHash, Rhs.RemotePathIndex); + }); + return Targets; +} + +void +BuildsOperationUpdateFolder::ScanScavengeSources(std::span<const ScavengeSource> Sources, + std::vector<ChunkedFolderContent>& OutContents, + std::vector<ChunkedContentLookup>& OutLookups, + std::vector<std::filesystem::path>& OutPaths) +{ + ZEN_TRACE_CPU("ScanScavengeSources"); + + const size_t ScavengePathCount = Sources.size(); + OutContents.resize(ScavengePathCount); + OutLookups.resize(ScavengePathCount); + OutPaths.resize(ScavengePathCount); + + std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Scavenging"); + + ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + + std::atomic<uint64_t> PathsFound(0); + std::atomic<uint64_t> ChunksFound(0); + std::atomic<uint64_t> PathsScavenged(0); + + for (size_t ScavengeIndex = 0; ScavengeIndex < ScavengePathCount; ScavengeIndex++) + { + Work.ScheduleWork(m_IOWorkerPool, + [this, &Sources, &OutContents, &OutPaths, &OutLookups, &PathsFound, &ChunksFound, &PathsScavenged, ScavengeIndex]( + std::atomic<bool>&) { + if (!m_AbortFlag) + { + ZEN_TRACE_CPU("Async_FindScavengeContent"); + + const ScavengeSource& Source = Sources[ScavengeIndex]; + ChunkedFolderContent& ScavengedLocalContent = OutContents[ScavengeIndex]; + ChunkedContentLookup& ScavengedLookup = OutLookups[ScavengeIndex]; + + if (FindScavengeContent(Source, ScavengedLocalContent, ScavengedLookup)) + { + OutPaths[ScavengeIndex] = Source.Path; + PathsFound += ScavengedLocalContent.Paths.size(); + ChunksFound += ScavengedLocalContent.ChunkedContent.ChunkHashes.size(); + } + else + { + OutPaths[ScavengeIndex].clear(); + } + PathsScavenged++; + } + }); + } + { + ZEN_TRACE_CPU("ScavengeScan_Wait"); + + Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(PendingWork); + std::string Details = fmt::format("{}/{} scanned. {} paths and {} chunks found for scavenging", + PathsScavenged.load(), + ScavengePathCount, + PathsFound.load(), + ChunksFound.load()); + ProgressBar->UpdateState({.Task = "Scavenging ", + .Details = Details, + .TotalCount = ScavengePathCount, + .RemainingCount = ScavengePathCount - PathsScavenged.load(), + .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }); + } + + ProgressBar->Finish(); +} + +BuildsOperationUpdateFolder::LocalPathCategorization +BuildsOperationUpdateFolder::CategorizeLocalPaths(const tsl::robin_map<std::string, uint32_t>& RemotePathToRemoteIndex) +{ + ZEN_TRACE_CPU("PrepareTarget"); + + LocalPathCategorization Result; + tsl::robin_set<IoHash, IoHash::Hasher> CachedRemoteSequences; + + Result.RemotePathIndexToLocalPathIndex.reserve(m_RemoteContent.Paths.size()); + + for (uint32_t LocalPathIndex = 0; LocalPathIndex < m_LocalContent.Paths.size(); LocalPathIndex++) + { + if (m_AbortFlag) + { + break; + } + const IoHash& RawHash = m_LocalContent.RawHashes[LocalPathIndex]; + const std::filesystem::path& LocalPath = m_LocalContent.Paths[LocalPathIndex]; + + ZEN_ASSERT_SLOW(IsFile((m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred())); + + if (m_Options.EnableTargetFolderScavenging) + { + if (!m_Options.WipeTargetFolder) + { + // Check if it is already in the correct place + if (auto RemotePathIt = RemotePathToRemoteIndex.find(LocalPath.generic_string()); + RemotePathIt != RemotePathToRemoteIndex.end()) + { + const uint32_t RemotePathIndex = RemotePathIt->second; + if (m_RemoteContent.RawHashes[RemotePathIndex] == RawHash) + { + // It is already in it's correct place + Result.RemotePathIndexToLocalPathIndex[RemotePathIndex] = LocalPathIndex; + Result.SequenceHashToLocalPathIndex.insert({RawHash, LocalPathIndex}); + Result.MatchCount++; + continue; + } + else + { + Result.HashMismatchCount++; + } + } + else + { + Result.PathMismatchCount++; + } + } + + // Do we need it? + if (m_RemoteLookup.RawHashToSequenceIndex.contains(RawHash)) + { + if (!CachedRemoteSequences.contains(RawHash)) + { + // We need it, make sure we move it to the cache + Result.FilesToCache.push_back(LocalPathIndex); + CachedRemoteSequences.insert(RawHash); + continue; + } + else + { + Result.SkippedCount++; + } + } + } + + if (!m_Options.WipeTargetFolder) + { + // Explicitly delete the unneeded local file + Result.RemoveLocalPathIndexes.push_back(LocalPathIndex); + Result.DeleteCount++; + } + } + + return Result; +} + +void +BuildsOperationUpdateFolder::ScheduleLocalFileCaching(std::span<const uint32_t> FilesToCache, + std::atomic<uint64_t>& OutCachedCount, + std::atomic<uint64_t>& OutCachedByteCount) +{ + ZEN_TRACE_CPU("CopyToCache"); + + std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Cache Local Data"); + ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + + for (uint32_t LocalPathIndex : FilesToCache) + { + if (m_AbortFlag) + { + break; + } + Work.ScheduleWork(m_IOWorkerPool, [this, &OutCachedCount, &OutCachedByteCount, LocalPathIndex](std::atomic<bool>&) { + if (!m_AbortFlag) + { + ZEN_TRACE_CPU("Async_CopyToCache"); + + const IoHash& RawHash = m_LocalContent.RawHashes[LocalPathIndex]; + const std::filesystem::path& LocalPath = m_LocalContent.Paths[LocalPathIndex]; + const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(m_CacheFolderPath, RawHash); + ZEN_ASSERT_SLOW(!IsFileWithRetry(CacheFilePath)); + const std::filesystem::path LocalFilePath = (m_Path / LocalPath).make_preferred(); + + std::error_code Ec = RenameFileWithRetry(LocalFilePath, CacheFilePath); + if (Ec) + { + ZEN_WARN("Failed to move file from '{}' to '{}', reason: ({}) {}, retrying...", + LocalFilePath, + CacheFilePath, + Ec.value(), + Ec.message()); + Ec = RenameFileWithRetry(LocalFilePath, CacheFilePath); + if (Ec) + { + throw std::system_error(std::error_code(Ec.value(), std::system_category()), + fmt::format("Failed to file from '{}' to '{}', reason: ({}) {}", + LocalFilePath, + CacheFilePath, + Ec.value(), + Ec.message())); + } + } + + OutCachedCount++; + OutCachedByteCount += m_LocalContent.RawSizes[LocalPathIndex]; + } + }); + } + + { + ZEN_TRACE_CPU("CopyToCache_Wait"); + + Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(PendingWork); + const uint64_t WorkTotal = FilesToCache.size(); + const uint64_t WorkComplete = OutCachedCount.load(); + std::string Details = fmt::format("{}/{} ({}) files", WorkComplete, WorkTotal, NiceBytes(OutCachedByteCount)); + ProgressBar->UpdateState({.Task = "Caching local ", + .Details = Details, + .TotalCount = gsl::narrow<uint64_t>(WorkTotal), + .RemainingCount = gsl::narrow<uint64_t>(WorkTotal - WorkComplete), + .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }); + } + + ProgressBar->Finish(); +} + +void +BuildsOperationUpdateFolder::ScheduleScavengedSequenceWrites(WriteChunksContext& Context, + std::span<const ScavengedSequenceCopyOperation> CopyOperations, + const std::vector<ChunkedFolderContent>& ScavengedContents, + const std::vector<std::filesystem::path>& ScavengedPaths) +{ + for (uint32_t ScavengeOpIndex = 0; ScavengeOpIndex < CopyOperations.size(); ScavengeOpIndex++) + { + if (m_AbortFlag) + { + break; + } + Context.Work.ScheduleWork( + m_IOWorkerPool, + [this, &Context, CopyOperations, &ScavengedContents, &ScavengedPaths, ScavengeOpIndex](std::atomic<bool>&) { + if (!m_AbortFlag) + { + ZEN_TRACE_CPU("Async_WriteScavenged"); + + Context.FilteredWrittenBytesPerSecond.Start(); + + const ScavengedSequenceCopyOperation& ScavengeOp = CopyOperations[ScavengeOpIndex]; + const ChunkedFolderContent& ScavengedContent = ScavengedContents[ScavengeOp.ScavengedContentIndex]; + const std::filesystem::path& ScavengeRootPath = ScavengedPaths[ScavengeOp.ScavengedContentIndex]; + + WriteScavengedSequenceToCache(ScavengeRootPath, ScavengedContent, ScavengeOp); + + if (Context.WritePartsComplete.fetch_add(1) + 1 == Context.TotalPartWriteCount) + { + Context.FilteredWrittenBytesPerSecond.Stop(); + } + } + }); + } +} + +void +BuildsOperationUpdateFolder::ScheduleLooseChunkWrites(WriteChunksContext& Context, std::vector<LooseChunkHashWorkData>& LooseChunkHashWorks) +{ + for (uint32_t LooseChunkHashWorkIndex = 0; LooseChunkHashWorkIndex < LooseChunkHashWorks.size(); LooseChunkHashWorkIndex++) + { + if (m_AbortFlag) + { + break; + } + + Context.Work.ScheduleWork( + m_IOWorkerPool, + [this, &Context, &LooseChunkHashWorks, LooseChunkHashWorkIndex](std::atomic<bool>&) { + ZEN_TRACE_CPU("Async_ReadPreDownloadedChunk"); + if (!m_AbortFlag) + { + LooseChunkHashWorkData& LooseChunkHashWork = LooseChunkHashWorks[LooseChunkHashWorkIndex]; + const uint32_t RemoteChunkIndex = LooseChunkHashWork.RemoteChunkIndex; + WriteLooseChunk(RemoteChunkIndex, + Context.ExistsResult, + Context.SequenceIndexChunksLeftToWriteCounters, + Context.WritePartsComplete, + std::move(LooseChunkHashWork.ChunkTargetPtrs), + Context.WriteCache, + Context.Work, + Context.TotalRequestCount, + Context.TotalPartWriteCount, + Context.FilteredDownloadedBytesPerSecond, + Context.FilteredWrittenBytesPerSecond); + } + }, + WorkerThreadPool::EMode::EnableBacklog); + } +} + +void +BuildsOperationUpdateFolder::ScheduleLocalChunkCopies(WriteChunksContext& Context, + std::span<const CopyChunkData> CopyChunkDatas, + CloneQueryInterface* CloneQuery, + const std::vector<ChunkedFolderContent>& ScavengedContents, + const std::vector<ChunkedContentLookup>& ScavengedLookups, + const std::vector<std::filesystem::path>& ScavengedPaths) +{ + for (size_t CopyDataIndex = 0; CopyDataIndex < CopyChunkDatas.size(); CopyDataIndex++) + { + if (m_AbortFlag) + { + break; + } + + Context.Work.ScheduleWork( + m_IOWorkerPool, + [this, &Context, CloneQuery, CopyChunkDatas, &ScavengedContents, &ScavengedLookups, &ScavengedPaths, CopyDataIndex]( + std::atomic<bool>&) { + if (!m_AbortFlag) + { + ZEN_TRACE_CPU("Async_CopyLocal"); + + Context.FilteredWrittenBytesPerSecond.Start(); + const CopyChunkData& CopyData = CopyChunkDatas[CopyDataIndex]; + + std::vector<uint32_t> WrittenSequenceIndexes = WriteLocalChunkToCache(CloneQuery, + CopyData, + ScavengedContents, + ScavengedLookups, + ScavengedPaths, + Context.WriteCache); + bool WritePartsDone = Context.WritePartsComplete.fetch_add(1) + 1 == Context.TotalPartWriteCount; + if (!m_AbortFlag) + { + if (WritePartsDone) + { + Context.FilteredWrittenBytesPerSecond.Stop(); + } + + // Write tracking, updating this must be done without any files open + std::vector<uint32_t> CompletedChunkSequences; + for (uint32_t RemoteSequenceIndex : WrittenSequenceIndexes) + { + if (CompleteSequenceChunk(RemoteSequenceIndex, Context.SequenceIndexChunksLeftToWriteCounters)) + { + CompletedChunkSequences.push_back(RemoteSequenceIndex); + } + } + Context.WriteCache.Close(CompletedChunkSequences); + VerifyAndCompleteChunkSequencesAsync(CompletedChunkSequences, Context.Work); + } + } + }); + } +} + +void +BuildsOperationUpdateFolder::ScheduleCachedBlockWrites(WriteChunksContext& Context, std::span<const uint32_t> CachedBlockIndexes) +{ + for (uint32_t BlockIndex : CachedBlockIndexes) + { + if (m_AbortFlag) + { + break; + } + + Context.Work.ScheduleWork(m_IOWorkerPool, [this, &Context, BlockIndex](std::atomic<bool>&) { + if (!m_AbortFlag) + { + ZEN_TRACE_CPU("Async_WriteCachedBlock"); + + const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; + Context.FilteredWrittenBytesPerSecond.Start(); + + std::filesystem::path BlockChunkPath = m_TempBlockFolderPath / BlockDescription.BlockHash.ToHexString(); + IoBuffer BlockBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath); + if (!BlockBuffer) + { + throw std::runtime_error(fmt::format("Can not read block {} at {}", BlockDescription.BlockHash, BlockChunkPath)); + } + + if (!m_AbortFlag) + { + if (!WriteChunksBlockToCache(BlockDescription, + Context.SequenceIndexChunksLeftToWriteCounters, + Context.Work, + CompositeBuffer(std::move(BlockBuffer)), + Context.RemoteChunkIndexNeedsCopyFromSourceFlags, + Context.WriteCache)) + { + std::error_code DummyEc; + RemoveFile(BlockChunkPath, DummyEc); + throw std::runtime_error(fmt::format("Block {} is malformed", BlockDescription.BlockHash)); + } + + std::error_code Ec = TryRemoveFile(BlockChunkPath); + if (Ec) + { + ZEN_DEBUG("Failed removing file '{}', reason: ({}) {}", BlockChunkPath, Ec.value(), Ec.message()); + } + + if (Context.WritePartsComplete.fetch_add(1) + 1 == Context.TotalPartWriteCount) + { + Context.FilteredWrittenBytesPerSecond.Stop(); + } + } + } + }); + } +} + +void +BuildsOperationUpdateFolder::SchedulePartialBlockDownloads(WriteChunksContext& Context, + const ChunkBlockAnalyser::BlockResult& PartialBlocks) +{ + for (size_t BlockRangeIndex = 0; BlockRangeIndex < PartialBlocks.BlockRanges.size();) + { + if (m_AbortFlag) + { + break; + } + + size_t RangeCount = 1; + size_t RangesLeft = PartialBlocks.BlockRanges.size() - BlockRangeIndex; + const ChunkBlockAnalyser::BlockRangeDescriptor& CurrentBlockRange = PartialBlocks.BlockRanges[BlockRangeIndex]; + while (RangeCount < RangesLeft && + CurrentBlockRange.BlockIndex == PartialBlocks.BlockRanges[BlockRangeIndex + RangeCount].BlockIndex) + { + RangeCount++; + } + + Context.Work.ScheduleWork( + m_NetworkPool, + [this, &Context, &PartialBlocks, BlockRangeStartIndex = BlockRangeIndex, RangeCount = RangeCount](std::atomic<bool>&) { + if (!m_AbortFlag) + { + ZEN_TRACE_CPU("Async_GetPartialBlockRanges"); + + Context.FilteredDownloadedBytesPerSecond.Start(); + + DownloadPartialBlock( + PartialBlocks.BlockRanges, + BlockRangeStartIndex, + RangeCount, + Context.ExistsResult, + Context.TotalRequestCount, + Context.FilteredDownloadedBytesPerSecond, + [this, &Context, &PartialBlocks](IoBuffer&& InMemoryBuffer, + const std::filesystem::path& OnDiskPath, + size_t BlockRangeStartIndex, + std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths) { + if (!m_AbortFlag) + { + Context.Work.ScheduleWork( + m_IOWorkerPool, + [this, + &Context, + &PartialBlocks, + BlockRangeStartIndex, + BlockChunkPath = std::filesystem::path(OnDiskPath), + BlockPartialBuffer = std::move(InMemoryBuffer), + OffsetAndLengths = + std::vector<std::pair<uint64_t, uint64_t>>(OffsetAndLengths.begin(), OffsetAndLengths.end())]( + std::atomic<bool>&) mutable { + if (!m_AbortFlag) + { + WritePartialBlockToCache(Context, + BlockRangeStartIndex, + std::move(BlockPartialBuffer), + BlockChunkPath, + OffsetAndLengths, + PartialBlocks); + } + }, + OnDiskPath.empty() ? WorkerThreadPool::EMode::DisableBacklog : WorkerThreadPool::EMode::EnableBacklog); + } + }); + } + }); + BlockRangeIndex += RangeCount; + } +} + +void +BuildsOperationUpdateFolder::WritePartialBlockToCache(WriteChunksContext& Context, + size_t BlockRangeStartIndex, + IoBuffer BlockPartialBuffer, + const std::filesystem::path& BlockChunkPath, + std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths, + const ChunkBlockAnalyser::BlockResult& PartialBlocks) +{ + ZEN_TRACE_CPU("Async_WritePartialBlock"); + + const uint32_t BlockIndex = PartialBlocks.BlockRanges[BlockRangeStartIndex].BlockIndex; + const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; + + if (BlockChunkPath.empty()) + { + ZEN_ASSERT(BlockPartialBuffer); + } + else + { + ZEN_ASSERT(!BlockPartialBuffer); + BlockPartialBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath); + if (!BlockPartialBuffer) + { + throw std::runtime_error(fmt::format("Could not open downloaded block {} from {}", BlockDescription.BlockHash, BlockChunkPath)); + } + } + + Context.FilteredWrittenBytesPerSecond.Start(); + + const size_t RangeCount = OffsetAndLengths.size(); + + for (size_t PartialRangeIndex = 0; PartialRangeIndex < RangeCount; PartialRangeIndex++) + { + const std::pair<uint64_t, uint64_t>& OffsetAndLength = OffsetAndLengths[PartialRangeIndex]; + IoBuffer BlockRangeBuffer(BlockPartialBuffer, OffsetAndLength.first, OffsetAndLength.second); + + const ChunkBlockAnalyser::BlockRangeDescriptor& RangeDescriptor = + PartialBlocks.BlockRanges[BlockRangeStartIndex + PartialRangeIndex]; + + if (!WritePartialBlockChunksToCache(BlockDescription, + Context.SequenceIndexChunksLeftToWriteCounters, + Context.Work, + CompositeBuffer(std::move(BlockRangeBuffer)), + RangeDescriptor.ChunkBlockIndexStart, + RangeDescriptor.ChunkBlockIndexStart + RangeDescriptor.ChunkBlockIndexCount - 1, + Context.RemoteChunkIndexNeedsCopyFromSourceFlags, + Context.WriteCache)) + { + std::error_code DummyEc; + RemoveFile(BlockChunkPath, DummyEc); + throw std::runtime_error(fmt::format("Partial block {} is malformed", BlockDescription.BlockHash)); + } + + if (Context.WritePartsComplete.fetch_add(1) + 1 == Context.TotalPartWriteCount) + { + Context.FilteredWrittenBytesPerSecond.Stop(); + } + } + std::error_code Ec = TryRemoveFile(BlockChunkPath); + if (Ec) + { + ZEN_DEBUG("Failed removing file '{}', reason: ({}) {}", BlockChunkPath, Ec.value(), Ec.message()); + } +} + +void +BuildsOperationUpdateFolder::ScheduleFullBlockDownloads(WriteChunksContext& Context, std::span<const uint32_t> FullBlockIndexes) +{ + for (uint32_t BlockIndex : FullBlockIndexes) + { + if (m_AbortFlag) + { + break; + } + + Context.Work.ScheduleWork(m_NetworkPool, [this, &Context, BlockIndex](std::atomic<bool>&) { + if (!m_AbortFlag) + { + ZEN_TRACE_CPU("Async_GetFullBlock"); + + const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; + + Context.FilteredDownloadedBytesPerSecond.Start(); + + IoBuffer BlockBuffer; + const bool ExistsInCache = + m_Storage.CacheStorage && Context.ExistsResult.ExistingBlobs.contains(BlockDescription.BlockHash); + if (ExistsInCache) + { + BlockBuffer = m_Storage.CacheStorage->GetBuildBlob(m_BuildId, BlockDescription.BlockHash); + } + if (!BlockBuffer) + { + try + { + BlockBuffer = m_Storage.BuildStorage->GetBuildBlob(m_BuildId, BlockDescription.BlockHash); + } + catch (const std::exception&) + { + // Silence http errors due to abort + if (!m_AbortFlag) + { + throw; + } + } + } + if (!m_AbortFlag) + { + if (!BlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} is missing", BlockDescription.BlockHash)); + } + + uint64_t BlockSize = BlockBuffer.GetSize(); + m_DownloadStats.DownloadedBlockCount++; + m_DownloadStats.DownloadedBlockByteCount += BlockSize; + if (m_DownloadStats.RequestsCompleteCount.fetch_add(1) + 1 == Context.TotalRequestCount) + { + Context.FilteredDownloadedBytesPerSecond.Stop(); + } + + const bool PutInCache = !ExistsInCache && m_Storage.CacheStorage && m_Options.PopulateCache; + + std::filesystem::path BlockChunkPath = + TryMoveDownloadedChunk(BlockBuffer, + m_TempBlockFolderPath / BlockDescription.BlockHash.ToHexString(), + /* ForceDiskBased */ PutInCache || (BlockSize > m_Options.MaximumInMemoryPayloadSize)); + + if (PutInCache) + { + ZEN_ASSERT(!BlockChunkPath.empty()); + IoBuffer CacheBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath); + if (CacheBuffer) + { + m_Storage.CacheStorage->PutBuildBlob(m_BuildId, + BlockDescription.BlockHash, + ZenContentType::kCompressedBinary, + CompositeBuffer(SharedBuffer(CacheBuffer))); + } + } + + if (!m_AbortFlag) + { + Context.Work.ScheduleWork( + m_IOWorkerPool, + [this, &Context, BlockIndex, BlockChunkPath, BlockBuffer = std::move(BlockBuffer)](std::atomic<bool>&) mutable { + if (!m_AbortFlag) + { + WriteFullBlockToCache(Context, BlockIndex, std::move(BlockBuffer), BlockChunkPath); + } + }, + BlockChunkPath.empty() ? WorkerThreadPool::EMode::DisableBacklog : WorkerThreadPool::EMode::EnableBacklog); + } + } + } + }); + } +} + +void +BuildsOperationUpdateFolder::WriteFullBlockToCache(WriteChunksContext& Context, + uint32_t BlockIndex, + IoBuffer BlockBuffer, + const std::filesystem::path& BlockChunkPath) +{ + ZEN_TRACE_CPU("Async_WriteFullBlock"); + + const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; + + if (BlockChunkPath.empty()) + { + ZEN_ASSERT(BlockBuffer); + } + else + { + ZEN_ASSERT(!BlockBuffer); + BlockBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath); + if (!BlockBuffer) + { + throw std::runtime_error(fmt::format("Could not open dowloaded block {} from {}", BlockDescription.BlockHash, BlockChunkPath)); + } + } + + Context.FilteredWrittenBytesPerSecond.Start(); + if (!WriteChunksBlockToCache(BlockDescription, + Context.SequenceIndexChunksLeftToWriteCounters, + Context.Work, + CompositeBuffer(std::move(BlockBuffer)), + Context.RemoteChunkIndexNeedsCopyFromSourceFlags, + Context.WriteCache)) + { + std::error_code DummyEc; + RemoveFile(BlockChunkPath, DummyEc); + throw std::runtime_error(fmt::format("Block {} is malformed", BlockDescription.BlockHash)); + } + + if (!BlockChunkPath.empty()) + { + std::error_code Ec = TryRemoveFile(BlockChunkPath); + if (Ec) + { + ZEN_DEBUG("Failed removing file '{}', reason: ({}) {}", BlockChunkPath, Ec.value(), Ec.message()); + } + } + + if (Context.WritePartsComplete.fetch_add(1) + 1 == Context.TotalPartWriteCount) + { + Context.FilteredWrittenBytesPerSecond.Stop(); + } +} + +void +BuildsOperationUpdateFolder::ScheduleLocalFileRemovals(ParallelWork& Work, + std::span<const uint32_t> RemoveLocalPathIndexes, + std::atomic<uint64_t>& DeletedCount) +{ + for (uint32_t LocalPathIndex : RemoveLocalPathIndexes) + { + if (m_AbortFlag) + { + break; + } + Work.ScheduleWork(m_IOWorkerPool, [this, &DeletedCount, LocalPathIndex](std::atomic<bool>&) { + if (!m_AbortFlag) + { + ZEN_TRACE_CPU("Async_RemoveFile"); + + const std::filesystem::path LocalFilePath = (m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred(); + SetFileReadOnlyWithRetry(LocalFilePath, false); + RemoveFileWithRetry(LocalFilePath); + DeletedCount++; + } + }); + } +} + +void +BuildsOperationUpdateFolder::ScheduleTargetFinalization( + ParallelWork& Work, + std::span<const FinalizeTarget> Targets, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& SequenceHashToLocalPathIndex, + const tsl::robin_map<uint32_t, uint32_t>& RemotePathIndexToLocalPathIndex, + FolderContent& OutLocalFolderState, + std::atomic<uint64_t>& TargetsComplete) +{ + size_t TargetOffset = 0; + while (TargetOffset < Targets.size()) + { + if (m_AbortFlag) + { + break; + } + + size_t TargetCount = 1; + while ((TargetOffset + TargetCount) < Targets.size() && + (Targets[TargetOffset + TargetCount].RawHash == Targets[TargetOffset].RawHash)) + { + TargetCount++; + } + + Work.ScheduleWork(m_IOWorkerPool, + [this, + &SequenceHashToLocalPathIndex, + Targets, + &RemotePathIndexToLocalPathIndex, + &OutLocalFolderState, + BaseTargetOffset = TargetOffset, + TargetCount, + &TargetsComplete](std::atomic<bool>&) { + if (!m_AbortFlag) + { + FinalizeTargetGroup(BaseTargetOffset, + TargetCount, + Targets, + SequenceHashToLocalPathIndex, + RemotePathIndexToLocalPathIndex, + OutLocalFolderState, + TargetsComplete); + } + }); + + TargetOffset += TargetCount; + } +} + +void +BuildsOperationUpdateFolder::FinalizeTargetGroup(size_t BaseOffset, + size_t Count, + std::span<const FinalizeTarget> Targets, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& SequenceHashToLocalPathIndex, + const tsl::robin_map<uint32_t, uint32_t>& RemotePathIndexToLocalPathIndex, + FolderContent& OutLocalFolderState, + std::atomic<uint64_t>& TargetsComplete) +{ + ZEN_TRACE_CPU("Async_FinalizeChunkSequence"); + + size_t TargetOffset = BaseOffset; + const IoHash& RawHash = Targets[TargetOffset].RawHash; + + if (RawHash == IoHash::Zero) + { + ZEN_TRACE_CPU("CreateEmptyFiles"); + while (TargetOffset < (BaseOffset + Count)) + { + const uint32_t RemotePathIndex = Targets[TargetOffset].RemotePathIndex; + ZEN_ASSERT(Targets[TargetOffset].RawHash == RawHash); + const std::filesystem::path& TargetPath = m_RemoteContent.Paths[RemotePathIndex]; + std::filesystem::path TargetFilePath = (m_Path / TargetPath).make_preferred(); + auto InPlaceIt = RemotePathIndexToLocalPathIndex.find(RemotePathIndex); + if (InPlaceIt == RemotePathIndexToLocalPathIndex.end() || InPlaceIt->second == 0) + { + if (IsFileWithRetry(TargetFilePath)) + { + SetFileReadOnlyWithRetry(TargetFilePath, false); + } + else + { + CreateDirectories(TargetFilePath.parent_path()); + } + BasicFile OutputFile; + OutputFile.Open(TargetFilePath, BasicFile::Mode::kTruncate); + } + OutLocalFolderState.Paths[RemotePathIndex] = TargetPath; + OutLocalFolderState.RawSizes[RemotePathIndex] = m_RemoteContent.RawSizes[RemotePathIndex]; + + OutLocalFolderState.Attributes[RemotePathIndex] = + m_RemoteContent.Attributes.empty() + ? GetNativeFileAttributes(TargetFilePath) + : SetNativeFileAttributes(TargetFilePath, m_RemoteContent.Platform, m_RemoteContent.Attributes[RemotePathIndex]); + OutLocalFolderState.ModificationTicks[RemotePathIndex] = GetModificationTickFromPath(TargetFilePath); + + TargetOffset++; + TargetsComplete++; + } + } + else + { + ZEN_TRACE_CPU("FinalizeFile"); + ZEN_ASSERT(m_RemoteLookup.RawHashToSequenceIndex.contains(RawHash)); + const uint32_t FirstRemotePathIndex = Targets[TargetOffset].RemotePathIndex; + const std::filesystem::path& FirstTargetPath = m_RemoteContent.Paths[FirstRemotePathIndex]; + std::filesystem::path FirstTargetFilePath = (m_Path / FirstTargetPath).make_preferred(); + + if (auto InPlaceIt = RemotePathIndexToLocalPathIndex.find(FirstRemotePathIndex); InPlaceIt != RemotePathIndexToLocalPathIndex.end()) + { + ZEN_ASSERT_SLOW(IsFileWithRetry(FirstTargetFilePath)); + } + else + { + if (IsFileWithRetry(FirstTargetFilePath)) + { + SetFileReadOnlyWithRetry(FirstTargetFilePath, false); + } + else + { + CreateDirectories(FirstTargetFilePath.parent_path()); + } + + if (auto InplaceIt = SequenceHashToLocalPathIndex.find(RawHash); InplaceIt != SequenceHashToLocalPathIndex.end()) + { + ZEN_TRACE_CPU("Copy"); + const uint32_t LocalPathIndex = InplaceIt->second; + const std::filesystem::path& SourcePath = m_LocalContent.Paths[LocalPathIndex]; + std::filesystem::path SourceFilePath = (m_Path / SourcePath).make_preferred(); + ZEN_ASSERT_SLOW(IsFileWithRetry(SourceFilePath)); + + ZEN_DEBUG("Copying from '{}' -> '{}'", SourceFilePath, FirstTargetFilePath); + const uint64_t RawSize = m_LocalContent.RawSizes[LocalPathIndex]; + FastCopyFile(m_Options.AllowFileClone, + m_Options.UseSparseFiles, + SourceFilePath, + FirstTargetFilePath, + RawSize, + m_DiskStats.WriteCount, + m_DiskStats.WriteByteCount, + m_DiskStats.CloneCount, + m_DiskStats.CloneByteCount); + + m_RebuildFolderStateStats.FinalizeTreeFilesCopiedCount++; + } + else + { + ZEN_TRACE_CPU("Rename"); + const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(m_CacheFolderPath, RawHash); + ZEN_ASSERT_SLOW(IsFileWithRetry(CacheFilePath)); + + std::error_code Ec = RenameFileWithRetry(CacheFilePath, FirstTargetFilePath); + if (Ec) + { + ZEN_WARN("Failed to move file from '{}' to '{}', reason: ({}) {}, retrying...", + CacheFilePath, + FirstTargetFilePath, + Ec.value(), + Ec.message()); + Ec = RenameFileWithRetry(CacheFilePath, FirstTargetFilePath); + if (Ec) + { + throw std::system_error(std::error_code(Ec.value(), std::system_category()), + fmt::format("Failed to move file from '{}' to '{}', reason: ({}) {}", + CacheFilePath, + FirstTargetFilePath, + Ec.value(), + Ec.message())); + } + } + + m_RebuildFolderStateStats.FinalizeTreeFilesMovedCount++; + } + } + + OutLocalFolderState.Paths[FirstRemotePathIndex] = FirstTargetPath; + OutLocalFolderState.RawSizes[FirstRemotePathIndex] = m_RemoteContent.RawSizes[FirstRemotePathIndex]; + + OutLocalFolderState.Attributes[FirstRemotePathIndex] = + m_RemoteContent.Attributes.empty() + ? GetNativeFileAttributes(FirstTargetFilePath) + : SetNativeFileAttributes(FirstTargetFilePath, m_RemoteContent.Platform, m_RemoteContent.Attributes[FirstRemotePathIndex]); + OutLocalFolderState.ModificationTicks[FirstRemotePathIndex] = GetModificationTickFromPath(FirstTargetFilePath); + + TargetOffset++; + TargetsComplete++; + + while (TargetOffset < (BaseOffset + Count)) + { + const uint32_t RemotePathIndex = Targets[TargetOffset].RemotePathIndex; + ZEN_ASSERT(Targets[TargetOffset].RawHash == RawHash); + const std::filesystem::path& TargetPath = m_RemoteContent.Paths[RemotePathIndex]; + std::filesystem::path TargetFilePath = (m_Path / TargetPath).make_preferred(); + + if (auto InPlaceIt = RemotePathIndexToLocalPathIndex.find(RemotePathIndex); InPlaceIt != RemotePathIndexToLocalPathIndex.end()) + { + ZEN_ASSERT_SLOW(IsFileWithRetry(TargetFilePath)); + } + else + { + ZEN_TRACE_CPU("Copy"); + if (IsFileWithRetry(TargetFilePath)) + { + SetFileReadOnlyWithRetry(TargetFilePath, false); + } + else + { + CreateDirectories(TargetFilePath.parent_path()); + } + + ZEN_ASSERT_SLOW(IsFileWithRetry(FirstTargetFilePath)); + ZEN_DEBUG("Copying from '{}' -> '{}'", FirstTargetFilePath, TargetFilePath); + const uint64_t RawSize = m_RemoteContent.RawSizes[RemotePathIndex]; + FastCopyFile(m_Options.AllowFileClone, + m_Options.UseSparseFiles, + FirstTargetFilePath, + TargetFilePath, + RawSize, + m_DiskStats.WriteCount, + m_DiskStats.WriteByteCount, + m_DiskStats.CloneCount, + m_DiskStats.CloneByteCount); + + m_RebuildFolderStateStats.FinalizeTreeFilesCopiedCount++; + } + + OutLocalFolderState.Paths[RemotePathIndex] = TargetPath; + OutLocalFolderState.RawSizes[RemotePathIndex] = m_RemoteContent.RawSizes[RemotePathIndex]; + + OutLocalFolderState.Attributes[RemotePathIndex] = + m_RemoteContent.Attributes.empty() + ? GetNativeFileAttributes(TargetFilePath) + : SetNativeFileAttributes(TargetFilePath, m_RemoteContent.Platform, m_RemoteContent.Attributes[RemotePathIndex]); + OutLocalFolderState.ModificationTicks[RemotePathIndex] = GetModificationTickFromPath(TargetFilePath); + + TargetOffset++; + TargetsComplete++; + } + } +} + +std::vector<BuildsOperationUpdateFolder::ScavengeSource> +BuildsOperationUpdateFolder::FindScavengeSources() +{ + ZEN_TRACE_CPU("FindScavengeSources"); + + const bool TargetPathExists = IsDir(m_Path); + + std::vector<std::filesystem::path> StatePaths = GetDownloadedStatePaths(m_Options.SystemRootDir); + + std::vector<ScavengeSource> Result; + for (const std::filesystem::path& EntryPath : StatePaths) + { + if (IsFile(EntryPath)) + { + bool DeleteEntry = false; + + try + { + BuildsDownloadInfo Info = ReadDownloadedInfoFile(EntryPath); + const bool LocalPathExists = !Info.LocalPath.empty() && IsDir(Info.LocalPath); + const bool LocalStateFileExists = IsFile(Info.StateFilePath); + if (LocalPathExists && LocalStateFileExists) + { + if (TargetPathExists && std::filesystem::equivalent(Info.LocalPath, m_Path)) + { + DeleteEntry = true; + } + else + { + Result.push_back({.StateFilePath = std::move(Info.StateFilePath), .Path = std::move(Info.LocalPath)}); + } + } + else + { + DeleteEntry = true; + } + } + catch (const std::exception& Ex) + { + ZEN_WARN("{}", Ex.what()); + DeleteEntry = true; + } + + if (DeleteEntry) + { + std::error_code DummyEc; + std::filesystem::remove(EntryPath, DummyEc); + } + } + } + return Result; +} + +std::vector<uint32_t> +BuildsOperationUpdateFolder::ScanTargetFolder(const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedChunkHashesFound, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedSequenceHashesFound) +{ + ZEN_TRACE_CPU("ScanTargetFolder"); + + Stopwatch LocalTimer; + + std::vector<uint32_t> MissingSequenceIndexes; + + for (uint32_t RemoteSequenceIndex = 0; RemoteSequenceIndex < m_RemoteContent.ChunkedContent.SequenceRawHashes.size(); + RemoteSequenceIndex++) + { + const IoHash& RemoteSequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + const uint32_t RemotePathIndex = GetFirstPathIndexForSeqeuenceIndex(m_RemoteLookup, RemoteSequenceIndex); + const uint64_t RemoteRawSize = m_RemoteContent.RawSizes[RemotePathIndex]; + if (auto CacheSequenceIt = CachedSequenceHashesFound.find(RemoteSequenceRawHash); + CacheSequenceIt != CachedSequenceHashesFound.end()) + { + const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(m_CacheFolderPath, RemoteSequenceRawHash); + ZEN_ASSERT_SLOW(IsFile(CacheFilePath)); + if (m_Options.IsVerbose) + { + ZEN_INFO("Found sequence {} at {} ({})", RemoteSequenceRawHash, CacheFilePath, NiceBytes(RemoteRawSize)); + } + } + else if (auto CacheChunkIt = CachedChunkHashesFound.find(RemoteSequenceRawHash); CacheChunkIt != CachedChunkHashesFound.end()) + { + const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(m_CacheFolderPath, RemoteSequenceRawHash); + ZEN_ASSERT_SLOW(IsFile(CacheFilePath)); + if (m_Options.IsVerbose) + { + ZEN_INFO("Found chunk {} at {} ({})", RemoteSequenceRawHash, CacheFilePath, NiceBytes(RemoteRawSize)); + } + } + else if (auto It = m_LocalLookup.RawHashToSequenceIndex.find(RemoteSequenceRawHash); + It != m_LocalLookup.RawHashToSequenceIndex.end()) + { + const uint32_t LocalSequenceIndex = It->second; + const uint32_t LocalPathIndex = GetFirstPathIndexForSeqeuenceIndex(m_LocalLookup, LocalSequenceIndex); + const std::filesystem::path LocalFilePath = (m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred(); + ZEN_ASSERT_SLOW(IsFile(LocalFilePath)); + m_CacheMappingStats.LocalPathsMatchingSequencesCount++; + m_CacheMappingStats.LocalPathsMatchingSequencesByteCount += RemoteRawSize; + if (m_Options.IsVerbose) + { + ZEN_INFO("Found sequence {} at {} ({})", RemoteSequenceRawHash, LocalFilePath, NiceBytes(RemoteRawSize)); + } + } + else + { + MissingSequenceIndexes.push_back(RemoteSequenceIndex); + } + } + + m_CacheMappingStats.LocalScanElapsedWallTimeUs += LocalTimer.GetElapsedTimeUs(); + return MissingSequenceIndexes; +} + +bool +BuildsOperationUpdateFolder::FindScavengeContent(const ScavengeSource& Source, + ChunkedFolderContent& OutScavengedLocalContent, + ChunkedContentLookup& OutScavengedLookup) +{ + ZEN_TRACE_CPU("FindScavengeContent"); + + FolderContent LocalFolderState; + try + { + BuildSaveState SavedState = ReadBuildSaveStateFile(Source.StateFilePath); + if (SavedState.Version == BuildSaveState::NoVersion) + { + ZEN_DEBUG("Skipping old build state at '{}', state files before version {} can not be trusted during scavenge", + Source.StateFilePath, + BuildSaveState::kVersion1); + return false; + } + OutScavengedLocalContent = std::move(SavedState.State.ChunkedContent); + LocalFolderState = std::move(SavedState.FolderState); + } + catch (const std::exception& Ex) + { + ZEN_DEBUG("Skipping invalid build state at '{}', reason: {}", Source.StateFilePath, Ex.what()); + return false; + } + + tsl::robin_set<uint32_t> PathIndexesToScavenge; + PathIndexesToScavenge.reserve(OutScavengedLocalContent.Paths.size()); + std::vector<uint32_t> ChunkOrderOffsets = BuildChunkOrderOffset(OutScavengedLocalContent.ChunkedContent.ChunkCounts); + + { + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToPathIndex; + + RawHashToPathIndex.reserve(OutScavengedLocalContent.Paths.size()); + for (uint32_t ScavengedPathIndex = 0; ScavengedPathIndex < OutScavengedLocalContent.RawHashes.size(); ScavengedPathIndex++) + { + if (!RawHashToPathIndex.contains(OutScavengedLocalContent.RawHashes[ScavengedPathIndex])) + { + RawHashToPathIndex.insert_or_assign(OutScavengedLocalContent.RawHashes[ScavengedPathIndex], ScavengedPathIndex); + } + } + + for (uint32_t ScavengeSequenceIndex = 0; ScavengeSequenceIndex < OutScavengedLocalContent.ChunkedContent.SequenceRawHashes.size(); + ScavengeSequenceIndex++) + { + const IoHash& SequenceHash = OutScavengedLocalContent.ChunkedContent.SequenceRawHashes[ScavengeSequenceIndex]; + if (auto It = RawHashToPathIndex.find(SequenceHash); It != RawHashToPathIndex.end()) + { + uint32_t PathIndex = It->second; + if (!PathIndexesToScavenge.contains(PathIndex)) + { + if (m_RemoteLookup.RawHashToSequenceIndex.contains(SequenceHash)) + { + PathIndexesToScavenge.insert(PathIndex); + } + else + { + uint32_t ChunkOrderIndexStart = ChunkOrderOffsets[ScavengeSequenceIndex]; + const uint32_t ChunkCount = OutScavengedLocalContent.ChunkedContent.ChunkCounts[ScavengeSequenceIndex]; + for (uint32_t ChunkOrderIndex = 0; ChunkOrderIndex < ChunkCount; ChunkOrderIndex++) + { + const uint32_t ChunkIndex = + OutScavengedLocalContent.ChunkedContent.ChunkOrders[ChunkOrderIndexStart + ChunkOrderIndex]; + const IoHash& ChunkHash = OutScavengedLocalContent.ChunkedContent.ChunkHashes[ChunkIndex]; + if (m_RemoteLookup.ChunkHashToChunkIndex.contains(ChunkHash)) + { + PathIndexesToScavenge.insert(PathIndex); + break; + } + } + } + } + } + else + { + ZEN_WARN("Scavenged state file at '{}' for '{}' is invalid, skipping scavenging for sequence {}", + Source.StateFilePath, + Source.Path, + SequenceHash); + } + } + } + + if (PathIndexesToScavenge.empty()) + { + OutScavengedLocalContent = {}; + return false; + } + + std::vector<std::filesystem::path> PathsToScavenge; + PathsToScavenge.reserve(PathIndexesToScavenge.size()); + for (uint32_t ScavengedStatePathIndex : PathIndexesToScavenge) + { + PathsToScavenge.push_back(OutScavengedLocalContent.Paths[ScavengedStatePathIndex]); + } + + FolderContent ValidFolderContent = + GetValidFolderContent(m_IOWorkerPool, m_ScavengedFolderScanStats, Source.Path, PathsToScavenge, {}, 0, m_AbortFlag, m_PauseFlag); + + if (!LocalFolderState.AreKnownFilesEqual(ValidFolderContent)) + { + std::vector<std::filesystem::path> DeletedPaths; + FolderContent UpdatedContent = GetUpdatedContent(LocalFolderState, ValidFolderContent, DeletedPaths); + + // If the files are modified since the state was saved we ignore the files since we don't + // want to incur the cost of scanning/hashing scavenged files + DeletedPaths.insert(DeletedPaths.end(), UpdatedContent.Paths.begin(), UpdatedContent.Paths.end()); + if (!DeletedPaths.empty()) + { + OutScavengedLocalContent = + DeletePathsFromChunkedContent(OutScavengedLocalContent, + BuildHashLookup(OutScavengedLocalContent.ChunkedContent.SequenceRawHashes), + ChunkOrderOffsets, + DeletedPaths); + } + } + + if (OutScavengedLocalContent.Paths.empty()) + { + OutScavengedLocalContent = {}; + return false; + } + + OutScavengedLookup = BuildChunkedContentLookup(OutScavengedLocalContent); + + return true; +} + +void +BuildsOperationUpdateFolder::ScavengeSourceForChunks(uint32_t& InOutRemainingChunkCount, + std::vector<bool>& InOutRemoteChunkIndexNeedsCopyFromLocalFileFlags, + tsl::robin_map<IoHash, size_t, IoHash::Hasher>& InOutRawHashToCopyChunkDataIndex, + const std::vector<std::atomic<uint32_t>>& SequenceIndexChunksLeftToWriteCounters, + const ChunkedFolderContent& ScavengedContent, + const ChunkedContentLookup& ScavengedLookup, + std::vector<CopyChunkData>& InOutCopyChunkDatas, + uint32_t ScavengedContentIndex, + uint64_t& InOutChunkMatchingRemoteCount, + uint64_t& InOutChunkMatchingRemoteByteCount) +{ + for (uint32_t RemoteChunkIndex = 0; + RemoteChunkIndex < m_RemoteContent.ChunkedContent.ChunkHashes.size() && (InOutRemainingChunkCount > 0); + RemoteChunkIndex++) + { + if (!InOutRemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex]) + { + const IoHash& RemoteChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + if (auto It = ScavengedLookup.ChunkHashToChunkIndex.find(RemoteChunkHash); It != ScavengedLookup.ChunkHashToChunkIndex.end()) + { + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs = + GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteChunkIndex); + + if (!ChunkTargetPtrs.empty()) + { + const uint32_t ScavengedChunkIndex = It->second; + const uint64_t ScavengedChunkRawSize = ScavengedContent.ChunkedContent.ChunkRawSizes[ScavengedChunkIndex]; + const size_t ChunkSequenceLocationOffset = ScavengedLookup.ChunkSequenceLocationOffset[ScavengedChunkIndex]; + const ChunkedContentLookup::ChunkSequenceLocation& ScavengeLocation = + ScavengedLookup.ChunkSequenceLocations[ChunkSequenceLocationOffset]; + const IoHash& ScavengedSequenceRawHash = + ScavengedContent.ChunkedContent.SequenceRawHashes[ScavengeLocation.SequenceIndex]; + + CopyChunkData::ChunkTarget Target = {.TargetChunkLocationCount = gsl::narrow<uint32_t>(ChunkTargetPtrs.size()), + .RemoteChunkIndex = RemoteChunkIndex, + .CacheFileOffset = ScavengeLocation.Offset}; + if (auto CopySourceIt = InOutRawHashToCopyChunkDataIndex.find(ScavengedSequenceRawHash); + CopySourceIt != InOutRawHashToCopyChunkDataIndex.end()) + { + CopyChunkData& Data = InOutCopyChunkDatas[CopySourceIt->second]; + if (Data.TargetChunkLocationPtrs.size() > 1024) + { + InOutRawHashToCopyChunkDataIndex.insert_or_assign(ScavengedSequenceRawHash, InOutCopyChunkDatas.size()); + InOutCopyChunkDatas.push_back(CopyChunkData{.ScavengeSourceIndex = ScavengedContentIndex, + .SourceSequenceIndex = ScavengeLocation.SequenceIndex, + .TargetChunkLocationPtrs = ChunkTargetPtrs, + .ChunkTargets = std::vector<CopyChunkData::ChunkTarget>{Target}}); + } + else + { + Data.TargetChunkLocationPtrs.insert(Data.TargetChunkLocationPtrs.end(), + ChunkTargetPtrs.begin(), + ChunkTargetPtrs.end()); + Data.ChunkTargets.push_back(Target); + } + } + else + { + InOutRawHashToCopyChunkDataIndex.insert_or_assign(ScavengedSequenceRawHash, InOutCopyChunkDatas.size()); + InOutCopyChunkDatas.push_back(CopyChunkData{.ScavengeSourceIndex = ScavengedContentIndex, + .SourceSequenceIndex = ScavengeLocation.SequenceIndex, + .TargetChunkLocationPtrs = ChunkTargetPtrs, + .ChunkTargets = std::vector<CopyChunkData::ChunkTarget>{Target}}); + } + InOutChunkMatchingRemoteCount++; + InOutChunkMatchingRemoteByteCount += ScavengedChunkRawSize; + InOutRemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex] = true; + InOutRemainingChunkCount--; + } + } + } + } +} + +std::filesystem::path +BuildsOperationUpdateFolder::FindDownloadedChunk(const IoHash& ChunkHash) +{ + ZEN_TRACE_CPU("FindDownloadedChunk"); + + std::filesystem::path CompressedChunkPath = m_TempDownloadFolderPath / ChunkHash.ToHexString(); + if (IsFile(CompressedChunkPath)) + { + IoBuffer ExistingCompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath); + if (ExistingCompressedPart) + { + IoHash RawHash; + uint64_t RawSize; + if (CompressedBuffer::ValidateCompressedHeader(ExistingCompressedPart, + RawHash, + RawSize, + /*OutOptionalTotalCompressedSize*/ nullptr)) + { + return CompressedChunkPath; + } + else + { + std::error_code DummyEc; + RemoveFile(CompressedChunkPath, DummyEc); + } + } + } + return {}; +} + +std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> +BuildsOperationUpdateFolder::GetRemainingChunkTargets(std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + uint32_t ChunkIndex) +{ + ZEN_TRACE_CPU("GetRemainingChunkTargets"); + + std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkSources = GetChunkSequenceLocations(m_RemoteLookup, ChunkIndex); + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs; + if (!ChunkSources.empty()) + { + ChunkTargetPtrs.reserve(ChunkSources.size()); + for (const ChunkedContentLookup::ChunkSequenceLocation& Source : ChunkSources) + { + if (SequenceIndexChunksLeftToWriteCounters[Source.SequenceIndex].load() > 0) + { + ChunkTargetPtrs.push_back(&Source); + } + } + } + return ChunkTargetPtrs; +}; + +uint64_t +BuildsOperationUpdateFolder::GetChunkWriteCount(std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + uint32_t ChunkIndex) +{ + ZEN_TRACE_CPU("GetChunkWriteCount"); + + uint64_t WriteCount = 0; + std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkSources = GetChunkSequenceLocations(m_RemoteLookup, ChunkIndex); + for (const ChunkedContentLookup::ChunkSequenceLocation& Source : ChunkSources) + { + if (SequenceIndexChunksLeftToWriteCounters[Source.SequenceIndex].load() > 0) + { + WriteCount++; + } + } + return WriteCount; +}; + +void +BuildsOperationUpdateFolder::CheckRequiredDiskSpace(const tsl::robin_map<std::string, uint32_t>& RemotePathToRemoteIndex) +{ + tsl::robin_set<uint32_t> ExistingRemotePaths; + + if (m_Options.EnableTargetFolderScavenging) + { + for (uint32_t LocalPathIndex = 0; LocalPathIndex < m_LocalContent.Paths.size(); LocalPathIndex++) + { + const IoHash& RawHash = m_LocalContent.RawHashes[LocalPathIndex]; + const std::filesystem::path& LocalPath = m_LocalContent.Paths[LocalPathIndex]; + + if (auto RemotePathIt = RemotePathToRemoteIndex.find(LocalPath.generic_string()); RemotePathIt != RemotePathToRemoteIndex.end()) + { + const uint32_t RemotePathIndex = RemotePathIt->second; + if (m_RemoteContent.RawHashes[RemotePathIndex] == RawHash) + { + ExistingRemotePaths.insert(RemotePathIndex); + } + } + } + } + + uint64_t RequiredSpace = 0; + for (uint32_t RemotePathIndex = 0; RemotePathIndex < m_RemoteContent.Paths.size(); RemotePathIndex++) + { + if (!ExistingRemotePaths.contains(RemotePathIndex)) + { + RequiredSpace += m_RemoteContent.RawSizes[RemotePathIndex]; + } + } + + std::error_code Ec; + DiskSpace Space = DiskSpaceInfo(m_Path, Ec); + if (Ec) + { + throw std::runtime_error(fmt::format("Get free disk space for target path '{}' FAILED, reason: {}", m_Path, Ec.message())); + } + if (Space.Free < (RequiredSpace + 16u * 1024u * 1024u)) + { + throw std::runtime_error( + fmt::format("Not enough free space for target path '{}', {} of free space is needed but only {} is available", + m_Path, + NiceBytes(RequiredSpace), + NiceBytes(Space.Free))); + } +} + +void +BuildsOperationUpdateFolder::WriteScavengedSequenceToCache(const std::filesystem::path& ScavengeRootPath, + const ChunkedFolderContent& ScavengedContent, + const ScavengedSequenceCopyOperation& ScavengeOp) +{ + ZEN_TRACE_CPU("WriteScavengedSequenceToCache"); + + const std::filesystem::path ScavengedPath = ScavengedContent.Paths[ScavengeOp.ScavengedPathIndex]; + const std::filesystem::path ScavengedFilePath = (ScavengeRootPath / ScavengedPath).make_preferred(); + ZEN_ASSERT_SLOW(FileSizeFromPath(ScavengedFilePath) == ScavengeOp.RawSize); + + const IoHash& RemoteSequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[ScavengeOp.RemoteSequenceIndex]; + const std::filesystem::path TempFilePath = GetTempChunkedSequenceFileName(m_CacheFolderPath, RemoteSequenceRawHash); + + const uint64_t RawSize = ScavengedContent.RawSizes[ScavengeOp.ScavengedPathIndex]; + FastCopyFile(m_Options.AllowFileClone, + m_Options.UseSparseFiles, + ScavengedFilePath, + TempFilePath, + RawSize, + m_DiskStats.WriteCount, + m_DiskStats.WriteByteCount, + m_DiskStats.CloneCount, + m_DiskStats.CloneByteCount); + + const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(m_CacheFolderPath, RemoteSequenceRawHash); + RenameFile(TempFilePath, CacheFilePath); + + m_WrittenChunkByteCount += RawSize; + if (m_Options.ValidateCompletedSequences) + { + m_ValidatedChunkByteCount += RawSize; + } +} + +void +BuildsOperationUpdateFolder::WriteLooseChunk(const uint32_t RemoteChunkIndex, + const BlobsExistsResult& ExistsResult, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + std::atomic<uint64_t>& WritePartsComplete, + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>&& ChunkTargetPtrs, + BufferedWriteFileCache& WriteCache, + ParallelWork& Work, + uint64_t TotalRequestCount, + uint64_t TotalPartWriteCount, + FilteredRate& FilteredDownloadedBytesPerSecond, + FilteredRate& FilteredWrittenBytesPerSecond) +{ + const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + std::filesystem::path ExistingCompressedChunkPath = FindDownloadedChunk(ChunkHash); + if (!ExistingCompressedChunkPath.empty()) + { + if (m_DownloadStats.RequestsCompleteCount.fetch_add(1) + 1 == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + } + if (!m_AbortFlag) + { + if (!ExistingCompressedChunkPath.empty()) + { + Work.ScheduleWork( + m_IOWorkerPool, + [this, + SequenceIndexChunksLeftToWriteCounters, + &WriteCache, + &Work, + &WritePartsComplete, + TotalPartWriteCount, + &FilteredWrittenBytesPerSecond, + RemoteChunkIndex, + ChunkTargetPtrs = std::move(ChunkTargetPtrs), + CompressedChunkPath = std::move(ExistingCompressedChunkPath)](std::atomic<bool>& AbortFlag) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("Async_WritePreDownloadedChunk"); + + FilteredWrittenBytesPerSecond.Start(); + + const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + + IoBuffer CompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath); + if (!CompressedPart) + { + throw std::runtime_error( + fmt::format("Could not open dowloaded compressed chunk {} from {}", ChunkHash, CompressedChunkPath)); + } + + bool NeedHashVerify = + WriteCompressedChunkToCache(ChunkHash, ChunkTargetPtrs, WriteCache, std::move(CompressedPart)); + bool WritePartsDone = WritePartsComplete.fetch_add(1) + 1 == TotalPartWriteCount; + + if (!AbortFlag) + { + if (WritePartsDone) + { + FilteredWrittenBytesPerSecond.Stop(); + } + + std::error_code Ec = TryRemoveFile(CompressedChunkPath); + if (Ec) + { + ZEN_DEBUG("Failed removing file '{}', reason: ({}) {}", CompressedChunkPath, Ec.value(), Ec.message()); + } + + std::vector<uint32_t> CompletedSequences = + CompleteChunkTargets(ChunkTargetPtrs, SequenceIndexChunksLeftToWriteCounters); + WriteCache.Close(CompletedSequences); + if (NeedHashVerify) + { + VerifyAndCompleteChunkSequencesAsync(CompletedSequences, Work); + } + else + { + FinalizeChunkSequences(CompletedSequences); + } + } + } + }); + } + else + { + Work.ScheduleWork(m_NetworkPool, + [this, + &ExistsResult, + SequenceIndexChunksLeftToWriteCounters, + &WriteCache, + &Work, + &WritePartsComplete, + TotalPartWriteCount, + TotalRequestCount, + &FilteredDownloadedBytesPerSecond, + &FilteredWrittenBytesPerSecond, + RemoteChunkIndex, + ChunkTargetPtrs = std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>( + std::move(ChunkTargetPtrs))](std::atomic<bool>&) mutable { + if (!m_AbortFlag) + { + ZEN_TRACE_CPU("Async_DownloadChunk"); + + FilteredDownloadedBytesPerSecond.Start(); + DownloadBuildBlob(RemoteChunkIndex, + ExistsResult, + Work, + TotalRequestCount, + FilteredDownloadedBytesPerSecond, + [this, + &ExistsResult, + SequenceIndexChunksLeftToWriteCounters, + &WriteCache, + &Work, + &WritePartsComplete, + TotalPartWriteCount, + RemoteChunkIndex, + &FilteredWrittenBytesPerSecond, + ChunkTargetPtrs = std::move(ChunkTargetPtrs)](IoBuffer&& Payload) mutable { + AsyncWriteDownloadedChunk(RemoteChunkIndex, + ExistsResult, + std::move(ChunkTargetPtrs), + WriteCache, + Work, + std::move(Payload), + SequenceIndexChunksLeftToWriteCounters, + WritePartsComplete, + TotalPartWriteCount, + FilteredWrittenBytesPerSecond); + }); + } + }); + } + } +} + +void +BuildsOperationUpdateFolder::DownloadBuildBlob(uint32_t RemoteChunkIndex, + const BlobsExistsResult& ExistsResult, + ParallelWork& Work, + uint64_t TotalRequestCount, + FilteredRate& FilteredDownloadedBytesPerSecond, + std::function<void(IoBuffer&& Payload)>&& OnDownloaded) +{ + const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + // FilteredDownloadedBytesPerSecond.Start(); + IoBuffer BuildBlob; + const bool ExistsInCache = m_Storage.CacheStorage && ExistsResult.ExistingBlobs.contains(ChunkHash); + if (ExistsInCache) + { + BuildBlob = m_Storage.CacheStorage->GetBuildBlob(m_BuildId, ChunkHash); + } + if (BuildBlob) + { + uint64_t BlobSize = BuildBlob.GetSize(); + m_DownloadStats.DownloadedChunkCount++; + m_DownloadStats.DownloadedChunkByteCount += BlobSize; + if (m_DownloadStats.RequestsCompleteCount.fetch_add(1) + 1 == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + OnDownloaded(std::move(BuildBlob)); + } + else + { + if (m_RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex] >= m_Options.LargeAttachmentSize) + { + DownloadLargeBlob( + *m_Storage.BuildStorage, + m_TempDownloadFolderPath, + m_BuildId, + ChunkHash, + m_Options.PreferredMultipartChunkSize, + Work, + m_NetworkPool, + m_DownloadStats.DownloadedChunkByteCount, + m_DownloadStats.MultipartAttachmentCount, + [this, &FilteredDownloadedBytesPerSecond, TotalRequestCount, OnDownloaded = std::move(OnDownloaded)](IoBuffer&& Payload) { + m_DownloadStats.DownloadedChunkCount++; + if (m_DownloadStats.RequestsCompleteCount.fetch_add(1) + 1 == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + + OnDownloaded(std::move(Payload)); + }); + } + else + { + try + { + BuildBlob = m_Storage.BuildStorage->GetBuildBlob(m_BuildId, ChunkHash); + } + catch (const std::exception&) + { + // Silence http errors due to abort + if (!m_AbortFlag) + { + throw; + } + } + if (!m_AbortFlag) + { + if (!BuildBlob) + { + throw std::runtime_error(fmt::format("Chunk {} is missing", ChunkHash)); + } + + if (!m_AbortFlag) + { + uint64_t BlobSize = BuildBlob.GetSize(); + m_DownloadStats.DownloadedChunkCount++; + m_DownloadStats.DownloadedChunkByteCount += BlobSize; + if (m_DownloadStats.RequestsCompleteCount.fetch_add(1) + 1 == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + + OnDownloaded(std::move(BuildBlob)); + } + } + } + } +} + +void +BuildsOperationUpdateFolder::DownloadPartialBlock( + std::span<const ChunkBlockAnalyser::BlockRangeDescriptor> BlockRanges, + size_t BlockRangeStartIndex, + size_t BlockRangeCount, + const BlobsExistsResult& ExistsResult, + uint64_t TotalRequestCount, + FilteredRate& FilteredDownloadedBytesPerSecond, + std::function<void(IoBuffer&& InMemoryBuffer, + const std::filesystem::path& OnDiskPath, + size_t BlockRangeStartIndex, + std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths)>&& OnDownloaded) +{ + const uint32_t BlockIndex = BlockRanges[BlockRangeStartIndex].BlockIndex; + + const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; + + auto ProcessDownload = [this]( + const ChunkBlockDescription& BlockDescription, + IoBuffer&& BlockRangeBuffer, + size_t BlockRangeStartIndex, + std::span<const std::pair<uint64_t, uint64_t>> BlockOffsetAndLengths, + uint64_t TotalRequestCount, + FilteredRate& FilteredDownloadedBytesPerSecond, + const std::function<void(IoBuffer && InMemoryBuffer, + const std::filesystem::path& OnDiskPath, + size_t BlockRangeStartIndex, + std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths)>& OnDownloaded) { + uint64_t BlockRangeBufferSize = BlockRangeBuffer.GetSize(); + m_DownloadStats.DownloadedBlockCount++; + m_DownloadStats.DownloadedBlockByteCount += BlockRangeBufferSize; + if (m_DownloadStats.RequestsCompleteCount.fetch_add(BlockOffsetAndLengths.size()) + BlockOffsetAndLengths.size() == + TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + + IoHashStream RangeId; + for (const std::pair<uint64_t, uint64_t>& Range : BlockOffsetAndLengths) + { + RangeId.Append(&Range.first, sizeof(uint64_t)); + RangeId.Append(&Range.second, sizeof(uint64_t)); + } + std::filesystem::path BlockChunkPath = + TryMoveDownloadedChunk(BlockRangeBuffer, + m_TempBlockFolderPath / fmt::format("{}_{}", BlockDescription.BlockHash, RangeId.GetHash()), + /* ForceDiskBased */ BlockRangeBufferSize > m_Options.MaximumInMemoryPayloadSize); + + if (!m_AbortFlag) + { + OnDownloaded(std::move(BlockRangeBuffer), std::move(BlockChunkPath), BlockRangeStartIndex, BlockOffsetAndLengths); + } + }; + + std::vector<std::pair<uint64_t, uint64_t>> Ranges; + Ranges.reserve(BlockRangeCount); + for (size_t BlockRangeIndex = BlockRangeStartIndex; BlockRangeIndex < BlockRangeStartIndex + BlockRangeCount; BlockRangeIndex++) + { + const ChunkBlockAnalyser::BlockRangeDescriptor& BlockRange = BlockRanges[BlockRangeIndex]; + Ranges.push_back(std::make_pair(BlockRange.RangeStart, BlockRange.RangeLength)); + } + + const bool ExistsInCache = m_Storage.CacheStorage && ExistsResult.ExistingBlobs.contains(BlockDescription.BlockHash); + + size_t SubBlockRangeCount = BlockRangeCount; + size_t SubRangeCountComplete = 0; + std::span<const std::pair<uint64_t, uint64_t>> RangesSpan(Ranges); + while (SubRangeCountComplete < SubBlockRangeCount) + { + if (m_AbortFlag) + { + break; + } + + // First try to get subrange from cache. + // If not successful, try to get the ranges from the build store and adapt SubRangeCount... + + size_t SubRangeStartIndex = BlockRangeStartIndex + SubRangeCountComplete; + if (ExistsInCache) + { + size_t SubRangeCount = Min(BlockRangeCount - SubRangeCountComplete, m_Storage.CacheHost.Caps.MaxRangeCountPerRequest); + + if (SubRangeCount == 1) + { + // Legacy single-range path, prefer that for max compatibility + + const std::pair<uint64_t, uint64_t> SubRange = RangesSpan[SubRangeCountComplete]; + IoBuffer PayloadBuffer = + m_Storage.CacheStorage->GetBuildBlob(m_BuildId, BlockDescription.BlockHash, SubRange.first, SubRange.second); + if (m_AbortFlag) + { + break; + } + if (PayloadBuffer) + { + ProcessDownload(BlockDescription, + std::move(PayloadBuffer), + SubRangeStartIndex, + std::vector<std::pair<uint64_t, uint64_t>>{std::make_pair(0u, SubRange.second)}, + TotalRequestCount, + FilteredDownloadedBytesPerSecond, + OnDownloaded); + SubRangeCountComplete += SubRangeCount; + continue; + } + } + else + { + auto SubRanges = RangesSpan.subspan(SubRangeCountComplete, SubRangeCount); + + BuildStorageCache::BuildBlobRanges RangeBuffers = + m_Storage.CacheStorage->GetBuildBlobRanges(m_BuildId, BlockDescription.BlockHash, SubRanges); + if (m_AbortFlag) + { + break; + } + if (RangeBuffers.PayloadBuffer) + { + if (RangeBuffers.Ranges.empty()) + { + SubRangeCount = Ranges.size() - SubRangeCountComplete; + ProcessDownload(BlockDescription, + std::move(RangeBuffers.PayloadBuffer), + SubRangeStartIndex, + RangesSpan.subspan(SubRangeCountComplete, SubRangeCount), + TotalRequestCount, + FilteredDownloadedBytesPerSecond, + OnDownloaded); + SubRangeCountComplete += SubRangeCount; + continue; + } + else if (RangeBuffers.Ranges.size() == SubRangeCount) + { + ProcessDownload(BlockDescription, + std::move(RangeBuffers.PayloadBuffer), + SubRangeStartIndex, + RangeBuffers.Ranges, + TotalRequestCount, + FilteredDownloadedBytesPerSecond, + OnDownloaded); + SubRangeCountComplete += SubRangeCount; + continue; + } + } + } + } + + size_t SubRangeCount = Min(BlockRangeCount - SubRangeCountComplete, m_Storage.BuildStorageHost.Caps.MaxRangeCountPerRequest); + + auto SubRanges = RangesSpan.subspan(SubRangeCountComplete, SubRangeCount); + + BuildStorageBase::BuildBlobRanges RangeBuffers; + + try + { + RangeBuffers = m_Storage.BuildStorage->GetBuildBlobRanges(m_BuildId, BlockDescription.BlockHash, SubRanges); + } + catch (const std::exception&) + { + // Silence http errors due to abort + if (!m_AbortFlag) + { + throw; + } + } + + if (!m_AbortFlag) + { + if (RangeBuffers.PayloadBuffer) + { + if (RangeBuffers.Ranges.empty()) + { + // Jupiter will ignore the ranges and send the whole payload if it fetches the payload from S3 + // Upload to cache (if enabled) and use the whole payload for the remaining ranges + + const uint64_t Size = RangeBuffers.PayloadBuffer.GetSize(); + + const bool PopulateCache = !ExistsInCache && m_Storage.CacheStorage && m_Options.PopulateCache; + + std::filesystem::path BlockPath = + TryMoveDownloadedChunk(RangeBuffers.PayloadBuffer, + m_TempBlockFolderPath / BlockDescription.BlockHash.ToHexString(), + /* ForceDiskBased */ PopulateCache || Size > m_Options.MaximumInMemoryPayloadSize); + if (!BlockPath.empty()) + { + RangeBuffers.PayloadBuffer = IoBufferBuilder::MakeFromFile(BlockPath); + if (!RangeBuffers.PayloadBuffer) + { + throw std::runtime_error( + fmt::format("Failed to read block {} from temporary path '{}'", BlockDescription.BlockHash, BlockPath)); + } + RangeBuffers.PayloadBuffer.SetDeleteOnClose(true); + } + + if (PopulateCache) + { + m_Storage.CacheStorage->PutBuildBlob(m_BuildId, + BlockDescription.BlockHash, + ZenContentType::kCompressedBinary, + CompositeBuffer(SharedBuffer(RangeBuffers.PayloadBuffer))); + } + + if (m_AbortFlag) + { + break; + } + + SubRangeCount = Ranges.size() - SubRangeCountComplete; + ProcessDownload(BlockDescription, + std::move(RangeBuffers.PayloadBuffer), + SubRangeStartIndex, + RangesSpan.subspan(SubRangeCountComplete, SubRangeCount), + TotalRequestCount, + FilteredDownloadedBytesPerSecond, + OnDownloaded); + } + else + { + if (RangeBuffers.Ranges.size() != SubRanges.size()) + { + throw std::runtime_error(fmt::format("Fetching {} ranges from {} resulted in {} ranges", + SubRanges.size(), + BlockDescription.BlockHash, + RangeBuffers.Ranges.size())); + } + ProcessDownload(BlockDescription, + std::move(RangeBuffers.PayloadBuffer), + SubRangeStartIndex, + RangeBuffers.Ranges, + TotalRequestCount, + FilteredDownloadedBytesPerSecond, + OnDownloaded); + } + } + else + { + throw std::runtime_error( + fmt::format("Block {} is missing when fetching {} ranges", BlockDescription.BlockHash, SubRangeCount)); + } + + SubRangeCountComplete += SubRangeCount; + } + } +} + +std::vector<uint32_t> +BuildsOperationUpdateFolder::WriteLocalChunkToCache(CloneQueryInterface* CloneQuery, + const CopyChunkData& CopyData, + const std::vector<ChunkedFolderContent>& ScavengedContents, + const std::vector<ChunkedContentLookup>& ScavengedLookups, + const std::vector<std::filesystem::path>& ScavengedPaths, + BufferedWriteFileCache& WriteCache) +{ + ZEN_TRACE_CPU("WriteLocalChunkToCache"); + + std::filesystem::path SourceFilePath; + + if (CopyData.ScavengeSourceIndex == (uint32_t)-1) + { + const uint32_t LocalPathIndex = m_LocalLookup.SequenceIndexFirstPathIndex[CopyData.SourceSequenceIndex]; + SourceFilePath = (m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred(); + } + else + { + const ChunkedFolderContent& ScavengedContent = ScavengedContents[CopyData.ScavengeSourceIndex]; + const ChunkedContentLookup& ScavengedLookup = ScavengedLookups[CopyData.ScavengeSourceIndex]; + const std::filesystem::path ScavengedPath = ScavengedPaths[CopyData.ScavengeSourceIndex]; + const uint32_t ScavengedPathIndex = ScavengedLookup.SequenceIndexFirstPathIndex[CopyData.SourceSequenceIndex]; + SourceFilePath = (ScavengedPath / ScavengedContent.Paths[ScavengedPathIndex]).make_preferred(); + } + ZEN_ASSERT_SLOW(IsFile(SourceFilePath)); + ZEN_ASSERT(!CopyData.TargetChunkLocationPtrs.empty()); + + uint64_t CacheLocalFileBytesRead = 0; + + size_t TargetStart = 0; + const std::span<const ChunkedContentLookup::ChunkSequenceLocation* const> AllTargets(CopyData.TargetChunkLocationPtrs); + + struct WriteOp + { + const ChunkedContentLookup::ChunkSequenceLocation* Target = nullptr; + uint64_t CacheFileOffset = (uint64_t)-1; + uint32_t ChunkIndex = (uint32_t)-1; + }; + + std::vector<WriteOp> WriteOps; + + if (!m_AbortFlag) + { + ZEN_TRACE_CPU("Sort"); + WriteOps.reserve(AllTargets.size()); + for (const CopyChunkData::ChunkTarget& ChunkTarget : CopyData.ChunkTargets) + { + std::span<const ChunkedContentLookup::ChunkSequenceLocation* const> TargetRange = + AllTargets.subspan(TargetStart, ChunkTarget.TargetChunkLocationCount); + for (const ChunkedContentLookup::ChunkSequenceLocation* Target : TargetRange) + { + WriteOps.push_back( + WriteOp{.Target = Target, .CacheFileOffset = ChunkTarget.CacheFileOffset, .ChunkIndex = ChunkTarget.RemoteChunkIndex}); + } + TargetStart += ChunkTarget.TargetChunkLocationCount; + } + + std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOp& Lhs, const WriteOp& Rhs) { + if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex) + { + return true; + } + else if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex) + { + return false; + } + if (Lhs.Target->Offset < Rhs.Target->Offset) + { + return true; + } + return false; + }); + } + + if (!m_AbortFlag) + { + ZEN_TRACE_CPU("Write"); + + tsl::robin_set<uint32_t> ChunkIndexesWritten; + + BufferedOpenFile SourceFile(SourceFilePath, + m_DiskStats.OpenReadCount, + m_DiskStats.CurrentOpenFileCount, + m_DiskStats.ReadCount, + m_DiskStats.ReadByteCount); + + bool CanCloneSource = CloneQuery && CloneQuery->CanClone(SourceFile.Handle()); + + BufferedWriteFileCache::Local LocalWriter(WriteCache); + + for (size_t WriteOpIndex = 0; WriteOpIndex < WriteOps.size();) + { + if (m_AbortFlag) + { + break; + } + const WriteOp& Op = WriteOps[WriteOpIndex]; + + const uint32_t RemoteSequenceIndex = Op.Target->SequenceIndex; + const uint32_t RemotePathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex]; + const uint64_t TargetSize = m_RemoteContent.RawSizes[RemotePathIndex]; + const uint64_t ChunkSize = m_RemoteContent.ChunkedContent.ChunkRawSizes[Op.ChunkIndex]; + + uint64_t ReadLength = ChunkSize; + size_t WriteCount = 1; + uint64_t OpSourceEnd = Op.CacheFileOffset + ChunkSize; + uint64_t OpTargetEnd = Op.Target->Offset + ChunkSize; + while ((WriteOpIndex + WriteCount) < WriteOps.size()) + { + const WriteOp& NextOp = WriteOps[WriteOpIndex + WriteCount]; + if (NextOp.Target->SequenceIndex != Op.Target->SequenceIndex) + { + break; + } + if (NextOp.Target->Offset != OpTargetEnd) + { + break; + } + if (NextOp.CacheFileOffset != OpSourceEnd) + { + break; + } + const uint64_t NextChunkLength = m_RemoteContent.ChunkedContent.ChunkRawSizes[NextOp.ChunkIndex]; + if (ReadLength + NextChunkLength > BufferedOpenFile::BlockSize) + { + break; + } + ReadLength += NextChunkLength; + OpSourceEnd += NextChunkLength; + OpTargetEnd += NextChunkLength; + WriteCount++; + } + + { + bool DidClone = false; + + if (CanCloneSource) + { + uint64_t PreBytes = 0; + uint64_t PostBytes = 0; + uint64_t ClonableBytes = + CloneQuery->GetClonableRange(Op.CacheFileOffset, Op.Target->Offset, ReadLength, PreBytes, PostBytes); + if (ClonableBytes > 0) + { + // We need to open the file... + BufferedWriteFileCache::Local::Writer* Writer = LocalWriter.GetWriter(RemoteSequenceIndex); + if (!Writer) + { + Writer = LocalWriter.PutWriter(RemoteSequenceIndex, std::make_unique<BufferedWriteFileCache::Local::Writer>()); + + Writer->File = std::make_unique<BasicFile>(); + + const std::filesystem::path FileName = + GetTempChunkedSequenceFileName(m_CacheFolderPath, + m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]); + Writer->File->Open(FileName, BasicFile::Mode::kWrite); + if (m_Options.UseSparseFiles) + { + PrepareFileForScatteredWrite(Writer->File->Handle(), TargetSize); + } + } + DidClone = CloneQuery->TryClone(SourceFile.Handle(), + Writer->File->Handle(), + Op.CacheFileOffset + PreBytes, + Op.Target->Offset + PreBytes, + ClonableBytes, + TargetSize); + if (DidClone) + { + m_DiskStats.WriteCount++; + m_DiskStats.WriteByteCount += ClonableBytes; + + m_DiskStats.CloneCount++; + m_DiskStats.CloneByteCount += ClonableBytes; + + m_WrittenChunkByteCount += ClonableBytes; + + if (PreBytes > 0) + { + CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset, PreBytes); + const uint64_t FileOffset = Op.Target->Offset; + + WriteSequenceChunkToCache(LocalWriter, ChunkSource, RemoteSequenceIndex, FileOffset, RemotePathIndex); + } + if (PostBytes > 0) + { + CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset + ReadLength - PostBytes, PostBytes); + const uint64_t FileOffset = Op.Target->Offset + ReadLength - PostBytes; + + WriteSequenceChunkToCache(LocalWriter, ChunkSource, RemoteSequenceIndex, FileOffset, RemotePathIndex); + } + } + } + } + + if (!DidClone) + { + CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset, ReadLength); + + const uint64_t FileOffset = Op.Target->Offset; + + WriteSequenceChunkToCache(LocalWriter, ChunkSource, RemoteSequenceIndex, FileOffset, RemotePathIndex); + } + } + + CacheLocalFileBytesRead += ReadLength; // TODO: This should be the sum of unique chunk sizes? + + WriteOpIndex += WriteCount; + } + } + + if (m_Options.IsVerbose) + { + ZEN_INFO("Copied {} from {}", NiceBytes(CacheLocalFileBytesRead), SourceFilePath); + } + + std::vector<uint32_t> Result; + Result.reserve(WriteOps.size()); + + for (const WriteOp& Op : WriteOps) + { + Result.push_back(Op.Target->SequenceIndex); + } + return Result; +} + +bool +BuildsOperationUpdateFolder::WriteCompressedChunkToCache( + const IoHash& ChunkHash, + const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>& ChunkTargetPtrs, + BufferedWriteFileCache& WriteCache, + IoBuffer&& CompressedPart) +{ + ZEN_TRACE_CPU("WriteCompressedChunkToCache"); + + auto ChunkHashToChunkIndexIt = m_RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); + ZEN_ASSERT(ChunkHashToChunkIndexIt != m_RemoteLookup.ChunkHashToChunkIndex.end()); + if (IsSingleFileChunk(m_RemoteContent, ChunkTargetPtrs)) + { + const std::uint32_t SequenceIndex = ChunkTargetPtrs.front()->SequenceIndex; + const IoHash& SequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]; + StreamDecompress(SequenceRawHash, CompositeBuffer(std::move(CompressedPart))); + return false; + } + else + { + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(CompositeBuffer(std::move(CompressedPart)), RawHash, RawSize); + if (!Compressed) + { + throw std::runtime_error(fmt::format("Failed to parse header of compressed large blob {}", ChunkHash)); + } + if (RawHash != ChunkHash) + { + throw std::runtime_error(fmt::format("RawHash in header {} in large blob {} does match.", RawHash, ChunkHash)); + } + + BufferedWriteFileCache::Local LocalWriter(WriteCache); + + IoHashStream Hash; + bool CouldDecompress = Compressed.DecompressToStream( + 0, + (uint64_t)-1, + [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) { + ZEN_UNUSED(SourceOffset); + ZEN_TRACE_CPU("Async_StreamDecompress_Write"); + m_DiskStats.ReadByteCount += SourceSize; + if (!m_AbortFlag) + { + for (const ChunkedContentLookup::ChunkSequenceLocation* TargetPtr : ChunkTargetPtrs) + { + const auto& Target = *TargetPtr; + const uint64_t FileOffset = Target.Offset + Offset; + const uint32_t SequenceIndex = Target.SequenceIndex; + const uint32_t PathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex]; + + WriteSequenceChunkToCache(LocalWriter, RangeBuffer, SequenceIndex, FileOffset, PathIndex); + } + + return true; + } + return false; + }); + + if (m_AbortFlag) + { + return false; + } + + if (!CouldDecompress) + { + throw std::runtime_error(fmt::format("Failed to decompress large chunk {}", ChunkHash)); + } + + return true; + } +} + +void +BuildsOperationUpdateFolder::StreamDecompress(const IoHash& SequenceRawHash, CompositeBuffer&& CompressedPart) +{ + ZEN_TRACE_CPU("StreamDecompress"); + const std::filesystem::path TempChunkSequenceFileName = GetTempChunkedSequenceFileName(m_CacheFolderPath, SequenceRawHash); + TemporaryFile DecompressedTemp; + std::error_code Ec; + DecompressedTemp.CreateTemporary(TempChunkSequenceFileName.parent_path(), Ec); + if (Ec) + { + throw std::runtime_error(fmt::format("Failed creating temporary file for decompressing large blob {}, reason: ({}) {}", + SequenceRawHash, + Ec.value(), + Ec.message())); + } + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(CompressedPart, RawHash, RawSize); + if (!Compressed) + { + throw std::runtime_error(fmt::format("Failed to parse header of compressed large blob {}", SequenceRawHash)); + } + if (RawHash != SequenceRawHash) + { + throw std::runtime_error(fmt::format("RawHash in header {} in large blob {} does match.", RawHash, SequenceRawHash)); + } + PrepareFileForScatteredWrite(DecompressedTemp.Handle(), RawSize); + + IoHashStream Hash; + bool CouldDecompress = + Compressed.DecompressToStream(0, + (uint64_t)-1, + [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) { + ZEN_UNUSED(SourceOffset); + ZEN_TRACE_CPU("StreamDecompress_Write"); + m_DiskStats.ReadCount++; + m_DiskStats.ReadByteCount += SourceSize; + if (!m_AbortFlag) + { + for (const SharedBuffer& Segment : RangeBuffer.GetSegments()) + { + if (m_Options.ValidateCompletedSequences) + { + Hash.Append(Segment.GetView()); + m_ValidatedChunkByteCount += Segment.GetSize(); + } + DecompressedTemp.Write(Segment, Offset); + Offset += Segment.GetSize(); + m_DiskStats.WriteByteCount += Segment.GetSize(); + m_DiskStats.WriteCount++; + m_WrittenChunkByteCount += Segment.GetSize(); + } + return true; + } + return false; + }); + + if (m_AbortFlag) + { + return; + } + + if (!CouldDecompress) + { + throw std::runtime_error(fmt::format("Failed to decompress large blob {}", SequenceRawHash)); + } + if (m_Options.ValidateCompletedSequences) + { + const IoHash VerifyHash = Hash.GetHash(); + if (VerifyHash != SequenceRawHash) + { + throw std::runtime_error( + fmt::format("Decompressed blob payload hash {} does not match expected hash {}", VerifyHash, SequenceRawHash)); + } + } + DecompressedTemp.MoveTemporaryIntoPlace(TempChunkSequenceFileName, Ec); + if (Ec) + { + throw std::runtime_error(fmt::format("Failed moving temporary file for decompressing large blob {}, reason: ({}) {}", + SequenceRawHash, + Ec.value(), + Ec.message())); + } + // WriteChunkStats.ChunkCountWritten++; +} + +void +BuildsOperationUpdateFolder::WriteSequenceChunkToCache(BufferedWriteFileCache::Local& LocalWriter, + const CompositeBuffer& Chunk, + const uint32_t SequenceIndex, + const uint64_t FileOffset, + const uint32_t PathIndex) +{ + ZEN_TRACE_CPU("WriteSequenceChunkToCache"); + + const uint64_t SequenceSize = m_RemoteContent.RawSizes[PathIndex]; + + auto OpenFile = [&](BasicFile& File) { + const std::filesystem::path FileName = + GetTempChunkedSequenceFileName(m_CacheFolderPath, m_RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]); + File.Open(FileName, BasicFile::Mode::kWrite); + if (m_Options.UseSparseFiles) + { + PrepareFileForScatteredWrite(File.Handle(), SequenceSize); + } + }; + + const uint64_t ChunkSize = Chunk.GetSize(); + ZEN_ASSERT(FileOffset + ChunkSize <= SequenceSize); + if (ChunkSize == SequenceSize) + { + BasicFile SingleChunkFile; + OpenFile(SingleChunkFile); + + m_DiskStats.CurrentOpenFileCount++; + auto _ = MakeGuard([this]() { m_DiskStats.CurrentOpenFileCount--; }); + SingleChunkFile.Write(Chunk, FileOffset); + } + else + { + const uint64_t MaxWriterBufferSize = 256u * 1025u; + + BufferedWriteFileCache::Local::Writer* Writer = LocalWriter.GetWriter(SequenceIndex); + if (Writer) + { + if ((!Writer->Writer) && (ChunkSize < MaxWriterBufferSize)) + { + Writer->Writer = std::make_unique<BasicFileWriter>(*Writer->File, Min(SequenceSize, MaxWriterBufferSize)); + } + Writer->Write(Chunk, FileOffset); + } + else + { + Writer = LocalWriter.PutWriter(SequenceIndex, std::make_unique<BufferedWriteFileCache::Local::Writer>()); + + Writer->File = std::make_unique<BasicFile>(); + OpenFile(*Writer->File); + if (ChunkSize < MaxWriterBufferSize) + { + Writer->Writer = std::make_unique<BasicFileWriter>(*Writer->File, Min(SequenceSize, MaxWriterBufferSize)); + } + Writer->Write(Chunk, FileOffset); + } + } + m_DiskStats.WriteCount++; + m_DiskStats.WriteByteCount += ChunkSize; + m_WrittenChunkByteCount += ChunkSize; +} + +bool +BuildsOperationUpdateFolder::GetBlockWriteOps(const IoHash& BlockRawHash, + std::span<const IoHash> ChunkRawHashes, + std::span<const uint32_t> ChunkCompressedLengths, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags, + const MemoryView BlockView, + uint32_t FirstIncludedBlockChunkIndex, + uint32_t LastIncludedBlockChunkIndex, + BlockWriteOps& OutOps) +{ + ZEN_TRACE_CPU("GetBlockWriteOps"); + + uint32_t OffsetInBlock = 0; + for (uint32_t ChunkBlockIndex = FirstIncludedBlockChunkIndex; ChunkBlockIndex <= LastIncludedBlockChunkIndex; ChunkBlockIndex++) + { + const uint32_t ChunkCompressedSize = ChunkCompressedLengths[ChunkBlockIndex]; + const IoHash& ChunkHash = ChunkRawHashes[ChunkBlockIndex]; + if (auto It = m_RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); It != m_RemoteLookup.ChunkHashToChunkIndex.end()) + { + const uint32_t ChunkIndex = It->second; + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs = + GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, ChunkIndex); + + if (!ChunkTargetPtrs.empty()) + { + bool NeedsWrite = true; + if (RemoteChunkIndexNeedsCopyFromSourceFlags[ChunkIndex].compare_exchange_strong(NeedsWrite, false)) + { + MemoryView ChunkMemoryView = BlockView.Mid(OffsetInBlock, ChunkCompressedSize); + IoHash VerifyChunkHash; + uint64_t VerifyChunkSize; + CompressedBuffer CompressedChunk = + CompressedBuffer::FromCompressed(SharedBuffer::MakeView(ChunkMemoryView), VerifyChunkHash, VerifyChunkSize); + if (!CompressedChunk) + { + throw std::runtime_error(fmt::format("Chunk {} at {}, size {} in block {} is not a valid compressed buffer", + ChunkHash, + OffsetInBlock, + ChunkCompressedSize, + BlockRawHash)); + } + if (VerifyChunkHash != ChunkHash) + { + throw std::runtime_error(fmt::format("Chunk {} at {}, size {} in block {} has a mismatching content hash {}", + ChunkHash, + OffsetInBlock, + ChunkCompressedSize, + BlockRawHash, + VerifyChunkHash)); + } + if (VerifyChunkSize != m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]) + { + throw std::runtime_error( + fmt::format("Chunk {} at {}, size {} in block {} has a mismatching raw size {}, expected {}", + ChunkHash, + OffsetInBlock, + ChunkCompressedSize, + BlockRawHash, + VerifyChunkSize, + m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex])); + } + + OodleCompressor ChunkCompressor; + OodleCompressionLevel ChunkCompressionLevel; + uint64_t ChunkBlockSize; + + bool GetCompressParametersSuccess = + CompressedChunk.TryGetCompressParameters(ChunkCompressor, ChunkCompressionLevel, ChunkBlockSize); + ZEN_ASSERT(GetCompressParametersSuccess); + + IoBuffer Decompressed; + if (ChunkCompressionLevel == OodleCompressionLevel::None) + { + MemoryView ChunkDecompressedMemoryView = ChunkMemoryView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder()); + Decompressed = + IoBuffer(IoBuffer::Wrap, ChunkDecompressedMemoryView.GetData(), ChunkDecompressedMemoryView.GetSize()); + } + else + { + Decompressed = CompressedChunk.Decompress().AsIoBuffer(); + } + + if (Decompressed.GetSize() != m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]) + { + throw std::runtime_error(fmt::format("Chunk {} at {}, size {} in block {} decompressed to size {}, expected {}", + ChunkHash, + OffsetInBlock, + ChunkCompressedSize, + BlockRawHash, + Decompressed.GetSize(), + m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex])); + } + + ZEN_ASSERT_SLOW(ChunkHash == IoHash::HashBuffer(Decompressed)); + for (const ChunkedContentLookup::ChunkSequenceLocation* Target : ChunkTargetPtrs) + { + OutOps.WriteOps.push_back( + BlockWriteOps::WriteOpData{.Target = Target, .ChunkBufferIndex = OutOps.ChunkBuffers.size()}); + } + OutOps.ChunkBuffers.emplace_back(std::move(Decompressed)); + } + } + } + + OffsetInBlock += ChunkCompressedSize; + } + { + ZEN_TRACE_CPU("Sort"); + std::sort(OutOps.WriteOps.begin(), + OutOps.WriteOps.end(), + [](const BlockWriteOps::WriteOpData& Lhs, const BlockWriteOps::WriteOpData& Rhs) { + if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex) + { + return true; + } + if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex) + { + return false; + } + return Lhs.Target->Offset < Rhs.Target->Offset; + }); + } + return true; +} + +void +BuildsOperationUpdateFolder::WriteBlockChunkOpsToCache(std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + const BlockWriteOps& Ops, + BufferedWriteFileCache& WriteCache, + ParallelWork& Work) +{ + ZEN_TRACE_CPU("WriteBlockChunkOpsToCache"); + + { + BufferedWriteFileCache::Local LocalWriter(WriteCache); + for (const BlockWriteOps::WriteOpData& WriteOp : Ops.WriteOps) + { + if (Work.IsAborted()) + { + break; + } + const CompositeBuffer& Chunk = Ops.ChunkBuffers[WriteOp.ChunkBufferIndex]; + const uint32_t SequenceIndex = WriteOp.Target->SequenceIndex; + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[SequenceIndex].load() <= + m_RemoteContent.ChunkedContent.ChunkCounts[SequenceIndex]); + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[SequenceIndex].load() > 0); + const uint64_t FileOffset = WriteOp.Target->Offset; + const uint32_t PathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex]; + + WriteSequenceChunkToCache(LocalWriter, Chunk, SequenceIndex, FileOffset, PathIndex); + } + } + if (!Work.IsAborted()) + { + // Write tracking, updating this must be done without any files open (BufferedWriteFileCache::Local) + std::vector<uint32_t> CompletedChunkSequences; + for (const BlockWriteOps::WriteOpData& WriteOp : Ops.WriteOps) + { + const uint32_t RemoteSequenceIndex = WriteOp.Target->SequenceIndex; + if (CompleteSequenceChunk(RemoteSequenceIndex, SequenceIndexChunksLeftToWriteCounters)) + { + CompletedChunkSequences.push_back(RemoteSequenceIndex); + } + } + WriteCache.Close(CompletedChunkSequences); + VerifyAndCompleteChunkSequencesAsync(CompletedChunkSequences, Work); + } +} + +bool +BuildsOperationUpdateFolder::WriteChunksBlockToCache(const ChunkBlockDescription& BlockDescription, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + ParallelWork& Work, + CompositeBuffer&& BlockBuffer, + std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags, + BufferedWriteFileCache& WriteCache) +{ + ZEN_TRACE_CPU("WriteChunksBlockToCache"); + + IoBuffer BlockMemoryBuffer = MakeBufferMemoryBased(BlockBuffer); + const MemoryView BlockView = BlockMemoryBuffer.GetView(); + + BlockWriteOps Ops; + if ((BlockDescription.HeaderSize == 0) || BlockDescription.ChunkCompressedLengths.empty()) + { + ZEN_TRACE_CPU("WriteChunksBlockToCache_Legacy"); + + uint64_t HeaderSize; + const std::vector<uint32_t> ChunkCompressedLengths = + ReadChunkBlockHeader(BlockView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder()), HeaderSize); + + if (GetBlockWriteOps(BlockDescription.BlockHash, + BlockDescription.ChunkRawHashes, + ChunkCompressedLengths, + SequenceIndexChunksLeftToWriteCounters, + RemoteChunkIndexNeedsCopyFromSourceFlags, + BlockView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder() + HeaderSize), + 0, + gsl::narrow<uint32_t>(BlockDescription.ChunkRawHashes.size() - 1), + Ops)) + { + WriteBlockChunkOpsToCache(SequenceIndexChunksLeftToWriteCounters, Ops, WriteCache, Work); + return true; + } + return false; + } + + if (GetBlockWriteOps(BlockDescription.BlockHash, + BlockDescription.ChunkRawHashes, + BlockDescription.ChunkCompressedLengths, + SequenceIndexChunksLeftToWriteCounters, + RemoteChunkIndexNeedsCopyFromSourceFlags, + BlockView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize), + 0, + gsl::narrow<uint32_t>(BlockDescription.ChunkRawHashes.size() - 1), + Ops)) + { + WriteBlockChunkOpsToCache(SequenceIndexChunksLeftToWriteCounters, Ops, WriteCache, Work); + return true; + } + return false; +} + +bool +BuildsOperationUpdateFolder::WritePartialBlockChunksToCache(const ChunkBlockDescription& BlockDescription, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + ParallelWork& Work, + CompositeBuffer&& PartialBlockBuffer, + uint32_t FirstIncludedBlockChunkIndex, + uint32_t LastIncludedBlockChunkIndex, + std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags, + BufferedWriteFileCache& WriteCache) +{ + ZEN_TRACE_CPU("WritePartialBlockChunksToCache"); + + IoBuffer BlockMemoryBuffer = MakeBufferMemoryBased(PartialBlockBuffer); + const MemoryView BlockView = BlockMemoryBuffer.GetView(); + + BlockWriteOps Ops; + if (GetBlockWriteOps(BlockDescription.BlockHash, + BlockDescription.ChunkRawHashes, + BlockDescription.ChunkCompressedLengths, + SequenceIndexChunksLeftToWriteCounters, + RemoteChunkIndexNeedsCopyFromSourceFlags, + BlockView, + FirstIncludedBlockChunkIndex, + LastIncludedBlockChunkIndex, + Ops)) + { + WriteBlockChunkOpsToCache(SequenceIndexChunksLeftToWriteCounters, Ops, WriteCache, Work); + return true; + } + else + { + return false; + } +} + +void +BuildsOperationUpdateFolder::AsyncWriteDownloadedChunk(uint32_t RemoteChunkIndex, + const BlobsExistsResult& ExistsResult, + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>&& ChunkTargetPtrs, + BufferedWriteFileCache& WriteCache, + ParallelWork& Work, + IoBuffer&& Payload, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + std::atomic<uint64_t>& WritePartsComplete, + const uint64_t TotalPartWriteCount, + FilteredRate& FilteredWrittenBytesPerSecond) +{ + ZEN_TRACE_CPU("AsyncWriteDownloadedChunk"); + + const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + + const uint64_t Size = Payload.GetSize(); + + const bool ExistsInCache = m_Storage.CacheStorage && ExistsResult.ExistingBlobs.contains(ChunkHash); + + const bool PopulateCache = !ExistsInCache && m_Storage.CacheStorage && m_Options.PopulateCache; + + std::filesystem::path CompressedChunkPath = + TryMoveDownloadedChunk(Payload, + m_TempDownloadFolderPath / ChunkHash.ToHexString(), + /* ForceDiskBased */ PopulateCache || Size > m_Options.MaximumInMemoryPayloadSize); + if (PopulateCache) + { + IoBuffer CacheBlob = IoBufferBuilder::MakeFromFile(CompressedChunkPath); + if (CacheBlob) + { + m_Storage.CacheStorage->PutBuildBlob(m_BuildId, + ChunkHash, + ZenContentType::kCompressedBinary, + CompositeBuffer(SharedBuffer(CacheBlob))); + } + } + + IoBufferFileReference FileRef; + bool EnableBacklog = !CompressedChunkPath.empty() || Payload.GetFileReference(FileRef); + + Work.ScheduleWork( + m_IOWorkerPool, + [this, + SequenceIndexChunksLeftToWriteCounters, + &Work, + CompressedChunkPath, + RemoteChunkIndex, + TotalPartWriteCount, + &WriteCache, + &WritePartsComplete, + &FilteredWrittenBytesPerSecond, + ChunkTargetPtrs = std::move(ChunkTargetPtrs), + CompressedPart = IoBuffer(std::move(Payload))](std::atomic<bool>&) mutable { + if (!m_AbortFlag) + { + ZEN_TRACE_CPU("Async_WriteChunk"); + + FilteredWrittenBytesPerSecond.Start(); + + const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + if (CompressedChunkPath.empty()) + { + ZEN_ASSERT(CompressedPart); + } + else + { + ZEN_ASSERT(!CompressedPart); + CompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath); + if (!CompressedPart) + { + throw std::runtime_error( + fmt::format("Could not open dowloaded compressed chunk {} from {}", ChunkHash, CompressedChunkPath)); + } + } + + bool NeedHashVerify = WriteCompressedChunkToCache(ChunkHash, ChunkTargetPtrs, WriteCache, std::move(CompressedPart)); + if (!m_AbortFlag) + { + if (WritePartsComplete.fetch_add(1) + 1 == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + + if (!CompressedChunkPath.empty()) + { + std::error_code Ec = TryRemoveFile(CompressedChunkPath); + if (Ec) + { + ZEN_DEBUG("Failed removing file '{}', reason: ({}) {}", CompressedChunkPath, Ec.value(), Ec.message()); + } + } + + std::vector<uint32_t> CompletedSequences = + CompleteChunkTargets(ChunkTargetPtrs, SequenceIndexChunksLeftToWriteCounters); + WriteCache.Close(CompletedSequences); + if (NeedHashVerify) + { + VerifyAndCompleteChunkSequencesAsync(CompletedSequences, Work); + } + else + { + FinalizeChunkSequences(CompletedSequences); + } + } + } + }, + EnableBacklog ? WorkerThreadPool::EMode::EnableBacklog : WorkerThreadPool::EMode::DisableBacklog); +} + +void +BuildsOperationUpdateFolder::VerifyAndCompleteChunkSequencesAsync(std::span<const uint32_t> RemoteSequenceIndexes, ParallelWork& Work) +{ + if (RemoteSequenceIndexes.empty()) + { + return; + } + ZEN_TRACE_CPU("VerifyAndCompleteChunkSequence"); + if (m_Options.ValidateCompletedSequences) + { + for (uint32_t RemoteSequenceIndexOffset = 1; RemoteSequenceIndexOffset < RemoteSequenceIndexes.size(); RemoteSequenceIndexOffset++) + { + const uint32_t RemoteSequenceIndex = RemoteSequenceIndexes[RemoteSequenceIndexOffset]; + Work.ScheduleWork(m_IOWorkerPool, [this, RemoteSequenceIndex](std::atomic<bool>&) { + if (!m_AbortFlag) + { + ZEN_TRACE_CPU("Async_VerifyAndFinalizeSequence"); + + VerifySequence(RemoteSequenceIndex); + if (!m_AbortFlag) + { + const IoHash& SequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + FinalizeChunkSequence(SequenceRawHash); + } + } + }); + } + const uint32_t RemoteSequenceIndex = RemoteSequenceIndexes[0]; + + VerifySequence(RemoteSequenceIndex); + const IoHash& SequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + FinalizeChunkSequence(SequenceRawHash); + } + else + { + for (uint32_t RemoteSequenceIndexOffset = 0; RemoteSequenceIndexOffset < RemoteSequenceIndexes.size(); RemoteSequenceIndexOffset++) + { + const uint32_t RemoteSequenceIndex = RemoteSequenceIndexes[RemoteSequenceIndexOffset]; + const IoHash& SequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + FinalizeChunkSequence(SequenceRawHash); + } + } +} + +bool +BuildsOperationUpdateFolder::CompleteSequenceChunk(uint32_t RemoteSequenceIndex, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters) +{ + uint32_t PreviousValue = SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1); + ZEN_ASSERT(PreviousValue >= 1); + ZEN_ASSERT(PreviousValue != (uint32_t)-1); + return PreviousValue == 1; +} + +std::vector<uint32_t> +BuildsOperationUpdateFolder::CompleteChunkTargets(const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>& ChunkTargetPtrs, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters) +{ + ZEN_TRACE_CPU("CompleteChunkTargets"); + + std::vector<uint32_t> CompletedSequenceIndexes; + for (const ChunkedContentLookup::ChunkSequenceLocation* Location : ChunkTargetPtrs) + { + const uint32_t RemoteSequenceIndex = Location->SequenceIndex; + if (CompleteSequenceChunk(RemoteSequenceIndex, SequenceIndexChunksLeftToWriteCounters)) + { + CompletedSequenceIndexes.push_back(RemoteSequenceIndex); + } + } + return CompletedSequenceIndexes; +} + +void +BuildsOperationUpdateFolder::FinalizeChunkSequence(const IoHash& SequenceRawHash) +{ + ZEN_TRACE_CPU("FinalizeChunkSequence"); + + ZEN_ASSERT_SLOW(!IsFile(GetFinalChunkedSequenceFileName(m_CacheFolderPath, SequenceRawHash))); + std::error_code Ec; + RenameFile(GetTempChunkedSequenceFileName(m_CacheFolderPath, SequenceRawHash), + GetFinalChunkedSequenceFileName(m_CacheFolderPath, SequenceRawHash), + Ec); + if (Ec) + { + throw std::system_error(Ec); + } +} + +void +BuildsOperationUpdateFolder::FinalizeChunkSequences(std::span<const uint32_t> RemoteSequenceIndexes) +{ + ZEN_TRACE_CPU("FinalizeChunkSequences"); + + for (uint32_t SequenceIndex : RemoteSequenceIndexes) + { + FinalizeChunkSequence(m_RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]); + } +} + +void +BuildsOperationUpdateFolder::VerifySequence(uint32_t RemoteSequenceIndex) +{ + ZEN_TRACE_CPU("VerifySequence"); + + ZEN_ASSERT(m_Options.ValidateCompletedSequences); + + const IoHash& SequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + { + ZEN_TRACE_CPU("HashSequence"); + const std::uint32_t RemotePathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex]; + const uint64_t ExpectedSize = m_RemoteContent.RawSizes[RemotePathIndex]; + IoBuffer VerifyBuffer = IoBufferBuilder::MakeFromFile(GetTempChunkedSequenceFileName(m_CacheFolderPath, SequenceRawHash)); + const uint64_t VerifySize = VerifyBuffer.GetSize(); + if (VerifySize != ExpectedSize) + { + throw std::runtime_error(fmt::format("Written chunk sequence {} size {} does not match expected size {}", + SequenceRawHash, + VerifySize, + ExpectedSize)); + } + + const IoHash VerifyChunkHash = IoHash::HashBuffer(std::move(VerifyBuffer), &m_ValidatedChunkByteCount); + if (VerifyChunkHash != SequenceRawHash) + { + throw std::runtime_error( + fmt::format("Written chunk sequence {} hash does not match expected hash {}", VerifyChunkHash, SequenceRawHash)); + } + } +} + +void +VerifyFolder(ProgressBase& Progress, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + TransferThreadWorkers& Workers, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const std::filesystem::path& Path, + const std::vector<std::string>& ExcludeFolders, + bool VerifyFileHash, + VerifyFolderStatistics& VerifyFolderStats) +{ + ZEN_TRACE_CPU("VerifyFolder"); + + Stopwatch Timer; + + std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = Progress.CreateProgressBar("Verify Files"); + + WorkerThreadPool& VerifyPool = Workers.GetIOWorkerPool(); + + ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + + const uint32_t PathCount = gsl::narrow<uint32_t>(Content.Paths.size()); + + RwLock ErrorLock; + std::vector<std::string> Errors; + + auto IsAcceptedFolder = [ExcludeFolders = ExcludeFolders](const std::string_view& RelativePath) -> bool { + for (const std::string& ExcludeFolder : ExcludeFolders) + { + if (RelativePath.starts_with(ExcludeFolder)) + { + if (RelativePath.length() == ExcludeFolder.length()) + { + return false; + } + else if (RelativePath[ExcludeFolder.length()] == '/') + { + return false; + } + } + } + return true; + }; + + for (uint32_t PathIndex = 0; PathIndex < PathCount; PathIndex++) + { + if (Work.IsAborted()) + { + break; + } + + Work.ScheduleWork( + VerifyPool, + [&Path, &Content, &Lookup, &ErrorLock, &Errors, &VerifyFolderStats, VerifyFileHash, &IsAcceptedFolder, PathIndex, &AbortFlag]( + std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("VerifyFile_work"); + + // TODO: Convert ScheduleWork body to function + + const std::filesystem::path TargetPath = (Path / Content.Paths[PathIndex]).make_preferred(); + if (IsAcceptedFolder(TargetPath.parent_path().generic_string())) + { + const uint64_t ExpectedSize = Content.RawSizes[PathIndex]; + if (!IsFile(TargetPath)) + { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back(fmt::format("File {} with expected size {} does not exist", TargetPath, ExpectedSize)); + }); + VerifyFolderStats.FilesFailed++; + } + else + { + std::error_code Ec; + uint64_t SizeOnDisk = gsl::narrow<uint64_t>(FileSizeFromPath(TargetPath, Ec)); + if (Ec) + { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back( + fmt::format("Failed to get size of file {}: {} ({})", TargetPath, Ec.message(), Ec.value())); + }); + VerifyFolderStats.FilesFailed++; + } + else if (SizeOnDisk < ExpectedSize) + { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back(fmt::format("Size of file {} is smaller than expected. Expected: {}, Found: {}", + TargetPath, + ExpectedSize, + SizeOnDisk)); + }); + VerifyFolderStats.FilesFailed++; + } + else if (SizeOnDisk > ExpectedSize) + { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back(fmt::format("Size of file {} is bigger than expected. Expected: {}, Found: {}", + TargetPath, + ExpectedSize, + SizeOnDisk)); + }); + VerifyFolderStats.FilesFailed++; + } + else if (SizeOnDisk > 0 && VerifyFileHash) + { + const IoHash& ExpectedRawHash = Content.RawHashes[PathIndex]; + IoBuffer Buffer = IoBufferBuilder::MakeFromFile(TargetPath); + IoHash RawHash = IoHash::HashBuffer(Buffer); + if (RawHash != ExpectedRawHash) + { + uint64_t FileOffset = 0; + const uint32_t SequenceIndex = Lookup.RawHashToSequenceIndex.at(ExpectedRawHash); + const uint32_t OrderOffset = Lookup.SequenceIndexChunkOrderOffset[SequenceIndex]; + for (uint32_t OrderIndex = OrderOffset; + OrderIndex < OrderOffset + Content.ChunkedContent.ChunkCounts[SequenceIndex]; + OrderIndex++) + { + uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex]; + uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + IoHash ChunkHash = Content.ChunkedContent.ChunkHashes[ChunkIndex]; + IoBuffer FileChunk = IoBuffer(Buffer, FileOffset, ChunkSize); + if (IoHash::HashBuffer(FileChunk) != ChunkHash) + { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back(fmt::format( + "WARNING: Hash of file {} does not match expected hash. Expected: {}, Found: {}. " + "Mismatch at chunk {}", + TargetPath, + ExpectedRawHash, + RawHash, + OrderIndex - OrderOffset)); + }); + break; + } + FileOffset += ChunkSize; + } + VerifyFolderStats.FilesFailed++; + } + VerifyFolderStats.ReadBytes += SizeOnDisk; + } + } + } + VerifyFolderStats.FilesVerified++; + } + }, + [&, PathIndex](std::exception_ptr Ex, std::atomic<bool>&) { + std::string Description; + try + { + std::rethrow_exception(Ex); + } + catch (const std::exception& Ex) + { + Description = Ex.what(); + } + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back(fmt::format("Failed verifying file '{}'. Reason: {}", + (Path / Content.Paths[PathIndex]).make_preferred(), + Description)); + }); + VerifyFolderStats.FilesFailed++; + }); + } + + Work.Wait(Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(PendingWork); + std::string Details = fmt::format("Verified {}/{} ({}). Failed files: {}", + VerifyFolderStats.FilesVerified.load(), + PathCount, + NiceBytes(VerifyFolderStats.ReadBytes.load()), + VerifyFolderStats.FilesFailed.load()); + ProgressBar->UpdateState({.Task = "Verifying files ", + .Details = Details, + .TotalCount = gsl::narrow<uint64_t>(PathCount), + .RemainingCount = gsl::narrow<uint64_t>(PathCount - VerifyFolderStats.FilesVerified.load()), + .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }); + VerifyFolderStats.VerifyElapsedWallTimeUs = Timer.GetElapsedTimeUs(); + + ProgressBar->Finish(); + if (AbortFlag) + { + return; + } + + for (const std::string& Error : Errors) + { + ZEN_CONSOLE_ERROR("{}", Error); + } + if (!Errors.empty()) + { + throw std::runtime_error(fmt::format("Verify failed with {} errors", Errors.size())); + } +} + +std::vector<std::filesystem::path> +GetNewPaths(const std::span<const std::filesystem::path> KnownPaths, const std::span<const std::filesystem::path> Paths) +{ + tsl::robin_set<std::string> KnownPathsSet; + KnownPathsSet.reserve(KnownPaths.size()); + for (const std::filesystem::path& LocalPath : KnownPaths) + { + KnownPathsSet.insert(LocalPath.generic_string()); + } + + std::vector<std::filesystem::path> NewPaths; + for (const std::filesystem::path& UntrackedPath : Paths) + { + if (!KnownPathsSet.contains(UntrackedPath.generic_string())) + { + NewPaths.push_back(UntrackedPath); + } + } + return NewPaths; +} + +BuildSaveState +GetLocalStateFromPaths(ProgressBase& Progress, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + TransferThreadWorkers& Workers, + GetFolderContentStatistics& LocalFolderScanStats, + ChunkingStatistics& ChunkingStats, + const std::filesystem::path& Path, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache, + std::span<const std::filesystem::path> PathsToCheck) +{ + FolderContent FolderState = + CheckFolderFiles(Progress, AbortFlag, PauseFlag, "Check Files", Workers, LocalFolderScanStats, Path, PathsToCheck); + + ChunkedFolderContent ChunkedContent; + if (FolderState.Paths.size() > 0) + { + ChunkedContent = ScanFolderFiles(Progress, + AbortFlag, + PauseFlag, + "Scan Files", + Workers, + Path, + FolderState, + ChunkController, + ChunkCache, + ChunkingStats); + } + + return BuildSaveState{.State = BuildState{.ChunkedContent = std::move(ChunkedContent)}, .FolderState = FolderState, .LocalPath = Path}; +} + +BuildSaveState +GetLocalContent(ProgressBase& Progress, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + bool IsQuiet, + TransferThreadWorkers& Workers, + GetFolderContentStatistics& LocalFolderScanStats, + ChunkingStatistics& ChunkingStats, + const std::filesystem::path& Path, + const std::filesystem::path& StateFilePath, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache) +{ + Stopwatch ReadStateTimer; + bool FileExists = IsFile(StateFilePath); + if (!FileExists) + { + ZEN_CONSOLE("No known local state file in {}, falling back to scanning", Path); + return {}; + } + + BuildSaveState SavedLocalState; + try + { + SavedLocalState = ReadBuildSaveStateFile(StateFilePath); + if (!IsQuiet) + { + ZEN_CONSOLE("Read local state file {} in {}", StateFilePath, NiceTimeSpanMs(ReadStateTimer.GetElapsedTimeMs())); + } + } + catch (const std::exception& Ex) + { + ZEN_CONSOLE_WARN("Failed reading state file {}, falling back to scannning. Reason: {}", StateFilePath, Ex.what()); + return {}; + } + + FolderContent CurrentLocalFolderState = CheckFolderFiles(Progress, + AbortFlag, + PauseFlag, + "Check Known Files", + Workers, + LocalFolderScanStats, + Path, + SavedLocalState.FolderState.Paths); + if (AbortFlag) + { + return {}; + } + + if (!SavedLocalState.FolderState.AreKnownFilesEqual(CurrentLocalFolderState)) + { + const size_t LocalStatePathCount = SavedLocalState.FolderState.Paths.size(); + std::vector<std::filesystem::path> DeletedPaths; + FolderContent UpdatedContent = GetUpdatedContent(SavedLocalState.FolderState, CurrentLocalFolderState, DeletedPaths); + if (!DeletedPaths.empty()) + { + SavedLocalState.State.ChunkedContent = DeletePathsFromChunkedContent(SavedLocalState.State.ChunkedContent, DeletedPaths); + } + + if (!IsQuiet) + { + ZEN_CONSOLE("Updating state, {} local files deleted and {} local files updated out of {}", + DeletedPaths.size(), + UpdatedContent.Paths.size(), + LocalStatePathCount); + } + if (UpdatedContent.Paths.size() > 0) + { + ChunkedFolderContent UpdatedLocalContent = ScanFolderFiles(Progress, + AbortFlag, + PauseFlag, + "Scan Known Files", + Workers, + Path, + UpdatedContent, + ChunkController, + ChunkCache, + ChunkingStats); + if (AbortFlag) + { + return {}; + } + SavedLocalState.State.ChunkedContent = + MergeChunkedFolderContents(SavedLocalState.State.ChunkedContent, {{UpdatedLocalContent}}); + } + } + else + { + // Remove files from LocalContent no longer in LocalFolderState + tsl::robin_set<std::string> LocalFolderPaths; + LocalFolderPaths.reserve(SavedLocalState.FolderState.Paths.size()); + for (const std::filesystem::path& LocalFolderPath : SavedLocalState.FolderState.Paths) + { + LocalFolderPaths.insert(LocalFolderPath.generic_string()); + } + std::vector<std::filesystem::path> DeletedPaths; + for (const std::filesystem::path& LocalContentPath : SavedLocalState.State.ChunkedContent.Paths) + { + if (!LocalFolderPaths.contains(LocalContentPath.generic_string())) + { + DeletedPaths.push_back(LocalContentPath); + } + } + if (!DeletedPaths.empty()) + { + SavedLocalState.State.ChunkedContent = DeletePathsFromChunkedContent(SavedLocalState.State.ChunkedContent, DeletedPaths); + } + } + + SavedLocalState.FolderState = CurrentLocalFolderState; + + return SavedLocalState; +} + +void +DownloadFolder(LoggerRef InLog, + ProgressBase& Progress, + TransferThreadWorkers& Workers, + StorageInstance& Storage, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + const BuildStorageCache::Statistics& StorageCacheStats, + const Oid& BuildId, + const std::vector<Oid>& BuildPartIds, + std::span<const std::string> BuildPartNames, + const std::filesystem::path& DownloadSpecPath, + const std::filesystem::path& Path, + const DownloadOptions& Options) +{ + ZEN_TRACE_CPU("DownloadFolder"); + ZEN_SCOPED_LOG(InLog); + + Progress.SetLogOperationName("Download Folder"); + + enum TaskSteps : uint32_t + { + CheckState, + CompareState, + Download, + Verify, + Cleanup, + StepCount + }; + + auto EndProgress = MakeGuard([&]() { Progress.SetLogOperationProgress(TaskSteps::StepCount, TaskSteps::StepCount); }); + + Stopwatch DownloadTimer; + + Progress.SetLogOperationProgress(TaskSteps::CheckState, TaskSteps::StepCount); + + const std::filesystem::path ZenTempFolder = ZenTempFolderPath(Options.ZenFolderPath); + CreateDirectories(ZenTempFolder); + + std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; + + CbObject BuildObject = GetBuild(*Storage.BuildStorage, BuildId, Options.IsQuiet); + + std::vector<std::pair<Oid, std::string>> AllBuildParts = + ResolveBuildPartNames(BuildObject, BuildId, BuildPartIds, BuildPartNames, PreferredMultipartChunkSize); + + BuildManifest Manifest; + if (!DownloadSpecPath.empty()) + { + const std::filesystem::path AbsoluteDownloadSpecPath = + DownloadSpecPath.is_relative() ? MakeSafeAbsolutePath(Path / DownloadSpecPath) : MakeSafeAbsolutePath(DownloadSpecPath); + Manifest = ParseBuildManifest(AbsoluteDownloadSpecPath); + } + + std::vector<ChunkedFolderContent> PartContents; + + std::unique_ptr<ChunkingController> ChunkController; + + std::vector<ChunkBlockDescription> BlockDescriptions; + std::vector<IoHash> LooseChunkHashes; + + Progress.SetLogOperationProgress(TaskSteps::CompareState, TaskSteps::StepCount); + + ChunkedFolderContent RemoteContent = GetRemoteContent(InLog, + Storage, + BuildId, + AllBuildParts, + Manifest, + Options.IncludeWildcards, + Options.ExcludeWildcards, + ChunkController, + PartContents, + BlockDescriptions, + LooseChunkHashes, + Options.IsQuiet, + Options.IsVerbose, + Options.DoExtraContentVerify); + + const std::uint64_t LargeAttachmentSize = Options.AllowMultiparts ? PreferredMultipartChunkSize * 4u : (std::uint64_t)-1; + GetFolderContentStatistics LocalFolderScanStats; + ChunkingStatistics ChunkingStats; + + BuildSaveState LocalState; + + if (IsDir(Path)) + { + if (!ChunkController && !Options.IsQuiet) + { + ZEN_CONSOLE_INFO("Unspecified chunking algorithm, using default"); + ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{}); + } + std::unique_ptr<ChunkingCache> ChunkCache(CreateNullChunkingCache()); + + LocalState = GetLocalContent(Progress, + AbortFlag, + PauseFlag, + Options.IsQuiet, + Workers, + LocalFolderScanStats, + ChunkingStats, + Path, + ZenStateFilePath(Path / ZenFolderName), + *ChunkController, + *ChunkCache); + + std::vector<std::filesystem::path> UntrackedPaths = GetNewPaths(LocalState.State.ChunkedContent.Paths, RemoteContent.Paths); + + BuildSaveState UntrackedLocalContent = GetLocalStateFromPaths(Progress, + AbortFlag, + PauseFlag, + Workers, + LocalFolderScanStats, + ChunkingStats, + Path, + *ChunkController, + *ChunkCache, + UntrackedPaths); + + if (!UntrackedLocalContent.State.ChunkedContent.Paths.empty()) + { + LocalState.State.ChunkedContent = + MergeChunkedFolderContents(LocalState.State.ChunkedContent, + std::vector<ChunkedFolderContent>{UntrackedLocalContent.State.ChunkedContent}); + + // TODO: Helper + LocalState.FolderState.Paths.insert(LocalState.FolderState.Paths.begin(), + UntrackedLocalContent.FolderState.Paths.begin(), + UntrackedLocalContent.FolderState.Paths.end()); + LocalState.FolderState.RawSizes.insert(LocalState.FolderState.RawSizes.begin(), + UntrackedLocalContent.FolderState.RawSizes.begin(), + UntrackedLocalContent.FolderState.RawSizes.end()); + LocalState.FolderState.Attributes.insert(LocalState.FolderState.Attributes.begin(), + UntrackedLocalContent.FolderState.Attributes.begin(), + UntrackedLocalContent.FolderState.Attributes.end()); + LocalState.FolderState.ModificationTicks.insert(LocalState.FolderState.ModificationTicks.begin(), + UntrackedLocalContent.FolderState.ModificationTicks.begin(), + UntrackedLocalContent.FolderState.ModificationTicks.end()); + } + + if (Options.AppendNewContent) + { + RemoteContent = ApplyChunkedContentOverlay(LocalState.State.ChunkedContent, + RemoteContent, + Options.IncludeWildcards, + Options.ExcludeWildcards); + } +#if ZEN_BUILD_DEBUG + ValidateChunkedFolderContent(RemoteContent, + BlockDescriptions, + LooseChunkHashes, + Options.IncludeWildcards, + Options.ExcludeWildcards); +#endif // ZEN_BUILD_DEBUG + } + else + { + CreateDirectories(Path); + } + if (AbortFlag) + { + return; + } + + LocalState.LocalPath = Path; + + { + BuildsSelection::Build RemoteBuildState = {.Id = BuildId, + .IncludeWildcards = Options.IncludeWildcards, + .ExcludeWildcards = Options.ExcludeWildcards}; + RemoteBuildState.Parts.reserve(BuildPartIds.size()); + for (size_t PartIndex = 0; PartIndex < BuildPartIds.size(); PartIndex++) + { + RemoteBuildState.Parts.push_back( + {BuildsSelection::BuildPart{.Id = BuildPartIds[PartIndex], + .Name = PartIndex < BuildPartNames.size() ? BuildPartNames[PartIndex] : ""}}); + } + + if (Options.AppendNewContent) + { + LocalState.State.Selection.Builds.emplace_back(std::move(RemoteBuildState)); + } + else + { + LocalState.State.Selection.Builds = std::vector<BuildsSelection::Build>{std::move(RemoteBuildState)}; + } + } + + if ((Options.EnableTargetFolderScavenging || Options.AppendNewContent) && !Options.CleanTargetFolder && + CompareChunkedContent(RemoteContent, LocalState.State.ChunkedContent)) + { + if (!Options.IsQuiet) + { + ZEN_CONSOLE("Local state is identical to build to download. All done. Completed in {}.", + NiceTimeSpanMs(DownloadTimer.GetElapsedTimeMs())); + } + + Stopwatch WriteStateTimer; + + CbObject StateObject = CreateBuildSaveStateObject(LocalState); + CreateDirectories(ZenStateFilePath(Options.ZenFolderPath).parent_path()); + TemporaryFile::SafeWriteFile(ZenStateFilePath(Options.ZenFolderPath), StateObject.GetView()); + if (!Options.IsQuiet) + { + ZEN_CONSOLE("Wrote local state in {}", NiceTimeSpanMs(WriteStateTimer.GetElapsedTimeMs())); + } + + AddDownloadedPath(Options.SystemRootDir, + BuildsDownloadInfo{.Selection = LocalState.State.Selection, + .LocalPath = Path, + .StateFilePath = ZenStateFilePath(Options.ZenFolderPath), + .Iso8601Date = DateTime::Now().ToIso8601()}); + } + else + { + ExtendableStringBuilder<128> BuildPartString; + for (const std::pair<Oid, std::string>& BuildPart : AllBuildParts) + { + BuildPartString.Append(fmt::format(" {} ({})", BuildPart.second, BuildPart.first)); + } + + uint64_t RawSize = std::accumulate(RemoteContent.RawSizes.begin(), RemoteContent.RawSizes.end(), std::uint64_t(0)); + + if (!Options.IsQuiet) + { + ZEN_CONSOLE("Downloading build {}, parts:{} to '{}' ({})", BuildId, BuildPartString.ToView(), Path, NiceBytes(RawSize)); + } + + Stopwatch IndexTimer; + + const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalState.State.ChunkedContent); + const ChunkedContentLookup RemoteLookup = BuildChunkedContentLookup(RemoteContent); + + if (!Options.IsQuiet) + { + ZEN_INFO("Indexed local and remote content in {}", NiceTimeSpanMs(IndexTimer.GetElapsedTimeMs())); + } + + Progress.SetLogOperationProgress(TaskSteps::Download, TaskSteps::StepCount); + + BuildsOperationUpdateFolder Updater( + InLog, + Progress, + Storage, + AbortFlag, + PauseFlag, + Workers.GetIOWorkerPool(), + Workers.GetNetworkPool(), + BuildId, + Path, + LocalState.State.ChunkedContent, + LocalLookup, + RemoteContent, + RemoteLookup, + BlockDescriptions, + LooseChunkHashes, + BuildsOperationUpdateFolder::Options{ + .IsQuiet = Options.IsQuiet, + .IsVerbose = Options.IsVerbose, + .AllowFileClone = Options.AllowFileClone, + .UseSparseFiles = Options.UseSparseFiles, + .SystemRootDir = Options.SystemRootDir, + .ZenFolderPath = Options.ZenFolderPath, + .LargeAttachmentSize = LargeAttachmentSize, + .PreferredMultipartChunkSize = PreferredMultipartChunkSize, + .PartialBlockRequestMode = Options.PartialBlockRequestMode, + .WipeTargetFolder = Options.CleanTargetFolder, + .EnableOtherDownloadsScavenging = Options.EnableOtherDownloadsScavenging, + .EnableTargetFolderScavenging = Options.EnableTargetFolderScavenging || Options.AppendNewContent, + .ValidateCompletedSequences = Options.PostDownloadVerify, + .ExcludeFolders = Options.ExcludeFolders, + .MaximumInMemoryPayloadSize = Options.MaximumInMemoryPayloadSize, + .PopulateCache = Options.PopulateCache}); + { + Progress.PushLogOperation("Download"); + auto _ = MakeGuard([&Progress]() { Progress.PopLogOperation(); }); + FolderContent UpdatedLocalFolderState; + Updater.Execute(UpdatedLocalFolderState); + + LocalState.State.ChunkedContent = RemoteContent; + LocalState.FolderState = std::move(UpdatedLocalFolderState); + } + + VerifyFolderStatistics VerifyFolderStats; + if (!AbortFlag) + { + AddDownloadedPath(Options.SystemRootDir, + BuildsDownloadInfo{.Selection = LocalState.State.Selection, + .LocalPath = Path, + .StateFilePath = ZenStateFilePath(Options.ZenFolderPath), + .Iso8601Date = DateTime::Now().ToIso8601()}); + + Progress.SetLogOperationProgress(TaskSteps::Verify, TaskSteps::StepCount); + + VerifyFolder(Progress, + AbortFlag, + PauseFlag, + Workers, + RemoteContent, + RemoteLookup, + Path, + Options.ExcludeFolders, + Options.PostDownloadVerify, + VerifyFolderStats); + + Stopwatch WriteStateTimer; + CbObject StateObject = CreateBuildSaveStateObject(LocalState); + + CreateDirectories(ZenStateFilePath(Options.ZenFolderPath).parent_path()); + TemporaryFile::SafeWriteFile(ZenStateFilePath(Options.ZenFolderPath), StateObject.GetView()); + if (!Options.IsQuiet) + { + ZEN_CONSOLE("Wrote local state in {}", NiceTimeSpanMs(WriteStateTimer.GetElapsedTimeMs())); + } + +#if 0 + ExtendableStringBuilder<1024> SB; + CompactBinaryToJson(StateObject, SB); + WriteFile(ZenStateFileJsonPath(Options.ZenFolderPath), IoBuffer(IoBuffer::Wrap, SB.Data(), SB.Size())); +#endif // 0 + const uint64_t DownloadCount = Updater.m_DownloadStats.DownloadedChunkCount.load() + + Updater.m_DownloadStats.DownloadedBlockCount.load() + + Updater.m_DownloadStats.DownloadedPartialBlockCount.load(); + const uint64_t DownloadByteCount = Updater.m_DownloadStats.DownloadedChunkByteCount.load() + + Updater.m_DownloadStats.DownloadedBlockByteCount.load() + + Updater.m_DownloadStats.DownloadedPartialBlockByteCount.load(); + const uint64_t DownloadTimeMs = DownloadTimer.GetElapsedTimeMs(); + + if (!Options.IsQuiet) + { + std::string CloneInfo; + if (Updater.m_DiskStats.CloneByteCount > 0) + { + CloneInfo = fmt::format(" ({} cloned)", NiceBytes(Updater.m_DiskStats.CloneByteCount.load())); + } + + std::string DownloadDetails; + { + ExtendableStringBuilder<128> SB; + BuildStorageBase::ExtendedStatistics ExtendedDownloadStats; + if (Storage.BuildStorage->GetExtendedStatistics(ExtendedDownloadStats)) + { + if (!ExtendedDownloadStats.ReceivedBytesPerSource.empty()) + { + for (auto& It : ExtendedDownloadStats.ReceivedBytesPerSource) + { + if (SB.Size() > 0) + { + SB.Append(", "sv); + } + SB.Append(It.first); + SB.Append(": "sv); + SB.Append(NiceBytes(It.second)); + } + } + } + if (Storage.CacheStorage) + { + if (SB.Size() > 0) + { + SB.Append(", "sv); + } + SB.Append("Cache: "); + SB.Append(NiceBytes(StorageCacheStats.TotalBytesRead.load())); + } + if (SB.Size() > 0) + { + DownloadDetails = fmt::format(" ({})", SB.ToView()); + } + } + + ZEN_CONSOLE( + "Downloaded build {}, parts:{} in {}\n" + " Scavenge: {} (Target: {}, Cache: {}, Others: {})\n" + " Download: {} ({}) {}bits/s{}\n" + " Write: {} ({}) {}B/s{}\n" + " Clean: {}\n" + " Finalize: {}\n" + " Verify: {}", + BuildId, + BuildPartString.ToView(), + NiceTimeSpanMs(DownloadTimeMs), + + NiceTimeSpanMs((Updater.m_CacheMappingStats.CacheScanElapsedWallTimeUs + + Updater.m_CacheMappingStats.LocalScanElapsedWallTimeUs + + Updater.m_CacheMappingStats.ScavengeElapsedWallTimeUs) / + 1000), + NiceTimeSpanMs(Updater.m_CacheMappingStats.LocalScanElapsedWallTimeUs / 1000), + NiceTimeSpanMs(Updater.m_CacheMappingStats.CacheScanElapsedWallTimeUs / 1000), + NiceTimeSpanMs(Updater.m_CacheMappingStats.ScavengeElapsedWallTimeUs / 1000), + + DownloadCount, + NiceBytes(DownloadByteCount), + NiceNum(GetBytesPerSecond(Updater.m_WriteChunkStats.DownloadTimeUs, DownloadByteCount * 8)), + DownloadDetails, + + Updater.m_DiskStats.WriteCount.load(), + NiceBytes(Updater.m_WrittenChunkByteCount.load()), + NiceNum(GetBytesPerSecond(Updater.m_WriteChunkStats.WriteTimeUs, Updater.m_DiskStats.WriteByteCount.load())), + CloneInfo, + + NiceTimeSpanMs(Updater.m_RebuildFolderStateStats.CleanFolderElapsedWallTimeUs / 1000), + + NiceTimeSpanMs(Updater.m_RebuildFolderStateStats.FinalizeTreeElapsedWallTimeUs / 1000), + + NiceTimeSpanMs(VerifyFolderStats.VerifyElapsedWallTimeUs / 1000)); + } + } + } + + Progress.SetLogOperationProgress(TaskSteps::Cleanup, TaskSteps::StepCount); + + CleanAndRemoveDirectory(Workers.GetIOWorkerPool(), AbortFlag, PauseFlag, ZenTempFolder); +} +} // namespace zen diff --git a/src/zenremotestore/builds/builduploadfolder.cpp b/src/zenremotestore/builds/builduploadfolder.cpp new file mode 100644 index 000000000..b536ae464 --- /dev/null +++ b/src/zenremotestore/builds/builduploadfolder.cpp @@ -0,0 +1,2634 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenremotestore/builds/builduploadfolder.h> + +#include <zencore/basicfile.h> +#include <zencore/compactbinarybuilder.h> +#include <zencore/fmtutils.h> +#include <zencore/parallelwork.h> +#include <zencore/scopeguard.h> +#include <zencore/trace.h> +#include <zenremotestore/builds/buildcontent.h> +#include <zenremotestore/builds/buildmanifest.h> +#include <zenremotestore/builds/buildstoragecache.h> +#include <zenremotestore/chunking/chunkingcache.h> +#include <zenremotestore/chunking/chunkingcontroller.h> +#include <zenremotestore/transferthreadworkers.h> +#include <zenutil/filesystemutils.h> +#include <zenutil/filteredrate.h> +#include <zenutil/progress.h> + +#include <numeric> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_set.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +using namespace std::literals; + +namespace { + bool IsExtensionHashCompressable(const tsl::robin_set<uint32_t>& NonCompressableExtensionHashes, const uint32_t PathHash) + { + return !NonCompressableExtensionHashes.contains(PathHash); + } + + bool IsChunkCompressable(const tsl::robin_set<uint32_t>& NonCompressableExtensionHashes, + const ChunkedContentLookup& Lookup, + uint32_t ChunkIndex) + { + const uint32_t ChunkLocationCount = Lookup.ChunkSequenceLocationCounts[ChunkIndex]; + if (ChunkLocationCount == 0) + { + return false; + } + const size_t ChunkLocationOffset = Lookup.ChunkSequenceLocationOffset[ChunkIndex]; + const uint32_t SequenceIndex = Lookup.ChunkSequenceLocations[ChunkLocationOffset].SequenceIndex; + const uint32_t PathIndex = Lookup.SequenceIndexFirstPathIndex[SequenceIndex]; + const uint32_t ExtensionHash = Lookup.PathExtensionHash[PathIndex]; + + const bool IsCompressable = IsExtensionHashCompressable(NonCompressableExtensionHashes, ExtensionHash); + return IsCompressable; + } + template<typename T> + std::string FormatArray(std::span<const T> Items, std::string_view Prefix) + { + ExtendableStringBuilder<512> SB; + for (const T& Item : Items) + { + SB.Append(fmt::format("{}{}", Prefix, Item)); + } + return SB.ToString(); + } +} // namespace + +class ReadFileCache +{ +public: + // A buffered file reader that provides CompositeBuffer where the buffers are owned and the memory never overwritten + ReadFileCache(std::atomic<uint64_t>& OpenReadCount, + std::atomic<uint64_t>& CurrentOpenFileCount, + std::atomic<uint64_t>& ReadCount, + std::atomic<uint64_t>& ReadByteCount, + const std::filesystem::path& Path, + const ChunkedFolderContent& LocalContent, + const ChunkedContentLookup& LocalLookup, + size_t MaxOpenFileCount) + : m_Path(Path) + , m_LocalContent(LocalContent) + , m_LocalLookup(LocalLookup) + , m_OpenReadCount(OpenReadCount) + , m_CurrentOpenFileCount(CurrentOpenFileCount) + , m_ReadCount(ReadCount) + , m_ReadByteCount(ReadByteCount) + { + m_OpenFiles.reserve(MaxOpenFileCount); + } + ~ReadFileCache() { m_OpenFiles.clear(); } + + CompositeBuffer GetRange(uint32_t SequenceIndex, uint64_t Offset, uint64_t Size) + { + ZEN_TRACE_CPU("ReadFileCache::GetRange"); + + auto CacheIt = + std::find_if(m_OpenFiles.begin(), m_OpenFiles.end(), [SequenceIndex](const auto& Lhs) { return Lhs.first == SequenceIndex; }); + if (CacheIt != m_OpenFiles.end()) + { + if (CacheIt != m_OpenFiles.begin()) + { + auto CachedFile(std::move(CacheIt->second)); + m_OpenFiles.erase(CacheIt); + m_OpenFiles.insert(m_OpenFiles.begin(), std::make_pair(SequenceIndex, std::move(CachedFile))); + } + CompositeBuffer Result = m_OpenFiles.front().second->GetRange(Offset, Size); + return Result; + } + const uint32_t LocalPathIndex = m_LocalLookup.SequenceIndexFirstPathIndex[SequenceIndex]; + const std::filesystem::path LocalFilePath = (m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred(); + if (Size == m_LocalContent.RawSizes[LocalPathIndex]) + { + IoBuffer Result = IoBufferBuilder::MakeFromFile(LocalFilePath); + return CompositeBuffer(SharedBuffer(Result)); + } + if (m_OpenFiles.size() == m_OpenFiles.capacity()) + { + m_OpenFiles.pop_back(); + } + m_OpenFiles.insert( + m_OpenFiles.begin(), + std::make_pair( + SequenceIndex, + std::make_unique<BufferedOpenFile>(LocalFilePath, m_OpenReadCount, m_CurrentOpenFileCount, m_ReadCount, m_ReadByteCount))); + CompositeBuffer Result = m_OpenFiles.front().second->GetRange(Offset, Size); + return Result; + } + +private: + const std::filesystem::path m_Path; + const ChunkedFolderContent& m_LocalContent; + const ChunkedContentLookup& m_LocalLookup; + std::vector<std::pair<uint32_t, std::unique_ptr<BufferedOpenFile>>> m_OpenFiles; + std::atomic<uint64_t>& m_OpenReadCount; + std::atomic<uint64_t>& m_CurrentOpenFileCount; + std::atomic<uint64_t>& m_ReadCount; + std::atomic<uint64_t>& m_ReadByteCount; +}; + +BuildsOperationUploadFolder::BuildsOperationUploadFolder(LoggerRef Log, + ProgressBase& Progress, + StorageInstance& Storage, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + WorkerThreadPool& IOWorkerPool, + WorkerThreadPool& NetworkPool, + const Oid& BuildId, + const std::filesystem::path& Path, + bool CreateBuild, + const CbObject& MetaData, + const Options& Options) +: m_Log(Log) +, m_Progress(Progress) +, m_Storage(Storage) +, m_AbortFlag(AbortFlag) +, m_PauseFlag(PauseFlag) +, m_IOWorkerPool(IOWorkerPool) +, m_NetworkPool(NetworkPool) +, m_BuildId(BuildId) +, m_Path(Path) +, m_CreateBuild(CreateBuild) +, m_MetaData(MetaData) +, m_Options(Options) +{ + m_NonCompressableExtensionHashes.reserve(Options.NonCompressableExtensions.size()); + for (const std::string& Extension : Options.NonCompressableExtensions) + { + m_NonCompressableExtensionHashes.insert(HashStringAsLowerDjb2(Extension)); + } +} + +BuildsOperationUploadFolder::PrepareBuildResult +BuildsOperationUploadFolder::PrepareBuild() +{ + ZEN_TRACE_CPU("PrepareBuild"); + + PrepareBuildResult Result; + Result.PreferredMultipartChunkSize = m_Options.PreferredMultipartChunkSize; + Stopwatch Timer; + if (m_CreateBuild) + { + ZEN_TRACE_CPU("CreateBuild"); + + Stopwatch PutBuildTimer; + CbObject PutBuildResult = m_Storage.BuildStorage->PutBuild(m_BuildId, m_MetaData); + Result.PrepareBuildTimeMs = PutBuildTimer.GetElapsedTimeMs(); + if (auto ChunkSize = PutBuildResult["chunkSize"sv].AsUInt64(); ChunkSize != 0) + { + Result.PreferredMultipartChunkSize = ChunkSize; + } + Result.PayloadSize = m_MetaData.GetSize(); + } + else + { + ZEN_TRACE_CPU("PutBuild"); + Stopwatch GetBuildTimer; + CbObject Build = m_Storage.BuildStorage->GetBuild(m_BuildId); + Result.PrepareBuildTimeMs = GetBuildTimer.GetElapsedTimeMs(); + Result.PayloadSize = Build.GetSize(); + if (auto ChunkSize = Build["chunkSize"sv].AsUInt64(); ChunkSize != 0) + { + Result.PreferredMultipartChunkSize = ChunkSize; + } + else if (m_Options.AllowMultiparts) + { + ZEN_WARN("PreferredMultipartChunkSize is unknown. Defaulting to '{}'", NiceBytes(Result.PreferredMultipartChunkSize)); + } + } + + if (!m_Options.IgnoreExistingBlocks) + { + ZEN_TRACE_CPU("FindBlocks"); + Stopwatch KnownBlocksTimer; + CbObject BlockDescriptionList = m_Storage.BuildStorage->FindBlocks(m_BuildId, m_Options.FindBlockMaxCount); + if (BlockDescriptionList) + { + Result.KnownBlocks = ParseChunkBlockDescriptionList(BlockDescriptionList); + } + Result.FindBlocksTimeMs = KnownBlocksTimer.GetElapsedTimeMs(); + } + Result.ElapsedTimeMs = Timer.GetElapsedTimeMs(); + return Result; +} + +std::vector<BuildsOperationUploadFolder::UploadPart> +BuildsOperationUploadFolder::ReadFolder() +{ + std::vector<UploadPart> UploadParts; + std::filesystem::path ExcludeManifestPath = m_Path / m_Options.ZenExcludeManifestName; + tsl::robin_set<std::string> ExcludeAssetPaths; + if (IsFile(ExcludeManifestPath)) + { + std::filesystem::path AbsoluteExcludeManifestPath = + MakeSafeAbsolutePath(ExcludeManifestPath.is_absolute() ? ExcludeManifestPath : m_Path / ExcludeManifestPath); + BuildManifest Manifest = ParseBuildManifest(AbsoluteExcludeManifestPath); + const std::vector<std::filesystem::path>& AssetPaths = Manifest.Parts.front().Files; + ExcludeAssetPaths.reserve(AssetPaths.size()); + for (const std::filesystem::path& AssetPath : AssetPaths) + { + ExcludeAssetPaths.insert(AssetPath.generic_string()); + } + } + + UploadParts.resize(1); + + UploadPart& Part = UploadParts.front(); + GetFolderContentStatistics& LocalFolderScanStats = Part.LocalFolderScanStats; + + Part.Content = GetFolderContent( + Part.LocalFolderScanStats, + m_Path, + [this](const std::string_view& RelativePath) { return IsAcceptedFolder(RelativePath); }, + [this, &ExcludeAssetPaths](const std::string_view& RelativePath, uint64_t Size, uint32_t Attributes) -> bool { + ZEN_UNUSED(Size, Attributes); + if (!IsAcceptedFile(RelativePath)) + { + return false; + } + if (ExcludeAssetPaths.contains(std::filesystem::path(RelativePath).generic_string())) + { + return false; + } + return true; + }, + m_IOWorkerPool, + m_Progress.GetProgressUpdateDelayMS(), + [&](bool, std::ptrdiff_t) { ZEN_INFO("Found {} files in '{}'...", LocalFolderScanStats.AcceptedFileCount.load(), m_Path); }, + m_AbortFlag); + Part.TotalRawSize = std::accumulate(Part.Content.RawSizes.begin(), Part.Content.RawSizes.end(), std::uint64_t(0)); + + return UploadParts; +} + +std::vector<BuildsOperationUploadFolder::UploadPart> +BuildsOperationUploadFolder::ReadManifestParts(const std::filesystem::path& ManifestPath) +{ + std::vector<UploadPart> UploadParts; + Stopwatch ManifestParseTimer; + std::filesystem::path AbsoluteManifestPath = MakeSafeAbsolutePath(ManifestPath.is_absolute() ? ManifestPath : m_Path / ManifestPath); + BuildManifest Manifest = ParseBuildManifest(AbsoluteManifestPath); + if (Manifest.Parts.empty()) + { + throw std::runtime_error(fmt::format("Manifest file at '{}' is invalid", ManifestPath)); + } + + UploadParts.resize(Manifest.Parts.size()); + for (size_t PartIndex = 0; PartIndex < Manifest.Parts.size(); PartIndex++) + { + BuildManifest::Part& PartManifest = Manifest.Parts[PartIndex]; + if (ManifestPath.is_relative()) + { + PartManifest.Files.push_back(ManifestPath); + } + + UploadPart& Part = UploadParts[PartIndex]; + FolderContent& Content = Part.Content; + + GetFolderContentStatistics& LocalFolderScanStats = Part.LocalFolderScanStats; + + const std::vector<std::filesystem::path>& AssetPaths = PartManifest.Files; + Content = GetValidFolderContent( + m_IOWorkerPool, + LocalFolderScanStats, + m_Path, + AssetPaths, + [](uint64_t PathCount, uint64_t CompletedPathCount) { ZEN_UNUSED(PathCount, CompletedPathCount); }, + 1000, + m_AbortFlag, + m_PauseFlag); + + if (Content.Paths.size() != AssetPaths.size()) + { + const tsl::robin_set<std::filesystem::path> FoundPaths(Content.Paths.begin(), Content.Paths.end()); + ExtendableStringBuilder<1024> SB; + for (const std::filesystem::path& AssetPath : AssetPaths) + { + if (!FoundPaths.contains(AssetPath)) + { + SB << "\n " << AssetPath.generic_string(); + } + } + throw std::runtime_error( + fmt::format("Manifest file at '{}' references files that does not exist{}", ManifestPath, SB.ToView())); + } + + Part.PartId = PartManifest.PartId; + Part.PartName = PartManifest.PartName; + Part.TotalRawSize = std::accumulate(Part.Content.RawSizes.begin(), Part.Content.RawSizes.end(), std::uint64_t(0)); + } + + return UploadParts; +} + +std::vector<std::pair<Oid, std::string>> +BuildsOperationUploadFolder::Execute(const Oid& BuildPartId, + const std::string_view BuildPartName, + const std::filesystem::path& ManifestPath, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache) +{ + ZEN_TRACE_CPU("BuildsOperationUploadFolder::Execute"); + try + { + Stopwatch ReadPartsTimer; + std::vector<UploadPart> UploadParts = ManifestPath.empty() ? ReadFolder() : ReadManifestParts(ManifestPath); + + for (UploadPart& Part : UploadParts) + { + if (Part.PartId == Oid::Zero) + { + if (UploadParts.size() != 1) + { + throw std::runtime_error(fmt::format("Multi part upload manifest '{}' must contains build part id", ManifestPath)); + } + + if (BuildPartId == Oid::Zero) + { + Part.PartId = Oid::NewOid(); + } + else + { + Part.PartId = BuildPartId; + } + } + if (Part.PartName.empty()) + { + if (UploadParts.size() != 1) + { + throw std::runtime_error(fmt::format("Multi part upload manifest '{}' must contains build part name", ManifestPath)); + } + if (BuildPartName.empty()) + { + throw std::runtime_error("Build part name must be set"); + } + Part.PartName = std::string(BuildPartName); + } + } + + if (!m_Options.IsQuiet) + { + ZEN_INFO("Reading {} parts took {}", UploadParts.size(), NiceTimeSpanMs(ReadPartsTimer.GetElapsedTimeMs())); + } + + const uint32_t PartsUploadStepCount = gsl::narrow<uint32_t>(uint32_t(PartTaskSteps::StepCount) * UploadParts.size()); + + const uint32_t PrepareBuildStep = 0; + const uint32_t UploadPartsStep = 1; + const uint32_t FinalizeBuildStep = UploadPartsStep + PartsUploadStepCount; + const uint32_t CleanupStep = FinalizeBuildStep + 1; + const uint32_t StepCount = CleanupStep + 1; + + auto EndProgress = MakeGuard([&]() { m_Progress.SetLogOperationProgress(StepCount, StepCount); }); + + Stopwatch ProcessTimer; + + CleanAndRemoveDirectory(m_IOWorkerPool, m_AbortFlag, m_PauseFlag, m_Options.TempDir); + CreateDirectories(m_Options.TempDir); + auto _ = MakeGuard([&]() { CleanAndRemoveDirectory(m_IOWorkerPool, m_AbortFlag, m_PauseFlag, m_Options.TempDir); }); + + m_Progress.SetLogOperationProgress(PrepareBuildStep, StepCount); + + m_PrepBuildResultFuture = m_NetworkPool.EnqueueTask(std::packaged_task<PrepareBuildResult()>{[this] { return PrepareBuild(); }}, + WorkerThreadPool::EMode::EnableBacklog); + + for (uint32_t PartIndex = 0; PartIndex < UploadParts.size(); PartIndex++) + { + const uint32_t PartStepOffset = UploadPartsStep + (PartIndex * uint32_t(PartTaskSteps::StepCount)); + + const UploadPart& Part = UploadParts[PartIndex]; + UploadBuildPart(ChunkController, ChunkCache, PartIndex, Part, PartStepOffset, StepCount); + if (m_AbortFlag) + { + return {}; + } + } + + m_Progress.SetLogOperationProgress(FinalizeBuildStep, StepCount); + + if (m_CreateBuild && !m_AbortFlag) + { + Stopwatch FinalizeBuildTimer; + m_Storage.BuildStorage->FinalizeBuild(m_BuildId); + if (!m_Options.IsQuiet) + { + ZEN_INFO("FinalizeBuild took {}", NiceTimeSpanMs(FinalizeBuildTimer.GetElapsedTimeMs())); + } + } + + m_Progress.SetLogOperationProgress(CleanupStep, StepCount); + + std::vector<std::pair<Oid, std::string>> Result; + Result.reserve(UploadParts.size()); + for (UploadPart& Part : UploadParts) + { + Result.push_back(std::make_pair(Part.PartId, Part.PartName)); + } + return Result; + } + catch (const std::exception&) + { + m_AbortFlag = true; + throw; + } +} + +bool +BuildsOperationUploadFolder::IsAcceptedFolder(const std::string_view& RelativePath) const +{ + for (const std::string& ExcludeFolder : m_Options.ExcludeFolders) + { + if (RelativePath.starts_with(ExcludeFolder)) + { + if (RelativePath.length() == ExcludeFolder.length()) + { + return false; + } + else if (RelativePath[ExcludeFolder.length()] == '/') + { + return false; + } + } + } + return true; +} + +bool +BuildsOperationUploadFolder::IsAcceptedFile(const std::string_view& RelativePath) const +{ + if (RelativePath == m_Options.ZenExcludeManifestName) + { + return false; + } + for (const std::string& ExcludeExtension : m_Options.ExcludeExtensions) + { + if (RelativePath.ends_with(ExcludeExtension)) + { + return false; + } + } + return true; +} + +void +BuildsOperationUploadFolder::ArrangeChunksIntoBlocks(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + std::vector<uint32_t>& ChunkIndexes, + std::vector<std::vector<uint32_t>>& OutBlocks) +{ + ZEN_TRACE_CPU("ArrangeChunksIntoBlocks"); + std::sort(ChunkIndexes.begin(), ChunkIndexes.end(), [&Content, &Lookup](uint32_t Lhs, uint32_t Rhs) { + const ChunkedContentLookup::ChunkSequenceLocation& LhsLocation = GetChunkSequenceLocations(Lookup, Lhs)[0]; + const ChunkedContentLookup::ChunkSequenceLocation& RhsLocation = GetChunkSequenceLocations(Lookup, Rhs)[0]; + if (LhsLocation.SequenceIndex < RhsLocation.SequenceIndex) + { + return true; + } + else if (LhsLocation.SequenceIndex > RhsLocation.SequenceIndex) + { + return false; + } + return LhsLocation.Offset < RhsLocation.Offset; + }); + + uint64_t MaxBlockSizeLowThreshold = m_Options.BlockParameters.MaxBlockSize - (m_Options.BlockParameters.MaxBlockSize / 16); + + uint64_t BlockSize = 0; + + uint32_t ChunkIndexStart = 0; + for (uint32_t ChunkIndexOffset = 0; ChunkIndexOffset < ChunkIndexes.size();) + { + const uint32_t ChunkIndex = ChunkIndexes[ChunkIndexOffset]; + const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + + if (((BlockSize + ChunkSize) > m_Options.BlockParameters.MaxBlockSize) || + (ChunkIndexOffset - ChunkIndexStart) > m_Options.BlockParameters.MaxChunksPerBlock) + { + // Within the span of MaxBlockSizeLowThreshold and MaxBlockSize, see if there is a break + // between source paths for chunks. Break the block at the last such break if any. + ZEN_ASSERT(ChunkIndexOffset > ChunkIndexStart); + + const uint32_t ChunkSequenceIndex = Lookup.ChunkSequenceLocations[Lookup.ChunkSequenceLocationOffset[ChunkIndex]].SequenceIndex; + + uint64_t ScanBlockSize = BlockSize; + + uint32_t ScanChunkIndexOffset = ChunkIndexOffset - 1; + while (ScanChunkIndexOffset > (ChunkIndexStart + 2)) + { + const uint32_t TestChunkIndex = ChunkIndexes[ScanChunkIndexOffset]; + const uint64_t TestChunkSize = Content.ChunkedContent.ChunkRawSizes[TestChunkIndex]; + if ((ScanBlockSize - TestChunkSize) < MaxBlockSizeLowThreshold) + { + break; + } + + const uint32_t TestSequenceIndex = + Lookup.ChunkSequenceLocations[Lookup.ChunkSequenceLocationOffset[TestChunkIndex]].SequenceIndex; + if (ChunkSequenceIndex != TestSequenceIndex) + { + ChunkIndexOffset = ScanChunkIndexOffset + 1; + break; + } + + ScanBlockSize -= TestChunkSize; + ScanChunkIndexOffset--; + } + + std::vector<uint32_t> ChunksInBlock; + ChunksInBlock.reserve(ChunkIndexOffset - ChunkIndexStart); + for (uint32_t AddIndexOffset = ChunkIndexStart; AddIndexOffset < ChunkIndexOffset; AddIndexOffset++) + { + const uint32_t AddChunkIndex = ChunkIndexes[AddIndexOffset]; + ChunksInBlock.push_back(AddChunkIndex); + } + OutBlocks.emplace_back(std::move(ChunksInBlock)); + BlockSize = 0; + ChunkIndexStart = ChunkIndexOffset; + } + else + { + ChunkIndexOffset++; + BlockSize += ChunkSize; + } + } + if (ChunkIndexStart < ChunkIndexes.size()) + { + std::vector<uint32_t> ChunksInBlock; + ChunksInBlock.reserve(ChunkIndexes.size() - ChunkIndexStart); + for (uint32_t AddIndexOffset = ChunkIndexStart; AddIndexOffset < ChunkIndexes.size(); AddIndexOffset++) + { + const uint32_t AddChunkIndex = ChunkIndexes[AddIndexOffset]; + ChunksInBlock.push_back(AddChunkIndex); + } + OutBlocks.emplace_back(std::move(ChunksInBlock)); + } +} + +void +BuildsOperationUploadFolder::GenerateBuildBlocks(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const std::vector<std::vector<uint32_t>>& NewBlockChunks, + GeneratedBlocks& OutBlocks, + GenerateBlocksStatistics& GenerateBlocksStats, + UploadStatistics& UploadStats) +{ + ZEN_TRACE_CPU("GenerateBuildBlocks"); + const std::size_t NewBlockCount = NewBlockChunks.size(); + if (NewBlockCount == 0) + { + return; + } + + std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Generate Blocks"); + + OutBlocks.BlockDescriptions.resize(NewBlockCount); + OutBlocks.BlockSizes.resize(NewBlockCount); + OutBlocks.BlockMetaDatas.resize(NewBlockCount); + OutBlocks.BlockHeaders.resize(NewBlockCount); + OutBlocks.MetaDataHasBeenUploaded.resize(NewBlockCount, 0); + OutBlocks.BlockHashToBlockIndex.reserve(NewBlockCount); + + RwLock Lock; + FilteredRate FilteredGeneratedBytesPerSecond; + FilteredRate FilteredUploadedBytesPerSecond; + ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + std::atomic<uint64_t> QueuedPendingBlocksForUpload = 0; + + GenerateBuildBlocksContext Context{.Work = Work, + .GenerateBlobsPool = m_IOWorkerPool, + .UploadBlocksPool = m_NetworkPool, + .FilteredGeneratedBytesPerSecond = FilteredGeneratedBytesPerSecond, + .FilteredUploadedBytesPerSecond = FilteredUploadedBytesPerSecond, + .QueuedPendingBlocksForUpload = QueuedPendingBlocksForUpload, + .Lock = Lock, + .OutBlocks = OutBlocks, + .GenerateBlocksStats = GenerateBlocksStats, + .UploadStats = UploadStats, + .NewBlockCount = NewBlockCount}; + + ScheduleBlockGeneration(Context, Content, Lookup, NewBlockChunks); + + Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(PendingWork); + + FilteredGeneratedBytesPerSecond.Update(GenerateBlocksStats.GeneratedBlockByteCount.load()); + FilteredUploadedBytesPerSecond.Update(UploadStats.BlocksBytes.load()); + + std::string Details = fmt::format("Generated {}/{} ({}, {}B/s). Uploaded {}/{} ({}, {}bits/s)", + GenerateBlocksStats.GeneratedBlockCount.load(), + NewBlockCount, + NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount.load()), + NiceNum(FilteredGeneratedBytesPerSecond.GetCurrent()), + UploadStats.BlockCount.load(), + NewBlockCount, + NiceBytes(UploadStats.BlocksBytes.load()), + NiceNum(FilteredUploadedBytesPerSecond.GetCurrent() * 8)); + + ProgressBar->UpdateState({.Task = "Generating blocks", + .Details = Details, + .TotalCount = gsl::narrow<uint64_t>(NewBlockCount), + .RemainingCount = gsl::narrow<uint64_t>(NewBlockCount - GenerateBlocksStats.GeneratedBlockCount.load()), + .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }); + + ZEN_ASSERT(m_AbortFlag || QueuedPendingBlocksForUpload.load() == 0); + + ProgressBar->Finish(); + + GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS = FilteredGeneratedBytesPerSecond.GetElapsedTimeUS(); + UploadStats.ElapsedWallTimeUS = FilteredUploadedBytesPerSecond.GetElapsedTimeUS(); +} + +void +BuildsOperationUploadFolder::ScheduleBlockGeneration(GenerateBuildBlocksContext& Context, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const std::vector<std::vector<uint32_t>>& NewBlockChunks) +{ + for (size_t BlockIndex = 0; BlockIndex < Context.NewBlockCount; BlockIndex++) + { + if (Context.Work.IsAborted()) + { + break; + } + const std::vector<uint32_t>& ChunksInBlock = NewBlockChunks[BlockIndex]; + Context.Work.ScheduleWork( + Context.GenerateBlobsPool, + [this, &Context, &Content, &Lookup, ChunksInBlock, BlockIndex](std::atomic<bool>&) { + if (!m_AbortFlag) + { + ZEN_TRACE_CPU("GenerateBuildBlocks_Generate"); + + Context.FilteredGeneratedBytesPerSecond.Start(); + + Stopwatch GenerateTimer; + CompressedBuffer CompressedBlock = + GenerateBlock(Content, Lookup, ChunksInBlock, Context.OutBlocks.BlockDescriptions[BlockIndex]); + if (m_Options.IsVerbose) + { + ZEN_INFO("Generated block {} ({}) containing {} chunks in {}", + Context.OutBlocks.BlockDescriptions[BlockIndex].BlockHash, + NiceBytes(CompressedBlock.GetCompressedSize()), + Context.OutBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size(), + NiceTimeSpanMs(GenerateTimer.GetElapsedTimeMs())); + } + + Context.OutBlocks.BlockSizes[BlockIndex] = CompressedBlock.GetCompressedSize(); + { + CbObjectWriter Writer; + Writer.AddString("createdBy", "zen"); + Context.OutBlocks.BlockMetaDatas[BlockIndex] = Writer.Save(); + } + Context.GenerateBlocksStats.GeneratedBlockByteCount += Context.OutBlocks.BlockSizes[BlockIndex]; + Context.GenerateBlocksStats.GeneratedBlockCount++; + + Context.Lock.WithExclusiveLock([&]() { + Context.OutBlocks.BlockHashToBlockIndex.insert_or_assign(Context.OutBlocks.BlockDescriptions[BlockIndex].BlockHash, + BlockIndex); + }); + + { + std::span<const SharedBuffer> Segments = CompressedBlock.GetCompressed().GetSegments(); + ZEN_ASSERT(Segments.size() >= 2); + Context.OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]); + } + + if (Context.GenerateBlocksStats.GeneratedBlockCount == Context.NewBlockCount) + { + Context.FilteredGeneratedBytesPerSecond.Stop(); + } + + if (Context.QueuedPendingBlocksForUpload.load() > 16) + { + std::span<const SharedBuffer> Segments = CompressedBlock.GetCompressed().GetSegments(); + ZEN_ASSERT(Segments.size() >= 2); + Context.OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]); + } + else + { + if (!m_AbortFlag) + { + Context.QueuedPendingBlocksForUpload++; + Context.Work.ScheduleWork( + Context.UploadBlocksPool, + [this, &Context, BlockIndex, Payload = std::move(CompressedBlock)](std::atomic<bool>&) mutable { + UploadGeneratedBlock(Context, BlockIndex, std::move(Payload)); + }); + } + } + } + }); + } +} + +void +BuildsOperationUploadFolder::UploadGeneratedBlock(GenerateBuildBlocksContext& Context, size_t BlockIndex, CompressedBuffer Payload) +{ + auto _ = MakeGuard([&Context] { Context.QueuedPendingBlocksForUpload--; }); + if (m_AbortFlag) + { + return; + } + + if (Context.GenerateBlocksStats.GeneratedBlockCount == Context.NewBlockCount) + { + ZEN_TRACE_CPU("GenerateBuildBlocks_Save"); + + Context.FilteredUploadedBytesPerSecond.Stop(); + std::span<const SharedBuffer> Segments = Payload.GetCompressed().GetSegments(); + ZEN_ASSERT(Segments.size() >= 2); + Context.OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]); + return; + } + + ZEN_TRACE_CPU("GenerateBuildBlocks_Upload"); + + Context.FilteredUploadedBytesPerSecond.Start(); + + const CbObject BlockMetaData = + BuildChunkBlockDescription(Context.OutBlocks.BlockDescriptions[BlockIndex], Context.OutBlocks.BlockMetaDatas[BlockIndex]); + + const IoHash& BlockHash = Context.OutBlocks.BlockDescriptions[BlockIndex].BlockHash; + const uint64_t CompressedBlockSize = Payload.GetCompressedSize(); + + if (m_Storage.CacheStorage && m_Options.PopulateCache) + { + m_Storage.CacheStorage->PutBuildBlob(m_BuildId, BlockHash, ZenContentType::kCompressedBinary, Payload.GetCompressed()); + } + + try + { + m_Storage.BuildStorage->PutBuildBlob(m_BuildId, BlockHash, ZenContentType::kCompressedBinary, std::move(Payload).GetCompressed()); + } + catch (const std::exception&) + { + // Silence http errors due to abort + if (!m_AbortFlag) + { + throw; + } + } + + if (m_AbortFlag) + { + return; + } + + Context.UploadStats.BlocksBytes += CompressedBlockSize; + + if (m_Options.IsVerbose) + { + ZEN_INFO("Uploaded block {} ({}) containing {} chunks", + BlockHash, + NiceBytes(CompressedBlockSize), + Context.OutBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); + } + + if (m_Storage.CacheStorage && m_Options.PopulateCache) + { + m_Storage.CacheStorage->PutBlobMetadatas(m_BuildId, std::vector<IoHash>({BlockHash}), std::vector<CbObject>({BlockMetaData})); + } + + bool MetadataSucceeded = false; + try + { + MetadataSucceeded = m_Storage.BuildStorage->PutBlockMetadata(m_BuildId, BlockHash, BlockMetaData); + } + catch (const std::exception&) + { + // Silence http errors due to abort + if (!m_AbortFlag) + { + throw; + } + } + + if (m_AbortFlag) + { + return; + } + + if (MetadataSucceeded) + { + if (m_Options.IsVerbose) + { + ZEN_INFO("Uploaded block {} metadata ({})", BlockHash, NiceBytes(BlockMetaData.GetSize())); + } + + Context.OutBlocks.MetaDataHasBeenUploaded[BlockIndex] = true; + Context.UploadStats.BlocksBytes += BlockMetaData.GetSize(); + } + + Context.UploadStats.BlockCount++; + if (Context.UploadStats.BlockCount == Context.NewBlockCount) + { + Context.FilteredUploadedBytesPerSecond.Stop(); + } +} + +std::vector<uint32_t> +BuildsOperationUploadFolder::CalculateAbsoluteChunkOrders( + const std::span<const IoHash> LocalChunkHashes, + const std::span<const uint32_t> LocalChunkOrder, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToLocalChunkIndex, + const std::span<const uint32_t>& LooseChunkIndexes, + const std::span<const ChunkBlockDescription>& BlockDescriptions) +{ + ZEN_TRACE_CPU("CalculateAbsoluteChunkOrders"); + + std::vector<IoHash> TmpAbsoluteChunkHashes; + if (m_Options.DoExtraContentValidation) + { + TmpAbsoluteChunkHashes.reserve(LocalChunkHashes.size()); + } + std::vector<uint32_t> LocalChunkIndexToAbsoluteChunkIndex; + LocalChunkIndexToAbsoluteChunkIndex.resize(LocalChunkHashes.size(), (uint32_t)-1); + std::uint32_t AbsoluteChunkCount = 0; + for (uint32_t ChunkIndex : LooseChunkIndexes) + { + LocalChunkIndexToAbsoluteChunkIndex[ChunkIndex] = AbsoluteChunkCount; + if (m_Options.DoExtraContentValidation) + { + TmpAbsoluteChunkHashes.push_back(LocalChunkHashes[ChunkIndex]); + } + AbsoluteChunkCount++; + } + for (const ChunkBlockDescription& Block : BlockDescriptions) + { + for (const IoHash& ChunkHash : Block.ChunkRawHashes) + { + if (auto It = ChunkHashToLocalChunkIndex.find(ChunkHash); It != ChunkHashToLocalChunkIndex.end()) + { + const uint32_t LocalChunkIndex = It->second; + ZEN_ASSERT_SLOW(LocalChunkHashes[LocalChunkIndex] == ChunkHash); + LocalChunkIndexToAbsoluteChunkIndex[LocalChunkIndex] = AbsoluteChunkCount; + } + if (m_Options.DoExtraContentValidation) + { + TmpAbsoluteChunkHashes.push_back(ChunkHash); + } + AbsoluteChunkCount++; + } + } + std::vector<uint32_t> AbsoluteChunkOrder; + AbsoluteChunkOrder.reserve(LocalChunkHashes.size()); + for (const uint32_t LocalChunkIndex : LocalChunkOrder) + { + const uint32_t AbsoluteChunkIndex = LocalChunkIndexToAbsoluteChunkIndex[LocalChunkIndex]; + if (m_Options.DoExtraContentValidation) + { + ZEN_ASSERT(LocalChunkHashes[LocalChunkIndex] == TmpAbsoluteChunkHashes[AbsoluteChunkIndex]); + } + AbsoluteChunkOrder.push_back(AbsoluteChunkIndex); + } + if (m_Options.DoExtraContentValidation) + { + uint32_t OrderIndex = 0; + while (OrderIndex < LocalChunkOrder.size()) + { + const uint32_t LocalChunkIndex = LocalChunkOrder[OrderIndex]; + const IoHash& LocalChunkHash = LocalChunkHashes[LocalChunkIndex]; + const uint32_t AbsoluteChunkIndex = AbsoluteChunkOrder[OrderIndex]; + const IoHash& AbsoluteChunkHash = TmpAbsoluteChunkHashes[AbsoluteChunkIndex]; + ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash); + OrderIndex++; + } + } + return AbsoluteChunkOrder; +} + +CompositeBuffer +BuildsOperationUploadFolder::FetchChunk(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const IoHash& ChunkHash, + ReadFileCache& OpenFileCache) +{ + ZEN_TRACE_CPU("FetchChunk"); + auto It = Lookup.ChunkHashToChunkIndex.find(ChunkHash); + ZEN_ASSERT(It != Lookup.ChunkHashToChunkIndex.end()); + uint32_t ChunkIndex = It->second; + std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkLocations = GetChunkSequenceLocations(Lookup, ChunkIndex); + ZEN_ASSERT(!ChunkLocations.empty()); + CompositeBuffer Chunk = + OpenFileCache.GetRange(ChunkLocations[0].SequenceIndex, ChunkLocations[0].Offset, Content.ChunkedContent.ChunkRawSizes[ChunkIndex]); + if (!Chunk) + { + throw std::runtime_error(fmt::format("Unable to read chunk at {}, size {} from '{}'", + ChunkLocations[0].Offset, + Content.ChunkedContent.ChunkRawSizes[ChunkIndex], + Content.Paths[Lookup.SequenceIndexFirstPathIndex[ChunkLocations[0].SequenceIndex]])); + } + ZEN_ASSERT_SLOW(IoHash::HashBuffer(Chunk) == ChunkHash); + return Chunk; +}; + +CompressedBuffer +BuildsOperationUploadFolder::GenerateBlock(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const std::vector<uint32_t>& ChunksInBlock, + ChunkBlockDescription& OutBlockDescription) +{ + ZEN_TRACE_CPU("GenerateBlock"); + ReadFileCache OpenFileCache(m_DiskStats.OpenReadCount, + m_DiskStats.CurrentOpenFileCount, + m_DiskStats.ReadCount, + m_DiskStats.ReadByteCount, + m_Path, + Content, + Lookup, + 4); + + std::vector<std::pair<IoHash, FetchChunkFunc>> BlockContent; + BlockContent.reserve(ChunksInBlock.size()); + for (uint32_t ChunkIndex : ChunksInBlock) + { + BlockContent.emplace_back(std::make_pair( + Content.ChunkedContent.ChunkHashes[ChunkIndex], + [this, &Content, &Lookup, &OpenFileCache, ChunkIndex](const IoHash& ChunkHash) -> std::pair<uint64_t, CompositeBuffer> { + CompositeBuffer Chunk = FetchChunk(Content, Lookup, ChunkHash, OpenFileCache); + ZEN_ASSERT(Chunk); + uint64_t RawSize = Chunk.GetSize(); + + const bool ShouldCompressChunk = RawSize >= m_Options.MinimumSizeForCompressInBlock && + IsChunkCompressable(m_NonCompressableExtensionHashes, Lookup, ChunkIndex); + + const OodleCompressionLevel CompressionLevel = + ShouldCompressChunk ? OodleCompressionLevel::VeryFast : OodleCompressionLevel::None; + return {RawSize, CompressedBuffer::Compress(Chunk, OodleCompressor::Mermaid, CompressionLevel).GetCompressed()}; + })); + } + + return GenerateChunkBlock(std::move(BlockContent), OutBlockDescription); +}; + +CompressedBuffer +BuildsOperationUploadFolder::RebuildBlock(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + CompositeBuffer&& HeaderBuffer, + const std::vector<uint32_t>& ChunksInBlock) +{ + ZEN_TRACE_CPU("RebuildBlock"); + ReadFileCache OpenFileCache(m_DiskStats.OpenReadCount, + m_DiskStats.CurrentOpenFileCount, + m_DiskStats.ReadCount, + m_DiskStats.ReadByteCount, + m_Path, + Content, + Lookup, + 4); + + std::vector<SharedBuffer> ResultBuffers; + ResultBuffers.reserve(HeaderBuffer.GetSegments().size() + ChunksInBlock.size()); + ResultBuffers.insert(ResultBuffers.end(), HeaderBuffer.GetSegments().begin(), HeaderBuffer.GetSegments().end()); + for (uint32_t ChunkIndex : ChunksInBlock) + { + std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkLocations = GetChunkSequenceLocations(Lookup, ChunkIndex); + ZEN_ASSERT(!ChunkLocations.empty()); + CompositeBuffer Chunk = OpenFileCache.GetRange(ChunkLocations[0].SequenceIndex, + ChunkLocations[0].Offset, + Content.ChunkedContent.ChunkRawSizes[ChunkIndex]); + ZEN_ASSERT_SLOW(IoHash::HashBuffer(Chunk) == Content.ChunkedContent.ChunkHashes[ChunkIndex]); + + const uint64_t RawSize = Chunk.GetSize(); + const bool ShouldCompressChunk = + RawSize >= m_Options.MinimumSizeForCompressInBlock && IsChunkCompressable(m_NonCompressableExtensionHashes, Lookup, ChunkIndex); + + const OodleCompressionLevel CompressionLevel = ShouldCompressChunk ? OodleCompressionLevel::VeryFast : OodleCompressionLevel::None; + + CompositeBuffer CompressedChunk = + CompressedBuffer::Compress(std::move(Chunk), OodleCompressor::Mermaid, CompressionLevel).GetCompressed(); + ResultBuffers.insert(ResultBuffers.end(), CompressedChunk.GetSegments().begin(), CompressedChunk.GetSegments().end()); + } + return CompressedBuffer::FromCompressedNoValidate(CompositeBuffer(std::move(ResultBuffers))); +}; + +void +BuildsOperationUploadFolder::UploadBuildPart(ChunkingController& ChunkController, + ChunkingCache& ChunkCache, + uint32_t PartIndex, + const UploadPart& Part, + uint32_t PartStepOffset, + uint32_t StepCount) +{ + Stopwatch UploadTimer; + + ChunkingStatistics ChunkingStats; + FindBlocksStatistics FindBlocksStats; + ReuseBlocksStatistics ReuseBlocksStats; + UploadStatistics UploadStats; + GenerateBlocksStatistics GenerateBlocksStats; + LooseChunksStatistics LooseChunksStats; + + m_Progress.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::ChunkPartContent, StepCount); + + ChunkedFolderContent LocalContent = ScanPartContent(Part, ChunkController, ChunkCache, ChunkingStats); + if (m_AbortFlag) + { + return; + } + + const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalContent); + + if (PartIndex == 0) + { + ConsumePrepareBuildResult(); + } + + ZEN_ASSERT(m_PreferredMultipartChunkSize != 0); + ZEN_ASSERT(m_LargeAttachmentSize != 0); + + m_Progress.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::CalculateDelta, StepCount); + + Stopwatch BlockArrangeTimer; + + std::vector<uint32_t> LooseChunkIndexes; + std::vector<uint32_t> NewBlockChunkIndexes; + std::vector<size_t> ReuseBlockIndexes; + ClassifyChunksByBlockEligibility(LocalContent, + LooseChunkIndexes, + NewBlockChunkIndexes, + ReuseBlockIndexes, + LooseChunksStats, + FindBlocksStats, + ReuseBlocksStats); + + std::vector<std::vector<uint32_t>> NewBlockChunks; + ArrangeChunksIntoBlocks(LocalContent, LocalLookup, NewBlockChunkIndexes, NewBlockChunks); + + FindBlocksStats.NewBlocksCount += NewBlockChunks.size(); + for (uint32_t ChunkIndex : NewBlockChunkIndexes) + { + FindBlocksStats.NewBlocksChunkByteCount += LocalContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + FindBlocksStats.NewBlocksChunkCount += NewBlockChunkIndexes.size(); + + const double AcceptedByteCountPercent = FindBlocksStats.PotentialChunkByteCount > 0 + ? (100.0 * ReuseBlocksStats.AcceptedRawByteCount / FindBlocksStats.PotentialChunkByteCount) + : 0.0; + + const double AcceptedReduntantByteCountPercent = + ReuseBlocksStats.AcceptedByteCount > 0 ? (100.0 * ReuseBlocksStats.AcceptedReduntantByteCount) / + (ReuseBlocksStats.AcceptedByteCount + ReuseBlocksStats.AcceptedReduntantByteCount) + : 0.0; + if (!m_Options.IsQuiet) + { + ZEN_INFO( + "Found {} chunks in {} ({}) blocks eligible for reuse in {}\n" + " Reusing {} ({}) matching chunks in {} blocks ({:.1f}%)\n" + " Accepting {} ({}) redundant chunks ({:.1f}%)\n" + " Rejected {} ({}) chunks in {} blocks\n" + " Arranged {} ({}) chunks in {} new blocks\n" + " Keeping {} ({}) chunks as loose chunks\n" + " Discovery completed in {}", + FindBlocksStats.FoundBlockChunkCount, + FindBlocksStats.FoundBlockCount, + NiceBytes(FindBlocksStats.FoundBlockByteCount), + NiceTimeSpanMs(FindBlocksStats.FindBlockTimeMS), + + ReuseBlocksStats.AcceptedChunkCount, + NiceBytes(ReuseBlocksStats.AcceptedRawByteCount), + FindBlocksStats.AcceptedBlockCount, + AcceptedByteCountPercent, + + ReuseBlocksStats.AcceptedReduntantChunkCount, + NiceBytes(ReuseBlocksStats.AcceptedReduntantByteCount), + AcceptedReduntantByteCountPercent, + + ReuseBlocksStats.RejectedChunkCount, + NiceBytes(ReuseBlocksStats.RejectedByteCount), + ReuseBlocksStats.RejectedBlockCount, + + FindBlocksStats.NewBlocksChunkCount, + NiceBytes(FindBlocksStats.NewBlocksChunkByteCount), + FindBlocksStats.NewBlocksCount, + + LooseChunksStats.ChunkCount, + NiceBytes(LooseChunksStats.ChunkByteCount), + + NiceTimeSpanMs(BlockArrangeTimer.GetElapsedTimeMs())); + } + + m_Progress.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::GenerateBlocks, StepCount); + GeneratedBlocks NewBlocks; + + if (!NewBlockChunks.empty()) + { + Stopwatch GenerateBuildBlocksTimer; + auto __ = MakeGuard([&]() { + uint64_t BlockGenerateTimeUs = GenerateBuildBlocksTimer.GetElapsedTimeUs(); + if (!m_Options.IsQuiet) + { + ZEN_INFO("Generated {} ({}) and uploaded {} ({}) blocks in {}. Generate speed: {}B/sec. Transfer speed {}bits/sec.", + GenerateBlocksStats.GeneratedBlockCount.load(), + NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount), + UploadStats.BlockCount.load(), + NiceBytes(UploadStats.BlocksBytes.load()), + NiceTimeSpanMs(BlockGenerateTimeUs / 1000), + NiceNum(GetBytesPerSecond(GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS, + GenerateBlocksStats.GeneratedBlockByteCount)), + NiceNum(GetBytesPerSecond(UploadStats.ElapsedWallTimeUS, UploadStats.BlocksBytes * 8))); + } + }); + GenerateBuildBlocks(LocalContent, LocalLookup, NewBlockChunks, NewBlocks, GenerateBlocksStats, UploadStats); + } + + m_Progress.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::BuildPartManifest, StepCount); + + BuiltPartManifest Manifest = + BuildPartManifestObject(LocalContent, LocalLookup, ChunkController, ReuseBlockIndexes, NewBlocks, LooseChunkIndexes); + + m_Progress.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::UploadBuildPart, StepCount); + + Stopwatch PutBuildPartResultTimer; + std::pair<IoHash, std::vector<IoHash>> PutBuildPartResult = + m_Storage.BuildStorage->PutBuildPart(m_BuildId, Part.PartId, Part.PartName, Manifest.PartManifest); + if (!m_Options.IsQuiet) + { + ZEN_INFO("PutBuildPart took {}, payload size {}. {} attachments are needed.", + NiceTimeSpanMs(PutBuildPartResultTimer.GetElapsedTimeMs()), + NiceBytes(Manifest.PartManifest.GetSize()), + PutBuildPartResult.second.size()); + } + IoHash PartHash = PutBuildPartResult.first; + + m_Progress.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::UploadAttachments, StepCount); + + std::vector<IoHash> UnknownChunks; + if (m_Options.IgnoreExistingBlocks) + { + if (m_Options.IsVerbose) + { + ZEN_INFO("PutBuildPart uploading all attachments, needs are: {}", FormatArray<IoHash>(PutBuildPartResult.second, "\n "sv)); + } + + std::vector<IoHash> ForceUploadChunkHashes; + ForceUploadChunkHashes.reserve(LooseChunkIndexes.size()); + + for (uint32_t ChunkIndex : LooseChunkIndexes) + { + ForceUploadChunkHashes.push_back(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); + } + + for (size_t BlockIndex = 0; BlockIndex < NewBlocks.BlockHeaders.size(); BlockIndex++) + { + if (NewBlocks.BlockHeaders[BlockIndex]) + { + // Block was not uploaded during generation + ForceUploadChunkHashes.push_back(NewBlocks.BlockDescriptions[BlockIndex].BlockHash); + } + } + UploadAttachmentBatch(ForceUploadChunkHashes, + UnknownChunks, + LocalContent, + LocalLookup, + NewBlockChunks, + NewBlocks, + LooseChunkIndexes, + UploadStats, + LooseChunksStats); + } + else if (!PutBuildPartResult.second.empty()) + { + if (m_Options.IsVerbose) + { + ZEN_INFO("PutBuildPart needs attachments: {}", FormatArray<IoHash>(PutBuildPartResult.second, "\n "sv)); + } + UploadAttachmentBatch(PutBuildPartResult.second, + UnknownChunks, + LocalContent, + LocalLookup, + NewBlockChunks, + NewBlocks, + LooseChunkIndexes, + UploadStats, + LooseChunksStats); + } + + FinalizeBuildPartWithRetries(Part, + PartHash, + UnknownChunks, + LocalContent, + LocalLookup, + NewBlockChunks, + NewBlocks, + LooseChunkIndexes, + UploadStats, + LooseChunksStats); + + if (!NewBlocks.BlockDescriptions.empty() && !m_AbortFlag) + { + UploadMissingBlockMetadata(NewBlocks, UploadStats); + // The newly generated blocks are now known blocks so the next part upload can use those blocks as well + m_KnownBlocks.insert(m_KnownBlocks.end(), NewBlocks.BlockDescriptions.begin(), NewBlocks.BlockDescriptions.end()); + } + + m_Progress.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::PutBuildPartStats, StepCount); + + m_Storage.BuildStorage->PutBuildPartStats( + m_BuildId, + Part.PartId, + {{"totalSize", double(Part.LocalFolderScanStats.FoundFileByteCount.load())}, + {"reusedRatio", AcceptedByteCountPercent / 100.0}, + {"reusedBlockCount", double(FindBlocksStats.AcceptedBlockCount)}, + {"reusedBlockByteCount", double(ReuseBlocksStats.AcceptedRawByteCount)}, + {"newBlockCount", double(FindBlocksStats.NewBlocksCount)}, + {"newBlockByteCount", double(FindBlocksStats.NewBlocksChunkByteCount)}, + {"uploadedCount", double(UploadStats.BlockCount.load() + UploadStats.ChunkCount.load())}, + {"uploadedByteCount", double(UploadStats.BlocksBytes.load() + UploadStats.ChunksBytes.load())}, + {"uploadedBytesPerSec", + double(GetBytesPerSecond(UploadStats.ElapsedWallTimeUS, UploadStats.ChunksBytes + UploadStats.BlocksBytes))}, + {"elapsedTimeSec", double(UploadTimer.GetElapsedTimeMs() / 1000.0)}}); + + m_LocalFolderScanStats += Part.LocalFolderScanStats; + m_ChunkingStats += ChunkingStats; + m_FindBlocksStats += FindBlocksStats; + m_ReuseBlocksStats += ReuseBlocksStats; + m_UploadStats += UploadStats; + m_GenerateBlocksStats += GenerateBlocksStats; + m_LooseChunksStats += LooseChunksStats; +} + +ChunkedFolderContent +BuildsOperationUploadFolder::ScanPartContent(const UploadPart& Part, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache, + ChunkingStatistics& ChunkingStats) +{ + Stopwatch ScanTimer; + + std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Scan Folder"); + + FilteredRate FilteredBytesHashed; + FilteredBytesHashed.Start(); + ChunkedFolderContent LocalContent = ChunkFolderContent( + ChunkingStats, + m_IOWorkerPool, + m_Path, + Part.Content, + ChunkController, + ChunkCache, + m_Progress.GetProgressUpdateDelayMS(), + [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { + FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); + std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found", + ChunkingStats.FilesProcessed.load(), + Part.Content.Paths.size(), + NiceBytes(ChunkingStats.BytesHashed.load()), + NiceBytes(Part.TotalRawSize), + NiceNum(FilteredBytesHashed.GetCurrent()), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load())); + ProgressBar->UpdateState({.Task = "Scanning files ", + .Details = Details, + .TotalCount = Part.TotalRawSize, + .RemainingCount = Part.TotalRawSize - ChunkingStats.BytesHashed.load(), + .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }, + m_AbortFlag, + m_PauseFlag); + FilteredBytesHashed.Stop(); + ProgressBar->Finish(); + if (m_AbortFlag) + { + return LocalContent; + } + + if (!m_Options.IsQuiet) + { + ZEN_INFO("Found {} ({}) files divided into {} ({}) unique chunks in '{}' in {}. Average hash rate {}B/sec", + Part.Content.Paths.size(), + NiceBytes(Part.TotalRawSize), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load()), + m_Path, + NiceTimeSpanMs(ScanTimer.GetElapsedTimeMs()), + NiceNum(GetBytesPerSecond(ChunkingStats.ElapsedWallTimeUS, ChunkingStats.BytesHashed))); + } + + return LocalContent; +} + +void +BuildsOperationUploadFolder::ConsumePrepareBuildResult() +{ + const PrepareBuildResult PrepBuildResult = m_PrepBuildResultFuture.get(); + + m_FindBlocksStats.FindBlockTimeMS = PrepBuildResult.ElapsedTimeMs; + m_FindBlocksStats.FoundBlockCount = PrepBuildResult.KnownBlocks.size(); + + if (!m_Options.IsQuiet) + { + ZEN_INFO("Build prepare took {}. {} took {}, payload size {}{}", + NiceTimeSpanMs(PrepBuildResult.ElapsedTimeMs), + m_CreateBuild ? "PutBuild" : "GetBuild", + NiceTimeSpanMs(PrepBuildResult.PrepareBuildTimeMs), + NiceBytes(PrepBuildResult.PayloadSize), + m_Options.IgnoreExistingBlocks ? "" + : fmt::format(". Found {} blocks in {}", + PrepBuildResult.KnownBlocks.size(), + NiceTimeSpanMs(PrepBuildResult.FindBlocksTimeMs))); + } + + m_PreferredMultipartChunkSize = PrepBuildResult.PreferredMultipartChunkSize; + m_LargeAttachmentSize = m_Options.AllowMultiparts ? m_PreferredMultipartChunkSize * 4u : (std::uint64_t)-1; + m_KnownBlocks = std::move(PrepBuildResult.KnownBlocks); +} + +void +BuildsOperationUploadFolder::ClassifyChunksByBlockEligibility(const ChunkedFolderContent& LocalContent, + std::vector<uint32_t>& OutLooseChunkIndexes, + std::vector<uint32_t>& OutNewBlockChunkIndexes, + std::vector<size_t>& OutReuseBlockIndexes, + LooseChunksStatistics& LooseChunksStats, + FindBlocksStatistics& FindBlocksStats, + ReuseBlocksStatistics& ReuseBlocksStats) +{ + const bool EnableBlocks = true; + std::vector<std::uint32_t> BlockChunkIndexes; + for (uint32_t ChunkIndex = 0; ChunkIndex < LocalContent.ChunkedContent.ChunkHashes.size(); ChunkIndex++) + { + const uint64_t ChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + if (!EnableBlocks || ChunkRawSize == 0 || ChunkRawSize > m_Options.BlockParameters.MaxChunkEmbedSize) + { + OutLooseChunkIndexes.push_back(ChunkIndex); + LooseChunksStats.ChunkByteCount += ChunkRawSize; + } + else + { + BlockChunkIndexes.push_back(ChunkIndex); + FindBlocksStats.PotentialChunkByteCount += ChunkRawSize; + } + } + FindBlocksStats.PotentialChunkCount += BlockChunkIndexes.size(); + LooseChunksStats.ChunkCount = OutLooseChunkIndexes.size(); + + if (m_Options.IgnoreExistingBlocks) + { + if (!m_Options.IsQuiet) + { + ZEN_INFO("Ignoring any existing blocks in store"); + } + OutNewBlockChunkIndexes = std::move(BlockChunkIndexes); + return; + } + + OutReuseBlockIndexes = FindReuseBlocks(Log(), + m_Options.BlockReuseMinPercentLimit, + m_Options.IsVerbose, + ReuseBlocksStats, + m_KnownBlocks, + LocalContent.ChunkedContent.ChunkHashes, + BlockChunkIndexes, + OutNewBlockChunkIndexes); + FindBlocksStats.AcceptedBlockCount += OutReuseBlockIndexes.size(); + + for (const ChunkBlockDescription& Description : m_KnownBlocks) + { + for (uint32_t ChunkRawLength : Description.ChunkRawLengths) + { + FindBlocksStats.FoundBlockByteCount += ChunkRawLength; + } + FindBlocksStats.FoundBlockChunkCount += Description.ChunkRawHashes.size(); + } +} + +BuildsOperationUploadFolder::BuiltPartManifest +BuildsOperationUploadFolder::BuildPartManifestObject(const ChunkedFolderContent& LocalContent, + const ChunkedContentLookup& LocalLookup, + ChunkingController& ChunkController, + std::span<const size_t> ReuseBlockIndexes, + const GeneratedBlocks& NewBlocks, + std::span<const uint32_t> LooseChunkIndexes) +{ + BuiltPartManifest Result; + + CbObjectWriter PartManifestWriter; + Stopwatch ManifestGenerationTimer; + auto __ = MakeGuard([&]() { + if (!m_Options.IsQuiet) + { + ZEN_INFO("Generated build part manifest in {} ({})", + NiceTimeSpanMs(ManifestGenerationTimer.GetElapsedTimeMs()), + NiceBytes(PartManifestWriter.GetSaveSize())); + } + }); + + PartManifestWriter.BeginObject("chunker"sv); + { + PartManifestWriter.AddString("name"sv, ChunkController.GetName()); + PartManifestWriter.AddObject("parameters"sv, ChunkController.GetParameters()); + } + PartManifestWriter.EndObject(); // chunker + + Result.AllChunkBlockHashes.reserve(ReuseBlockIndexes.size() + NewBlocks.BlockDescriptions.size()); + Result.AllChunkBlockDescriptions.reserve(ReuseBlockIndexes.size() + NewBlocks.BlockDescriptions.size()); + for (size_t ReuseBlockIndex : ReuseBlockIndexes) + { + Result.AllChunkBlockDescriptions.push_back(m_KnownBlocks[ReuseBlockIndex]); + Result.AllChunkBlockHashes.push_back(m_KnownBlocks[ReuseBlockIndex].BlockHash); + } + Result.AllChunkBlockDescriptions.insert(Result.AllChunkBlockDescriptions.end(), + NewBlocks.BlockDescriptions.begin(), + NewBlocks.BlockDescriptions.end()); + for (const ChunkBlockDescription& BlockDescription : NewBlocks.BlockDescriptions) + { + Result.AllChunkBlockHashes.push_back(BlockDescription.BlockHash); + } + + std::vector<IoHash> AbsoluteChunkHashes; + if (m_Options.DoExtraContentValidation) + { + tsl::robin_map<IoHash, size_t, IoHash::Hasher> ChunkHashToAbsoluteChunkIndex; + AbsoluteChunkHashes.reserve(LocalContent.ChunkedContent.ChunkHashes.size()); + for (uint32_t ChunkIndex : LooseChunkIndexes) + { + ChunkHashToAbsoluteChunkIndex.insert({LocalContent.ChunkedContent.ChunkHashes[ChunkIndex], AbsoluteChunkHashes.size()}); + AbsoluteChunkHashes.push_back(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); + } + for (const ChunkBlockDescription& Block : Result.AllChunkBlockDescriptions) + { + for (const IoHash& ChunkHash : Block.ChunkRawHashes) + { + ChunkHashToAbsoluteChunkIndex.insert({ChunkHash, AbsoluteChunkHashes.size()}); + AbsoluteChunkHashes.push_back(ChunkHash); + } + } + for (const IoHash& ChunkHash : LocalContent.ChunkedContent.ChunkHashes) + { + ZEN_ASSERT(AbsoluteChunkHashes[ChunkHashToAbsoluteChunkIndex.at(ChunkHash)] == ChunkHash); + ZEN_ASSERT(LocalContent.ChunkedContent.ChunkHashes[LocalLookup.ChunkHashToChunkIndex.at(ChunkHash)] == ChunkHash); + } + for (const uint32_t ChunkIndex : LocalContent.ChunkedContent.ChunkOrders) + { + ZEN_ASSERT(AbsoluteChunkHashes[ChunkHashToAbsoluteChunkIndex.at(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex])] == + LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); + ZEN_ASSERT(LocalLookup.ChunkHashToChunkIndex.at(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]) == ChunkIndex); + } + } + + std::vector<uint32_t> AbsoluteChunkOrders = CalculateAbsoluteChunkOrders(LocalContent.ChunkedContent.ChunkHashes, + LocalContent.ChunkedContent.ChunkOrders, + LocalLookup.ChunkHashToChunkIndex, + LooseChunkIndexes, + Result.AllChunkBlockDescriptions); + + if (m_Options.DoExtraContentValidation) + { + for (uint32_t ChunkOrderIndex = 0; ChunkOrderIndex < LocalContent.ChunkedContent.ChunkOrders.size(); ChunkOrderIndex++) + { + uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[ChunkOrderIndex]; + uint32_t AbsoluteChunkIndex = AbsoluteChunkOrders[ChunkOrderIndex]; + const IoHash& LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; + const IoHash& AbsoluteChunkHash = AbsoluteChunkHashes[AbsoluteChunkIndex]; + ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash); + } + } + + WriteBuildContentToCompactBinary(PartManifestWriter, + LocalContent.Platform, + LocalContent.Paths, + LocalContent.RawHashes, + LocalContent.RawSizes, + LocalContent.Attributes, + LocalContent.ChunkedContent.SequenceRawHashes, + LocalContent.ChunkedContent.ChunkCounts, + LocalContent.ChunkedContent.ChunkHashes, + LocalContent.ChunkedContent.ChunkRawSizes, + AbsoluteChunkOrders, + LooseChunkIndexes, + Result.AllChunkBlockHashes); + + if (m_Options.DoExtraContentValidation) + { + ChunkedFolderContent VerifyFolderContent; + + std::vector<uint32_t> OutAbsoluteChunkOrders; + std::vector<IoHash> OutLooseChunkHashes; + std::vector<uint64_t> OutLooseChunkRawSizes; + std::vector<IoHash> OutBlockRawHashes; + ReadBuildContentFromCompactBinary(PartManifestWriter.Save(), + VerifyFolderContent.Platform, + VerifyFolderContent.Paths, + VerifyFolderContent.RawHashes, + VerifyFolderContent.RawSizes, + VerifyFolderContent.Attributes, + VerifyFolderContent.ChunkedContent.SequenceRawHashes, + VerifyFolderContent.ChunkedContent.ChunkCounts, + OutAbsoluteChunkOrders, + OutLooseChunkHashes, + OutLooseChunkRawSizes, + OutBlockRawHashes); + ZEN_ASSERT(OutBlockRawHashes == Result.AllChunkBlockHashes); + + for (uint32_t OrderIndex = 0; OrderIndex < OutAbsoluteChunkOrders.size(); OrderIndex++) + { + uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[OrderIndex]; + const IoHash LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; + + uint32_t VerifyChunkIndex = OutAbsoluteChunkOrders[OrderIndex]; + const IoHash VerifyChunkHash = AbsoluteChunkHashes[VerifyChunkIndex]; + + ZEN_ASSERT(LocalChunkHash == VerifyChunkHash); + } + + CalculateLocalChunkOrders(OutAbsoluteChunkOrders, + OutLooseChunkHashes, + OutLooseChunkRawSizes, + Result.AllChunkBlockDescriptions, + VerifyFolderContent.ChunkedContent.ChunkHashes, + VerifyFolderContent.ChunkedContent.ChunkRawSizes, + VerifyFolderContent.ChunkedContent.ChunkOrders, + m_Options.DoExtraContentValidation); + + ZEN_ASSERT(LocalContent.Paths == VerifyFolderContent.Paths); + ZEN_ASSERT(LocalContent.RawHashes == VerifyFolderContent.RawHashes); + ZEN_ASSERT(LocalContent.RawSizes == VerifyFolderContent.RawSizes); + ZEN_ASSERT(LocalContent.Attributes == VerifyFolderContent.Attributes); + ZEN_ASSERT(LocalContent.ChunkedContent.SequenceRawHashes == VerifyFolderContent.ChunkedContent.SequenceRawHashes); + ZEN_ASSERT(LocalContent.ChunkedContent.ChunkCounts == VerifyFolderContent.ChunkedContent.ChunkCounts); + + for (uint32_t OrderIndex = 0; OrderIndex < LocalContent.ChunkedContent.ChunkOrders.size(); OrderIndex++) + { + uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[OrderIndex]; + const IoHash LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; + uint64_t LocalChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[LocalChunkIndex]; + + uint32_t VerifyChunkIndex = VerifyFolderContent.ChunkedContent.ChunkOrders[OrderIndex]; + const IoHash VerifyChunkHash = VerifyFolderContent.ChunkedContent.ChunkHashes[VerifyChunkIndex]; + uint64_t VerifyChunkRawSize = VerifyFolderContent.ChunkedContent.ChunkRawSizes[VerifyChunkIndex]; + + ZEN_ASSERT(LocalChunkHash == VerifyChunkHash); + ZEN_ASSERT(LocalChunkRawSize == VerifyChunkRawSize); + } + } + + Result.PartManifest = PartManifestWriter.Save(); + return Result; +} + +void +BuildsOperationUploadFolder::UploadAttachmentBatch(std::span<IoHash> RawHashes, + std::vector<IoHash>& OutUnknownChunks, + const ChunkedFolderContent& LocalContent, + const ChunkedContentLookup& LocalLookup, + const std::vector<std::vector<uint32_t>>& NewBlockChunks, + GeneratedBlocks& NewBlocks, + std::span<const uint32_t> LooseChunkIndexes, + UploadStatistics& UploadStats, + LooseChunksStatistics& LooseChunksStats) +{ + if (m_AbortFlag) + { + return; + } + + UploadStatistics TempUploadStats; + LooseChunksStatistics TempLooseChunksStats; + + Stopwatch TempUploadTimer; + auto __ = MakeGuard([&]() { + if (!m_Options.IsQuiet) + { + uint64_t TempChunkUploadTimeUs = TempUploadTimer.GetElapsedTimeUs(); + ZEN_INFO( + "Uploaded {} ({}) blocks. " + "Compressed {} ({} {}B/s) and uploaded {} ({}) chunks. " + "Transferred {} ({}bits/s) in {}", + TempUploadStats.BlockCount.load(), + NiceBytes(TempUploadStats.BlocksBytes), + + TempLooseChunksStats.CompressedChunkCount.load(), + NiceBytes(TempLooseChunksStats.CompressedChunkBytes.load()), + NiceNum(GetBytesPerSecond(TempLooseChunksStats.CompressChunksElapsedWallTimeUS, TempLooseChunksStats.ChunkByteCount)), + TempUploadStats.ChunkCount.load(), + NiceBytes(TempUploadStats.ChunksBytes), + + NiceBytes(TempUploadStats.BlocksBytes + TempUploadStats.ChunksBytes), + NiceNum(GetBytesPerSecond(TempUploadStats.ElapsedWallTimeUS, TempUploadStats.ChunksBytes * 8)), + NiceTimeSpanMs(TempChunkUploadTimeUs / 1000)); + } + }); + UploadPartBlobs(LocalContent, + LocalLookup, + RawHashes, + NewBlockChunks, + NewBlocks, + LooseChunkIndexes, + m_LargeAttachmentSize, + TempUploadStats, + TempLooseChunksStats, + OutUnknownChunks); + UploadStats += TempUploadStats; + LooseChunksStats += TempLooseChunksStats; +} + +void +BuildsOperationUploadFolder::FinalizeBuildPartWithRetries(const UploadPart& Part, + const IoHash& PartHash, + std::vector<IoHash>& InOutUnknownChunks, + const ChunkedFolderContent& LocalContent, + const ChunkedContentLookup& LocalLookup, + const std::vector<std::vector<uint32_t>>& NewBlockChunks, + GeneratedBlocks& NewBlocks, + std::span<const uint32_t> LooseChunkIndexes, + UploadStatistics& UploadStats, + LooseChunksStatistics& LooseChunksStats) +{ + auto BuildUnkownChunksResponse = [](const std::vector<IoHash>& UnknownChunks, bool WillRetry) { + return fmt::format( + "The following build blobs was reported as needed for upload but was reported as existing at the start of the " + "operation.{}{}", + WillRetry ? " Treating this as a transient inconsistency issue and will attempt to retry finalization."sv : ""sv, + FormatArray<IoHash>(UnknownChunks, "\n "sv)); + }; + + if (!InOutUnknownChunks.empty()) + { + ZEN_WARN("{}", BuildUnkownChunksResponse(InOutUnknownChunks, /*WillRetry*/ true)); + } + + uint32_t FinalizeBuildPartRetryCount = 5; + while (!m_AbortFlag && (FinalizeBuildPartRetryCount--) > 0) + { + Stopwatch FinalizeBuildPartTimer; + std::vector<IoHash> Needs = m_Storage.BuildStorage->FinalizeBuildPart(m_BuildId, Part.PartId, PartHash); + if (!m_Options.IsQuiet) + { + ZEN_INFO("FinalizeBuildPart took {}. {} attachments are missing.", + NiceTimeSpanMs(FinalizeBuildPartTimer.GetElapsedTimeMs()), + Needs.size()); + } + if (Needs.empty()) + { + break; + } + if (m_Options.IsVerbose) + { + ZEN_INFO("FinalizeBuildPart needs attachments: {}", FormatArray<IoHash>(Needs, "\n "sv)); + } + + std::vector<IoHash> RetryUnknownChunks; + UploadAttachmentBatch(Needs, + RetryUnknownChunks, + LocalContent, + LocalLookup, + NewBlockChunks, + NewBlocks, + LooseChunkIndexes, + UploadStats, + LooseChunksStats); + if (RetryUnknownChunks == InOutUnknownChunks) + { + if (FinalizeBuildPartRetryCount > 0) + { + // Back off a bit + Sleep(1000); + } + } + else + { + InOutUnknownChunks = RetryUnknownChunks; + ZEN_WARN("{}", BuildUnkownChunksResponse(InOutUnknownChunks, /*WillRetry*/ FinalizeBuildPartRetryCount != 0)); + } + } + + if (!InOutUnknownChunks.empty()) + { + throw std::runtime_error(BuildUnkownChunksResponse(InOutUnknownChunks, /*WillRetry*/ false)); + } +} + +void +BuildsOperationUploadFolder::UploadMissingBlockMetadata(GeneratedBlocks& NewBlocks, UploadStatistics& UploadStats) +{ + uint64_t UploadBlockMetadataCount = 0; + Stopwatch UploadBlockMetadataTimer; + + uint32_t FailedMetadataUploadCount = 1; + int32_t MetadataUploadRetryCount = 3; + while ((MetadataUploadRetryCount-- > 0) && (FailedMetadataUploadCount > 0)) + { + FailedMetadataUploadCount = 0; + for (size_t BlockIndex = 0; BlockIndex < NewBlocks.BlockDescriptions.size(); BlockIndex++) + { + if (m_AbortFlag) + { + break; + } + const IoHash& BlockHash = NewBlocks.BlockDescriptions[BlockIndex].BlockHash; + if (!NewBlocks.MetaDataHasBeenUploaded[BlockIndex]) + { + const CbObject BlockMetaData = + BuildChunkBlockDescription(NewBlocks.BlockDescriptions[BlockIndex], NewBlocks.BlockMetaDatas[BlockIndex]); + if (m_Storage.CacheStorage && m_Options.PopulateCache) + { + m_Storage.CacheStorage->PutBlobMetadatas(m_BuildId, + std::vector<IoHash>({BlockHash}), + std::vector<CbObject>({BlockMetaData})); + } + bool MetadataSucceeded = m_Storage.BuildStorage->PutBlockMetadata(m_BuildId, BlockHash, BlockMetaData); + if (MetadataSucceeded) + { + UploadStats.BlocksBytes += BlockMetaData.GetSize(); + NewBlocks.MetaDataHasBeenUploaded[BlockIndex] = true; + UploadBlockMetadataCount++; + } + else + { + FailedMetadataUploadCount++; + } + } + } + } + if (UploadBlockMetadataCount > 0) + { + uint64_t ElapsedUS = UploadBlockMetadataTimer.GetElapsedTimeUs(); + UploadStats.ElapsedWallTimeUS += ElapsedUS; + if (!m_Options.IsQuiet) + { + ZEN_INFO("Uploaded metadata for {} blocks in {}", UploadBlockMetadataCount, NiceTimeSpanMs(ElapsedUS / 1000)); + } + } +} + +void +BuildsOperationUploadFolder::UploadPartBlobs(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + std::span<IoHash> RawHashes, + const std::vector<std::vector<uint32_t>>& NewBlockChunks, + GeneratedBlocks& NewBlocks, + std::span<const uint32_t> LooseChunkIndexes, + const std::uint64_t LargeAttachmentSize, + UploadStatistics& TempUploadStats, + LooseChunksStatistics& TempLooseChunksStats, + std::vector<IoHash>& OutUnknownChunks) +{ + ZEN_TRACE_CPU("UploadPartBlobs"); + + UploadPartClassification Classification = + ClassifyUploadRawHashes(RawHashes, Content, Lookup, NewBlocks, LooseChunkIndexes, OutUnknownChunks); + + if (Classification.BlockIndexes.empty() && Classification.LooseChunkOrderIndexes.empty()) + { + return; + } + + std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Upload Blobs"); + + FilteredRate FilteredGenerateBlockBytesPerSecond; + FilteredRate FilteredCompressedBytesPerSecond; + FilteredRate FilteredUploadedBytesPerSecond; + + ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + + std::atomic<size_t> UploadedBlockSize = 0; + std::atomic<size_t> UploadedBlockCount = 0; + std::atomic<size_t> UploadedRawChunkSize = 0; + std::atomic<size_t> UploadedCompressedChunkSize = 0; + std::atomic<uint32_t> UploadedChunkCount = 0; + std::atomic<uint64_t> GeneratedBlockCount = 0; + std::atomic<uint64_t> GeneratedBlockByteCount = 0; + std::atomic<uint64_t> QueuedPendingInMemoryBlocksForUpload = 0; + + const size_t UploadBlockCount = Classification.BlockIndexes.size(); + const uint32_t UploadChunkCount = gsl::narrow<uint32_t>(Classification.LooseChunkOrderIndexes.size()); + const uint64_t TotalRawSize = Classification.TotalLooseChunksSize + Classification.TotalBlocksSize; + + UploadPartBlobsContext Context{.Work = Work, + .ReadChunkPool = m_IOWorkerPool, + .UploadChunkPool = m_NetworkPool, + .FilteredGenerateBlockBytesPerSecond = FilteredGenerateBlockBytesPerSecond, + .FilteredCompressedBytesPerSecond = FilteredCompressedBytesPerSecond, + .FilteredUploadedBytesPerSecond = FilteredUploadedBytesPerSecond, + .UploadedBlockSize = UploadedBlockSize, + .UploadedBlockCount = UploadedBlockCount, + .UploadedRawChunkSize = UploadedRawChunkSize, + .UploadedCompressedChunkSize = UploadedCompressedChunkSize, + .UploadedChunkCount = UploadedChunkCount, + .GeneratedBlockCount = GeneratedBlockCount, + .GeneratedBlockByteCount = GeneratedBlockByteCount, + .QueuedPendingInMemoryBlocksForUpload = QueuedPendingInMemoryBlocksForUpload, + .UploadBlockCount = UploadBlockCount, + .UploadChunkCount = UploadChunkCount, + .LargeAttachmentSize = LargeAttachmentSize, + .NewBlocks = NewBlocks, + .Content = Content, + .Lookup = Lookup, + .NewBlockChunks = NewBlockChunks, + .LooseChunkIndexes = LooseChunkIndexes, + .TempUploadStats = TempUploadStats, + .TempLooseChunksStats = TempLooseChunksStats}; + + ScheduleBlockGenerationAndUpload(Context, Classification.BlockIndexes); + ScheduleLooseChunkCompressionAndUpload(Context, Classification.LooseChunkOrderIndexes); + + Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(PendingWork); + FilteredCompressedBytesPerSecond.Update(TempLooseChunksStats.CompressedChunkRawBytes.load()); + FilteredGenerateBlockBytesPerSecond.Update(GeneratedBlockByteCount.load()); + FilteredUploadedBytesPerSecond.Update(UploadedCompressedChunkSize.load() + UploadedBlockSize.load()); + uint64_t UploadedRawSize = UploadedRawChunkSize.load() + UploadedBlockSize.load(); + uint64_t UploadedCompressedSize = UploadedCompressedChunkSize.load() + UploadedBlockSize.load(); + + std::string Details = fmt::format( + "Compressed {}/{} ({}/{}{}) chunks. " + "Uploaded {}/{} ({}/{}) blobs " + "({}{})", + TempLooseChunksStats.CompressedChunkCount.load(), + Classification.LooseChunkOrderIndexes.size(), + NiceBytes(TempLooseChunksStats.CompressedChunkRawBytes), + NiceBytes(Classification.TotalLooseChunksSize), + (TempLooseChunksStats.CompressedChunkCount == Classification.LooseChunkOrderIndexes.size()) + ? "" + : fmt::format(" {}B/s", NiceNum(FilteredCompressedBytesPerSecond.GetCurrent())), + + UploadedBlockCount.load() + UploadedChunkCount.load(), + UploadBlockCount + UploadChunkCount, + NiceBytes(UploadedRawSize), + NiceBytes(TotalRawSize), + + NiceBytes(UploadedCompressedSize), + (UploadedBlockCount == UploadBlockCount && UploadedChunkCount == UploadChunkCount) + ? "" + : fmt::format(" {}bits/s", NiceNum(FilteredUploadedBytesPerSecond.GetCurrent()))); + + ProgressBar->UpdateState({.Task = "Uploading blobs ", + .Details = Details, + .TotalCount = gsl::narrow<uint64_t>(TotalRawSize), + .RemainingCount = gsl::narrow<uint64_t>(TotalRawSize - UploadedRawSize), + .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }); + + ZEN_ASSERT(m_AbortFlag || QueuedPendingInMemoryBlocksForUpload.load() == 0); + + ProgressBar->Finish(); + + TempUploadStats.ElapsedWallTimeUS += FilteredUploadedBytesPerSecond.GetElapsedTimeUS(); + TempLooseChunksStats.CompressChunksElapsedWallTimeUS += FilteredCompressedBytesPerSecond.GetElapsedTimeUS(); +} + +BuildsOperationUploadFolder::UploadPartClassification +BuildsOperationUploadFolder::ClassifyUploadRawHashes(std::span<IoHash> RawHashes, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const GeneratedBlocks& NewBlocks, + std::span<const uint32_t> LooseChunkIndexes, + std::vector<IoHash>& OutUnknownChunks) +{ + UploadPartClassification Result; + + tsl::robin_map<uint32_t, uint32_t> ChunkIndexToLooseChunkOrderIndex; + ChunkIndexToLooseChunkOrderIndex.reserve(LooseChunkIndexes.size()); + for (uint32_t OrderIndex = 0; OrderIndex < LooseChunkIndexes.size(); OrderIndex++) + { + ChunkIndexToLooseChunkOrderIndex.insert_or_assign(LooseChunkIndexes[OrderIndex], OrderIndex); + } + + for (const IoHash& RawHash : RawHashes) + { + if (auto It = NewBlocks.BlockHashToBlockIndex.find(RawHash); It != NewBlocks.BlockHashToBlockIndex.end()) + { + Result.BlockIndexes.push_back(It->second); + Result.TotalBlocksSize += NewBlocks.BlockSizes[It->second]; + } + else if (auto ChunkIndexIt = Lookup.ChunkHashToChunkIndex.find(RawHash); ChunkIndexIt != Lookup.ChunkHashToChunkIndex.end()) + { + const uint32_t ChunkIndex = ChunkIndexIt->second; + if (auto LooseOrderIndexIt = ChunkIndexToLooseChunkOrderIndex.find(ChunkIndex); + LooseOrderIndexIt != ChunkIndexToLooseChunkOrderIndex.end()) + { + Result.LooseChunkOrderIndexes.push_back(LooseOrderIndexIt->second); + Result.TotalLooseChunksSize += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + } + else + { + OutUnknownChunks.push_back(RawHash); + } + } + return Result; +} + +void +BuildsOperationUploadFolder::ScheduleBlockGenerationAndUpload(UploadPartBlobsContext& Context, std::span<const size_t> BlockIndexes) +{ + for (const size_t BlockIndex : BlockIndexes) + { + const IoHash& BlockHash = Context.NewBlocks.BlockDescriptions[BlockIndex].BlockHash; + if (m_AbortFlag) + { + break; + } + Context.Work.ScheduleWork( + Context.ReadChunkPool, + [this, &Context, BlockHash = IoHash(BlockHash), BlockIndex, GenerateBlockCount = BlockIndexes.size()](std::atomic<bool>&) { + if (m_AbortFlag) + { + return; + } + ZEN_TRACE_CPU("UploadPartBlobs_GenerateBlock"); + + Context.FilteredGenerateBlockBytesPerSecond.Start(); + + Stopwatch GenerateTimer; + CompositeBuffer Payload; + if (Context.NewBlocks.BlockHeaders[BlockIndex]) + { + Payload = RebuildBlock(Context.Content, + Context.Lookup, + std::move(Context.NewBlocks.BlockHeaders[BlockIndex]), + Context.NewBlockChunks[BlockIndex]) + .GetCompressed(); + } + else + { + ChunkBlockDescription BlockDescription; + CompressedBuffer CompressedBlock = + GenerateBlock(Context.Content, Context.Lookup, Context.NewBlockChunks[BlockIndex], BlockDescription); + if (!CompressedBlock) + { + throw std::runtime_error(fmt::format("Failed generating block {}", BlockHash)); + } + ZEN_ASSERT(BlockDescription.BlockHash == BlockHash); + Payload = std::move(CompressedBlock).GetCompressed(); + } + + Context.GeneratedBlockByteCount += Context.NewBlocks.BlockSizes[BlockIndex]; + if (Context.GeneratedBlockCount.fetch_add(1) + 1 == GenerateBlockCount) + { + Context.FilteredGenerateBlockBytesPerSecond.Stop(); + } + if (m_Options.IsVerbose) + { + ZEN_INFO("{} block {} ({}) containing {} chunks in {}", + Context.NewBlocks.BlockHeaders[BlockIndex] ? "Regenerated" : "Generated", + Context.NewBlocks.BlockDescriptions[BlockIndex].BlockHash, + NiceBytes(Context.NewBlocks.BlockSizes[BlockIndex]), + Context.NewBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size(), + NiceTimeSpanMs(GenerateTimer.GetElapsedTimeMs())); + } + if (!m_AbortFlag) + { + UploadBlockPayload(Context, BlockIndex, BlockHash, std::move(Payload)); + } + }); + } +} + +void +BuildsOperationUploadFolder::UploadBlockPayload(UploadPartBlobsContext& Context, + size_t BlockIndex, + const IoHash& BlockHash, + CompositeBuffer Payload) +{ + bool IsInMemoryBlock = true; + if (Context.QueuedPendingInMemoryBlocksForUpload.load() > 16) + { + ZEN_TRACE_CPU("AsyncUploadBlock_WriteTempBlock"); + std::filesystem::path TempFilePath = m_Options.TempDir / (BlockHash.ToHexString()); + Payload = CompositeBuffer(WriteToTempFile(std::move(Payload), TempFilePath)); + IsInMemoryBlock = false; + } + else + { + Context.QueuedPendingInMemoryBlocksForUpload++; + } + + Context.Work.ScheduleWork( + Context.UploadChunkPool, + [this, &Context, IsInMemoryBlock, BlockIndex, BlockHash = IoHash(BlockHash), Payload = CompositeBuffer(std::move(Payload))]( + std::atomic<bool>&) { + auto _ = MakeGuard([IsInMemoryBlock, &Context] { + if (IsInMemoryBlock) + { + Context.QueuedPendingInMemoryBlocksForUpload--; + } + }); + if (m_AbortFlag) + { + return; + } + ZEN_TRACE_CPU("AsyncUploadBlock"); + + const uint64_t PayloadSize = Payload.GetSize(); + + Context.FilteredUploadedBytesPerSecond.Start(); + const CbObject BlockMetaData = + BuildChunkBlockDescription(Context.NewBlocks.BlockDescriptions[BlockIndex], Context.NewBlocks.BlockMetaDatas[BlockIndex]); + + if (m_Storage.CacheStorage && m_Options.PopulateCache) + { + m_Storage.CacheStorage->PutBuildBlob(m_BuildId, BlockHash, ZenContentType::kCompressedBinary, Payload); + } + + try + { + m_Storage.BuildStorage->PutBuildBlob(m_BuildId, BlockHash, ZenContentType::kCompressedBinary, Payload); + } + catch (const std::exception&) + { + // Silence http errors due to abort + if (!m_AbortFlag) + { + throw; + } + } + + if (m_AbortFlag) + { + return; + } + if (m_Options.IsVerbose) + { + ZEN_INFO("Uploaded block {} ({}) containing {} chunks", + BlockHash, + NiceBytes(PayloadSize), + Context.NewBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); + } + Context.UploadedBlockSize += PayloadSize; + Context.TempUploadStats.BlocksBytes += PayloadSize; + + if (m_Storage.CacheStorage && m_Options.PopulateCache) + { + m_Storage.CacheStorage->PutBlobMetadatas(m_BuildId, + std::vector<IoHash>({BlockHash}), + std::vector<CbObject>({BlockMetaData})); + } + + bool MetadataSucceeded = false; + try + { + MetadataSucceeded = m_Storage.BuildStorage->PutBlockMetadata(m_BuildId, BlockHash, BlockMetaData); + } + catch (const std::exception&) + { + // Silence http errors due to abort + if (!m_AbortFlag) + { + throw; + } + } + if (m_AbortFlag) + { + return; + } + if (MetadataSucceeded) + { + if (m_Options.IsVerbose) + { + ZEN_INFO("Uploaded block {} metadata ({})", BlockHash, NiceBytes(BlockMetaData.GetSize())); + } + Context.NewBlocks.MetaDataHasBeenUploaded[BlockIndex] = true; + Context.TempUploadStats.BlocksBytes += BlockMetaData.GetSize(); + } + + Context.TempUploadStats.BlockCount++; + + if (Context.UploadedBlockCount.fetch_add(1) + 1 == Context.UploadBlockCount && + Context.UploadedChunkCount == Context.UploadChunkCount) + { + Context.FilteredUploadedBytesPerSecond.Stop(); + } + }); +} + +void +BuildsOperationUploadFolder::ScheduleLooseChunkCompressionAndUpload(UploadPartBlobsContext& Context, + std::span<const uint32_t> LooseChunkOrderIndexes) +{ + for (const uint32_t LooseChunkOrderIndex : LooseChunkOrderIndexes) + { + const uint32_t ChunkIndex = Context.LooseChunkIndexes[LooseChunkOrderIndex]; + Context.Work.ScheduleWork(Context.ReadChunkPool, + [this, &Context, LooseChunkOrderCount = LooseChunkOrderIndexes.size(), ChunkIndex](std::atomic<bool>&) { + if (m_AbortFlag) + { + return; + } + ZEN_TRACE_CPU("UploadPartBlobs_CompressChunk"); + + Context.FilteredCompressedBytesPerSecond.Start(); + Stopwatch CompressTimer; + CompositeBuffer Payload = + CompressChunk(Context.Content, Context.Lookup, ChunkIndex, Context.TempLooseChunksStats); + if (m_Options.IsVerbose) + { + ZEN_INFO("Compressed chunk {} ({} -> {}) in {}", + Context.Content.ChunkedContent.ChunkHashes[ChunkIndex], + NiceBytes(Context.Content.ChunkedContent.ChunkRawSizes[ChunkIndex]), + NiceBytes(Payload.GetSize()), + NiceTimeSpanMs(CompressTimer.GetElapsedTimeMs())); + } + const uint64_t ChunkRawSize = Context.Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + Context.TempUploadStats.ReadFromDiskBytes += ChunkRawSize; + if (Context.TempLooseChunksStats.CompressedChunkCount == LooseChunkOrderCount) + { + Context.FilteredCompressedBytesPerSecond.Stop(); + } + if (!m_AbortFlag) + { + UploadLooseChunkPayload(Context, + Context.Content.ChunkedContent.ChunkHashes[ChunkIndex], + ChunkRawSize, + std::move(Payload)); + } + }); + } +} + +void +BuildsOperationUploadFolder::UploadLooseChunkPayload(UploadPartBlobsContext& Context, + const IoHash& RawHash, + uint64_t RawSize, + CompositeBuffer Payload) +{ + Context.Work.ScheduleWork( + Context.UploadChunkPool, + [this, &Context, RawHash = IoHash(RawHash), RawSize, Payload = CompositeBuffer(std::move(Payload))](std::atomic<bool>&) mutable { + if (m_AbortFlag) + { + return; + } + ZEN_TRACE_CPU("AsyncUploadLooseChunk"); + + const uint64_t PayloadSize = Payload.GetSize(); + + if (m_Storage.CacheStorage && m_Options.PopulateCache) + { + m_Storage.CacheStorage->PutBuildBlob(m_BuildId, RawHash, ZenContentType::kCompressedBinary, Payload); + } + + if (PayloadSize >= Context.LargeAttachmentSize) + { + ZEN_TRACE_CPU("AsyncUploadLooseChunk_Multipart"); + Context.TempUploadStats.MultipartAttachmentCount++; + try + { + std::vector<std::function<void()>> MultipartWork = m_Storage.BuildStorage->PutLargeBuildBlob( + m_BuildId, + RawHash, + ZenContentType::kCompressedBinary, + PayloadSize, + [Payload = std::move(Payload), &Context](uint64_t Offset, uint64_t Size) -> IoBuffer { + Context.FilteredUploadedBytesPerSecond.Start(); + + IoBuffer PartPayload = Payload.Mid(Offset, Size).Flatten().AsIoBuffer(); + PartPayload.SetContentType(ZenContentType::kBinary); + return PartPayload; + }, + [&Context, RawSize](uint64_t SentBytes, bool IsComplete) { + Context.TempUploadStats.ChunksBytes += SentBytes; + Context.UploadedCompressedChunkSize += SentBytes; + if (IsComplete) + { + Context.TempUploadStats.ChunkCount++; + if (Context.UploadedChunkCount.fetch_add(1) + 1 == Context.UploadChunkCount && + Context.UploadedBlockCount == Context.UploadBlockCount) + { + Context.FilteredUploadedBytesPerSecond.Stop(); + } + Context.UploadedRawChunkSize += RawSize; + } + }); + for (auto& WorkPart : MultipartWork) + { + Context.Work.ScheduleWork(Context.UploadChunkPool, [Work = std::move(WorkPart)](std::atomic<bool>& AbortFlag) { + ZEN_TRACE_CPU("AsyncUploadLooseChunk_Multipart_Work"); + if (!AbortFlag) + { + Work(); + } + }); + } + if (m_Options.IsVerbose) + { + ZEN_INFO("Uploaded multipart chunk {} ({})", RawHash, NiceBytes(PayloadSize)); + } + } + catch (const std::exception&) + { + // Silence http errors due to abort + if (!m_AbortFlag) + { + throw; + } + } + return; + } + + ZEN_TRACE_CPU("AsyncUploadLooseChunk_Singlepart"); + try + { + m_Storage.BuildStorage->PutBuildBlob(m_BuildId, RawHash, ZenContentType::kCompressedBinary, Payload); + } + catch (const std::exception&) + { + // Silence http errors due to abort + if (!m_AbortFlag) + { + throw; + } + } + if (m_AbortFlag) + { + return; + } + if (m_Options.IsVerbose) + { + ZEN_INFO("Uploaded chunk {} ({})", RawHash, NiceBytes(PayloadSize)); + } + Context.TempUploadStats.ChunksBytes += Payload.GetSize(); + Context.TempUploadStats.ChunkCount++; + Context.UploadedCompressedChunkSize += Payload.GetSize(); + Context.UploadedRawChunkSize += RawSize; + if (Context.UploadedChunkCount.fetch_add(1) + 1 == Context.UploadChunkCount && + Context.UploadedBlockCount == Context.UploadBlockCount) + { + Context.FilteredUploadedBytesPerSecond.Stop(); + } + }); +} + +CompositeBuffer +BuildsOperationUploadFolder::CompressChunk(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + uint32_t ChunkIndex, + LooseChunksStatistics& TempLooseChunksStats) +{ + ZEN_TRACE_CPU("CompressChunk"); + ZEN_ASSERT(!m_Options.TempDir.empty()); + const IoHash& ChunkHash = Content.ChunkedContent.ChunkHashes[ChunkIndex]; + const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + + const ChunkedContentLookup::ChunkSequenceLocation& Source = GetChunkSequenceLocations(Lookup, ChunkIndex)[0]; + const std::uint32_t PathIndex = Lookup.SequenceIndexFirstPathIndex[Source.SequenceIndex]; + IoBuffer RawSource = IoBufferBuilder::MakeFromFile((m_Path / Content.Paths[PathIndex]).make_preferred(), Source.Offset, ChunkSize); + if (!RawSource) + { + throw std::runtime_error(fmt::format("Failed fetching chunk {}", ChunkHash)); + } + if (RawSource.GetSize() != ChunkSize) + { + throw std::runtime_error(fmt::format("Fetched chunk {} has invalid size", ChunkHash)); + } + + const bool ShouldCompressChunk = IsChunkCompressable(m_NonCompressableExtensionHashes, Lookup, ChunkIndex); + const OodleCompressionLevel CompressionLevel = ShouldCompressChunk ? OodleCompressionLevel::VeryFast : OodleCompressionLevel::None; + + if (ShouldCompressChunk) + { + std::filesystem::path TempFilePath = m_Options.TempDir / ChunkHash.ToHexString(); + + BasicFile CompressedFile; + std::error_code Ec; + CompressedFile.Open(TempFilePath, BasicFile::Mode::kTruncateDelete, Ec); + if (Ec) + { + throw std::runtime_error(fmt::format("Failed creating temporary file for compressing blob {}, reason: ({}) {}", + ChunkHash, + Ec.value(), + Ec.message())); + } + + uint64_t StreamRawBytes = 0; + uint64_t StreamCompressedBytes = 0; + + bool CouldCompress = CompressedBuffer::CompressToStream( + CompositeBuffer(SharedBuffer(RawSource)), + [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) { + ZEN_UNUSED(SourceOffset); + TempLooseChunksStats.CompressedChunkRawBytes += SourceSize; + CompressedFile.Write(RangeBuffer, Offset); + TempLooseChunksStats.CompressedChunkBytes += RangeBuffer.GetSize(); + StreamRawBytes += SourceSize; + StreamCompressedBytes += RangeBuffer.GetSize(); + }, + OodleCompressor::Mermaid, + CompressionLevel); + if (CouldCompress) + { + uint64_t CompressedSize = CompressedFile.FileSize(); + void* FileHandle = CompressedFile.Detach(); + IoBuffer TempPayload = IoBuffer(IoBuffer::File, + FileHandle, + 0, + CompressedSize, + /*IsWholeFile*/ true); + ZEN_ASSERT(TempPayload); + TempPayload.SetDeleteOnClose(true); + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(TempPayload), RawHash, RawSize); + ZEN_ASSERT(Compressed); + ZEN_ASSERT(RawHash == ChunkHash); + ZEN_ASSERT(RawSize == ChunkSize); + + TempLooseChunksStats.CompressedChunkCount++; + + return Compressed.GetCompressed(); + } + else + { + TempLooseChunksStats.CompressedChunkRawBytes -= StreamRawBytes; + TempLooseChunksStats.CompressedChunkBytes -= StreamCompressedBytes; + } + CompressedFile.Close(); + RemoveFile(TempFilePath, Ec); + ZEN_UNUSED(Ec); + } + + CompressedBuffer CompressedBlob = + CompressedBuffer::Compress(SharedBuffer(std::move(RawSource)), OodleCompressor::Mermaid, CompressionLevel); + if (!CompressedBlob) + { + throw std::runtime_error(fmt::format("Failed to compress large blob {}", ChunkHash)); + } + ZEN_ASSERT_SLOW(CompressedBlob.DecodeRawHash() == ChunkHash); + ZEN_ASSERT_SLOW(CompressedBlob.DecodeRawSize() == ChunkSize); + + TempLooseChunksStats.CompressedChunkRawBytes += ChunkSize; + TempLooseChunksStats.CompressedChunkBytes += CompressedBlob.GetCompressedSize(); + + // If we use none-compression, the compressed blob references the data and has 64 kb in memory so we don't need to write it to disk + if (ShouldCompressChunk) + { + std::filesystem::path TempFilePath = m_Options.TempDir / (ChunkHash.ToHexString()); + IoBuffer TempPayload = WriteToTempFile(std::move(CompressedBlob).GetCompressed(), TempFilePath); + CompressedBlob = CompressedBuffer::FromCompressedNoValidate(std::move(TempPayload)); + } + + TempLooseChunksStats.CompressedChunkCount++; + return std::move(CompressedBlob).GetCompressed(); +} + +std::vector<std::pair<Oid, std::string>> +UploadFolder(LoggerRef Log, + ProgressBase& Progress, + TransferThreadWorkers& Workers, + StorageInstance& Storage, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + const Oid& BuildId, + const Oid& BuildPartId, + std::string_view BuildPartName, + const std::filesystem::path& Path, + const std::filesystem::path& ManifestPath, + const CbObject& MetaData, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache, + const UploadFolderOptions& Options) +{ + Progress.SetLogOperationName("Upload Folder"); + + Stopwatch UploadTimer; + + BuildsOperationUploadFolder UploadOp( + Log, + Progress, + Storage, + AbortFlag, + PauseFlag, + Workers.GetIOWorkerPool(), + Workers.GetNetworkPool(), + BuildId, + Path, + Options.CreateBuild, + std::move(MetaData), + BuildsOperationUploadFolder::Options{.IsQuiet = Options.IsQuiet, + .IsVerbose = Options.IsVerbose, + .DoExtraContentValidation = Options.DoExtraContentVerify, + .FindBlockMaxCount = Options.FindBlockMaxCount, + .BlockReuseMinPercentLimit = Options.BlockReuseMinPercentLimit, + .AllowMultiparts = Options.AllowMultiparts, + .IgnoreExistingBlocks = Options.IgnoreExistingBlocks, + .TempDir = Options.TempDir, + .ExcludeFolders = Options.ExcludeFolders, + .ExcludeExtensions = Options.ExcludeExtensions, + .NonCompressableExtensions = DefaultSplitOnlyExtensions, + .PopulateCache = Options.UploadToZenCache}); + + std::vector<std::pair<Oid, std::string>> UploadedParts = + UploadOp.Execute(BuildPartId, BuildPartName, ManifestPath, ChunkController, ChunkCache); + if (AbortFlag) + { + return {}; + } + + if (Options.IsVerbose) + { + ZEN_CONSOLE( + "Folder scanning stats:" + "\n FoundFileCount: {}" + "\n FoundFileByteCount: {}" + "\n AcceptedFileCount: {}" + "\n AcceptedFileByteCount: {}" + "\n ElapsedWallTimeUS: {}", + UploadOp.m_LocalFolderScanStats.FoundFileCount.load(), + NiceBytes(UploadOp.m_LocalFolderScanStats.FoundFileByteCount.load()), + UploadOp.m_LocalFolderScanStats.AcceptedFileCount.load(), + NiceBytes(UploadOp.m_LocalFolderScanStats.AcceptedFileByteCount.load()), + NiceLatencyNs(UploadOp.m_LocalFolderScanStats.ElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE( + "Chunking stats:" + "\n FilesProcessed: {}" + "\n FilesChunked: {}" + "\n BytesHashed: {}" + "\n UniqueChunksFound: {}" + "\n UniqueSequencesFound: {}" + "\n UniqueBytesFound: {}" + "\n FilesFoundInCache: {}" + "\n ChunksFoundInCache: {}" + "\n FilesStoredInCache: {}" + "\n ChunksStoredInCache: {}" + "\n ElapsedWallTimeUS: {}", + UploadOp.m_ChunkingStats.FilesProcessed.load(), + UploadOp.m_ChunkingStats.FilesChunked.load(), + NiceBytes(UploadOp.m_ChunkingStats.BytesHashed.load()), + UploadOp.m_ChunkingStats.UniqueChunksFound.load(), + UploadOp.m_ChunkingStats.UniqueSequencesFound.load(), + NiceBytes(UploadOp.m_ChunkingStats.UniqueBytesFound.load()), + UploadOp.m_ChunkingStats.FilesFoundInCache.load(), + UploadOp.m_ChunkingStats.ChunksFoundInCache.load(), + NiceBytes(UploadOp.m_ChunkingStats.BytesFoundInCache.load()), + UploadOp.m_ChunkingStats.FilesStoredInCache.load(), + UploadOp.m_ChunkingStats.ChunksStoredInCache.load(), + NiceBytes(UploadOp.m_ChunkingStats.BytesStoredInCache.load()), + NiceLatencyNs(UploadOp.m_ChunkingStats.ElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE( + "Find block stats:" + "\n FindBlockTimeMS: {}" + "\n PotentialChunkCount: {}" + "\n PotentialChunkByteCount: {}" + "\n FoundBlockCount: {}" + "\n FoundBlockChunkCount: {}" + "\n FoundBlockByteCount: {}" + "\n AcceptedBlockCount: {}" + "\n NewBlocksCount: {}" + "\n NewBlocksChunkCount: {}" + "\n NewBlocksChunkByteCount: {}", + NiceTimeSpanMs(UploadOp.m_FindBlocksStats.FindBlockTimeMS), + UploadOp.m_FindBlocksStats.PotentialChunkCount, + NiceBytes(UploadOp.m_FindBlocksStats.PotentialChunkByteCount), + UploadOp.m_FindBlocksStats.FoundBlockCount, + UploadOp.m_FindBlocksStats.FoundBlockChunkCount, + NiceBytes(UploadOp.m_FindBlocksStats.FoundBlockByteCount), + UploadOp.m_FindBlocksStats.AcceptedBlockCount, + UploadOp.m_FindBlocksStats.NewBlocksCount, + UploadOp.m_FindBlocksStats.NewBlocksChunkCount, + NiceBytes(UploadOp.m_FindBlocksStats.NewBlocksChunkByteCount)); + + ZEN_CONSOLE( + "Reuse block stats:" + "\n AcceptedChunkCount: {}" + "\n AcceptedByteCount: {}" + "\n AcceptedRawByteCount: {}" + "\n RejectedBlockCount: {}" + "\n RejectedChunkCount: {}" + "\n RejectedByteCount: {}" + "\n AcceptedReduntantChunkCount: {}" + "\n AcceptedReduntantByteCount: {}", + UploadOp.m_ReuseBlocksStats.AcceptedChunkCount, + NiceBytes(UploadOp.m_ReuseBlocksStats.AcceptedByteCount), + NiceBytes(UploadOp.m_ReuseBlocksStats.AcceptedRawByteCount), + UploadOp.m_ReuseBlocksStats.RejectedBlockCount, + UploadOp.m_ReuseBlocksStats.RejectedChunkCount, + NiceBytes(UploadOp.m_ReuseBlocksStats.RejectedByteCount), + UploadOp.m_ReuseBlocksStats.AcceptedReduntantChunkCount, + NiceBytes(UploadOp.m_ReuseBlocksStats.AcceptedReduntantByteCount)); + + ZEN_CONSOLE( + "Generate blocks stats:" + "\n GeneratedBlockByteCount: {}" + "\n GeneratedBlockCount: {}" + "\n GenerateBlocksElapsedWallTimeUS: {}", + NiceBytes(UploadOp.m_GenerateBlocksStats.GeneratedBlockByteCount.load()), + UploadOp.m_GenerateBlocksStats.GeneratedBlockCount.load(), + NiceLatencyNs(UploadOp.m_GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE( + "Loose chunks stats:" + "\n ChunkCount: {}" + "\n ChunkByteCount: {}" + "\n CompressedChunkCount: {}" + "\n CompressChunksElapsedWallTimeUS: {}", + UploadOp.m_LooseChunksStats.ChunkCount, + NiceBytes(UploadOp.m_LooseChunksStats.ChunkByteCount), + UploadOp.m_LooseChunksStats.CompressedChunkCount.load(), + NiceBytes(UploadOp.m_LooseChunksStats.CompressedChunkBytes.load()), + NiceLatencyNs(UploadOp.m_LooseChunksStats.CompressChunksElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE( + "Disk stats:" + "\n OpenReadCount: {}" + "\n OpenWriteCount: {}" + "\n ReadCount: {}" + "\n ReadByteCount: {}" + "\n WriteCount: {} ({} cloned)" + "\n WriteByteCount: {} ({} cloned)" + "\n CurrentOpenFileCount: {}", + UploadOp.m_DiskStats.OpenReadCount.load(), + UploadOp.m_DiskStats.OpenWriteCount.load(), + UploadOp.m_DiskStats.ReadCount.load(), + NiceBytes(UploadOp.m_DiskStats.ReadByteCount.load()), + UploadOp.m_DiskStats.WriteCount.load(), + UploadOp.m_DiskStats.CloneCount.load(), + NiceBytes(UploadOp.m_DiskStats.WriteByteCount.load()), + NiceBytes(UploadOp.m_DiskStats.CloneByteCount.load()), + UploadOp.m_DiskStats.CurrentOpenFileCount.load()); + + ZEN_CONSOLE( + "Upload stats:" + "\n BlockCount: {}" + "\n BlocksBytes: {}" + "\n ChunkCount: {}" + "\n ChunksBytes: {}" + "\n ReadFromDiskBytes: {}" + "\n MultipartAttachmentCount: {}" + "\n ElapsedWallTimeUS: {}", + UploadOp.m_UploadStats.BlockCount.load(), + NiceBytes(UploadOp.m_UploadStats.BlocksBytes.load()), + UploadOp.m_UploadStats.ChunkCount.load(), + NiceBytes(UploadOp.m_UploadStats.ChunksBytes.load()), + NiceBytes(UploadOp.m_UploadStats.ReadFromDiskBytes.load()), + UploadOp.m_UploadStats.MultipartAttachmentCount.load(), + NiceLatencyNs(UploadOp.m_UploadStats.ElapsedWallTimeUS * 1000)); + } + + const double DeltaByteCountPercent = + UploadOp.m_ChunkingStats.BytesHashed > 0 + ? (100.0 * (UploadOp.m_FindBlocksStats.NewBlocksChunkByteCount + UploadOp.m_LooseChunksStats.CompressedChunkBytes)) / + (UploadOp.m_ChunkingStats.BytesHashed) + : 0.0; + + const std::string MultipartAttachmentStats = + Options.AllowMultiparts ? fmt::format(" ({} as multipart)", UploadOp.m_UploadStats.MultipartAttachmentCount.load()) : ""; + + if (!Options.IsQuiet) + { + ZEN_CONSOLE( + "Uploaded part {} ('{}') to build {}, {}\n" + " Scanned files: {:>8} ({}), {}B/sec, {}\n" + " New data: {:>8} ({}) {:.1f}%\n" + " New blocks: {:>8} ({} -> {}), {}B/sec, {}\n" + " New chunks: {:>8} ({} -> {}), {}B/sec, {}\n" + " Uploaded: {:>8} ({}), {}bits/sec, {}\n" + " Blocks: {:>8} ({})\n" + " Chunks: {:>8} ({}){}", + BuildPartId, + BuildPartName, + BuildId, + NiceTimeSpanMs(UploadTimer.GetElapsedTimeMs()), + + UploadOp.m_LocalFolderScanStats.FoundFileCount.load(), + NiceBytes(UploadOp.m_LocalFolderScanStats.FoundFileByteCount.load()), + NiceNum(GetBytesPerSecond(UploadOp.m_ChunkingStats.ElapsedWallTimeUS, UploadOp.m_ChunkingStats.BytesHashed)), + NiceTimeSpanMs(UploadOp.m_ChunkingStats.ElapsedWallTimeUS / 1000), + + UploadOp.m_FindBlocksStats.NewBlocksChunkCount + UploadOp.m_LooseChunksStats.CompressedChunkCount, + NiceBytes(UploadOp.m_FindBlocksStats.NewBlocksChunkByteCount + UploadOp.m_LooseChunksStats.CompressedChunkBytes), + DeltaByteCountPercent, + + UploadOp.m_GenerateBlocksStats.GeneratedBlockCount.load(), + NiceBytes(UploadOp.m_FindBlocksStats.NewBlocksChunkByteCount), + NiceBytes(UploadOp.m_GenerateBlocksStats.GeneratedBlockByteCount.load()), + NiceNum(GetBytesPerSecond(UploadOp.m_GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS, + UploadOp.m_GenerateBlocksStats.GeneratedBlockByteCount)), + NiceTimeSpanMs(UploadOp.m_GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS / 1000), + + UploadOp.m_LooseChunksStats.CompressedChunkCount.load(), + NiceBytes(UploadOp.m_LooseChunksStats.CompressedChunkRawBytes), + NiceBytes(UploadOp.m_LooseChunksStats.CompressedChunkBytes.load()), + NiceNum(GetBytesPerSecond(UploadOp.m_LooseChunksStats.CompressChunksElapsedWallTimeUS, + UploadOp.m_LooseChunksStats.CompressedChunkRawBytes)), + NiceTimeSpanMs(UploadOp.m_LooseChunksStats.CompressChunksElapsedWallTimeUS / 1000), + + UploadOp.m_UploadStats.BlockCount.load() + UploadOp.m_UploadStats.ChunkCount.load(), + NiceBytes(UploadOp.m_UploadStats.BlocksBytes + UploadOp.m_UploadStats.ChunksBytes), + NiceNum(GetBytesPerSecond(UploadOp.m_UploadStats.ElapsedWallTimeUS, + (UploadOp.m_UploadStats.ChunksBytes + UploadOp.m_UploadStats.BlocksBytes) * 8)), + NiceTimeSpanMs(UploadOp.m_UploadStats.ElapsedWallTimeUS / 1000), + + UploadOp.m_UploadStats.BlockCount.load(), + NiceBytes(UploadOp.m_UploadStats.BlocksBytes.load()), + + UploadOp.m_UploadStats.ChunkCount.load(), + NiceBytes(UploadOp.m_UploadStats.ChunksBytes.load()), + MultipartAttachmentStats); + } + return UploadedParts; +} + +} // namespace zen diff --git a/src/zenremotestore/builds/buildvalidatebuildpart.cpp b/src/zenremotestore/builds/buildvalidatebuildpart.cpp new file mode 100644 index 000000000..d06502683 --- /dev/null +++ b/src/zenremotestore/builds/buildvalidatebuildpart.cpp @@ -0,0 +1,374 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenremotestore/builds/buildvalidatebuildpart.h> + +#include <zencore/fmtutils.h> +#include <zencore/parallelwork.h> +#include <zencore/scopeguard.h> +#include <zencore/trace.h> +#include <zenremotestore/builds/builduploadfolder.h> +#include <zenremotestore/transferthreadworkers.h> +#include <zenutil/filesystemutils.h> +#include <zenutil/filteredrate.h> +#include <zenutil/progress.h> + +namespace zen { + +using namespace std::literals; + +BuildsOperationValidateBuildPart::BuildsOperationValidateBuildPart(LoggerRef Log, + ProgressBase& Progress, + BuildStorageBase& Storage, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + WorkerThreadPool& IOWorkerPool, + WorkerThreadPool& NetworkPool, + const Oid& BuildId, + const Oid& BuildPartId, + const std::string_view BuildPartName, + const Options& Options) + +: m_Log(Log) +, m_Progress(Progress) +, m_Storage(Storage) +, m_AbortFlag(AbortFlag) +, m_PauseFlag(PauseFlag) +, m_IOWorkerPool(IOWorkerPool) +, m_NetworkPool(NetworkPool) +, m_BuildId(BuildId) +, m_BuildPartId(BuildPartId) +, m_BuildPartName(BuildPartName) +, m_Options(Options) +{ +} + +void +BuildsOperationValidateBuildPart::Execute() +{ + ZEN_TRACE_CPU("ValidateBuildPart"); + try + { + auto EndProgress = + MakeGuard([&]() { m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::StepCount, (uint32_t)TaskSteps::StepCount); }); + + Stopwatch Timer; + auto _ = MakeGuard([&]() { + if (!m_Options.IsQuiet) + { + ZEN_INFO("Validated build part {}/{} ('{}') in {}", + m_BuildId, + m_BuildPartId, + m_BuildPartName, + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + } + }); + + m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::FetchBuild, (uint32_t)TaskSteps::StepCount); + + ResolvedBuildPart Resolved = ResolveBuildPart(); + + ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + + const std::filesystem::path& TempFolder = m_Options.TempFolder; + ZEN_ASSERT(!TempFolder.empty()); + + CleanAndRemoveDirectory(m_IOWorkerPool, m_AbortFlag, m_PauseFlag, TempFolder); + CreateDirectories(TempFolder); + auto __ = MakeGuard([this, TempFolder]() { CleanAndRemoveDirectory(m_IOWorkerPool, m_AbortFlag, m_PauseFlag, TempFolder); }); + + m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::ValidateBlobs, (uint32_t)TaskSteps::StepCount); + + std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Validate Blobs"); + + const uint64_t AttachmentsToVerifyCount = Resolved.ChunkAttachments.size() + Resolved.BlockAttachments.size(); + FilteredRate FilteredDownloadedBytesPerSecond; + FilteredRate FilteredVerifiedBytesPerSecond; + + ValidateBlobsContext Context{.Work = Work, + .AttachmentsToVerifyCount = AttachmentsToVerifyCount, + .FilteredDownloadedBytesPerSecond = FilteredDownloadedBytesPerSecond, + .FilteredVerifiedBytesPerSecond = FilteredVerifiedBytesPerSecond}; + + ScheduleChunkAttachmentValidation(Context, Resolved.ChunkAttachments, TempFolder, Resolved.PreferredMultipartChunkSize); + ScheduleBlockAttachmentValidation(Context, Resolved.BlockAttachments); + + Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(PendingWork); + + const uint64_t DownloadedAttachmentCount = m_DownloadStats.DownloadedChunkCount + m_DownloadStats.DownloadedBlockCount; + const uint64_t DownloadedByteCount = m_DownloadStats.DownloadedChunkByteCount + m_DownloadStats.DownloadedBlockByteCount; + + FilteredDownloadedBytesPerSecond.Update(DownloadedByteCount); + FilteredVerifiedBytesPerSecond.Update(m_ValidateStats.VerifiedByteCount); + + std::string Details = fmt::format("Downloaded {}/{} ({}, {}bits/s). Verified {}/{} ({}, {}B/s)", + DownloadedAttachmentCount, + AttachmentsToVerifyCount, + NiceBytes(DownloadedByteCount), + NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8), + m_ValidateStats.VerifiedAttachmentCount.load(), + AttachmentsToVerifyCount, + NiceBytes(m_ValidateStats.VerifiedByteCount.load()), + NiceNum(FilteredVerifiedBytesPerSecond.GetCurrent())); + + ProgressBar->UpdateState( + {.Task = "Validating blobs ", + .Details = Details, + .TotalCount = gsl::narrow<uint64_t>(AttachmentsToVerifyCount * 2), + .RemainingCount = gsl::narrow<uint64_t>(AttachmentsToVerifyCount * 2 - + (DownloadedAttachmentCount + m_ValidateStats.VerifiedAttachmentCount.load())), + .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }); + + ProgressBar->Finish(); + m_ValidateStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); + + m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::Cleanup, (uint32_t)TaskSteps::StepCount); + } + catch (const std::exception&) + { + m_AbortFlag = true; + throw; + } +} + +BuildsOperationValidateBuildPart::ResolvedBuildPart +BuildsOperationValidateBuildPart::ResolveBuildPart() +{ + ResolvedBuildPart Result; + Result.PreferredMultipartChunkSize = 32u * 1024u * 1024u; + + CbObject Build = m_Storage.GetBuild(m_BuildId); + if (!m_BuildPartName.empty()) + { + m_BuildPartId = Build["parts"sv].AsObjectView()[m_BuildPartName].AsObjectId(); + if (m_BuildPartId == Oid::Zero) + { + throw std::runtime_error(fmt::format("Build {} does not have a part named '{}'", m_BuildId, m_BuildPartName)); + } + } + m_ValidateStats.BuildBlobSize = Build.GetSize(); + if (auto ChunkSize = Build["chunkSize"sv].AsUInt64(); ChunkSize != 0) + { + Result.PreferredMultipartChunkSize = ChunkSize; + } + + m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::FetchBuildPart, (uint32_t)TaskSteps::StepCount); + + CbObject BuildPart = m_Storage.GetBuildPart(m_BuildId, m_BuildPartId); + m_ValidateStats.BuildPartSize = BuildPart.GetSize(); + if (!m_Options.IsQuiet) + { + ZEN_INFO("Validating build part {}/{} ({})", m_BuildId, m_BuildPartId, NiceBytes(BuildPart.GetSize())); + } + if (const CbObjectView ChunkAttachmentsView = BuildPart["chunkAttachments"sv].AsObjectView()) + { + for (CbFieldView LooseFileView : ChunkAttachmentsView["rawHashes"sv]) + { + Result.ChunkAttachments.push_back(LooseFileView.AsBinaryAttachment()); + } + } + m_ValidateStats.ChunkAttachmentCount = Result.ChunkAttachments.size(); + if (const CbObjectView BlockAttachmentsView = BuildPart["blockAttachments"sv].AsObjectView()) + { + for (CbFieldView BlocksView : BlockAttachmentsView["rawHashes"sv]) + { + Result.BlockAttachments.push_back(BlocksView.AsBinaryAttachment()); + } + } + m_ValidateStats.BlockAttachmentCount = Result.BlockAttachments.size(); + + std::vector<ChunkBlockDescription> VerifyBlockDescriptions = + ParseChunkBlockDescriptionList(m_Storage.GetBlockMetadatas(m_BuildId, Result.BlockAttachments)); + if (VerifyBlockDescriptions.size() != Result.BlockAttachments.size()) + { + throw std::runtime_error(fmt::format("Uploaded blocks metadata could not all be found, {} blocks metadata is missing", + Result.BlockAttachments.size() - VerifyBlockDescriptions.size())); + } + + return Result; +} + +void +BuildsOperationValidateBuildPart::ScheduleChunkAttachmentValidation(ValidateBlobsContext& Context, + std::span<const IoHash> ChunkAttachments, + const std::filesystem::path& TempFolder, + uint64_t PreferredMultipartChunkSize) +{ + for (const IoHash& ChunkAttachment : ChunkAttachments) + { + Context.Work.ScheduleWork( + m_NetworkPool, + [this, &Context, &TempFolder, PreferredMultipartChunkSize, ChunkAttachment = IoHash(ChunkAttachment)](std::atomic<bool>&) { + if (!m_AbortFlag) + { + ZEN_TRACE_CPU("ValidateBuildPart_GetChunk"); + + Context.FilteredDownloadedBytesPerSecond.Start(); + DownloadLargeBlob( + m_Storage, + TempFolder, + m_BuildId, + ChunkAttachment, + PreferredMultipartChunkSize, + Context.Work, + m_NetworkPool, + m_DownloadStats.DownloadedChunkByteCount, + m_DownloadStats.MultipartAttachmentCount, + [this, &Context, ChunkHash = IoHash(ChunkAttachment)](IoBuffer&& Payload) { + m_DownloadStats.DownloadedChunkCount++; + Payload.SetContentType(ZenContentType::kCompressedBinary); + if (!m_AbortFlag) + { + Context.Work.ScheduleWork( + m_IOWorkerPool, + [this, &Context, Payload = IoBuffer(std::move(Payload)), ChunkHash](std::atomic<bool>&) mutable { + if (!m_AbortFlag) + { + ValidateDownloadedChunk(Context, ChunkHash, std::move(Payload)); + } + }); + } + }); + } + }); + } +} + +void +BuildsOperationValidateBuildPart::ScheduleBlockAttachmentValidation(ValidateBlobsContext& Context, std::span<const IoHash> BlockAttachments) +{ + for (const IoHash& BlockAttachment : BlockAttachments) + { + Context.Work.ScheduleWork(m_NetworkPool, [this, &Context, BlockAttachment = IoHash(BlockAttachment)](std::atomic<bool>&) { + if (!m_AbortFlag) + { + ZEN_TRACE_CPU("ValidateBuildPart_GetBlock"); + + Context.FilteredDownloadedBytesPerSecond.Start(); + IoBuffer Payload = m_Storage.GetBuildBlob(m_BuildId, BlockAttachment); + m_DownloadStats.DownloadedBlockCount++; + m_DownloadStats.DownloadedBlockByteCount += Payload.GetSize(); + if (m_DownloadStats.DownloadedChunkCount + m_DownloadStats.DownloadedBlockCount == Context.AttachmentsToVerifyCount) + { + Context.FilteredDownloadedBytesPerSecond.Stop(); + } + if (!Payload) + { + throw std::runtime_error(fmt::format("Block attachment {} could not be found", BlockAttachment)); + } + if (!m_AbortFlag) + { + Context.Work.ScheduleWork(m_IOWorkerPool, + [this, &Context, Payload = std::move(Payload), BlockAttachment](std::atomic<bool>&) mutable { + if (!m_AbortFlag) + { + ValidateDownloadedBlock(Context, BlockAttachment, std::move(Payload)); + } + }); + } + } + }); + } +} + +void +BuildsOperationValidateBuildPart::ValidateDownloadedChunk(ValidateBlobsContext& Context, const IoHash& ChunkHash, IoBuffer Payload) +{ + ZEN_TRACE_CPU("ValidateBuildPart_Validate"); + + if (m_DownloadStats.DownloadedChunkCount + m_DownloadStats.DownloadedBlockCount == Context.AttachmentsToVerifyCount) + { + Context.FilteredDownloadedBytesPerSecond.Stop(); + } + + Context.FilteredVerifiedBytesPerSecond.Start(); + + uint64_t CompressedSize; + uint64_t DecompressedSize; + ValidateBlob(m_AbortFlag, std::move(Payload), ChunkHash, CompressedSize, DecompressedSize); + m_ValidateStats.VerifiedAttachmentCount++; + m_ValidateStats.VerifiedByteCount += DecompressedSize; + if (m_ValidateStats.VerifiedAttachmentCount.load() == Context.AttachmentsToVerifyCount) + { + Context.FilteredVerifiedBytesPerSecond.Stop(); + } +} + +void +BuildsOperationValidateBuildPart::ValidateDownloadedBlock(ValidateBlobsContext& Context, const IoHash& BlockAttachment, IoBuffer Payload) +{ + ZEN_TRACE_CPU("ValidateBuildPart_ValidateBlock"); + + Context.FilteredVerifiedBytesPerSecond.Start(); + + uint64_t CompressedSize; + uint64_t DecompressedSize; + ValidateChunkBlock(std::move(Payload), BlockAttachment, CompressedSize, DecompressedSize); + m_ValidateStats.VerifiedAttachmentCount++; + m_ValidateStats.VerifiedByteCount += DecompressedSize; + if (m_ValidateStats.VerifiedAttachmentCount.load() == Context.AttachmentsToVerifyCount) + { + Context.FilteredVerifiedBytesPerSecond.Stop(); + } +} + +ChunkBlockDescription +BuildsOperationValidateBuildPart::ValidateChunkBlock(IoBuffer&& Payload, + const IoHash& BlobHash, + uint64_t& OutCompressedSize, + uint64_t& OutDecompressedSize) +{ + CompositeBuffer BlockBuffer = ValidateBlob(m_AbortFlag, std::move(Payload), BlobHash, OutCompressedSize, OutDecompressedSize); + if (!BlockBuffer) + { + throw std::runtime_error(fmt::format("Chunk block blob {} is not compressed using 'None' compression level", BlobHash)); + } + return GetChunkBlockDescription(BlockBuffer.Flatten(), BlobHash); +} + +void +ValidateBuildPart(LoggerRef Log, + ProgressBase& Progress, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + bool IsQuiet, + bool IsVerbose, + TransferThreadWorkers& Workers, + BuildStorageBase& Storage, + const std::filesystem::path& TempFolder, + const Oid& BuildId, + const Oid& BuildPartId, + std::string_view BuildPartName) +{ + ZEN_TRACE_CPU("ValidateBuildPart"); + + Progress.SetLogOperationName("Validate Part"); + + BuildsOperationValidateBuildPart ValidateOp( + Log, + Progress, + Storage, + AbortFlag, + PauseFlag, + Workers.GetIOWorkerPool(), + Workers.GetNetworkPool(), + BuildId, + BuildPartId, + BuildPartName, + BuildsOperationValidateBuildPart::Options{.TempFolder = TempFolder, .IsQuiet = IsQuiet, .IsVerbose = IsVerbose}); + + ValidateOp.Execute(); + + const uint64_t DownloadedCount = ValidateOp.m_DownloadStats.DownloadedChunkCount + ValidateOp.m_DownloadStats.DownloadedBlockCount; + const uint64_t DownloadedByteCount = + ValidateOp.m_DownloadStats.DownloadedChunkByteCount + ValidateOp.m_DownloadStats.DownloadedBlockByteCount; + ZEN_CONSOLE("Verified: {:>8} ({}), {}B/sec, {}", + DownloadedCount, + NiceBytes(DownloadedByteCount), + NiceNum(GetBytesPerSecond(ValidateOp.m_ValidateStats.ElapsedWallTimeUS, DownloadedByteCount)), + NiceTimeSpanMs(ValidateOp.m_ValidateStats.ElapsedWallTimeUS / 1000)); +} + +} // namespace zen diff --git a/src/zenremotestore/builds/filebuildstorage.cpp b/src/zenremotestore/builds/filebuildstorage.cpp index 55e69de61..2f4904449 100644 --- a/src/zenremotestore/builds/filebuildstorage.cpp +++ b/src/zenremotestore/builds/filebuildstorage.cpp @@ -432,6 +432,45 @@ public: return IoBuffer{}; } + virtual BuildBlobRanges GetBuildBlobRanges(const Oid& BuildId, + const IoHash& RawHash, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) override + { + ZEN_TRACE_CPU("FileBuildStorage::GetBuildBlobRanges"); + ZEN_UNUSED(BuildId); + ZEN_ASSERT(!Ranges.empty()); + + uint64_t ReceivedBytes = 0; + uint64_t SentBytes = Ranges.size() * 2 * 8; + + SimulateLatency(SentBytes, 0); + auto _ = MakeGuard([&]() { SimulateLatency(0, ReceivedBytes); }); + + Stopwatch ExecutionTimer; + auto __ = MakeGuard([&]() { AddStatistic(ExecutionTimer, SentBytes, ReceivedBytes); }); + + BuildBlobRanges Result; + + const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash); + if (IsFile(BlockPath)) + { + BasicFile File(BlockPath, BasicFile::Mode::kRead); + + uint64_t RangeOffset = Ranges.front().first; + uint64_t RangeBytes = Ranges.back().first + Ranges.back().second - RangeOffset; + Result.PayloadBuffer = IoBufferBuilder::MakeFromFileHandle(File.Detach(), RangeOffset, RangeBytes); + + Result.Ranges.reserve(Ranges.size()); + + for (const std::pair<uint64_t, uint64_t>& Range : Ranges) + { + Result.Ranges.push_back(std::make_pair(Range.first - RangeOffset, Range.second)); + } + ReceivedBytes = Result.PayloadBuffer.GetSize(); + } + return Result; + } + virtual std::vector<std::function<void()>> GetLargeBuildBlob(const Oid& BuildId, const IoHash& RawHash, uint64_t ChunkSize, diff --git a/src/zenremotestore/builds/jupiterbuildstorage.cpp b/src/zenremotestore/builds/jupiterbuildstorage.cpp index 23d0ddd4c..c0cc16941 100644 --- a/src/zenremotestore/builds/jupiterbuildstorage.cpp +++ b/src/zenremotestore/builds/jupiterbuildstorage.cpp @@ -4,6 +4,7 @@ #include <zencore/compactbinarybuilder.h> #include <zencore/compactbinaryutil.h> +#include <zencore/compress.h> #include <zencore/fmtutils.h> #include <zencore/scopeguard.h> #include <zencore/timer.h> @@ -14,18 +15,19 @@ ZEN_THIRD_PARTY_INCLUDES_START #include <tsl/robin_map.h> ZEN_THIRD_PARTY_INCLUDES_END -#include <regex> +#include <string_view> namespace zen { using namespace std::literals; namespace { - void ThrowFromJupiterResult(const JupiterResult& Result, std::string_view Prefix) + [[noreturn]] void ThrowFromJupiterResult(const JupiterResult& Result, std::string_view Prefix) { - int Error = Result.ErrorCode < (int)HttpResponseCode::Continue ? Result.ErrorCode : 0; - HttpResponseCode Status = - Result.ErrorCode >= int(HttpResponseCode::Continue) ? HttpResponseCode(Result.ErrorCode) : HttpResponseCode::ImATeapot; + HttpClientErrorCode Error = Result.ErrorCode < static_cast<int>(HttpResponseCode::Continue) ? HttpClientErrorCode(Result.ErrorCode) + : HttpClientErrorCode::kOK; + HttpResponseCode Status = Result.ErrorCode >= static_cast<int>(HttpResponseCode::Continue) ? HttpResponseCode(Result.ErrorCode) + : HttpResponseCode::ImATeapot; throw HttpClientError(fmt::format("{}: {} ({})", Prefix, Result.Reason, Result.ErrorCode), Error, Status); } } // namespace @@ -262,7 +264,7 @@ public: std::vector<std::function<void()>> WorkList; for (auto& WorkItem : WorkItems) { - WorkList.emplace_back([this, WorkItem = std::move(WorkItem), OnSentBytes]() { + WorkList.emplace_back([this, WorkItem = std::move(WorkItem), OnSentBytes = std::move(OnSentBytes)]() { Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); bool IsComplete = false; @@ -284,7 +286,10 @@ public: Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); - CreateDirectories(m_TempFolderPath); + if (!m_TempFolderPath.empty()) + { + CreateDirectories(m_TempFolderPath); + } JupiterResult GetBuildBlobResult = m_Session.GetBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, m_TempFolderPath, RangeOffset, RangeBytes); AddStatistic(GetBuildBlobResult); @@ -295,6 +300,29 @@ public: return std::move(GetBuildBlobResult.Response); } + virtual BuildBlobRanges GetBuildBlobRanges(const Oid& BuildId, + const IoHash& RawHash, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) override + { + ZEN_TRACE_CPU("Jupiter::GetBuildBlob"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + if (!m_TempFolderPath.empty()) + { + CreateDirectories(m_TempFolderPath); + } + + BuildBlobRangesResult GetBuildBlobResult = + m_Session.GetBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, m_TempFolderPath, Ranges); + AddStatistic(GetBuildBlobResult); + if (!GetBuildBlobResult.Success) + { + ThrowFromJupiterResult(GetBuildBlobResult, "Failed fetching build blob ranges"sv); + } + return BuildBlobRanges{.PayloadBuffer = std::move(GetBuildBlobResult.Response), .Ranges = std::move(GetBuildBlobResult.Ranges)}; + } + virtual std::vector<std::function<void()>> GetLargeBuildBlob(const Oid& BuildId, const IoHash& RawHash, uint64_t ChunkSize, @@ -423,11 +451,13 @@ public: virtual bool GetExtendedStatistics(ExtendedStatistics& OutStats) override { - OutStats.ReceivedBytesPerSource.reserve(m_ReceivedBytesPerSource.size()); - for (auto& It : m_ReceivedBytesPerSource) - { - OutStats.ReceivedBytesPerSource.insert_or_assign(It.first, m_SourceBytes[It.second]); - } + m_SourceLock.WithSharedLock([this, &OutStats]() { + OutStats.ReceivedBytesPerSource.reserve(m_ReceivedBytesPerSource.size()); + for (auto& It : m_ReceivedBytesPerSource) + { + OutStats.ReceivedBytesPerSource.insert_or_assign(It.first, m_SourceBytes[It.second].load(std::memory_order_relaxed)); + } + }); return true; } @@ -500,15 +530,29 @@ private: } if (!Result.Source.empty()) { - if (tsl::robin_map<std::string, uint32_t>::const_iterator It = m_ReceivedBytesPerSource.find(Result.Source); - It != m_ReceivedBytesPerSource.end()) - { - m_SourceBytes[It->second] += Result.ReceivedBytes; - } - else + if (!m_SourceLock.WithSharedLock([&]() { + if (tsl::robin_map<std::string, uint32_t>::const_iterator It = m_ReceivedBytesPerSource.find(Result.Source); + It != m_ReceivedBytesPerSource.end()) + { + m_SourceBytes[It->second] += Result.ReceivedBytes; + return true; + } + return false; + })) { - m_ReceivedBytesPerSource.insert_or_assign(Result.Source, m_SourceBytes.size()); - m_SourceBytes.push_back(Result.ReceivedBytes); + m_SourceLock.WithExclusiveLock([&]() { + if (tsl::robin_map<std::string, uint32_t>::const_iterator It = m_ReceivedBytesPerSource.find(Result.Source); + It != m_ReceivedBytesPerSource.end()) + { + m_SourceBytes[It->second] += Result.ReceivedBytes; + } + else if (m_SourceCount < MaxSourceCount) + { + size_t Index = m_SourceCount++; + m_ReceivedBytesPerSource.insert_or_assign(Result.Source, Index); + m_SourceBytes[Index] += Result.ReceivedBytes; + } + }); } } } @@ -519,8 +563,11 @@ private: const std::string m_Bucket; const std::filesystem::path m_TempFolderPath; - tsl::robin_map<std::string, uint32_t> m_ReceivedBytesPerSource; - std::vector<uint64_t> m_SourceBytes; + RwLock m_SourceLock; + tsl::robin_map<std::string, uint32_t> m_ReceivedBytesPerSource; + static constexpr size_t MaxSourceCount = 8u; + std::array<std::atomic<uint64_t>, MaxSourceCount> m_SourceBytes; + size_t m_SourceCount = 0; }; std::unique_ptr<BuildStorageBase> @@ -551,35 +598,135 @@ ParseBuildStorageUrl(std::string_view InUrl, Url.erase(ApiString, ExtendedApiString.length()); } - const std::string ArtifactURLRegExString = R"((http[s]?:\/\/.*?)\/(.*?)\/(.*?)\/(.*))"; - const std::regex ArtifactURLRegEx(ArtifactURLRegExString, std::regex::ECMAScript | std::regex::icase); - std::match_results<std::string_view::const_iterator> MatchResults; - std::string_view UrlToParse(Url); - if (regex_match(begin(UrlToParse), end(UrlToParse), MatchResults, ArtifactURLRegEx) && MatchResults.size() == 5) - { - auto GetMatch = [&MatchResults](uint32_t Index) -> std::string_view { - ZEN_ASSERT(Index < MatchResults.size()); + // Parse URL of the form: http[s]://host/namespace/bucket/buildid + std::string_view Remaining(Url); - const auto& Match = MatchResults[Index]; + // Find the end of the scheme (e.g. "http://" or "https://") + size_t SchemeEnd = Remaining.find("://"); + if (SchemeEnd == std::string_view::npos) + { + return false; + } + SchemeEnd += 3; // skip past "://" - return std::string_view(&*Match.first, Match.second - Match.first); - }; + // Find the first '/' after the host + size_t HostEnd = Remaining.find('/', SchemeEnd); + if (HostEnd == std::string_view::npos) + { + return false; + } - const std::string_view Host = GetMatch(1); - const std::string_view Namespace = GetMatch(2); - const std::string_view Bucket = GetMatch(3); - const std::string_view BuildId = GetMatch(4); + // Find the '/' after namespace + size_t NamespaceEnd = Remaining.find('/', HostEnd + 1); + if (NamespaceEnd == std::string_view::npos) + { + return false; + } - OutHost = Host; - OutNamespace = Namespace; - OutBucket = Bucket; - OutBuildId = BuildId; - return true; + // Find the '/' after bucket + size_t BucketEnd = Remaining.find('/', NamespaceEnd + 1); + if (BucketEnd == std::string_view::npos) + { + return false; } - else + + // BuildId must be non-empty + if (BucketEnd + 1 >= Remaining.size()) { return false; } + + OutHost = Remaining.substr(0, HostEnd); + OutNamespace = Remaining.substr(HostEnd + 1, NamespaceEnd - HostEnd - 1); + OutBucket = Remaining.substr(NamespaceEnd + 1, BucketEnd - NamespaceEnd - 1); + OutBuildId = Remaining.substr(BucketEnd + 1); + return true; } } // namespace zen + +#if ZEN_WITH_TESTS + +# include <zencore/testing.h> + +namespace zen { + +void +jupiterbuildstorage_forcelink() +{ +} + +} // namespace zen + +TEST_SUITE_BEGIN("remotestore.jupiterbuildstorage"); + +TEST_CASE("ParseBuildStorageUrl.ValidUrl") +{ + std::string Host, Namespace, Bucket, BuildId; + bool Result = + zen::ParseBuildStorageUrl("https://horde.devtools.epicgames.com/mynamespace/mybucket/mybuildid", Host, Namespace, Bucket, BuildId); + CHECK(Result); + CHECK(Host == "https://horde.devtools.epicgames.com"); + CHECK(Namespace == "mynamespace"); + CHECK(Bucket == "mybucket"); + CHECK(BuildId == "mybuildid"); +} + +TEST_CASE("ParseBuildStorageUrl.ValidUrlWithApiPrefix") +{ + std::string Host, Namespace, Bucket, BuildId; + bool Result = zen::ParseBuildStorageUrl("https://horde.devtools.epicgames.com/api/v2/builds/mynamespace/mybucket/mybuildid", + Host, + Namespace, + Bucket, + BuildId); + CHECK(Result); + CHECK(Host == "https://horde.devtools.epicgames.com"); + CHECK(Namespace == "mynamespace"); + CHECK(Bucket == "mybucket"); + CHECK(BuildId == "mybuildid"); +} + +TEST_CASE("ParseBuildStorageUrl.HttpScheme") +{ + std::string Host, Namespace, Bucket, BuildId; + bool Result = zen::ParseBuildStorageUrl("http://localhost/ns/bucket/build123", Host, Namespace, Bucket, BuildId); + CHECK(Result); + CHECK(Host == "http://localhost"); + CHECK(Namespace == "ns"); + CHECK(Bucket == "bucket"); + CHECK(BuildId == "build123"); +} + +TEST_CASE("ParseBuildStorageUrl.BuildIdWithSlashes") +{ + std::string Host, Namespace, Bucket, BuildId; + bool Result = zen::ParseBuildStorageUrl("https://host/ns/bucket/build/with/slashes", Host, Namespace, Bucket, BuildId); + CHECK(Result); + CHECK(Host == "https://host"); + CHECK(Namespace == "ns"); + CHECK(Bucket == "bucket"); + CHECK(BuildId == "build/with/slashes"); +} + +TEST_CASE("ParseBuildStorageUrl.MissingBuildId") +{ + std::string Host, Namespace, Bucket, BuildId; + CHECK_FALSE(zen::ParseBuildStorageUrl("https://host/ns/bucket/", Host, Namespace, Bucket, BuildId)); +} + +TEST_CASE("ParseBuildStorageUrl.MissingBucket") +{ + std::string Host, Namespace, Bucket, BuildId; + CHECK_FALSE(zen::ParseBuildStorageUrl("https://host/ns", Host, Namespace, Bucket, BuildId)); +} + +TEST_CASE("ParseBuildStorageUrl.NoScheme") +{ + std::string Host, Namespace, Bucket, BuildId; + CHECK_FALSE(zen::ParseBuildStorageUrl("host/ns/bucket/buildid", Host, Namespace, Bucket, BuildId)); +} + +TEST_SUITE_END(); + +#endif // ZEN_WITH_TESTS diff --git a/src/zenremotestore/chunking/chunkblock.cpp b/src/zenremotestore/chunking/chunkblock.cpp index d203e0292..f29112f53 100644 --- a/src/zenremotestore/chunking/chunkblock.cpp +++ b/src/zenremotestore/chunking/chunkblock.cpp @@ -8,13 +8,9 @@ #include <zencore/timer.h> #include <zencore/trace.h> -#include <zenremotestore/operationlogoutput.h> - #include <numeric> -#include <vector> ZEN_THIRD_PARTY_INCLUDES_START -#include <tsl/robin_map.h> #include <tsl/robin_set.h> ZEN_THIRD_PARTY_INCLUDES_END @@ -27,6 +23,184 @@ namespace zen { using namespace std::literals; +namespace chunkblock_impl { + + struct RangeDescriptor + { + uint64_t RangeStart = 0; + uint64_t RangeLength = 0; + uint32_t ChunkBlockIndexStart = 0; + uint32_t ChunkBlockIndexCount = 0; + }; + + void MergeCheapestRange(std::vector<RangeDescriptor>& InOutRanges) + { + ZEN_ASSERT(InOutRanges.size() > 1); + + size_t BestRangeIndexToCollapse = SIZE_MAX; + uint64_t BestGap = (uint64_t)-1; + + for (size_t RangeIndex = 0; RangeIndex < InOutRanges.size() - 1; RangeIndex++) + { + const RangeDescriptor& Range = InOutRanges[RangeIndex]; + const RangeDescriptor& NextRange = InOutRanges[RangeIndex + 1]; + uint64_t Gap = NextRange.RangeStart - (Range.RangeStart + Range.RangeLength); + if (Gap < BestGap) + { + BestRangeIndexToCollapse = RangeIndex; + BestGap = Gap; + } + else if (Gap == BestGap) + { + const RangeDescriptor& BestRange = InOutRanges[BestRangeIndexToCollapse]; + const RangeDescriptor& BestNextRange = InOutRanges[BestRangeIndexToCollapse + 1]; + uint64_t BestMergedSize = (BestNextRange.RangeStart + BestNextRange.RangeLength) - BestRange.RangeStart; + uint64_t MergedSize = (NextRange.RangeStart + NextRange.RangeLength) - Range.RangeStart; + if (MergedSize < BestMergedSize) + { + BestRangeIndexToCollapse = RangeIndex; + } + } + } + + ZEN_ASSERT(BestRangeIndexToCollapse != SIZE_MAX); + ZEN_ASSERT(BestRangeIndexToCollapse < InOutRanges.size() - 1); + ZEN_ASSERT(BestGap != (uint64_t)-1); + + RangeDescriptor& BestRange = InOutRanges[BestRangeIndexToCollapse]; + const RangeDescriptor& BestNextRange = InOutRanges[BestRangeIndexToCollapse + 1]; + BestRange.RangeLength = BestNextRange.RangeStart - BestRange.RangeStart + BestNextRange.RangeLength; + BestRange.ChunkBlockIndexCount = + BestNextRange.ChunkBlockIndexStart - BestRange.ChunkBlockIndexStart + BestNextRange.ChunkBlockIndexCount; + InOutRanges.erase(InOutRanges.begin() + BestRangeIndexToCollapse + 1); + } + + std::vector<RangeDescriptor> GetBlockRanges(const ChunkBlockDescription& BlockDescription, + const uint64_t ChunkStartOffsetInBlock, + std::span<const uint32_t> BlockChunkIndexNeeded) + { + ZEN_TRACE_CPU("GetBlockRanges"); + std::vector<RangeDescriptor> BlockRanges; + { + uint64_t CurrentOffset = ChunkStartOffsetInBlock; + uint32_t ChunkBlockIndex = 0; + uint32_t NeedBlockChunkIndexOffset = 0; + RangeDescriptor NextRange; + while (NeedBlockChunkIndexOffset < BlockChunkIndexNeeded.size() && ChunkBlockIndex < BlockDescription.ChunkRawHashes.size()) + { + const uint32_t ChunkCompressedLength = BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; + if (ChunkBlockIndex < BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) + { + if (NextRange.RangeLength > 0) + { + BlockRanges.push_back(NextRange); + NextRange = {}; + } + ChunkBlockIndex++; + CurrentOffset += ChunkCompressedLength; + } + else if (ChunkBlockIndex == BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) + { + if (NextRange.RangeLength == 0) + { + NextRange.RangeStart = CurrentOffset; + NextRange.ChunkBlockIndexStart = ChunkBlockIndex; + } + NextRange.RangeLength += ChunkCompressedLength; + NextRange.ChunkBlockIndexCount++; + ChunkBlockIndex++; + CurrentOffset += ChunkCompressedLength; + NeedBlockChunkIndexOffset++; + } + else + { + ZEN_ASSERT(false); + } + } + if (NextRange.RangeLength > 0) + { + BlockRanges.push_back(NextRange); + } + } + ZEN_ASSERT(!BlockRanges.empty()); + return BlockRanges; + } + + std::vector<RangeDescriptor> OptimizeRanges(uint64_t TotalBlockSize, + std::span<const RangeDescriptor> ExactRanges, + double LatencySec, + uint64_t SpeedBytesPerSec, + uint64_t MaxRangeCountPerRequest, + uint64_t MaxRangesPerBlock) + { + ZEN_TRACE_CPU("OptimizeRanges"); + ZEN_ASSERT(MaxRangesPerBlock > 0); + std::vector<RangeDescriptor> Ranges(ExactRanges.begin(), ExactRanges.end()); + + while (Ranges.size() > MaxRangesPerBlock) + { + MergeCheapestRange(Ranges); + } + + while (true) + { + const std::uint64_t RangeTotalSize = + std::accumulate(Ranges.begin(), Ranges.end(), uint64_t(0u), [](uint64_t Current, const RangeDescriptor& Value) { + return Current + Value.RangeLength; + }); + + const size_t RangeCount = Ranges.size(); + const uint64_t RequestCount = + MaxRangeCountPerRequest == (uint64_t)-1 ? 1 : (RangeCount + MaxRangeCountPerRequest - 1) / MaxRangeCountPerRequest; + uint64_t RequestTimeAsBytes = uint64_t(SpeedBytesPerSec * RequestCount * LatencySec); + + if (RangeCount == 1) + { + // Does fetching the full block add less time than the time it takes to complete a single request? + if (TotalBlockSize - RangeTotalSize < SpeedBytesPerSec * LatencySec) + { + const std::uint64_t InitialRangeTotalSize = + std::accumulate(ExactRanges.begin(), + ExactRanges.end(), + uint64_t(0u), + [](uint64_t Current, const RangeDescriptor& Value) { return Current + Value.RangeLength; }); + + ZEN_DEBUG( + "Latency round trip takes as long as receiving the extra redundant bytes - go full block, dropping {} of slack, " + "adding {} of bytes to fetch, for block of size {}", + NiceBytes(TotalBlockSize - RangeTotalSize), + NiceBytes(TotalBlockSize - InitialRangeTotalSize), + NiceBytes(TotalBlockSize)); + return {}; + } + else + { + return Ranges; + } + } + + if (RequestTimeAsBytes < (TotalBlockSize - RangeTotalSize)) + { + return Ranges; + } + + if (RangeCount == 2) + { + // Merge to single range + Ranges.front().RangeLength = Ranges.back().RangeStart - Ranges.front().RangeStart + Ranges.back().RangeLength; + Ranges.front().ChunkBlockIndexCount = + Ranges.back().ChunkBlockIndexStart - Ranges.front().ChunkBlockIndexStart + Ranges.back().ChunkBlockIndexCount; + Ranges.pop_back(); + } + else + { + MergeCheapestRange(Ranges); + } + } + } + +} // namespace chunkblock_impl + ChunkBlockDescription ParseChunkBlockDescription(const CbObjectView& BlockObject) { @@ -177,9 +351,9 @@ GenerateChunkBlock(std::vector<std::pair<IoHash, FetchChunkFunc>>&& FetchChunks, BufferEndPtr += WriteVarUInt(gsl::narrow<uint64_t>(ChunkCount), BufferEndPtr); for (const auto& It : FetchChunks) { - std::pair<uint64_t, CompressedBuffer> Chunk = It.second(It.first); - uint64_t ChunkSize = 0; - std::span<const SharedBuffer> Segments = Chunk.second.GetCompressed().GetSegments(); + std::pair<uint64_t, CompositeBuffer> Chunk = It.second(It.first); + uint64_t ChunkSize = 0; + std::span<const SharedBuffer> Segments = Chunk.second.GetSegments(); for (const SharedBuffer& Segment : Segments) { ZEN_ASSERT(Segment.IsOwned()); @@ -270,7 +444,7 @@ IterateChunkBlock(const SharedBuffer& BlockPayload, }; std::vector<size_t> -FindReuseBlocks(OperationLogOutput& Output, +FindReuseBlocks(LoggerRef InLog, const uint8_t BlockReuseMinPercentLimit, const bool IsVerbose, ReuseBlocksStatistics& Stats, @@ -280,6 +454,7 @@ FindReuseBlocks(OperationLogOutput& Output, std::vector<uint32_t>& OutUnusedChunkIndexes) { ZEN_TRACE_CPU("FindReuseBlocks"); + ZEN_SCOPED_LOG(InLog); // Find all blocks with a usage level higher than MinPercentLimit // Pick out the blocks with usage higher or equal to MinPercentLimit @@ -346,11 +521,10 @@ FindReuseBlocks(OperationLogOutput& Output, { if (IsVerbose) { - ZEN_OPERATION_LOG_INFO(Output, - "Reusing block {}. {} attachments found, usage level: {}%", - KnownBlock.BlockHash, - FoundAttachmentCount, - ReusePercent); + ZEN_INFO("Reusing block {}. {} attachments found, usage level: {}%", + KnownBlock.BlockHash, + FoundAttachmentCount, + ReusePercent); } ReuseBlockIndexes.push_back(KnownBlockIndex); @@ -359,12 +533,13 @@ FindReuseBlocks(OperationLogOutput& Output, } else if (FoundAttachmentCount > 0) { - // if (IsVerbose) - //{ - // ZEN_OPERATION_LOG_INFO(Output, "Skipping block {}. {} attachments found, usage level: {}%", - // KnownBlock.BlockHash, - // FoundAttachmentCount, ReusePercent); - //} + if (IsVerbose) + { + ZEN_INFO("Skipping block {}. {} attachments found, usage level: {}%", + KnownBlock.BlockHash, + FoundAttachmentCount, + ReusePercent); + } Stats.RejectedBlockCount++; Stats.RejectedChunkCount += FoundAttachmentCount; Stats.RejectedByteCount += ReuseSize; @@ -408,11 +583,10 @@ FindReuseBlocks(OperationLogOutput& Output, { if (IsVerbose) { - ZEN_OPERATION_LOG_INFO(Output, - "Reusing block {}. {} attachments found, usage level: {}%", - KnownBlock.BlockHash, - FoundChunkIndexes.size(), - ReusePercent); + ZEN_INFO("Reusing block {}. {} attachments found, usage level: {}%", + KnownBlock.BlockHash, + FoundChunkIndexes.size(), + ReusePercent); } FilteredReuseBlockIndexes.push_back(KnownBlockIndex); @@ -429,11 +603,10 @@ FindReuseBlocks(OperationLogOutput& Output, } else { - // if (IsVerbose) - //{ - // ZEN_OPERATION_LOG_INFO(Output, "Skipping block {}. filtered usage level: {}%", KnownBlock.BlockHash, - // ReusePercent); - //} + if (IsVerbose) + { + ZEN_INFO("Skipping block {}. filtered usage level: {}%", KnownBlock.BlockHash, ReusePercent); + } Stats.RejectedBlockCount++; Stats.RejectedChunkCount += FoundChunkIndexes.size(); Stats.RejectedByteCount += AdjustedReuseSize; @@ -454,10 +627,8 @@ FindReuseBlocks(OperationLogOutput& Output, return FilteredReuseBlockIndexes; } -ChunkBlockAnalyser::ChunkBlockAnalyser(OperationLogOutput& LogOutput, - std::span<const ChunkBlockDescription> BlockDescriptions, - const Options& Options) -: m_LogOutput(LogOutput) +ChunkBlockAnalyser::ChunkBlockAnalyser(LoggerRef Log, std::span<const ChunkBlockDescription> BlockDescriptions, const Options& Options) +: m_Log(Log) , m_BlockDescriptions(BlockDescriptions) , m_Options(Options) { @@ -555,480 +726,198 @@ ChunkBlockAnalyser::CalculatePartialBlockDownloads(std::span<const NeededBlock> ChunkBlockAnalyser::BlockResult Result; - uint64_t IdealDownloadTotalSize = 0; - uint64_t AllBlocksTotalBlocksSize = 0; - - for (const NeededBlock& NeededBlock : NeededBlocks) { - const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[NeededBlock.BlockIndex]; - - std::span<const uint32_t> BlockChunkIndexNeeded(NeededBlock.ChunkIndexes); - if (!NeededBlock.ChunkIndexes.empty()) + uint64_t MinRequestCount = 0; + uint64_t RequestCount = 0; + uint64_t RangeCount = 0; + uint64_t IdealDownloadTotalSize = 0; + uint64_t ActualDownloadTotalSize = 0; + uint64_t FullDownloadTotalSize = 0; + for (const NeededBlock& NeededBlock : NeededBlocks) { - bool WantsToDoPartialBlockDownload = NeededBlock.ChunkIndexes.size() < BlockDescription.ChunkRawHashes.size(); - bool CanDoPartialBlockDownload = (BlockDescription.HeaderSize > 0) && - (BlockDescription.ChunkCompressedLengths.size() == BlockDescription.ChunkRawHashes.size()); - - EPartialBlockDownloadMode PartialBlockDownloadMode = BlockPartialDownloadModes[NeededBlock.BlockIndex]; - - const uint32_t ChunkStartOffsetInBlock = + const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[NeededBlock.BlockIndex]; + std::span<const uint32_t> BlockChunkIndexNeeded(NeededBlock.ChunkIndexes); + const uint32_t ChunkStartOffsetInBlock = gsl::narrow<uint32_t>(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize); + uint64_t TotalBlockSize = std::accumulate(BlockDescription.ChunkCompressedLengths.begin(), + BlockDescription.ChunkCompressedLengths.end(), + uint64_t(ChunkStartOffsetInBlock)); + uint64_t ExactRangesSize = 0; + uint64_t DownloadRangesSize = 0; + uint64_t FullDownloadSize = 0; + + bool CanDoPartialBlockDownload = (BlockDescription.HeaderSize > 0) && + (BlockDescription.ChunkCompressedLengths.size() == BlockDescription.ChunkRawHashes.size()); - const uint64_t TotalBlockSize = std::accumulate(BlockDescription.ChunkCompressedLengths.begin(), - BlockDescription.ChunkCompressedLengths.end(), - std::uint64_t(ChunkStartOffsetInBlock)); - - AllBlocksTotalBlocksSize += TotalBlockSize; - - if ((PartialBlockDownloadMode != EPartialBlockDownloadMode::Off) && WantsToDoPartialBlockDownload && CanDoPartialBlockDownload) + if (NeededBlock.ChunkIndexes.size() == BlockDescription.ChunkRawHashes.size() || !CanDoPartialBlockDownload) { - ZEN_TRACE_CPU("PartialBlockAnalysis"); - - uint64_t TotalWantedChunksSize = 0; - std::optional<std::vector<BlockRangeDescriptor>> MaybeBlockRanges = CalculateBlockRanges(NeededBlock.BlockIndex, - BlockDescription, - NeededBlock.ChunkIndexes, - PartialBlockDownloadMode, - ChunkStartOffsetInBlock, - TotalBlockSize, - TotalWantedChunksSize); - ZEN_ASSERT(TotalWantedChunksSize <= TotalBlockSize); - IdealDownloadTotalSize += TotalWantedChunksSize; - - if (MaybeBlockRanges.has_value()) + // Full block + ExactRangesSize = TotalBlockSize; + DownloadRangesSize = TotalBlockSize; + FullDownloadSize = TotalBlockSize; + MinRequestCount++; + RequestCount++; + RangeCount++; + Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex); + } + else if (NeededBlock.ChunkIndexes.empty()) + { + // Not needed + } + else + { + FullDownloadSize = TotalBlockSize; + std::vector<chunkblock_impl::RangeDescriptor> Ranges = + chunkblock_impl::GetBlockRanges(BlockDescription, ChunkStartOffsetInBlock, BlockChunkIndexNeeded); + ExactRangesSize = std::accumulate( + Ranges.begin(), + Ranges.end(), + uint64_t(0), + [](uint64_t Current, const chunkblock_impl::RangeDescriptor& Range) { return Current + Range.RangeLength; }); + + EPartialBlockDownloadMode PartialBlockDownloadMode = BlockPartialDownloadModes[NeededBlock.BlockIndex]; + if (PartialBlockDownloadMode == EPartialBlockDownloadMode::Off) { - std::vector<BlockRangeDescriptor> BlockRanges = MaybeBlockRanges.value(); - ZEN_ASSERT(!BlockRanges.empty()); - - uint64_t RequestedSize = - std::accumulate(BlockRanges.begin(), - BlockRanges.end(), - uint64_t(0), - [](uint64_t Current, const BlockRangeDescriptor& Range) { return Current + Range.RangeLength; }); + // Use full block + MinRequestCount++; + RangeCount++; + RequestCount++; + Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex); + DownloadRangesSize = TotalBlockSize; + } + else + { + const bool IsHighSpeed = (PartialBlockDownloadMode == EPartialBlockDownloadMode::MultiRangeHighSpeed); + uint64_t MaxRangeCountPerRequest = + IsHighSpeed ? m_Options.HostHighSpeedMaxRangeCountPerRequest : m_Options.HostMaxRangeCountPerRequest; + ZEN_ASSERT(MaxRangeCountPerRequest != 0); - if (PartialBlockDownloadMode != EPartialBlockDownloadMode::Exact && BlockRanges.size() > 1) + if (PartialBlockDownloadMode == EPartialBlockDownloadMode::Exact) { - // TODO: Once we have support in our http client to request multiple ranges in one request this - // logic would need to change as the per-request overhead would go away - - const double LatencySec = PartialBlockDownloadMode == EPartialBlockDownloadMode::MultiRangeHighSpeed - ? m_Options.HostHighSpeedLatencySec - : m_Options.HostLatencySec; - if (LatencySec > 0) + // Use exact ranges + for (const chunkblock_impl::RangeDescriptor& Range : Ranges) { - const uint64_t BytesPerSec = PartialBlockDownloadMode == EPartialBlockDownloadMode::MultiRangeHighSpeed - ? m_Options.HostHighSpeedBytesPerSec - : m_Options.HostSpeedBytesPerSec; - - const double ExtraRequestTimeSec = (BlockRanges.size() - 1) * LatencySec; - const uint64_t ExtraRequestTimeBytes = uint64_t(ExtraRequestTimeSec * BytesPerSec); - - const uint64_t FullRangeSize = - BlockRanges.back().RangeStart + BlockRanges.back().RangeLength - BlockRanges.front().RangeStart; + Result.BlockRanges.push_back(BlockRangeDescriptor{.BlockIndex = NeededBlock.BlockIndex, + .RangeStart = Range.RangeStart, + .RangeLength = Range.RangeLength, + .ChunkBlockIndexStart = Range.ChunkBlockIndexStart, + .ChunkBlockIndexCount = Range.ChunkBlockIndexCount}); + } - if (ExtraRequestTimeBytes + RequestedSize >= FullRangeSize) + MinRequestCount++; + RangeCount += Ranges.size(); + RequestCount += MaxRangeCountPerRequest == (uint64_t)-1 + ? 1 + : (Ranges.size() + MaxRangeCountPerRequest - 1) / MaxRangeCountPerRequest; + DownloadRangesSize = ExactRangesSize; + } + else + { + if (PartialBlockDownloadMode == EPartialBlockDownloadMode::SingleRange) + { + // Use single range + if (Ranges.size() > 1) { - BlockRanges = std::vector<BlockRangeDescriptor>{MergeBlockRanges(BlockRanges)}; - - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Merging {} chunks ({}) from block {} ({}) to single request (extra bytes {})", - NeededBlock.ChunkIndexes.size(), - NiceBytes(RequestedSize), - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize), - NiceBytes(BlockRanges.front().RangeLength - RequestedSize)); - } - - RequestedSize = BlockRanges.front().RangeLength; + Ranges = {chunkblock_impl::RangeDescriptor{ + .RangeStart = Ranges.front().RangeStart, + .RangeLength = Ranges.back().RangeStart + Ranges.back().RangeLength - Ranges.front().RangeStart, + .ChunkBlockIndexStart = Ranges.front().ChunkBlockIndexStart, + .ChunkBlockIndexCount = Ranges.back().ChunkBlockIndexStart + Ranges.back().ChunkBlockIndexCount - + Ranges.front().ChunkBlockIndexStart}}; } + + // We still do the optimize pass to see if it is more effective to use a full block } - } - if ((PartialBlockDownloadMode != EPartialBlockDownloadMode::Exact) && - ((TotalBlockSize - RequestedSize) < (512u * 1024u))) - { - if (m_Options.IsVerbose) + double LatencySec = IsHighSpeed ? m_Options.HostHighSpeedLatencySec : m_Options.HostLatencySec; + uint64_t SpeedBytesPerSec = IsHighSpeed ? m_Options.HostHighSpeedBytesPerSec : m_Options.HostSpeedBytesPerSec; + if (LatencySec > 0.0 && SpeedBytesPerSec > 0u) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Requesting {} chunks ({}) from block {} ({}) using full block request due to small " - "total slack (extra bytes {})", - NeededBlock.ChunkIndexes.size(), - NiceBytes(RequestedSize), - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize), - NiceBytes(TotalBlockSize - TotalWantedChunksSize)); + Ranges = chunkblock_impl::OptimizeRanges(TotalBlockSize, + Ranges, + LatencySec, + SpeedBytesPerSec, + MaxRangeCountPerRequest, + m_Options.MaxRangesPerBlock); } - Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex); - } - else - { - Result.BlockRanges.insert(Result.BlockRanges.end(), BlockRanges.begin(), BlockRanges.end()); - if (m_Options.IsVerbose) + MinRequestCount++; + if (Ranges.empty()) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Requesting {} chunks ({}) from block {} ({}) using {} requests (extra bytes {})", - NeededBlock.ChunkIndexes.size(), - NiceBytes(RequestedSize), - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize), - BlockRanges.size(), - NiceBytes(RequestedSize - TotalWantedChunksSize)); + Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex); + RequestCount++; + RangeCount++; + DownloadRangesSize = TotalBlockSize; } + else + { + for (const chunkblock_impl::RangeDescriptor& Range : Ranges) + { + Result.BlockRanges.push_back(BlockRangeDescriptor{.BlockIndex = NeededBlock.BlockIndex, + .RangeStart = Range.RangeStart, + .RangeLength = Range.RangeLength, + .ChunkBlockIndexStart = Range.ChunkBlockIndexStart, + .ChunkBlockIndexCount = Range.ChunkBlockIndexCount}); + } + RangeCount += Ranges.size(); + RequestCount += MaxRangeCountPerRequest == (uint64_t)-1 + ? 1 + : (Ranges.size() + MaxRangeCountPerRequest - 1) / MaxRangeCountPerRequest; + } + + DownloadRangesSize = Ranges.empty() + ? TotalBlockSize + : std::accumulate(Ranges.begin(), + Ranges.end(), + uint64_t(0), + [](uint64_t Current, const chunkblock_impl::RangeDescriptor& Range) { + return Current + Range.RangeLength; + }); } } - else - { - Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex); - } } - else - { - Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex); - IdealDownloadTotalSize += TotalBlockSize; - } - } - } + IdealDownloadTotalSize += ExactRangesSize; + ActualDownloadTotalSize += DownloadRangesSize; + FullDownloadTotalSize += FullDownloadSize; - if (!Result.BlockRanges.empty() && !m_Options.IsQuiet) - { - tsl::robin_set<uint32_t> PartialBlockIndexes; - uint64_t PartialBlocksTotalSize = std::accumulate(Result.BlockRanges.begin(), - Result.BlockRanges.end(), - uint64_t(0u), - [&](uint64_t Current, const BlockRangeDescriptor& Range) { - PartialBlockIndexes.insert(Range.BlockIndex); - return Current + Range.RangeLength; - }); - - uint64_t FullBlocksTotalSize = - std::accumulate(Result.FullBlockIndexes.begin(), - Result.FullBlockIndexes.end(), - uint64_t(0u), - [&](uint64_t Current, uint32_t BlockIndex) { - const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; - uint32_t CurrentOffset = - gsl::narrow<uint32_t>(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize); - - return Current + std::accumulate(BlockDescription.ChunkCompressedLengths.begin(), - BlockDescription.ChunkCompressedLengths.end(), - std::uint64_t(CurrentOffset)); - }); - - uint64_t PartialBlockRequestCount = Result.BlockRanges.size(); - uint64_t PartialBlockCount = PartialBlockIndexes.size(); - - uint64_t TotalExtraPartialBlocksRequestCount = PartialBlockRequestCount - PartialBlockCount; - uint64_t ActualPartialDownloadTotalSize = FullBlocksTotalSize + PartialBlocksTotalSize; - - uint64_t IdealSkippedSize = AllBlocksTotalBlocksSize - IdealDownloadTotalSize; - uint64_t ActualSkippedSize = AllBlocksTotalBlocksSize - ActualPartialDownloadTotalSize; - - double PercentOfIdealPartialSkippedSize = (ActualSkippedSize * 100.0) / IdealSkippedSize; - - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Analysis of partial block requests saves download of {} out of {}, {:.1f}% of possible {} using {} extra " - "requests. Completed in {}", - NiceBytes(ActualSkippedSize), - NiceBytes(AllBlocksTotalBlocksSize), - PercentOfIdealPartialSkippedSize, - NiceBytes(IdealSkippedSize), - TotalExtraPartialBlocksRequestCount, - NiceTimeSpanMs(PartialAnalisysTimer.GetElapsedTimeMs())); - } - - return Result; -} - -ChunkBlockAnalyser::BlockRangeDescriptor -ChunkBlockAnalyser::MergeBlockRanges(std::span<const BlockRangeDescriptor> Ranges) -{ - ZEN_ASSERT(Ranges.size() > 1); - const BlockRangeDescriptor& First = Ranges.front(); - const BlockRangeDescriptor& Last = Ranges.back(); - - return BlockRangeDescriptor{.BlockIndex = First.BlockIndex, - .RangeStart = First.RangeStart, - .RangeLength = Last.RangeStart + Last.RangeLength - First.RangeStart, - .ChunkBlockIndexStart = First.ChunkBlockIndexStart, - .ChunkBlockIndexCount = Last.ChunkBlockIndexStart + Last.ChunkBlockIndexCount - First.ChunkBlockIndexStart}; -} - -std::optional<std::vector<ChunkBlockAnalyser::BlockRangeDescriptor>> -ChunkBlockAnalyser::MakeOptionalBlockRangeVector(uint64_t TotalBlockSize, const BlockRangeDescriptor& Range) -{ - if (Range.RangeLength == TotalBlockSize) - { - return {}; - } - else - { - return std::vector<BlockRangeDescriptor>{Range}; - } -}; - -const ChunkBlockAnalyser::BlockRangeLimit* -ChunkBlockAnalyser::GetBlockRangeLimitForRange(std::span<const BlockRangeLimit> Limits, - uint64_t TotalBlockSize, - std::span<const BlockRangeDescriptor> Ranges) -{ - if (Ranges.size() > 1) - { - const std::uint64_t WantedSize = - std::accumulate(Ranges.begin(), Ranges.end(), uint64_t(0), [](uint64_t Current, const BlockRangeDescriptor& Range) { - return Current + Range.RangeLength; - }); - - const double RangeRequestedPercent = (WantedSize * 100.0) / TotalBlockSize; - - for (const BlockRangeLimit& Limit : Limits) - { - if (RangeRequestedPercent >= Limit.SizePercent && Ranges.size() > Limit.MaxRangeCount) + if (ExactRangesSize < FullDownloadSize) { - return &Limit; + ZEN_DEBUG("Block {}: Full: {}, Ideal: {}, Actual: {}, Saves: {}", + NeededBlock.BlockIndex, + NiceBytes(FullDownloadSize), + NiceBytes(ExactRangesSize), + NiceBytes(DownloadRangesSize), + NiceBytes(FullDownloadSize - DownloadRangesSize)); } } - } - return nullptr; -}; - -std::vector<ChunkBlockAnalyser::BlockRangeDescriptor> -ChunkBlockAnalyser::CollapseBlockRanges(const uint64_t AlwaysAcceptableGap, std::span<const BlockRangeDescriptor> BlockRanges) -{ - ZEN_ASSERT(BlockRanges.size() > 1); - std::vector<BlockRangeDescriptor> CollapsedBlockRanges; - - auto BlockRangesIt = BlockRanges.begin(); - CollapsedBlockRanges.push_back(*BlockRangesIt++); - for (; BlockRangesIt != BlockRanges.end(); BlockRangesIt++) - { - BlockRangeDescriptor& LastRange = CollapsedBlockRanges.back(); - - const uint64_t BothRangeSize = BlockRangesIt->RangeLength + LastRange.RangeLength; - - const uint64_t Gap = BlockRangesIt->RangeStart - (LastRange.RangeStart + LastRange.RangeLength); - if (Gap <= Max(BothRangeSize / 16, AlwaysAcceptableGap)) - { - LastRange.ChunkBlockIndexCount = - (BlockRangesIt->ChunkBlockIndexStart + BlockRangesIt->ChunkBlockIndexCount) - LastRange.ChunkBlockIndexStart; - LastRange.RangeLength = (BlockRangesIt->RangeStart + BlockRangesIt->RangeLength) - LastRange.RangeStart; - } - else + uint64_t Actual = FullDownloadTotalSize - ActualDownloadTotalSize; + uint64_t Ideal = FullDownloadTotalSize - IdealDownloadTotalSize; + if (Ideal < FullDownloadTotalSize && !m_Options.IsQuiet) { - CollapsedBlockRanges.push_back(*BlockRangesIt); + const double AchievedPercent = Ideal == 0 ? 100.0 : (100.0 * Actual) / Ideal; + ZEN_INFO( + "Block Partial Analysis: Blocks: {}, Full: {}, Ideal: {}, Actual: {}. Skipping {} ({:.1f}%) out of " + "possible {} using {} extra ranges " + "via {} extra requests. Completed in {}", + NeededBlocks.size(), + NiceBytes(FullDownloadTotalSize), + NiceBytes(IdealDownloadTotalSize), + NiceBytes(ActualDownloadTotalSize), + NiceBytes(FullDownloadTotalSize - ActualDownloadTotalSize), + AchievedPercent, + NiceBytes(Ideal), + RangeCount - MinRequestCount, + RequestCount - MinRequestCount, + NiceTimeSpanMs(PartialAnalisysTimer.GetElapsedTimeMs())); } } - return CollapsedBlockRanges; -}; - -uint64_t -ChunkBlockAnalyser::CalculateNextGap(const uint64_t AlwaysAcceptableGap, std::span<const BlockRangeDescriptor> BlockRanges) -{ - ZEN_ASSERT(BlockRanges.size() > 1); - uint64_t AcceptableGap = (uint64_t)-1; - for (size_t RangeIndex = 0; RangeIndex < BlockRanges.size() - 1; RangeIndex++) - { - const BlockRangeDescriptor& Range = BlockRanges[RangeIndex]; - const BlockRangeDescriptor& NextRange = BlockRanges[RangeIndex + 1]; - - const uint64_t Gap = NextRange.RangeStart - (Range.RangeStart + Range.RangeLength); - AcceptableGap = Min(Gap, AcceptableGap); - } - AcceptableGap = RoundUp(AcceptableGap, AlwaysAcceptableGap); - return AcceptableGap; -}; - -std::optional<std::vector<ChunkBlockAnalyser::BlockRangeDescriptor>> -ChunkBlockAnalyser::CalculateBlockRanges(uint32_t BlockIndex, - const ChunkBlockDescription& BlockDescription, - std::span<const uint32_t> BlockChunkIndexNeeded, - EPartialBlockDownloadMode PartialBlockDownloadMode, - const uint64_t ChunkStartOffsetInBlock, - const uint64_t TotalBlockSize, - uint64_t& OutTotalWantedChunksSize) -{ - ZEN_TRACE_CPU("CalculateBlockRanges"); - - if (PartialBlockDownloadMode == EPartialBlockDownloadMode::Off) - { - return {}; - } - - std::vector<BlockRangeDescriptor> BlockRanges; - { - uint64_t CurrentOffset = ChunkStartOffsetInBlock; - uint32_t ChunkBlockIndex = 0; - uint32_t NeedBlockChunkIndexOffset = 0; - BlockRangeDescriptor NextRange{.BlockIndex = BlockIndex}; - while (NeedBlockChunkIndexOffset < BlockChunkIndexNeeded.size() && ChunkBlockIndex < BlockDescription.ChunkRawHashes.size()) - { - const uint32_t ChunkCompressedLength = BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; - if (ChunkBlockIndex < BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) - { - if (NextRange.RangeLength > 0) - { - BlockRanges.push_back(NextRange); - NextRange = {.BlockIndex = BlockIndex}; - } - ChunkBlockIndex++; - CurrentOffset += ChunkCompressedLength; - } - else if (ChunkBlockIndex == BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) - { - if (NextRange.RangeLength == 0) - { - NextRange.RangeStart = CurrentOffset; - NextRange.ChunkBlockIndexStart = ChunkBlockIndex; - } - NextRange.RangeLength += ChunkCompressedLength; - NextRange.ChunkBlockIndexCount++; - ChunkBlockIndex++; - CurrentOffset += ChunkCompressedLength; - NeedBlockChunkIndexOffset++; - } - else - { - ZEN_ASSERT(false); - } - } - if (NextRange.RangeLength > 0) - { - BlockRanges.push_back(NextRange); - } - } - ZEN_ASSERT(!BlockRanges.empty()); - - OutTotalWantedChunksSize = - std::accumulate(BlockRanges.begin(), BlockRanges.end(), uint64_t(0), [](uint64_t Current, const BlockRangeDescriptor& Range) { - return Current + Range.RangeLength; - }); - - double RangeWantedPercent = (OutTotalWantedChunksSize * 100.0) / TotalBlockSize; - - if (BlockRanges.size() == 1) - { - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Range request of {} ({:.2f}%) using single range from block {} ({}) as is", - NiceBytes(OutTotalWantedChunksSize), - RangeWantedPercent, - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize)); - } - return BlockRanges; - } - - if (PartialBlockDownloadMode == EPartialBlockDownloadMode::Exact) - { - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Range request of {} ({:.2f}%) using {} ranges from block {} ({})", - NiceBytes(OutTotalWantedChunksSize), - RangeWantedPercent, - BlockRanges.size(), - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize)); - } - return BlockRanges; - } - - if (PartialBlockDownloadMode == EPartialBlockDownloadMode::SingleRange) - { - const BlockRangeDescriptor MergedRange = MergeBlockRanges(BlockRanges); - if (m_Options.IsVerbose) - { - const double RangeRequestedPercent = (MergedRange.RangeLength * 100.0) / TotalBlockSize; - const double WastedPercent = ((MergedRange.RangeLength - OutTotalWantedChunksSize) * 100.0) / MergedRange.RangeLength; - - ZEN_OPERATION_LOG_INFO( - m_LogOutput, - "Range request of {} ({:.2f}%) using {} ranges from block {} ({}) limited to single block range {} ({:.2f}%) wasting " - "{:.2f}% ({})", - NiceBytes(OutTotalWantedChunksSize), - RangeWantedPercent, - BlockRanges.size(), - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize), - NiceBytes(MergedRange.RangeLength), - RangeRequestedPercent, - WastedPercent, - NiceBytes(MergedRange.RangeLength - OutTotalWantedChunksSize)); - } - return MakeOptionalBlockRangeVector(TotalBlockSize, MergedRange); - } - - if (RangeWantedPercent > FullBlockRangePercentLimit) - { - const BlockRangeDescriptor MergedRange = MergeBlockRanges(BlockRanges); - if (m_Options.IsVerbose) - { - const double RangeRequestedPercent = (MergedRange.RangeLength * 100.0) / TotalBlockSize; - const double WastedPercent = ((MergedRange.RangeLength - OutTotalWantedChunksSize) * 100.0) / MergedRange.RangeLength; - - ZEN_OPERATION_LOG_INFO( - m_LogOutput, - "Range request of {} ({:.2f}%) using {} ranges from block {} ({}) exceeds {}%. Merged to single block range {} " - "({:.2f}%) wasting {:.2f}% ({})", - NiceBytes(OutTotalWantedChunksSize), - RangeWantedPercent, - BlockRanges.size(), - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize), - FullBlockRangePercentLimit, - NiceBytes(MergedRange.RangeLength), - RangeRequestedPercent, - WastedPercent, - NiceBytes(MergedRange.RangeLength - OutTotalWantedChunksSize)); - } - return MakeOptionalBlockRangeVector(TotalBlockSize, MergedRange); - } - - const uint64_t AlwaysAcceptableGap = 4u * 1024u; - - std::vector<BlockRangeDescriptor> CollapsedBlockRanges = CollapseBlockRanges(AlwaysAcceptableGap, BlockRanges); - while (GetBlockRangeLimitForRange(ForceMergeLimits, TotalBlockSize, CollapsedBlockRanges)) - { - CollapsedBlockRanges = CollapseBlockRanges(CalculateNextGap(AlwaysAcceptableGap, CollapsedBlockRanges), CollapsedBlockRanges); - } - - const std::uint64_t WantedCollapsedSize = - std::accumulate(CollapsedBlockRanges.begin(), - CollapsedBlockRanges.end(), - uint64_t(0), - [](uint64_t Current, const BlockRangeDescriptor& Range) { return Current + Range.RangeLength; }); - - const double CollapsedRangeRequestedPercent = (WantedCollapsedSize * 100.0) / TotalBlockSize; - - if (m_Options.IsVerbose) - { - const double WastedPercent = ((WantedCollapsedSize - OutTotalWantedChunksSize) * 100.0) / WantedCollapsedSize; - - ZEN_OPERATION_LOG_INFO( - m_LogOutput, - "Range request of {} ({:.2f}%) using {} ranges from block {} ({}) collapsed to {} {:.2f}% using {} ranges wasting {:.2f}% " - "({})", - NiceBytes(OutTotalWantedChunksSize), - RangeWantedPercent, - BlockRanges.size(), - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize), - NiceBytes(WantedCollapsedSize), - CollapsedRangeRequestedPercent, - CollapsedBlockRanges.size(), - WastedPercent, - NiceBytes(WantedCollapsedSize - OutTotalWantedChunksSize)); - } - return CollapsedBlockRanges; + return Result; } #if ZEN_WITH_TESTS -namespace testutils { +namespace chunkblock_testutils { static std::vector<std::pair<Oid, CompressedBuffer>> CreateAttachments( const std::span<const size_t>& Sizes, OodleCompressionLevel CompressionLevel = OodleCompressionLevel::VeryFast, @@ -1045,12 +934,14 @@ namespace testutils { return Result; } -} // namespace testutils +} // namespace chunkblock_testutils + +TEST_SUITE_BEGIN("remotestore.chunkblock"); TEST_CASE("chunkblock.block") { using namespace std::literals; - using namespace testutils; + using namespace chunkblock_testutils; std::vector<std::size_t> AttachmentSizes({7633, 6825, 5738, 8031, 7225, 566, 3656, 6006, 24, 3466, 1093, 4269, 2257, 3685, 3489, 7194, 6151, 5482, 6217, 3511, 6738, 5061, 7537, 2759, 1916, 8210, 2235, 4024, 1582, 5251, @@ -1062,8 +953,8 @@ TEST_CASE("chunkblock.block") for (const auto& It : AttachmentsWithId) { Chunks.push_back( - std::make_pair(It.second.DecodeRawHash(), [Buffer = It.second](const IoHash&) -> std::pair<uint64_t, CompressedBuffer> { - return {Buffer.DecodeRawSize(), Buffer}; + std::make_pair(It.second.DecodeRawHash(), [Buffer = It.second](const IoHash&) -> std::pair<uint64_t, CompositeBuffer> { + return {Buffer.DecodeRawSize(), Buffer.GetCompressed()}; })); } ChunkBlockDescription Block; @@ -1078,7 +969,7 @@ TEST_CASE("chunkblock.block") TEST_CASE("chunkblock.reuseblocks") { using namespace std::literals; - using namespace testutils; + using namespace chunkblock_testutils; std::vector<std::vector<std::size_t>> BlockAttachmentSizes( {std::vector<std::size_t>{7633, 6825, 5738, 8031, 7225, 566, 3656, 6006, 24, 3466, 1093, 4269, 2257, 3685, 3489, @@ -1097,8 +988,8 @@ TEST_CASE("chunkblock.reuseblocks") for (const auto& It : AttachmentsWithId) { Chunks.push_back( - std::make_pair(It.second.DecodeRawHash(), [Buffer = It.second](const IoHash&) -> std::pair<uint64_t, CompressedBuffer> { - return {Buffer.DecodeRawSize(), Buffer}; + std::make_pair(It.second.DecodeRawHash(), [Buffer = It.second](const IoHash&) -> std::pair<uint64_t, CompositeBuffer> { + return {Buffer.DecodeRawSize(), Buffer.GetCompressed()}; })); } ChunkBlockDescription Block; @@ -1106,8 +997,7 @@ TEST_CASE("chunkblock.reuseblocks") BlockDescriptions.emplace_back(std::move(Block)); } - LoggerRef LogRef = Log(); - std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef)); + LoggerRef LogRef = Log(); { // We use just about all the chunks - should result in use of both blocks @@ -1124,14 +1014,8 @@ TEST_CASE("chunkblock.reuseblocks") std::iota(ManyChunkIndexes.begin(), ManyChunkIndexes.end(), 0); std::vector<uint32_t> UnusedChunkIndexes; - std::vector<size_t> ReusedBlocks = FindReuseBlocks(*LogOutput, - 80, - false, - ReuseBlocksStats, - BlockDescriptions, - ManyChunkHashes, - ManyChunkIndexes, - UnusedChunkIndexes); + std::vector<size_t> ReusedBlocks = + FindReuseBlocks(LogRef, 80, false, ReuseBlocksStats, BlockDescriptions, ManyChunkHashes, ManyChunkIndexes, UnusedChunkIndexes); CHECK_EQ(2u, ReusedBlocks.size()); CHECK_EQ(0u, UnusedChunkIndexes.size()); @@ -1152,7 +1036,7 @@ TEST_CASE("chunkblock.reuseblocks") std::iota(ManyChunkIndexes.begin(), ManyChunkIndexes.end(), 0); std::vector<uint32_t> UnusedChunkIndexes; - std::vector<size_t> ReusedBlocks = FindReuseBlocks(*LogOutput, + std::vector<size_t> ReusedBlocks = FindReuseBlocks(LogRef, 80, false, ReuseBlocksStats, @@ -1181,7 +1065,7 @@ TEST_CASE("chunkblock.reuseblocks") // We use half the chunks - should result in no use of blocks due to 80% limit std::vector<uint32_t> UnusedChunkIndexes80Percent; ReuseBlocksStatistics ReuseBlocksStats; - std::vector<size_t> ReusedBlocks80Percent = FindReuseBlocks(*LogOutput, + std::vector<size_t> ReusedBlocks80Percent = FindReuseBlocks(LogRef, 80, false, ReuseBlocksStats, @@ -1197,7 +1081,7 @@ TEST_CASE("chunkblock.reuseblocks") // We use half the chunks - should result in use of both blocks due to 40% limit std::vector<uint32_t> UnusedChunkIndexes40Percent; ReuseBlocksStatistics ReuseBlocksStats; - std::vector<size_t> ReusedBlocks40Percent = FindReuseBlocks(*LogOutput, + std::vector<size_t> ReusedBlocks40Percent = FindReuseBlocks(LogRef, 40, false, ReuseBlocksStats, @@ -1227,7 +1111,7 @@ TEST_CASE("chunkblock.reuseblocks") // We use half the chunks for first block - should result in use of one blocks due to 80% limit ReuseBlocksStatistics ReuseBlocksStats; std::vector<uint32_t> UnusedChunkIndexes80Percent; - std::vector<size_t> ReusedBlocks80Percent = FindReuseBlocks(*LogOutput, + std::vector<size_t> ReusedBlocks80Percent = FindReuseBlocks(LogRef, 80, false, ReuseBlocksStats, @@ -1244,7 +1128,7 @@ TEST_CASE("chunkblock.reuseblocks") // We use half the chunks - should result in use of both blocks due to 40% limit ReuseBlocksStatistics ReuseBlocksStats; std::vector<uint32_t> UnusedChunkIndexes40Percent; - std::vector<size_t> ReusedBlocks40Percent = FindReuseBlocks(*LogOutput, + std::vector<size_t> ReusedBlocks40Percent = FindReuseBlocks(LogRef, 40, false, ReuseBlocksStats, @@ -1283,7 +1167,7 @@ TEST_CASE("chunkblock.reuseblocks") // We use half the chunks for first block - should result in use of one blocks due to 80% limit ReuseBlocksStatistics ReuseBlocksStats; std::vector<uint32_t> UnusedChunkIndexes80Percent; - std::vector<size_t> ReusedBlocks80Percent = FindReuseBlocks(*LogOutput, + std::vector<size_t> ReusedBlocks80Percent = FindReuseBlocks(LogRef, 80, false, ReuseBlocksStats, @@ -1300,7 +1184,7 @@ TEST_CASE("chunkblock.reuseblocks") // We use half the chunks - should result in use of both blocks due to 40% limit ReuseBlocksStatistics ReuseBlocksStats; std::vector<uint32_t> UnusedChunkIndexes40Percent; - std::vector<size_t> ReusedBlocks40Percent = FindReuseBlocks(*LogOutput, + std::vector<size_t> ReusedBlocks40Percent = FindReuseBlocks(LogRef, 40, false, ReuseBlocksStats, @@ -1315,6 +1199,877 @@ TEST_CASE("chunkblock.reuseblocks") } } +namespace chunkblock_analyser_testutils { + + // Build a ChunkBlockDescription without any real payload. + // Hashes are derived deterministically from (BlockSeed XOR ChunkIndex) so that the same + // seed produces the same hashes - useful for deduplication tests. + static ChunkBlockDescription MakeBlockDesc(uint64_t HeaderSize, + std::initializer_list<uint32_t> CompressedLengths, + uint32_t BlockSeed = 0) + { + ChunkBlockDescription Desc; + Desc.HeaderSize = HeaderSize; + uint32_t ChunkIndex = 0; + for (uint32_t Length : CompressedLengths) + { + uint64_t HashInput = uint64_t(BlockSeed ^ ChunkIndex); + Desc.ChunkRawHashes.push_back(IoHash::HashBuffer(MemoryView(&HashInput, sizeof(HashInput)))); + Desc.ChunkRawLengths.push_back(Length); + Desc.ChunkCompressedLengths.push_back(Length); + ChunkIndex++; + } + return Desc; + } + + // Build the robin_map<IoHash, uint32_t> needed by GetNeeded from a flat list of blocks. + // First occurrence of each hash wins; index is assigned sequentially across all blocks. + [[maybe_unused]] static tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> MakeHashMap(const std::vector<ChunkBlockDescription>& Blocks) + { + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> Result; + uint32_t Index = 0; + for (const ChunkBlockDescription& Block : Blocks) + { + for (const IoHash& Hash : Block.ChunkRawHashes) + { + if (!Result.contains(Hash)) + { + Result.emplace(Hash, Index++); + } + } + } + return Result; + } + +} // namespace chunkblock_analyser_testutils + +TEST_CASE("chunkblock.mergecheapestrange.picks_smallest_gap") +{ + using RD = chunkblock_impl::RangeDescriptor; + // Gap between ranges 0-1 is 50, gap between 1-2 is 150 -> pair 0-1 gets merged + std::vector<RD> Ranges = { + {.RangeStart = 0, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1}, + {.RangeStart = 150, .RangeLength = 100, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1}, + {.RangeStart = 400, .RangeLength = 100, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1}, + }; + chunkblock_impl::MergeCheapestRange(Ranges); + + REQUIRE_EQ(2u, Ranges.size()); + CHECK_EQ(0u, Ranges[0].RangeStart); + CHECK_EQ(250u, Ranges[0].RangeLength); // 150+100 + CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(2u, Ranges[0].ChunkBlockIndexCount); + CHECK_EQ(400u, Ranges[1].RangeStart); + CHECK_EQ(100u, Ranges[1].RangeLength); + CHECK_EQ(2u, Ranges[1].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[1].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.mergecheapestrange.tiebreak_smaller_merged") +{ + using RD = chunkblock_impl::RangeDescriptor; + // Gap 0-1 == gap 1-2 == 100; merged size 0-1 (250) < merged size 1-2 (350) -> pair 0-1 wins + std::vector<RD> Ranges = { + {.RangeStart = 0, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1}, + {.RangeStart = 200, .RangeLength = 50, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1}, + {.RangeStart = 350, .RangeLength = 200, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1}, + }; + chunkblock_impl::MergeCheapestRange(Ranges); + + REQUIRE_EQ(2u, Ranges.size()); + // Pair 0-1 merged: start=0, length = (200+50)-0 = 250 + CHECK_EQ(0u, Ranges[0].RangeStart); + CHECK_EQ(250u, Ranges[0].RangeLength); + CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(2u, Ranges[0].ChunkBlockIndexCount); + // Pair 1 unchanged (was index 2) + CHECK_EQ(350u, Ranges[1].RangeStart); + CHECK_EQ(200u, Ranges[1].RangeLength); + CHECK_EQ(2u, Ranges[1].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[1].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.optimizeranges.preserves_ranges_low_latency") +{ + using RD = chunkblock_impl::RangeDescriptor; + // With MaxRangeCountPerRequest unlimited, RequestCount=1 + // RequestTimeAsBytes = 100000 * 1 * 0.001 = 100 << slack=7000 -> all ranges preserved + std::vector<RD> ExactRanges = { + {.RangeStart = 0, .RangeLength = 1000, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1}, + {.RangeStart = 2000, .RangeLength = 1000, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1}, + {.RangeStart = 4000, .RangeLength = 1000, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1}, + }; + uint64_t TotalBlockSize = 10000; + double LatencySec = 0.001; + uint64_t SpeedBytesPerSec = 100000; + uint64_t MaxRangeCountPerReq = (uint64_t)-1; + uint64_t MaxRangesPerBlock = 1024; + + auto Result = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock); + + REQUIRE_EQ(3u, Result.size()); +} + +TEST_CASE("chunkblock.optimizeranges.falls_back_to_full_block") +{ + using RD = chunkblock_impl::RangeDescriptor; + // 1 range already; slack=100 < SpeedBytesPerSec*LatencySec=200 -> full block (empty result) + std::vector<RD> ExactRanges = { + {.RangeStart = 100, .RangeLength = 900, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 3}, + }; + uint64_t TotalBlockSize = 1000; + double LatencySec = 0.01; + uint64_t SpeedBytesPerSec = 20000; + uint64_t MaxRangeCountPerReq = (uint64_t)-1; + uint64_t MaxRangesPerBlock = 1024; + + auto Result = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock); + + CHECK(Result.empty()); +} + +TEST_CASE("chunkblock.optimizeranges.maxrangesperblock_clamp") +{ + using RD = chunkblock_impl::RangeDescriptor; + // 5 input ranges; MaxRangesPerBlock=2 clamps to <=2 before the cost model runs + std::vector<RD> ExactRanges = { + {.RangeStart = 0, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1}, + {.RangeStart = 300, .RangeLength = 100, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1}, + {.RangeStart = 600, .RangeLength = 100, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1}, + {.RangeStart = 900, .RangeLength = 100, .ChunkBlockIndexStart = 3, .ChunkBlockIndexCount = 1}, + {.RangeStart = 1200, .RangeLength = 100, .ChunkBlockIndexStart = 4, .ChunkBlockIndexCount = 1}, + }; + uint64_t TotalBlockSize = 5000; + double LatencySec = 0.001; + uint64_t SpeedBytesPerSec = 100000; + uint64_t MaxRangeCountPerReq = (uint64_t)-1; + uint64_t MaxRangesPerBlock = 2; + + auto Result = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock); + + CHECK(Result.size() <= 2u); + CHECK(!Result.empty()); +} + +TEST_CASE("chunkblock.optimizeranges.low_maxrangecountperrequest_drives_merge") +{ + using RD = chunkblock_impl::RangeDescriptor; + // MaxRangeCountPerRequest=1 means RequestCount==RangeCount; high latency drives merging + // With MaxRangeCountPerRequest=-1 the same 3 ranges would be preserved (verified by comment below) + std::vector<RD> ExactRanges = { + {.RangeStart = 100, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1}, + {.RangeStart = 250, .RangeLength = 100, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1}, + {.RangeStart = 400, .RangeLength = 100, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1}, + }; + uint64_t TotalBlockSize = 1000; + double LatencySec = 1.0; + uint64_t SpeedBytesPerSec = 500; + // With MaxRangeCountPerRequest=-1: RequestCount=1, RequestTimeAsBytes=500 < slack=700 -> preserved + // With MaxRangeCountPerRequest=1: RequestCount=3, RequestTimeAsBytes=1500 > slack=700 -> merged + uint64_t MaxRangesPerBlock = 1024; + + auto Unlimited = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, (uint64_t)-1, MaxRangesPerBlock); + CHECK_EQ(3u, Unlimited.size()); + + auto Limited = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, uint64_t(1), MaxRangesPerBlock); + CHECK(Limited.size() < 3u); +} + +TEST_CASE("chunkblock.optimizeranges.unlimited_rangecountperrequest_no_extra_cost") +{ + using RD = chunkblock_impl::RangeDescriptor; + // MaxRangeCountPerRequest=-1 -> RequestCount always 1, even with many ranges and high latency + std::vector<RD> ExactRanges = { + {.RangeStart = 0, .RangeLength = 50, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1}, + {.RangeStart = 200, .RangeLength = 50, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1}, + {.RangeStart = 400, .RangeLength = 50, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1}, + {.RangeStart = 600, .RangeLength = 50, .ChunkBlockIndexStart = 3, .ChunkBlockIndexCount = 1}, + {.RangeStart = 800, .RangeLength = 50, .ChunkBlockIndexStart = 4, .ChunkBlockIndexCount = 1}, + }; + uint64_t TotalBlockSize = 5000; + double LatencySec = 0.1; + uint64_t SpeedBytesPerSec = 10000; // RequestTimeAsBytes=1000 << slack=4750 + uint64_t MaxRangeCountPerReq = (uint64_t)-1; + uint64_t MaxRangesPerBlock = 1024; + + auto Result = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock); + + CHECK_EQ(5u, Result.size()); +} + +TEST_CASE("chunkblock.optimizeranges.two_range_direct_merge_path") +{ + using RD = chunkblock_impl::RangeDescriptor; + // Exactly 2 ranges; cost model demands merge; exercises the RangeCount==2 direct-merge branch + // After direct merge -> 1 range with small slack -> full block (empty) + std::vector<RD> ExactRanges = { + {.RangeStart = 0, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 2}, + {.RangeStart = 400, .RangeLength = 100, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 2}, + }; + uint64_t TotalBlockSize = 600; + double LatencySec = 0.1; + uint64_t SpeedBytesPerSec = 5000; // RequestTimeAsBytes=500 > slack=400 on first iter + uint64_t MaxRangeCountPerReq = (uint64_t)-1; + uint64_t MaxRangesPerBlock = 1024; + + // Iteration 1: RangeCount=2, RequestCount=1, RequestTimeAsBytes=500 > slack=400 -> direct merge + // After merge: 1 range [{0,500,0,4}], slack=100 < Speed*Lat=500 -> full block + auto Result = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock); + + CHECK(Result.empty()); +} + +TEST_CASE("chunkblock.getneeded.all_chunks") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + + auto Block = MakeBlockDesc(50, {100, 100, 100, 100}); + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + auto HashMap = MakeHashMap({Block}); + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t) { return true; }); + + REQUIRE_EQ(1u, NeededBlocks.size()); + CHECK_EQ(0u, NeededBlocks[0].BlockIndex); + REQUIRE_EQ(4u, NeededBlocks[0].ChunkIndexes.size()); + CHECK_EQ(0u, NeededBlocks[0].ChunkIndexes[0]); + CHECK_EQ(1u, NeededBlocks[0].ChunkIndexes[1]); + CHECK_EQ(2u, NeededBlocks[0].ChunkIndexes[2]); + CHECK_EQ(3u, NeededBlocks[0].ChunkIndexes[3]); +} + +TEST_CASE("chunkblock.getneeded.no_chunks") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + + auto Block = MakeBlockDesc(50, {100, 100, 100, 100}); + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + auto HashMap = MakeHashMap({Block}); + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t) { return false; }); + + CHECK(NeededBlocks.empty()); +} + +TEST_CASE("chunkblock.getneeded.subset_within_block") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + + auto Block = MakeBlockDesc(50, {100, 100, 100, 100}); + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + auto HashMap = MakeHashMap({Block}); + // Indices 0 and 2 are needed; 1 and 3 are not + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t ChunkIndex) { return ChunkIndex == 0 || ChunkIndex == 2; }); + + REQUIRE_EQ(1u, NeededBlocks.size()); + CHECK_EQ(0u, NeededBlocks[0].BlockIndex); + REQUIRE_EQ(2u, NeededBlocks[0].ChunkIndexes.size()); + CHECK_EQ(0u, NeededBlocks[0].ChunkIndexes[0]); + CHECK_EQ(2u, NeededBlocks[0].ChunkIndexes[1]); +} + +TEST_CASE("chunkblock.getneeded.dedup_low_slack_wins") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + + // Block 0: {H0, H1, SharedH, H3} - 3 of 4 needed (H3 not needed); slack = 100 + // Block 1: {H4, H5, SharedH, H6} - only SharedH needed; slack = 300 + // Block 0 has less slack -> processed first -> SharedH assigned to block 0 + IoHash SharedH = IoHash::HashBuffer(MemoryView("shared_chunk_dedup", 18)); + IoHash H0 = IoHash::HashBuffer(MemoryView("block0_chunk0", 13)); + IoHash H1 = IoHash::HashBuffer(MemoryView("block0_chunk1", 13)); + IoHash H3 = IoHash::HashBuffer(MemoryView("block0_chunk3", 13)); + IoHash H4 = IoHash::HashBuffer(MemoryView("block1_chunk0", 13)); + IoHash H5 = IoHash::HashBuffer(MemoryView("block1_chunk1", 13)); + IoHash H6 = IoHash::HashBuffer(MemoryView("block1_chunk3", 13)); + + ChunkBlockDescription Block0; + Block0.HeaderSize = 50; + Block0.ChunkRawHashes = {H0, H1, SharedH, H3}; + Block0.ChunkRawLengths = {100, 100, 100, 100}; + Block0.ChunkCompressedLengths = {100, 100, 100, 100}; + + ChunkBlockDescription Block1; + Block1.HeaderSize = 50; + Block1.ChunkRawHashes = {H4, H5, SharedH, H6}; + Block1.ChunkRawLengths = {100, 100, 100, 100}; + Block1.ChunkCompressedLengths = {100, 100, 100, 100}; + + std::vector<ChunkBlockDescription> Blocks = {Block0, Block1}; + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(LogRef, Blocks, Options); + + // Map: H0->0, H1->1, SharedH->2, H3->3, H4->4, H5->5, H6->6 + auto HashMap = MakeHashMap(Blocks); + // Need H0(0), H1(1), SharedH(2) from block 0; SharedH from block 1 (already index 2) + // H3(3) not needed; H4,H5,H6 not needed + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t ChunkIndex) { return ChunkIndex <= 2; }); + + // Block 0 slack=100 (H3 unused), block 1 slack=300 (H4,H5,H6 unused) + // Block 0 processed first; picks up H0, H1, SharedH + // Block 1 tries SharedH but it's already picked up -> empty -> not added + REQUIRE_EQ(1u, NeededBlocks.size()); + CHECK_EQ(0u, NeededBlocks[0].BlockIndex); + REQUIRE_EQ(3u, NeededBlocks[0].ChunkIndexes.size()); + CHECK_EQ(0u, NeededBlocks[0].ChunkIndexes[0]); + CHECK_EQ(1u, NeededBlocks[0].ChunkIndexes[1]); + CHECK_EQ(2u, NeededBlocks[0].ChunkIndexes[2]); +} + +TEST_CASE("chunkblock.getneeded.dedup_no_double_pickup") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + + // SharedH appears in both blocks; should appear in the result exactly once + IoHash SharedH = IoHash::HashBuffer(MemoryView("shared_chunk_nodup", 18)); + IoHash H0 = IoHash::HashBuffer(MemoryView("unique_chunk_b0", 15)); + IoHash H1 = IoHash::HashBuffer(MemoryView("unique_chunk_b1a", 16)); + IoHash H2 = IoHash::HashBuffer(MemoryView("unique_chunk_b1b", 16)); + IoHash H3 = IoHash::HashBuffer(MemoryView("unique_chunk_b1c", 16)); + + ChunkBlockDescription Block0; + Block0.HeaderSize = 50; + Block0.ChunkRawHashes = {SharedH, H0}; + Block0.ChunkRawLengths = {100, 100}; + Block0.ChunkCompressedLengths = {100, 100}; + + ChunkBlockDescription Block1; + Block1.HeaderSize = 50; + Block1.ChunkRawHashes = {H1, H2, H3, SharedH}; + Block1.ChunkRawLengths = {100, 100, 100, 100}; + Block1.ChunkCompressedLengths = {100, 100, 100, 100}; + + std::vector<ChunkBlockDescription> Blocks = {Block0, Block1}; + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(LogRef, Blocks, Options); + + // Map: SharedH->0, H0->1, H1->2, H2->3, H3->4 + // Only SharedH (index 0) needed; no other chunks + auto HashMap = MakeHashMap(Blocks); + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t ChunkIndex) { return ChunkIndex == 0; }); + + // Block 0: SharedH needed, H0 not needed -> slack=100 + // Block 1: SharedH needed, H1/H2/H3 not needed -> slack=300 + // Block 0 processed first -> picks up SharedH; Block 1 skips it + + // Count total occurrences of SharedH across all NeededBlocks + uint32_t SharedOccurrences = 0; + for (const auto& NB : NeededBlocks) + { + for (uint32_t Idx : NB.ChunkIndexes) + { + // SharedH is at block-local index 0 in Block0 and index 3 in Block1 + (void)Idx; + SharedOccurrences++; + } + } + CHECK_EQ(1u, SharedOccurrences); + REQUIRE_EQ(1u, NeededBlocks.size()); + CHECK_EQ(0u, NeededBlocks[0].BlockIndex); +} + +TEST_CASE("chunkblock.getneeded.skips_unrequested_chunks") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + + // Block has 4 chunks but only 2 appear in the hash map -> ChunkIndexes has exactly those 2 + auto Block = MakeBlockDesc(50, {100, 100, 100, 100}); + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + // Only put chunks at positions 0 and 2 in the map + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> HashMap; + HashMap.emplace(Block.ChunkRawHashes[0], 0u); + HashMap.emplace(Block.ChunkRawHashes[2], 1u); + + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t) { return true; }); + + REQUIRE_EQ(1u, NeededBlocks.size()); + CHECK_EQ(0u, NeededBlocks[0].BlockIndex); + REQUIRE_EQ(2u, NeededBlocks[0].ChunkIndexes.size()); + CHECK_EQ(0u, NeededBlocks[0].ChunkIndexes[0]); + CHECK_EQ(2u, NeededBlocks[0].ChunkIndexes[1]); +} + +TEST_CASE("chunkblock.getneeded.two_blocks_both_contribute") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + + // Block 0: all 4 needed (slack=0); block 1: 3 of 4 needed (slack=100) + // Both blocks contribute chunks -> 2 NeededBlocks in result + auto Block0 = MakeBlockDesc(50, {100, 100, 100, 100}, /*BlockSeed=*/0); + auto Block1 = MakeBlockDesc(50, {100, 100, 100, 100}, /*BlockSeed=*/200); + + std::vector<ChunkBlockDescription> Blocks = {Block0, Block1}; + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(LogRef, Blocks, Options); + + // HashMap: Block0 hashes -> indices 0-3, Block1 hashes -> indices 4-7 + auto HashMap = MakeHashMap(Blocks); + // Need all Block0 chunks (0-3) and Block1 chunks 0-2 (indices 4-6); not chunk index 7 (Block1 chunk 3) + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t ChunkIndex) { return ChunkIndex <= 6; }); + + CHECK_EQ(2u, NeededBlocks.size()); + // Block 0 has slack=0 (all 4 needed), Block 1 has slack=100 (1 not needed) + // Block 0 comes first in result + CHECK_EQ(0u, NeededBlocks[0].BlockIndex); + CHECK_EQ(4u, NeededBlocks[0].ChunkIndexes.size()); + CHECK_EQ(1u, NeededBlocks[1].BlockIndex); + CHECK_EQ(3u, NeededBlocks[1].ChunkIndexes.size()); +} + +TEST_CASE("chunkblock.calc.off_mode") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + + // HeaderSize > 0, chunks size matches -> CanDoPartialBlockDownload = true + // But mode Off forces full block regardless + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector<Mode> Modes = {Mode::Off}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + REQUIRE_EQ(1u, Result.FullBlockIndexes.size()); + CHECK_EQ(0u, Result.FullBlockIndexes[0]); + CHECK(Result.BlockRanges.empty()); +} + +TEST_CASE("chunkblock.calc.exact_mode") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + // Need chunks 0 and 2 -> 2 non-contiguous ranges; Exact mode passes them straight through + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector<Mode> Modes = {Mode::Exact}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + CHECK(Result.FullBlockIndexes.empty()); + REQUIRE_EQ(2u, Result.BlockRanges.size()); + + CHECK_EQ(0u, Result.BlockRanges[0].BlockIndex); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart); + CHECK_EQ(100u, Result.BlockRanges[0].RangeLength); + CHECK_EQ(0u, Result.BlockRanges[0].ChunkBlockIndexStart); + CHECK_EQ(1u, Result.BlockRanges[0].ChunkBlockIndexCount); + + CHECK_EQ(0u, Result.BlockRanges[1].BlockIndex); + CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart); // 100+200 before chunk 2 + CHECK_EQ(300u, Result.BlockRanges[1].RangeLength); + CHECK_EQ(2u, Result.BlockRanges[1].ChunkBlockIndexStart); + CHECK_EQ(1u, Result.BlockRanges[1].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.calc.singlerange_mode") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + // Default HostLatencySec=-1 -> OptimizeRanges not called after SingleRange collapse + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + // Need chunks 0 and 2 -> 2 ranges that get collapsed to 1 + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector<Mode> Modes = {Mode::SingleRange}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + CHECK(Result.FullBlockIndexes.empty()); + REQUIRE_EQ(1u, Result.BlockRanges.size()); + CHECK_EQ(0u, Result.BlockRanges[0].BlockIndex); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart); + // Spans from chunk 0 start to chunk 2 end: 100+200+300=600 + CHECK_EQ(600u, Result.BlockRanges[0].RangeLength); + CHECK_EQ(0u, Result.BlockRanges[0].ChunkBlockIndexStart); + // ChunkBlockIndexCount = (2+1) - 0 = 3 + CHECK_EQ(3u, Result.BlockRanges[0].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.calc.multirange_mode") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + // Low latency: RequestTimeAsBytes=100 << slack -> OptimizeRanges preserves ranges + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + Options.HostLatencySec = 0.001; + Options.HostSpeedBytesPerSec = 100000; + ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector<Mode> Modes = {Mode::MultiRange}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + CHECK(Result.FullBlockIndexes.empty()); + REQUIRE_EQ(2u, Result.BlockRanges.size()); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart); + CHECK_EQ(100u, Result.BlockRanges[0].RangeLength); + CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart); + CHECK_EQ(300u, Result.BlockRanges[1].RangeLength); +} + +TEST_CASE("chunkblock.calc.multirangehighspeed_mode") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + // Block slack ~= 714 bytes (TotalBlockSize~=1114, RangeTotalSize=400 for chunks 0+2) + // RequestTimeAsBytes = 400000 * 1 * 0.001 = 400 < 714 -> ranges preserved + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + Options.HostHighSpeedLatencySec = 0.001; + Options.HostHighSpeedBytesPerSec = 400000; + ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector<Mode> Modes = {Mode::MultiRangeHighSpeed}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + CHECK(Result.FullBlockIndexes.empty()); + REQUIRE_EQ(2u, Result.BlockRanges.size()); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart); + CHECK_EQ(100u, Result.BlockRanges[0].RangeLength); + CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart); + CHECK_EQ(300u, Result.BlockRanges[1].RangeLength); +} + +TEST_CASE("chunkblock.calc.all_chunks_needed_full_block") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + Options.HostLatencySec = 0.001; + Options.HostSpeedBytesPerSec = 100000; + ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + // All 4 chunks needed -> short-circuit to full block regardless of mode + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 1, 2, 3}}}; + std::vector<Mode> Modes = {Mode::Exact}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + REQUIRE_EQ(1u, Result.FullBlockIndexes.size()); + CHECK_EQ(0u, Result.FullBlockIndexes[0]); + CHECK(Result.BlockRanges.empty()); +} + +TEST_CASE("chunkblock.calc.headersize_zero_forces_full_block") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + + // HeaderSize=0 -> CanDoPartialBlockDownload=false -> full block even in Exact mode + auto Block = MakeBlockDesc(0, {100, 200, 300, 400}); + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector<Mode> Modes = {Mode::Exact}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + REQUIRE_EQ(1u, Result.FullBlockIndexes.size()); + CHECK_EQ(0u, Result.FullBlockIndexes[0]); + CHECK(Result.BlockRanges.empty()); +} + +TEST_CASE("chunkblock.calc.low_maxrangecountperrequest") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + + // 5 chunks of 100 bytes each; need chunks 0, 2, 4 -> 3 non-contiguous ranges + // With MaxRangeCountPerRequest=1 and high latency, cost model merges aggressively -> full block + auto Block = MakeBlockDesc(10, {100, 100, 100, 100, 100}); + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + Options.HostLatencySec = 0.1; + Options.HostSpeedBytesPerSec = 1000; + Options.HostMaxRangeCountPerRequest = 1; + ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2, 4}}}; + std::vector<Mode> Modes = {Mode::MultiRange}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + // Cost model drives merging: 3 requests x 1000 x 0.1 = 300 > slack ~= 210+headersize + // After merges converges to full block + REQUIRE_EQ(1u, Result.FullBlockIndexes.size()); + CHECK_EQ(0u, Result.FullBlockIndexes[0]); + CHECK(Result.BlockRanges.empty()); +} + +TEST_CASE("chunkblock.calc.no_latency_skips_optimize") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + // Default HostLatencySec=-1 -> OptimizeRanges not called; raw GetBlockRanges result used + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options); + + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector<Mode> Modes = {Mode::MultiRange}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + // No optimize pass -> exact ranges from GetBlockRanges + CHECK(Result.FullBlockIndexes.empty()); + REQUIRE_EQ(2u, Result.BlockRanges.size()); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart); + CHECK_EQ(100u, Result.BlockRanges[0].RangeLength); + CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart); + CHECK_EQ(300u, Result.BlockRanges[1].RangeLength); +} + +TEST_CASE("chunkblock.calc.multiple_blocks_different_modes") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + + // 3 blocks with different modes: Off, Exact, MultiRange + auto Block0 = MakeBlockDesc(50, {100, 200, 300, 400}, /*BlockSeed=*/0); + auto Block1 = MakeBlockDesc(50, {100, 200, 300, 400}, /*BlockSeed=*/10); + auto Block2 = MakeBlockDesc(50, {100, 200, 300, 400}, /*BlockSeed=*/20); + + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + Options.HostLatencySec = 0.001; + Options.HostSpeedBytesPerSec = 100000; + + std::vector<ChunkBlockDescription> Blocks = {Block0, Block1, Block2}; + ChunkBlockAnalyser Analyser(LogRef, Blocks, Options); + + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + 50; + + std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = { + {.BlockIndex = 0, .ChunkIndexes = {0, 2}}, + {.BlockIndex = 1, .ChunkIndexes = {0, 2}}, + {.BlockIndex = 2, .ChunkIndexes = {0, 2}}, + }; + std::vector<Mode> Modes = {Mode::Off, Mode::Exact, Mode::MultiRange}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + // Block 0: Off -> FullBlockIndexes + REQUIRE_EQ(1u, Result.FullBlockIndexes.size()); + CHECK_EQ(0u, Result.FullBlockIndexes[0]); + + // Block 1: Exact -> 2 ranges; Block 2: MultiRange (low latency) -> 2 ranges + // Total: 4 ranges + REQUIRE_EQ(4u, Result.BlockRanges.size()); + + // First 2 ranges belong to Block 1 (Exact) + CHECK_EQ(1u, Result.BlockRanges[0].BlockIndex); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart); + CHECK_EQ(100u, Result.BlockRanges[0].RangeLength); + CHECK_EQ(1u, Result.BlockRanges[1].BlockIndex); + CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart); + CHECK_EQ(300u, Result.BlockRanges[1].RangeLength); + + // Last 2 ranges belong to Block 2 (MultiRange preserved) + CHECK_EQ(2u, Result.BlockRanges[2].BlockIndex); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[2].RangeStart); + CHECK_EQ(100u, Result.BlockRanges[2].RangeLength); + CHECK_EQ(2u, Result.BlockRanges[3].BlockIndex); + CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[3].RangeStart); + CHECK_EQ(300u, Result.BlockRanges[3].RangeLength); +} + +TEST_CASE("chunkblock.getblockranges.first_chunk_only") +{ + using namespace chunkblock_analyser_testutils; + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector<uint32_t> Needed = {0}; + auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed); + + REQUIRE_EQ(1u, Ranges.size()); + CHECK_EQ(ChunkStartOffset, Ranges[0].RangeStart); + CHECK_EQ(100u, Ranges[0].RangeLength); + CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[0].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.getblockranges.last_chunk_only") +{ + using namespace chunkblock_analyser_testutils; + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector<uint32_t> Needed = {3}; + auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed); + + REQUIRE_EQ(1u, Ranges.size()); + CHECK_EQ(ChunkStartOffset + 600u, Ranges[0].RangeStart); // 100+200+300 before chunk 3 + CHECK_EQ(400u, Ranges[0].RangeLength); + CHECK_EQ(3u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[0].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.getblockranges.middle_chunk_only") +{ + using namespace chunkblock_analyser_testutils; + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector<uint32_t> Needed = {1}; + auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed); + + REQUIRE_EQ(1u, Ranges.size()); + CHECK_EQ(ChunkStartOffset + 100u, Ranges[0].RangeStart); // 100 before chunk 1 + CHECK_EQ(200u, Ranges[0].RangeLength); + CHECK_EQ(1u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[0].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.getblockranges.all_chunks") +{ + using namespace chunkblock_analyser_testutils; + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector<uint32_t> Needed = {0, 1, 2, 3}; + auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed); + + REQUIRE_EQ(1u, Ranges.size()); + CHECK_EQ(ChunkStartOffset, Ranges[0].RangeStart); + CHECK_EQ(1000u, Ranges[0].RangeLength); // 100+200+300+400 + CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(4u, Ranges[0].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.getblockranges.non_contiguous") +{ + using namespace chunkblock_analyser_testutils; + + // Chunks 0 and 2 needed, chunk 1 skipped -> two separate ranges + auto Block = MakeBlockDesc(50, {100, 200, 300}); + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector<uint32_t> Needed = {0, 2}; + auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed); + + REQUIRE_EQ(2u, Ranges.size()); + + CHECK_EQ(ChunkStartOffset, Ranges[0].RangeStart); + CHECK_EQ(100u, Ranges[0].RangeLength); + CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[0].ChunkBlockIndexCount); + + CHECK_EQ(ChunkStartOffset + 300u, Ranges[1].RangeStart); // 100+200 before chunk 2 + CHECK_EQ(300u, Ranges[1].RangeLength); + CHECK_EQ(2u, Ranges[1].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[1].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.getblockranges.contiguous_run") +{ + using namespace chunkblock_analyser_testutils; + + // Chunks 1, 2, 3 needed (consecutive) -> one merged range + auto Block = MakeBlockDesc(50, {50, 100, 150, 200, 250}); + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector<uint32_t> Needed = {1, 2, 3}; + auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed); + + REQUIRE_EQ(1u, Ranges.size()); + CHECK_EQ(ChunkStartOffset + 50u, Ranges[0].RangeStart); // 50 before chunk 1 + CHECK_EQ(450u, Ranges[0].RangeLength); // 100+150+200 + CHECK_EQ(1u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(3u, Ranges[0].ChunkBlockIndexCount); +} + +TEST_SUITE_END(); + void chunkblock_forcelink() { diff --git a/src/zenremotestore/chunking/chunkedcontent.cpp b/src/zenremotestore/chunking/chunkedcontent.cpp index 26d179f14..c09ab9d3a 100644 --- a/src/zenremotestore/chunking/chunkedcontent.cpp +++ b/src/zenremotestore/chunking/chunkedcontent.cpp @@ -166,7 +166,6 @@ namespace { if (Chunked.Info.ChunkSequence.empty()) { AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Chunked.Info.RawHash, RawSize); - Stats.UniqueSequencesFound++; } else { @@ -186,7 +185,6 @@ namespace { Chunked.Info.ChunkHashes, ChunkSizes); } - Stats.UniqueSequencesFound++; } }); Stats.FilesChunked++; @@ -253,7 +251,7 @@ FolderContent::operator==(const FolderContent& Rhs) const if ((Platform == Rhs.Platform) && (RawSizes == Rhs.RawSizes) && (Attributes == Rhs.Attributes) && (ModificationTicks == Rhs.ModificationTicks) && (Paths.size() == Rhs.Paths.size())) { - size_t PathCount = 0; + size_t PathCount = Paths.size(); for (size_t PathIndex = 0; PathIndex < PathCount; PathIndex++) { if (Paths[PathIndex].generic_string() != Rhs.Paths[PathIndex].generic_string()) @@ -1706,6 +1704,8 @@ namespace chunkedcontent_testutils { } // namespace chunkedcontent_testutils +TEST_SUITE_BEGIN("remotestore.chunkedcontent"); + TEST_CASE("chunkedcontent.DeletePathsFromContent") { FastRandom BaseRandom; @@ -1924,6 +1924,8 @@ TEST_CASE("chunkedcontent.ApplyChunkedContentOverlay") } } +TEST_SUITE_END(); + #endif // ZEN_WITH_TESTS } // namespace zen diff --git a/src/zenremotestore/chunking/chunkedfile.cpp b/src/zenremotestore/chunking/chunkedfile.cpp index 652110605..633ddfd0d 100644 --- a/src/zenremotestore/chunking/chunkedfile.cpp +++ b/src/zenremotestore/chunking/chunkedfile.cpp @@ -211,6 +211,8 @@ ZEN_THIRD_PARTY_INCLUDES_END namespace zen { # if 0 +TEST_SUITE_BEGIN("remotestore.chunkedfile"); + TEST_CASE("chunkedfile.findparams") { # if 1 @@ -513,6 +515,8 @@ TEST_CASE("chunkedfile.findparams") // WorkLatch.CountDown(); // WorkLatch.Wait(); } + +TEST_SUITE_END(); # endif // 0 void diff --git a/src/zenremotestore/chunking/chunkingcache.cpp b/src/zenremotestore/chunking/chunkingcache.cpp index 7f0a26330..e9b783a00 100644 --- a/src/zenremotestore/chunking/chunkingcache.cpp +++ b/src/zenremotestore/chunking/chunkingcache.cpp @@ -75,13 +75,13 @@ public: { Lock.ReleaseNow(); RwLock::ExclusiveLockScope EditLock(m_Lock); - if (auto RemoveIt = m_PathHashToEntry.find(PathHash); It != m_PathHashToEntry.end()) + if (auto RemoveIt = m_PathHashToEntry.find(PathHash); RemoveIt != m_PathHashToEntry.end()) { - CachedEntry& DeleteEntry = m_Entries[It->second]; + CachedEntry& DeleteEntry = m_Entries[RemoveIt->second]; DeleteEntry.Chunked = {}; DeleteEntry.ModificationTick = 0; - m_FreeEntryIndexes.push_back(It->second); - m_PathHashToEntry.erase(It); + m_FreeEntryIndexes.push_back(RemoveIt->second); + m_PathHashToEntry.erase(RemoveIt); } } } @@ -461,6 +461,8 @@ namespace chunkingcache_testutils { } } // namespace chunkingcache_testutils +TEST_SUITE_BEGIN("remotestore.chunkingcache"); + TEST_CASE("chunkingcache.nullchunkingcache") { using namespace chunkingcache_testutils; @@ -617,6 +619,8 @@ TEST_CASE("chunkingcache.diskchunkingcache") } } +TEST_SUITE_END(); + void chunkingcache_forcelink() { diff --git a/src/zenremotestore/filesystemutils.cpp b/src/zenremotestore/filesystemutils.cpp deleted file mode 100644 index fa1ce6f78..000000000 --- a/src/zenremotestore/filesystemutils.cpp +++ /dev/null @@ -1,697 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#include <zenremotestore/filesystemutils.h> - -#include <zenremotestore/chunking/chunkedcontent.h> - -#include <zencore/filesystem.h> -#include <zencore/fmtutils.h> -#include <zencore/parallelwork.h> -#include <zencore/scopeguard.h> -#include <zencore/timer.h> -#include <zencore/trace.h> - -#if ZEN_WITH_TESTS -# include <zencore/testing.h> -# include <zencore/testutils.h> -#endif // ZEN_WITH_TESTS - -namespace zen { - -BufferedOpenFile::BufferedOpenFile(const std::filesystem::path Path, - std::atomic<uint64_t>& OpenReadCount, - std::atomic<uint64_t>& CurrentOpenFileCount, - std::atomic<uint64_t>& ReadCount, - std::atomic<uint64_t>& ReadByteCount) -: m_Source(Path, BasicFile::Mode::kRead) -, m_SourceSize(m_Source.FileSize()) -, m_OpenReadCount(OpenReadCount) -, m_CurrentOpenFileCount(CurrentOpenFileCount) -, m_ReadCount(ReadCount) -, m_ReadByteCount(ReadByteCount) - -{ - m_OpenReadCount++; - m_CurrentOpenFileCount++; -} - -BufferedOpenFile::~BufferedOpenFile() -{ - m_CurrentOpenFileCount--; -} - -CompositeBuffer -BufferedOpenFile::GetRange(uint64_t Offset, uint64_t Size) -{ - ZEN_TRACE_CPU("BufferedOpenFile::GetRange"); - - ZEN_ASSERT((m_CacheBlockIndex == (uint64_t)-1) || m_Cache); - auto _ = MakeGuard([&]() { ZEN_ASSERT((m_CacheBlockIndex == (uint64_t)-1) || m_Cache); }); - - ZEN_ASSERT((Offset + Size) <= m_SourceSize); - const uint64_t BlockIndexStart = Offset / BlockSize; - const uint64_t BlockIndexEnd = (Offset + Size - 1) / BlockSize; - - std::vector<SharedBuffer> BufferRanges; - BufferRanges.reserve(BlockIndexEnd - BlockIndexStart + 1); - - uint64_t ReadOffset = Offset; - for (uint64_t BlockIndex = BlockIndexStart; BlockIndex <= BlockIndexEnd; BlockIndex++) - { - const uint64_t BlockStartOffset = BlockIndex * BlockSize; - if (m_CacheBlockIndex != BlockIndex) - { - uint64_t CacheSize = Min(BlockSize, m_SourceSize - BlockStartOffset); - ZEN_ASSERT(CacheSize > 0); - m_Cache = IoBuffer(CacheSize); - m_Source.Read(m_Cache.GetMutableView().GetData(), CacheSize, BlockStartOffset); - m_ReadCount++; - m_ReadByteCount += CacheSize; - m_CacheBlockIndex = BlockIndex; - } - - const uint64_t BytesRead = ReadOffset - Offset; - ZEN_ASSERT(BlockStartOffset <= ReadOffset); - const uint64_t OffsetIntoBlock = ReadOffset - BlockStartOffset; - ZEN_ASSERT(OffsetIntoBlock < m_Cache.GetSize()); - const uint64_t BlockBytes = Min(m_Cache.GetSize() - OffsetIntoBlock, Size - BytesRead); - BufferRanges.emplace_back(SharedBuffer(IoBuffer(m_Cache, OffsetIntoBlock, BlockBytes))); - ReadOffset += BlockBytes; - } - CompositeBuffer Result(std::move(BufferRanges)); - ZEN_ASSERT(Result.GetSize() == Size); - return Result; -} - -ReadFileCache::ReadFileCache(std::atomic<uint64_t>& OpenReadCount, - std::atomic<uint64_t>& CurrentOpenFileCount, - std::atomic<uint64_t>& ReadCount, - std::atomic<uint64_t>& ReadByteCount, - const std::filesystem::path& Path, - const ChunkedFolderContent& LocalContent, - const ChunkedContentLookup& LocalLookup, - size_t MaxOpenFileCount) -: m_Path(Path) -, m_LocalContent(LocalContent) -, m_LocalLookup(LocalLookup) -, m_OpenReadCount(OpenReadCount) -, m_CurrentOpenFileCount(CurrentOpenFileCount) -, m_ReadCount(ReadCount) -, m_ReadByteCount(ReadByteCount) -{ - m_OpenFiles.reserve(MaxOpenFileCount); -} -ReadFileCache::~ReadFileCache() -{ - m_OpenFiles.clear(); -} - -CompositeBuffer -ReadFileCache::GetRange(uint32_t SequenceIndex, uint64_t Offset, uint64_t Size) -{ - ZEN_TRACE_CPU("ReadFileCache::GetRange"); - - auto CacheIt = - std::find_if(m_OpenFiles.begin(), m_OpenFiles.end(), [SequenceIndex](const auto& Lhs) { return Lhs.first == SequenceIndex; }); - if (CacheIt != m_OpenFiles.end()) - { - if (CacheIt != m_OpenFiles.begin()) - { - auto CachedFile(std::move(CacheIt->second)); - m_OpenFiles.erase(CacheIt); - m_OpenFiles.insert(m_OpenFiles.begin(), std::make_pair(SequenceIndex, std::move(CachedFile))); - } - CompositeBuffer Result = m_OpenFiles.front().second->GetRange(Offset, Size); - return Result; - } - const uint32_t LocalPathIndex = m_LocalLookup.SequenceIndexFirstPathIndex[SequenceIndex]; - const std::filesystem::path LocalFilePath = (m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred(); - if (Size == m_LocalContent.RawSizes[LocalPathIndex]) - { - IoBuffer Result = IoBufferBuilder::MakeFromFile(LocalFilePath); - return CompositeBuffer(SharedBuffer(Result)); - } - if (m_OpenFiles.size() == m_OpenFiles.capacity()) - { - m_OpenFiles.pop_back(); - } - m_OpenFiles.insert( - m_OpenFiles.begin(), - std::make_pair( - SequenceIndex, - std::make_unique<BufferedOpenFile>(LocalFilePath, m_OpenReadCount, m_CurrentOpenFileCount, m_ReadCount, m_ReadByteCount))); - CompositeBuffer Result = m_OpenFiles.front().second->GetRange(Offset, Size); - return Result; -} - -uint32_t -SetNativeFileAttributes(const std::filesystem::path FilePath, SourcePlatform SourcePlatform, uint32_t Attributes) -{ -#if ZEN_PLATFORM_WINDOWS - if (SourcePlatform == SourcePlatform::Windows) - { - SetFileAttributesToPath(FilePath, Attributes); - return Attributes; - } - else - { - uint32_t CurrentAttributes = GetFileAttributesFromPath(FilePath); - uint32_t NewAttributes = zen::MakeFileAttributeReadOnly(CurrentAttributes, zen::IsFileModeReadOnly(Attributes)); - if (CurrentAttributes != NewAttributes) - { - SetFileAttributesToPath(FilePath, NewAttributes); - } - return NewAttributes; - } -#endif // ZEN_PLATFORM_WINDOWS -#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC - if (SourcePlatform != SourcePlatform::Windows) - { - zen::SetFileMode(FilePath, Attributes); - return Attributes; - } - else - { - uint32_t CurrentMode = zen::GetFileMode(FilePath); - uint32_t NewMode = zen::MakeFileModeReadOnly(CurrentMode, zen::IsFileAttributeReadOnly(Attributes)); - if (CurrentMode != NewMode) - { - zen::SetFileMode(FilePath, NewMode); - } - return NewMode; - } -#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC -}; - -uint32_t -GetNativeFileAttributes(const std::filesystem::path FilePath) -{ -#if ZEN_PLATFORM_WINDOWS - return GetFileAttributesFromPath(FilePath); -#endif // ZEN_PLATFORM_WINDOWS -#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC - return GetFileMode(FilePath); -#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC -} - -bool -IsFileWithRetry(const std::filesystem::path& Path) -{ - std::error_code Ec; - bool Result = IsFile(Path, Ec); - for (size_t Retries = 0; Ec && Retries < 3; Retries++) - { - Sleep(100 + int(Retries * 50)); - Ec.clear(); - Result = IsFile(Path, Ec); - } - if (Ec) - { - throw std::system_error(std::error_code(Ec.value(), std::system_category()), - fmt::format("Failed to check path '{}' is file, reason: ({}) {}", Path, Ec.value(), Ec.message())); - } - return Result; -} - -bool -SetFileReadOnlyWithRetry(const std::filesystem::path& Path, bool ReadOnly) -{ - std::error_code Ec; - bool Result = SetFileReadOnly(Path, ReadOnly, Ec); - for (size_t Retries = 0; Ec && Retries < 3; Retries++) - { - if (!IsFileWithRetry(Path)) - { - return false; - } - Sleep(100 + int(Retries * 50)); - Ec.clear(); - Result = SetFileReadOnly(Path, ReadOnly, Ec); - } - if (Ec) - { - throw std::system_error(std::error_code(Ec.value(), std::system_category()), - fmt::format("Failed {} read only flag for file '{}', reason: ({}) {}", - ReadOnly ? "setting" : "clearing", - Path, - Ec.value(), - Ec.message())); - } - return Result; -} - -std::error_code -RenameFileWithRetry(const std::filesystem::path& SourcePath, const std::filesystem::path& TargetPath) -{ - std::error_code Ec; - RenameFile(SourcePath, TargetPath, Ec); - for (size_t Retries = 0; Ec && Retries < 5; Retries++) - { - ZEN_ASSERT_SLOW(IsFile(SourcePath)); - Sleep(50 + int(Retries * 150)); - Ec.clear(); - RenameFile(SourcePath, TargetPath, Ec); - } - return Ec; -} - -std::error_code -TryRemoveFile(const std::filesystem::path& Path) -{ - std::error_code Ec; - RemoveFile(Path, Ec); - if (Ec) - { - if (IsFile(Path, Ec)) - { - Ec.clear(); - RemoveFile(Path, Ec); - if (Ec) - { - return Ec; - } - } - } - return {}; -} - -void -RemoveFileWithRetry(const std::filesystem::path& Path) -{ - std::error_code Ec; - RemoveFile(Path, Ec); - for (size_t Retries = 0; Ec && Retries < 6; Retries++) - { - if (!IsFileWithRetry(Path)) - { - return; - } - Sleep(100 + int(Retries * 50)); - Ec.clear(); - RemoveFile(Path, Ec); - } - if (Ec) - { - throw std::system_error(std::error_code(Ec.value(), std::system_category()), - fmt::format("Failed removing file '{}', reason: ({}) {}", Path, Ec.value(), Ec.message())); - } -} - -void -FastCopyFile(bool AllowFileClone, - bool UseSparseFiles, - const std::filesystem::path& SourceFilePath, - const std::filesystem::path& TargetFilePath, - uint64_t RawSize, - std::atomic<uint64_t>& WriteCount, - std::atomic<uint64_t>& WriteByteCount, - std::atomic<uint64_t>& CloneCount, - std::atomic<uint64_t>& CloneByteCount) -{ - ZEN_TRACE_CPU("CopyFile"); - if (AllowFileClone && TryCloneFile(SourceFilePath, TargetFilePath)) - { - WriteCount += 1; - WriteByteCount += RawSize; - CloneCount += 1; - CloneByteCount += RawSize; - } - else - { - BasicFile TargetFile(TargetFilePath, BasicFile::Mode::kTruncate); - if (UseSparseFiles) - { - PrepareFileForScatteredWrite(TargetFile.Handle(), RawSize); - } - uint64_t Offset = 0; - if (!ScanFile(SourceFilePath, 512u * 1024u, [&](const void* Data, size_t Size) { - TargetFile.Write(Data, Size, Offset); - Offset += Size; - WriteCount++; - WriteByteCount += Size; - })) - { - throw std::runtime_error(fmt::format("Failed to copy file '{}' to '{}'", SourceFilePath, TargetFilePath)); - } - } -} - -CleanDirectoryResult -CleanDirectory( - WorkerThreadPool& IOWorkerPool, - std::atomic<bool>& AbortFlag, - std::atomic<bool>& PauseFlag, - const std::filesystem::path& Path, - std::span<const std::string> ExcludeDirectories, - std::function<void(const std::string_view Details, uint64_t TotalCount, uint64_t RemainingCount, bool IsPaused, bool IsAborted)>&& - ProgressFunc, - uint32_t ProgressUpdateDelayMS) -{ - ZEN_TRACE_CPU("CleanDirectory"); - Stopwatch Timer; - - std::atomic<uint64_t> DiscoveredItemCount = 0; - std::atomic<uint64_t> DeletedItemCount = 0; - std::atomic<uint64_t> DeletedByteCount = 0; - - std::vector<std::filesystem::path> DirectoriesToDelete; - CleanDirectoryResult Result; - RwLock ResultLock; - auto _ = MakeGuard([&]() { - Result.DeletedCount = DeletedItemCount.load(); - Result.DeletedByteCount = DeletedByteCount.load(); - Result.FoundCount = DiscoveredItemCount.load(); - }); - - ParallelWork Work(AbortFlag, - PauseFlag, - ProgressFunc ? WorkerThreadPool::EMode::DisableBacklog : WorkerThreadPool::EMode::EnableBacklog); - - struct AsyncVisitor : public GetDirectoryContentVisitor - { - AsyncVisitor(const std::filesystem::path& InPath, - std::atomic<bool>& InAbortFlag, - std::atomic<uint64_t>& InDiscoveredItemCount, - std::atomic<uint64_t>& InDeletedItemCount, - std::atomic<uint64_t>& InDeletedByteCount, - std::span<const std::string> InExcludeDirectories, - std::vector<std::filesystem::path>& OutDirectoriesToDelete, - CleanDirectoryResult& InResult, - RwLock& InResultLock) - : Path(InPath) - , AbortFlag(InAbortFlag) - , DiscoveredItemCount(InDiscoveredItemCount) - , DeletedItemCount(InDeletedItemCount) - , DeletedByteCount(InDeletedByteCount) - , ExcludeDirectories(InExcludeDirectories) - , DirectoriesToDelete(OutDirectoriesToDelete) - , Result(InResult) - , ResultLock(InResultLock) - { - } - - virtual bool AsyncAllowDirectory(const std::filesystem::path& Parent, const std::filesystem::path& DirectoryName) const override - { - ZEN_UNUSED(Parent); - - if (AbortFlag) - { - return false; - } - const std::string DirectoryString = DirectoryName.string(); - for (const std::string_view ExcludeDirectory : ExcludeDirectories) - { - if (DirectoryString == ExcludeDirectory) - { - return false; - } - } - return true; - } - - virtual void AsyncVisitDirectory(const std::filesystem::path& RelativeRoot, DirectoryContent&& Content) override - { - ZEN_TRACE_CPU("CleanDirectory_AsyncVisitDirectory"); - if (!AbortFlag) - { - DiscoveredItemCount += Content.FileNames.size(); - - ZEN_TRACE_CPU("DeleteFiles"); - std::vector<std::pair<std::filesystem::path, std::error_code>> FailedRemovePaths; - for (size_t FileIndex = 0; FileIndex < Content.FileNames.size(); FileIndex++) - { - const std::filesystem::path& FileName = Content.FileNames[FileIndex]; - const std::filesystem::path FilePath = (Path / RelativeRoot / FileName).make_preferred(); - - bool IsRemoved = false; - std::error_code Ec; - (void)SetFileReadOnly(FilePath, false, Ec); - for (size_t Retries = 0; Ec && Retries < 3; Retries++) - { - if (!IsFileWithRetry(FilePath)) - { - IsRemoved = true; - Ec.clear(); - break; - } - Sleep(100 + int(Retries * 50)); - Ec.clear(); - (void)SetFileReadOnly(FilePath, false, Ec); - } - if (!IsRemoved && !Ec) - { - (void)RemoveFile(FilePath, Ec); - for (size_t Retries = 0; Ec && Retries < 6; Retries++) - { - if (!IsFileWithRetry(FilePath)) - { - IsRemoved = true; - Ec.clear(); - break; - } - Sleep(100 + int(Retries * 50)); - Ec.clear(); - (void)RemoveFile(FilePath, Ec); - } - } - if (!IsRemoved && Ec) - { - FailedRemovePaths.push_back(std::make_pair(FilePath, Ec)); - } - else - { - DeletedItemCount++; - DeletedByteCount += Content.FileSizes[FileIndex]; - } - } - - if (!FailedRemovePaths.empty()) - { - RwLock::ExclusiveLockScope _(ResultLock); - FailedRemovePaths.insert(FailedRemovePaths.end(), FailedRemovePaths.begin(), FailedRemovePaths.end()); - } - else if (!RelativeRoot.empty()) - { - DiscoveredItemCount++; - RwLock::ExclusiveLockScope _(ResultLock); - DirectoriesToDelete.push_back(RelativeRoot); - } - } - } - const std::filesystem::path& Path; - std::atomic<bool>& AbortFlag; - std::atomic<uint64_t>& DiscoveredItemCount; - std::atomic<uint64_t>& DeletedItemCount; - std::atomic<uint64_t>& DeletedByteCount; - std::span<const std::string> ExcludeDirectories; - std::vector<std::filesystem::path>& DirectoriesToDelete; - CleanDirectoryResult& Result; - RwLock& ResultLock; - } Visitor(Path, - AbortFlag, - DiscoveredItemCount, - DeletedItemCount, - DeletedByteCount, - ExcludeDirectories, - DirectoriesToDelete, - Result, - ResultLock); - - GetDirectoryContent(Path, - DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive | DirectoryContentFlags::IncludeFileSizes, - Visitor, - IOWorkerPool, - Work.PendingWork()); - - uint64_t LastUpdateTimeMs = Timer.GetElapsedTimeMs(); - - if (ProgressFunc && ProgressUpdateDelayMS != 0) - { - Work.Wait(ProgressUpdateDelayMS, [&](bool IsAborted, bool IsPaused, ptrdiff_t PendingWork) { - ZEN_UNUSED(PendingWork); - LastUpdateTimeMs = Timer.GetElapsedTimeMs(); - - uint64_t Deleted = DeletedItemCount.load(); - uint64_t DeletedBytes = DeletedByteCount.load(); - uint64_t Discovered = DiscoveredItemCount.load(); - std::string Details = fmt::format("Found {}, Deleted {} ({})", Discovered, Deleted, NiceBytes(DeletedBytes)); - ProgressFunc(Details, Discovered, Discovered - Deleted, IsPaused, IsAborted); - }); - } - else - { - Work.Wait(); - } - - { - ZEN_TRACE_CPU("DeleteDirs"); - - std::sort(DirectoriesToDelete.begin(), - DirectoriesToDelete.end(), - [](const std::filesystem::path& Lhs, const std::filesystem::path& Rhs) { - auto DistanceLhs = std::distance(Lhs.begin(), Lhs.end()); - auto DistanceRhs = std::distance(Rhs.begin(), Rhs.end()); - return DistanceLhs > DistanceRhs; - }); - - for (const std::filesystem::path& DirectoryToDelete : DirectoriesToDelete) - { - if (AbortFlag) - { - break; - } - else - { - while (PauseFlag && !AbortFlag) - { - Sleep(2000); - } - } - - const std::filesystem::path FullPath = Path / DirectoryToDelete; - - std::error_code Ec; - RemoveDir(FullPath, Ec); - if (Ec) - { - for (size_t Retries = 0; Ec && Retries < 3; Retries++) - { - if (!IsDir(FullPath)) - { - Ec.clear(); - break; - } - Sleep(100 + int(Retries * 50)); - Ec.clear(); - RemoveDir(FullPath, Ec); - } - } - if (Ec) - { - RwLock::ExclusiveLockScope __(ResultLock); - Result.FailedRemovePaths.push_back(std::make_pair(DirectoryToDelete, Ec)); - } - else - { - DeletedItemCount++; - } - - if (ProgressFunc) - { - uint64_t NowMs = Timer.GetElapsedTimeMs(); - - if ((NowMs - LastUpdateTimeMs) > 0) - { - LastUpdateTimeMs = NowMs; - - uint64_t Deleted = DeletedItemCount.load(); - uint64_t DeletedBytes = DeletedByteCount.load(); - uint64_t Discovered = DiscoveredItemCount.load(); - std::string Details = fmt::format("Found {}, Deleted {} ({})", Discovered, Deleted, NiceBytes(DeletedBytes)); - ProgressFunc(Details, Discovered, Discovered - Deleted, PauseFlag, AbortFlag); - } - } - } - } - - return Result; -} - -bool -CleanAndRemoveDirectory(WorkerThreadPool& WorkerPool, - std::atomic<bool>& AbortFlag, - std::atomic<bool>& PauseFlag, - const std::filesystem::path& Directory) -{ - if (!IsDir(Directory)) - { - return true; - } - if (CleanDirectoryResult Res = CleanDirectory( - WorkerPool, - AbortFlag, - PauseFlag, - Directory, - {}, - [](const std::string_view Details, uint64_t TotalCount, uint64_t RemainingCount, bool IsPaused, bool IsAborted) { - ZEN_UNUSED(Details, TotalCount, RemainingCount, IsPaused, IsAborted); - }, - 1000); - Res.FailedRemovePaths.empty()) - { - std::error_code Ec; - RemoveDir(Directory, Ec); - return !Ec; - } - return false; -} - -#if ZEN_WITH_TESTS - -void -filesystemutils_forcelink() -{ -} - -namespace { - void GenerateFile(const std::filesystem::path& Path) { BasicFile _(Path, BasicFile::Mode::kTruncate); } -} // namespace - -TEST_CASE("filesystemutils.CleanDirectory") -{ - ScopedTemporaryDirectory TmpDir; - - CreateDirectories(TmpDir.Path() / ".keepme"); - GenerateFile(TmpDir.Path() / ".keepme" / "keep"); - GenerateFile(TmpDir.Path() / "deleteme1"); - GenerateFile(TmpDir.Path() / "deleteme2"); - GenerateFile(TmpDir.Path() / "deleteme3"); - CreateDirectories(TmpDir.Path() / ".keepmenot"); - CreateDirectories(TmpDir.Path() / "no.keepme"); - - CreateDirectories(TmpDir.Path() / "DeleteMe"); - GenerateFile(TmpDir.Path() / "DeleteMe" / "delete1"); - CreateDirectories(TmpDir.Path() / "CantDeleteMe"); - GenerateFile(TmpDir.Path() / "CantDeleteMe" / "delete1"); - GenerateFile(TmpDir.Path() / "CantDeleteMe" / "delete2"); - GenerateFile(TmpDir.Path() / "CantDeleteMe" / "delete3"); - CreateDirectories(TmpDir.Path() / "CantDeleteMe" / ".keepme"); - CreateDirectories(TmpDir.Path() / "CantDeleteMe" / "DeleteMe2"); - GenerateFile(TmpDir.Path() / "CantDeleteMe" / "DeleteMe2" / "delete2"); - GenerateFile(TmpDir.Path() / "CantDeleteMe" / "DeleteMe2" / "delete3"); - CreateDirectories(TmpDir.Path() / "CantDeleteMe2" / ".keepme"); - CreateDirectories(TmpDir.Path() / "CantDeleteMe2" / ".keepme" / "Kept"); - GenerateFile(TmpDir.Path() / "CantDeleteMe2" / ".keepme" / "Kept" / "kept1"); - GenerateFile(TmpDir.Path() / "CantDeleteMe2" / ".keepme" / "Kept" / "kept2"); - GenerateFile(TmpDir.Path() / "CantDeleteMe2" / "deleteme"); - - WorkerThreadPool Pool(4); - std::atomic<bool> AbortFlag; - std::atomic<bool> PauseFlag; - - CleanDirectory(Pool, AbortFlag, PauseFlag, TmpDir.Path(), std::vector<std::string>{".keepme"}, {}, 0); - - CHECK(IsDir(TmpDir.Path() / ".keepme")); - CHECK(IsFile(TmpDir.Path() / ".keepme" / "keep")); - CHECK(!IsFile(TmpDir.Path() / "deleteme1")); - CHECK(!IsFile(TmpDir.Path() / "deleteme2")); - CHECK(!IsFile(TmpDir.Path() / "deleteme3")); - CHECK(!IsFile(TmpDir.Path() / ".keepmenot")); - CHECK(!IsFile(TmpDir.Path() / "no.keepme")); - - CHECK(!IsDir(TmpDir.Path() / "DeleteMe")); - CHECK(!IsDir(TmpDir.Path() / "DeleteMe2")); - - CHECK(IsDir(TmpDir.Path() / "CantDeleteMe")); - CHECK(IsDir(TmpDir.Path() / "CantDeleteMe" / ".keepme")); - CHECK(IsDir(TmpDir.Path() / "CantDeleteMe2")); - CHECK(IsDir(TmpDir.Path() / "CantDeleteMe2" / ".keepme")); - CHECK(IsDir(TmpDir.Path() / "CantDeleteMe2" / ".keepme" / "Kept")); - CHECK(IsFile(TmpDir.Path() / "CantDeleteMe2" / ".keepme" / "Kept" / "kept1")); - CHECK(IsFile(TmpDir.Path() / "CantDeleteMe2" / ".keepme" / "Kept" / "kept2")); - CHECK(!IsFile(TmpDir.Path() / "CantDeleteMe2" / "deleteme")); -} - -#endif - -} // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/builds/buildinspect.h b/src/zenremotestore/include/zenremotestore/builds/buildinspect.h new file mode 100644 index 000000000..7f6c65367 --- /dev/null +++ b/src/zenremotestore/include/zenremotestore/builds/buildinspect.h @@ -0,0 +1,60 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/uid.h> +#include <zenremotestore/chunking/chunkedcontent.h> + +#include <atomic> +#include <filesystem> +#include <span> +#include <string> +#include <vector> + +namespace zen { + +class CbObjectWriter; +class ChunkingCache; +class ChunkingController; +class ProgressBase; +class TransferThreadWorkers; +struct StorageInstance; + +ChunkedFolderContent ScanAndChunkFolder( + ProgressBase& Progress, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + bool IsQuiet, + TransferThreadWorkers& Workers, + GetFolderContentStatistics& GetFolderContentStats, + ChunkingStatistics& ChunkingStats, + const std::filesystem::path& Path, + std::function<bool(const std::string_view& RelativePath)>&& IsAcceptedFolder, + std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& IsAcceptedFile, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache); + +////////////////////////////////////////////////////////////////////////// + +void ListBuild(bool IsQuiet, + StorageInstance& Storage, + const Oid& BuildId, + const std::vector<Oid>& BuildPartIds, + std::span<const std::string> BuildPartNames, + std::span<const std::string> IncludeWildcards, + std::span<const std::string> ExcludeWildcards, + CbObjectWriter* OptionalStructuredOutput); + +void DiffFolders(ProgressBase& Progress, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + bool IsQuiet, + TransferThreadWorkers& Workers, + const std::filesystem::path& BasePath, + const std::filesystem::path& ComparePath, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache, + const std::vector<std::string>& ExcludeFolders, + const std::vector<std::string>& ExcludeExtensions); + +} // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/builds/buildprimecache.h b/src/zenremotestore/include/zenremotestore/builds/buildprimecache.h new file mode 100644 index 000000000..1d04ccbfe --- /dev/null +++ b/src/zenremotestore/include/zenremotestore/builds/buildprimecache.h @@ -0,0 +1,96 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/iohash.h> +#include <zencore/logging.h> +#include <zencore/uid.h> +#include <zencore/zencore.h> +#include <zenremotestore/builds/buildstoragecache.h> +#include <zenremotestore/builds/buildstoragestats.h> + +#include <atomic> +#include <filesystem> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +#include <tsl/robin_set.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +class FilteredRate; +class ParallelWork; +class ProgressBase; +class WorkerThreadPool; +struct StorageInstance; + +class BuildsOperationPrimeCache +{ +public: + struct Options + { + bool IsQuiet = false; + bool IsVerbose = false; + std::filesystem::path ZenFolderPath; + std::uint64_t LargeAttachmentSize = 32u * 1024u * 1024u * 4u; + std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; + bool ForceUpload = false; + }; + + BuildsOperationPrimeCache(LoggerRef Log, + ProgressBase& Progress, + StorageInstance& Storage, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + WorkerThreadPool& NetworkPool, + const Oid& BuildId, + std::span<const Oid> BuildPartIds, + const Options& Options, + BuildStorageCache::Statistics& StorageCacheStats); + + void Execute(); + + DownloadStatistics m_DownloadStats; + +private: + LoggerRef Log() { return m_Log; } + + void CollectReferencedBlobs(tsl::robin_set<IoHash, IoHash::Hasher>& OutBuildBlobs, + tsl::robin_map<IoHash, uint64_t, IoHash::Hasher>& OutLooseChunkRawSizes); + + std::vector<IoHash> FilterAlreadyCachedBlobs(const tsl::robin_set<IoHash, IoHash::Hasher>& BuildBlobs); + + void ScheduleBlobDownloads(std::span<const IoHash> BlobsToDownload, + const tsl::robin_map<IoHash, uint64_t, IoHash::Hasher>& LooseChunkRawSizes, + std::atomic<uint64_t>& MultipartAttachmentCount, + std::atomic<size_t>& CompletedDownloadCount, + FilteredRate& FilteredDownloadedBytesPerSecond); + + void DownloadLargeBlobForCache(ParallelWork& Work, + const IoHash& BlobHash, + size_t BlobCount, + std::atomic<size_t>& CompletedDownloadCount, + std::atomic<uint64_t>& MultipartAttachmentCount, + FilteredRate& FilteredDownloadedBytesPerSecond); + + void DownloadSingleBlobForCache(const IoHash& BlobHash, + size_t BlobCount, + std::atomic<size_t>& CompletedDownloadCount, + FilteredRate& FilteredDownloadedBytesPerSecond); + + LoggerRef m_Log; + ProgressBase& m_Progress; + StorageInstance& m_Storage; + std::atomic<bool>& m_AbortFlag; + std::atomic<bool>& m_PauseFlag; + WorkerThreadPool& m_NetworkPool; + const Oid m_BuildId; + std::vector<Oid> m_BuildPartIds; + Options m_Options; + std::filesystem::path m_TempPath; + + BuildStorageCache::Statistics& m_StorageCacheStats; +}; + +} // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/builds/buildsavedstate.h b/src/zenremotestore/include/zenremotestore/builds/buildsavedstate.h index f808a7a3b..2a214f196 100644 --- a/src/zenremotestore/include/zenremotestore/builds/buildsavedstate.h +++ b/src/zenremotestore/include/zenremotestore/builds/buildsavedstate.h @@ -44,6 +44,11 @@ struct BuildState struct BuildSaveState { + static constexpr uint32_t NoVersion = 0; + static constexpr uint32_t kVersion1 = 1; + static constexpr uint32_t kCurrentVersion = kVersion1; + + uint32_t Version = kCurrentVersion; BuildState State; FolderContent FolderState; std::filesystem::path LocalPath; diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstorage.h b/src/zenremotestore/include/zenremotestore/builds/buildstorage.h index 85dabc59f..b933ab95d 100644 --- a/src/zenremotestore/include/zenremotestore/builds/buildstorage.h +++ b/src/zenremotestore/include/zenremotestore/builds/buildstorage.h @@ -3,7 +3,7 @@ #pragma once #include <zencore/compactbinary.h> -#include <zenremotestore/chunking/chunkblock.h> +#include <zencore/compositebuffer.h> ZEN_THIRD_PARTY_INCLUDES_START #include <tsl/robin_map.h> @@ -53,15 +53,24 @@ public: std::function<IoBuffer(uint64_t Offset, uint64_t Size)>&& Transmitter, std::function<void(uint64_t, bool)>&& OnSentBytes) = 0; - virtual IoBuffer GetBuildBlob(const Oid& BuildId, - const IoHash& RawHash, - uint64_t RangeOffset = 0, - uint64_t RangeBytes = (uint64_t)-1) = 0; + virtual IoBuffer GetBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + uint64_t RangeOffset = 0, + uint64_t RangeBytes = (uint64_t)-1) = 0; + + struct BuildBlobRanges + { + IoBuffer PayloadBuffer; + std::vector<std::pair<uint64_t, uint64_t>> Ranges; + }; + virtual BuildBlobRanges GetBuildBlobRanges(const Oid& BuildId, + const IoHash& RawHash, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) = 0; virtual std::vector<std::function<void()>> GetLargeBuildBlob(const Oid& BuildId, const IoHash& RawHash, uint64_t ChunkSize, std::function<void(uint64_t Offset, const IoBuffer& Chunk)>&& OnReceive, - std::function<void()>&& OnComplete) = 0; + std::function<void()>&& OnComplete) = 0; [[nodiscard]] virtual bool PutBlockMetadata(const Oid& BuildId, const IoHash& BlockRawHash, const CbObject& MetaData) = 0; virtual CbObject FindBlocks(const Oid& BuildId, uint64_t MaxBlockCount) = 0; diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstoragecache.h b/src/zenremotestore/include/zenremotestore/builds/buildstoragecache.h index f25ce5b5e..4e0bd7243 100644 --- a/src/zenremotestore/include/zenremotestore/builds/buildstoragecache.h +++ b/src/zenremotestore/include/zenremotestore/builds/buildstoragecache.h @@ -2,11 +2,9 @@ #pragma once -#include <zencore/logging.h> - #include <zencore/compactbinary.h> #include <zencore/compositebuffer.h> -#include <zenremotestore/chunking/chunkblock.h> +#include <zencore/logging.h> namespace zen { @@ -37,6 +35,14 @@ public: const IoHash& RawHash, uint64_t RangeOffset = 0, uint64_t RangeBytes = (uint64_t)-1) = 0; + struct BuildBlobRanges + { + IoBuffer PayloadBuffer; + std::vector<std::pair<uint64_t, uint64_t>> Ranges; + }; + virtual BuildBlobRanges GetBuildBlobRanges(const Oid& BuildId, + const IoHash& RawHash, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) = 0; virtual void PutBlobMetadatas(const Oid& BuildId, std::span<const IoHash> BlobHashes, std::span<const CbObject> MetaDatas) = 0; virtual std::vector<CbObject> GetBlobMetadatas(const Oid& BuildId, std::span<const IoHash> BlobHashes) = 0; @@ -61,11 +67,19 @@ std::unique_ptr<BuildStorageCache> CreateZenBuildStorageCache(HttpClient& H const std::filesystem::path& TempFolderPath, WorkerThreadPool& BackgroundWorkerPool); +#if ZEN_WITH_TESTS +std::unique_ptr<BuildStorageCache> CreateInMemoryBuildStorageCache(uint64_t MaxRangeSupported, + BuildStorageCache::Statistics& Stats, + double LatencySec = 0.0, + double DelayPerKBSec = 0.0); +#endif // ZEN_WITH_TESTS + struct ZenCacheEndpointTestResult { bool Success = false; std::string FailureReason; - double LatencySeconds = -1.0; + double LatencySeconds = -1.0; + uint64_t MaxRangeCountPerRequest = 1; }; ZenCacheEndpointTestResult TestZenCacheEndpoint(std::string_view BaseUrl, const bool AssumeHttp2, const bool HttpVerbose); diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h b/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h deleted file mode 100644 index 31733569e..000000000 --- a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h +++ /dev/null @@ -1,774 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#pragma once - -#include <zencore/iohash.h> -#include <zencore/logging.h> -#include <zencore/uid.h> -#include <zencore/zencore.h> -#include <zenremotestore/builds/buildstoragecache.h> -#include <zenremotestore/chunking/chunkblock.h> -#include <zenremotestore/chunking/chunkedcontent.h> -#include <zenremotestore/partialblockrequestmode.h> -#include <zenutil/bufferedwritefilecache.h> - -#include <atomic> -#include <future> -#include <memory> - -ZEN_THIRD_PARTY_INCLUDES_START -#include <tsl/robin_set.h> -ZEN_THIRD_PARTY_INCLUDES_END - -namespace zen { - -class CloneQueryInterface; - -class OperationLogOutput; -class BuildStorageBase; -class HttpClient; -class ParallelWork; -class WorkerThreadPool; -class FilteredRate; -class ReadFileCache; -struct StorageInstance; - -class BufferedWriteFileCache; -struct ChunkBlockDescription; -struct ChunkedFolderContent; - -struct DiskStatistics -{ - std::atomic<uint64_t> OpenReadCount = 0; - std::atomic<uint64_t> OpenWriteCount = 0; - std::atomic<uint64_t> ReadCount = 0; - std::atomic<uint64_t> ReadByteCount = 0; - std::atomic<uint64_t> WriteCount = 0; - std::atomic<uint64_t> WriteByteCount = 0; - std::atomic<uint64_t> CloneCount = 0; - std::atomic<uint64_t> CloneByteCount = 0; - std::atomic<uint64_t> CurrentOpenFileCount = 0; -}; - -struct CacheMappingStatistics -{ - uint64_t CacheChunkCount = 0; - uint64_t CacheChunkByteCount = 0; - - uint64_t CacheBlockCount = 0; - uint64_t CacheBlocksByteCount = 0; - - uint64_t CacheSequenceHashesCount = 0; - uint64_t CacheSequenceHashesByteCount = 0; - - uint64_t CacheScanElapsedWallTimeUs = 0; - - uint32_t LocalPathsMatchingSequencesCount = 0; - uint64_t LocalPathsMatchingSequencesByteCount = 0; - - uint64_t LocalChunkMatchingRemoteCount = 0; - uint64_t LocalChunkMatchingRemoteByteCount = 0; - - uint64_t LocalScanElapsedWallTimeUs = 0; - - uint32_t ScavengedPathsMatchingSequencesCount = 0; - uint64_t ScavengedPathsMatchingSequencesByteCount = 0; - - uint64_t ScavengedChunkMatchingRemoteCount = 0; - uint64_t ScavengedChunkMatchingRemoteByteCount = 0; - - uint64_t ScavengeElapsedWallTimeUs = 0; -}; - -struct DownloadStatistics -{ - std::atomic<uint64_t> RequestsCompleteCount = 0; - - std::atomic<uint64_t> DownloadedChunkCount = 0; - std::atomic<uint64_t> DownloadedChunkByteCount = 0; - std::atomic<uint64_t> MultipartAttachmentCount = 0; - - std::atomic<uint64_t> DownloadedBlockCount = 0; - std::atomic<uint64_t> DownloadedBlockByteCount = 0; - - std::atomic<uint64_t> DownloadedPartialBlockCount = 0; - std::atomic<uint64_t> DownloadedPartialBlockByteCount = 0; -}; - -struct WriteChunkStatistics -{ - uint64_t DownloadTimeUs = 0; - uint64_t WriteTimeUs = 0; - uint64_t WriteChunksElapsedWallTimeUs = 0; -}; - -struct RebuildFolderStateStatistics -{ - uint64_t CleanFolderElapsedWallTimeUs = 0; - std::atomic<uint32_t> FinalizeTreeFilesMovedCount = 0; - std::atomic<uint32_t> FinalizeTreeFilesCopiedCount = 0; - uint64_t FinalizeTreeElapsedWallTimeUs = 0; -}; - -std::filesystem::path ZenStateFilePath(const std::filesystem::path& ZenFolderPath); -std::filesystem::path ZenTempFolderPath(const std::filesystem::path& ZenFolderPath); - -class BuildsOperationUpdateFolder -{ -public: - struct Options - { - bool IsQuiet = false; - bool IsVerbose = false; - bool AllowFileClone = true; - bool UseSparseFiles = true; - std::filesystem::path SystemRootDir; - std::filesystem::path ZenFolderPath; - std::uint64_t LargeAttachmentSize = 32u * 1024u * 1024u * 4u; - std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; - EPartialBlockRequestMode PartialBlockRequestMode = EPartialBlockRequestMode::Mixed; - bool WipeTargetFolder = false; - bool PrimeCacheOnly = false; - bool EnableOtherDownloadsScavenging = true; - bool EnableTargetFolderScavenging = true; - bool ValidateCompletedSequences = true; - std::vector<std::string> ExcludeFolders; - uint64_t MaximumInMemoryPayloadSize = 512u * 1024u; - bool PopulateCache = true; - }; - - BuildsOperationUpdateFolder(OperationLogOutput& OperationLogOutput, - StorageInstance& Storage, - std::atomic<bool>& AbortFlag, - std::atomic<bool>& PauseFlag, - WorkerThreadPool& IOWorkerPool, - WorkerThreadPool& NetworkPool, - const Oid& BuildId, - const std::filesystem::path& Path, - const ChunkedFolderContent& LocalContent, - const ChunkedContentLookup& LocalLookup, - const ChunkedFolderContent& RemoteContent, - const ChunkedContentLookup& RemoteLookup, - const std::vector<ChunkBlockDescription>& BlockDescriptions, - const std::vector<IoHash>& LooseChunkHashes, - const Options& Options); - - void Execute(FolderContent& OutLocalFolderState); - - DiskStatistics m_DiskStats; - CacheMappingStatistics m_CacheMappingStats; - GetFolderContentStatistics m_ScavengedFolderScanStats; - DownloadStatistics m_DownloadStats; - WriteChunkStatistics m_WriteChunkStats; - RebuildFolderStateStatistics m_RebuildFolderStateStats; - std::atomic<uint64_t> m_WrittenChunkByteCount; - -private: - struct BlockWriteOps - { - std::vector<CompositeBuffer> ChunkBuffers; - struct WriteOpData - { - const ChunkedContentLookup::ChunkSequenceLocation* Target = nullptr; - size_t ChunkBufferIndex = (size_t)-1; - }; - std::vector<WriteOpData> WriteOps; - }; - - struct ScavengeSource - { - std::filesystem::path StateFilePath; - std::filesystem::path Path; - }; - - struct ScavengedSequenceCopyOperation - { - uint32_t ScavengedContentIndex = (uint32_t)-1; - uint32_t ScavengedPathIndex = (uint32_t)-1; - uint32_t RemoteSequenceIndex = (uint32_t)-1; - uint64_t RawSize = (uint32_t)-1; - }; - - struct CopyChunkData - { - uint32_t ScavengeSourceIndex = (uint32_t)-1; - uint32_t SourceSequenceIndex = (uint32_t)-1; - std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> TargetChunkLocationPtrs; - struct ChunkTarget - { - uint32_t TargetChunkLocationCount = (uint32_t)-1; - uint32_t RemoteChunkIndex = (uint32_t)-1; - uint64_t CacheFileOffset = (uint64_t)-1; - }; - std::vector<ChunkTarget> ChunkTargets; - }; - - struct BlobsExistsResult - { - tsl::robin_set<IoHash> ExistingBlobs; - uint64_t ElapsedTimeMs = 0; - }; - - void ScanCacheFolder(tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedChunkHashesFound, - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedSequenceHashesFound); - void ScanTempBlocksFolder(tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedBlocksFound); - std::vector<uint32_t> ScanTargetFolder(const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedChunkHashesFound, - const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedSequenceHashesFound); - - std::vector<ScavengeSource> FindScavengeSources(); - - bool FindScavengeContent(const ScavengeSource& Source, - ChunkedFolderContent& OutScavengedLocalContent, - ChunkedContentLookup& OutScavengedLookup); - - void ScavengeSourceForChunks(uint32_t& InOutRemainingChunkCount, - std::vector<bool>& InOutRemoteChunkIndexNeedsCopyFromLocalFileFlags, - tsl::robin_map<IoHash, size_t, IoHash::Hasher>& InOutRawHashToCopyChunkDataIndex, - const std::vector<std::atomic<uint32_t>>& SequenceIndexChunksLeftToWriteCounters, - const ChunkedFolderContent& ScavengedContent, - const ChunkedContentLookup& ScavengedLookup, - std::vector<CopyChunkData>& InOutCopyChunkDatas, - uint32_t ScavengedContentIndex, - uint64_t& InOutChunkMatchingRemoteCount, - uint64_t& InOutChunkMatchingRemoteByteCount); - - std::filesystem::path FindDownloadedChunk(const IoHash& ChunkHash); - - std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> GetRemainingChunkTargets( - std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, - uint32_t ChunkIndex); - - uint64_t GetChunkWriteCount(std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, uint32_t ChunkIndex); - - void CheckRequiredDiskSpace(const tsl::robin_map<std::string, uint32_t>& RemotePathToRemoteIndex); - - void WriteScavengedSequenceToCache(const std::filesystem::path& ScavengeRootPath, - const ChunkedFolderContent& ScavengedContent, - const ScavengedSequenceCopyOperation& ScavengeOp); - - void WriteLooseChunk(const uint32_t RemoteChunkIndex, - const BlobsExistsResult& ExistsResult, - std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, - std::atomic<uint64_t>& WritePartsComplete, - std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>&& ChunkTargetPtrs, - BufferedWriteFileCache& WriteCache, - ParallelWork& Work, - uint64_t TotalRequestCount, - uint64_t TotalPartWriteCount, - FilteredRate& FilteredDownloadedBytesPerSecond, - FilteredRate& FilteredWrittenBytesPerSecond); - - void DownloadBuildBlob(uint32_t RemoteChunkIndex, - const BlobsExistsResult& ExistsResult, - ParallelWork& Work, - std::function<void(IoBuffer&& Payload)>&& OnDownloaded); - - void DownloadPartialBlock(const ChunkBlockAnalyser::BlockRangeDescriptor BlockRange, - const BlobsExistsResult& ExistsResult, - std::function<void(IoBuffer&& InMemoryBuffer, const std::filesystem::path& OnDiskPath)>&& OnDownloaded); - - std::vector<uint32_t> WriteLocalChunkToCache(CloneQueryInterface* CloneQuery, - const CopyChunkData& CopyData, - const std::vector<ChunkedFolderContent>& ScavengedContents, - const std::vector<ChunkedContentLookup>& ScavengedLookups, - const std::vector<std::filesystem::path>& ScavengedPaths, - BufferedWriteFileCache& WriteCache); - - bool WriteCompressedChunkToCache(const IoHash& ChunkHash, - const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>& ChunkTargetPtrs, - BufferedWriteFileCache& WriteCache, - IoBuffer&& CompressedPart); - - void StreamDecompress(const IoHash& SequenceRawHash, CompositeBuffer&& CompressedPart); - - void WriteSequenceChunkToCache(BufferedWriteFileCache::Local& LocalWriter, - const CompositeBuffer& Chunk, - const uint32_t SequenceIndex, - const uint64_t FileOffset, - const uint32_t PathIndex); - - bool GetBlockWriteOps(const IoHash& BlockRawHash, - std::span<const IoHash> ChunkRawHashes, - std::span<const uint32_t> ChunkCompressedLengths, - std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, - std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags, - const MemoryView BlockView, - uint32_t FirstIncludedBlockChunkIndex, - uint32_t LastIncludedBlockChunkIndex, - BlockWriteOps& OutOps); - - void WriteBlockChunkOpsToCache(std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, - const BlockWriteOps& Ops, - BufferedWriteFileCache& WriteCache, - ParallelWork& Work); - - bool WriteChunksBlockToCache(const ChunkBlockDescription& BlockDescription, - std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, - ParallelWork& Work, - CompositeBuffer&& BlockBuffer, - std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags, - BufferedWriteFileCache& WriteCache); - - bool WritePartialBlockChunksToCache(const ChunkBlockDescription& BlockDescription, - std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, - ParallelWork& Work, - CompositeBuffer&& PartialBlockBuffer, - uint32_t FirstIncludedBlockChunkIndex, - uint32_t LastIncludedBlockChunkIndex, - std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags, - BufferedWriteFileCache& WriteCache); - - void AsyncWriteDownloadedChunk(const std::filesystem::path& ZenFolderPath, - uint32_t RemoteChunkIndex, - std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>&& ChunkTargetPtrs, - BufferedWriteFileCache& WriteCache, - ParallelWork& Work, - IoBuffer&& Payload, - std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, - std::atomic<uint64_t>& WritePartsComplete, - const uint64_t TotalPartWriteCount, - FilteredRate& FilteredWrittenBytesPerSecond, - bool EnableBacklog); - - void VerifyAndCompleteChunkSequencesAsync(std::span<const uint32_t> RemoteSequenceIndexes, ParallelWork& Work); - bool CompleteSequenceChunk(uint32_t RemoteSequenceIndex, std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters); - std::vector<uint32_t> CompleteChunkTargets(const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>& ChunkTargetPtrs, - std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters); - void FinalizeChunkSequence(const IoHash& SequenceRawHash); - void FinalizeChunkSequences(std::span<const uint32_t> RemoteSequenceIndexes); - void VerifySequence(uint32_t RemoteSequenceIndex); - - OperationLogOutput& m_LogOutput; - StorageInstance& m_Storage; - std::atomic<bool>& m_AbortFlag; - std::atomic<bool>& m_PauseFlag; - WorkerThreadPool& m_IOWorkerPool; - WorkerThreadPool& m_NetworkPool; - const Oid m_BuildId; - const std::filesystem::path m_Path; - const ChunkedFolderContent& m_LocalContent; - const ChunkedContentLookup& m_LocalLookup; - const ChunkedFolderContent& m_RemoteContent; - const ChunkedContentLookup& m_RemoteLookup; - const std::vector<ChunkBlockDescription>& m_BlockDescriptions; - const std::vector<IoHash>& m_LooseChunkHashes; - const Options m_Options; - const std::filesystem::path m_CacheFolderPath; - const std::filesystem::path m_TempDownloadFolderPath; - const std::filesystem::path m_TempBlockFolderPath; - - std::atomic<uint64_t> m_ValidatedChunkByteCount; -}; - -struct FindBlocksStatistics -{ - uint64_t FindBlockTimeMS = 0; - uint64_t PotentialChunkCount = 0; - uint64_t PotentialChunkByteCount = 0; - uint64_t FoundBlockCount = 0; - uint64_t FoundBlockChunkCount = 0; - uint64_t FoundBlockByteCount = 0; - uint64_t AcceptedBlockCount = 0; - uint64_t NewBlocksCount = 0; - uint64_t NewBlocksChunkCount = 0; - uint64_t NewBlocksChunkByteCount = 0; - - FindBlocksStatistics& operator+=(const FindBlocksStatistics& Rhs) - { - FindBlockTimeMS += Rhs.FindBlockTimeMS; - PotentialChunkCount += Rhs.PotentialChunkCount; - PotentialChunkByteCount += Rhs.PotentialChunkByteCount; - FoundBlockCount += Rhs.FoundBlockCount; - FoundBlockChunkCount += Rhs.FoundBlockChunkCount; - FoundBlockByteCount += Rhs.FoundBlockByteCount; - AcceptedBlockCount += Rhs.AcceptedBlockCount; - NewBlocksCount += Rhs.NewBlocksCount; - NewBlocksChunkCount += Rhs.NewBlocksChunkCount; - NewBlocksChunkByteCount += Rhs.NewBlocksChunkByteCount; - return *this; - } -}; - -struct UploadStatistics -{ - std::atomic<uint64_t> BlockCount = 0; - std::atomic<uint64_t> BlocksBytes = 0; - std::atomic<uint64_t> ChunkCount = 0; - std::atomic<uint64_t> ChunksBytes = 0; - std::atomic<uint64_t> ReadFromDiskBytes = 0; - std::atomic<uint64_t> MultipartAttachmentCount = 0; - uint64_t ElapsedWallTimeUS = 0; - - UploadStatistics& operator+=(const UploadStatistics& Rhs) - { - BlockCount += Rhs.BlockCount; - BlocksBytes += Rhs.BlocksBytes; - ChunkCount += Rhs.ChunkCount; - ChunksBytes += Rhs.ChunksBytes; - ReadFromDiskBytes += Rhs.ReadFromDiskBytes; - MultipartAttachmentCount += Rhs.MultipartAttachmentCount; - ElapsedWallTimeUS += Rhs.ElapsedWallTimeUS; - return *this; - } -}; - -struct LooseChunksStatistics -{ - uint64_t ChunkCount = 0; - uint64_t ChunkByteCount = 0; - std::atomic<uint64_t> CompressedChunkCount = 0; - std::atomic<uint64_t> CompressedChunkRawBytes = 0; - std::atomic<uint64_t> CompressedChunkBytes = 0; - uint64_t CompressChunksElapsedWallTimeUS = 0; - - LooseChunksStatistics& operator+=(const LooseChunksStatistics& Rhs) - { - ChunkCount += Rhs.ChunkCount; - ChunkByteCount += Rhs.ChunkByteCount; - CompressedChunkCount += Rhs.CompressedChunkCount; - CompressedChunkRawBytes += Rhs.CompressedChunkRawBytes; - CompressedChunkBytes += Rhs.CompressedChunkBytes; - CompressChunksElapsedWallTimeUS += Rhs.CompressChunksElapsedWallTimeUS; - return *this; - } -}; - -struct GenerateBlocksStatistics -{ - std::atomic<uint64_t> GeneratedBlockByteCount = 0; - std::atomic<uint64_t> GeneratedBlockCount = 0; - uint64_t GenerateBlocksElapsedWallTimeUS = 0; - - GenerateBlocksStatistics& operator+=(const GenerateBlocksStatistics& Rhs) - { - GeneratedBlockByteCount += Rhs.GeneratedBlockByteCount; - GeneratedBlockCount += Rhs.GeneratedBlockCount; - GenerateBlocksElapsedWallTimeUS += Rhs.GenerateBlocksElapsedWallTimeUS; - return *this; - } -}; - -static constexpr size_t DefaultMaxChunkBlockSize = 64u * 1024u * 1024u; -static constexpr size_t DefaultMaxChunksPerChunkBlock = 4u * 1000u; -static constexpr size_t DefaultMaxChunkBlockEmbedSize = 3u * 512u * 1024u; - -class BuildsOperationUploadFolder -{ -public: - struct ChunksBlockParameters - { - size_t MaxBlockSize = DefaultMaxChunkBlockSize; - size_t MaxChunksPerBlock = DefaultMaxChunksPerChunkBlock; - size_t MaxChunkEmbedSize = DefaultMaxChunkBlockEmbedSize; - }; - - struct Options - { - bool IsQuiet = false; - bool IsVerbose = false; - bool DoExtraContentValidation = false; - - const uint64_t FindBlockMaxCount = 10000; - const uint8_t BlockReuseMinPercentLimit = 85; - bool AllowMultiparts = true; - bool IgnoreExistingBlocks = false; - ChunksBlockParameters BlockParameters; - - uint32_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; - - const uint64_t MinimumSizeForCompressInBlock = 2u * 1024u; - - std::filesystem::path TempDir; - std::vector<std::string> ExcludeFolders; - std::vector<std::string> ExcludeExtensions; - std::string ZenExcludeManifestName = ".zen_exclude_manifest.txt"; - - std::vector<std::string> NonCompressableExtensions; - - bool PopulateCache = true; - }; - BuildsOperationUploadFolder(OperationLogOutput& OperationLogOutput, - StorageInstance& Storage, - std::atomic<bool>& AbortFlag, - std::atomic<bool>& PauseFlag, - WorkerThreadPool& IOWorkerPool, - WorkerThreadPool& NetworkPool, - const Oid& BuildId, - const std::filesystem::path& Path, - bool CreateBuild, - const CbObject& MetaData, - const Options& Options); - - std::vector<std::pair<Oid, std::string>> Execute(const Oid& BuildPartId, - const std::string_view BuildPartName, - const std::filesystem::path& ManifestPath, - ChunkingController& ChunkController, - ChunkingCache& ChunkCache); - - DiskStatistics m_DiskStats; - GetFolderContentStatistics m_LocalFolderScanStats; - ChunkingStatistics m_ChunkingStats; - FindBlocksStatistics m_FindBlocksStats; - ReuseBlocksStatistics m_ReuseBlocksStats; - UploadStatistics m_UploadStats; - GenerateBlocksStatistics m_GenerateBlocksStats; - LooseChunksStatistics m_LooseChunksStats; - -private: - struct PrepareBuildResult - { - std::vector<ChunkBlockDescription> KnownBlocks; - uint64_t PreferredMultipartChunkSize = 0; - uint64_t PayloadSize = 0; - uint64_t PrepareBuildTimeMs = 0; - uint64_t FindBlocksTimeMs = 0; - uint64_t ElapsedTimeMs = 0; - }; - - PrepareBuildResult PrepareBuild(); - - struct UploadPart - { - Oid PartId = Oid::Zero; - std::string PartName; - FolderContent Content; - uint64_t TotalRawSize = 0; - GetFolderContentStatistics LocalFolderScanStats; - }; - - std::vector<BuildsOperationUploadFolder::UploadPart> ReadFolder(); - std::vector<UploadPart> ReadManifestParts(const std::filesystem::path& ManifestPath); - - bool IsAcceptedFolder(const std::string_view& RelativePath) const; - bool IsAcceptedFile(const std::string_view& RelativePath) const; - - void ArrangeChunksIntoBlocks(const ChunkedFolderContent& Content, - const ChunkedContentLookup& Lookup, - std::vector<uint32_t>& ChunkIndexes, - std::vector<std::vector<uint32_t>>& OutBlocks); - struct GeneratedBlocks - { - std::vector<ChunkBlockDescription> BlockDescriptions; - std::vector<uint64_t> BlockSizes; - std::vector<CompositeBuffer> BlockHeaders; - std::vector<CbObject> BlockMetaDatas; - std::vector<uint8_t> - MetaDataHasBeenUploaded; // NOTE: Do not use std::vector<bool> here as this vector is modified by multiple threads - tsl::robin_map<IoHash, size_t, IoHash::Hasher> BlockHashToBlockIndex; - }; - - void GenerateBuildBlocks(const ChunkedFolderContent& Content, - const ChunkedContentLookup& Lookup, - const std::vector<std::vector<uint32_t>>& NewBlockChunks, - GeneratedBlocks& OutBlocks, - GenerateBlocksStatistics& GenerateBlocksStats, - UploadStatistics& UploadStats); - - std::vector<uint32_t> CalculateAbsoluteChunkOrders(const std::span<const IoHash> LocalChunkHashes, - const std::span<const uint32_t> LocalChunkOrder, - const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToLocalChunkIndex, - const std::span<const uint32_t>& LooseChunkIndexes, - const std::span<const ChunkBlockDescription>& BlockDescriptions); - - CompositeBuffer FetchChunk(const ChunkedFolderContent& Content, - const ChunkedContentLookup& Lookup, - const IoHash& ChunkHash, - ReadFileCache& OpenFileCache); - - CompressedBuffer GenerateBlock(const ChunkedFolderContent& Content, - const ChunkedContentLookup& Lookup, - const std::vector<uint32_t>& ChunksInBlock, - ChunkBlockDescription& OutBlockDescription); - - CompressedBuffer RebuildBlock(const ChunkedFolderContent& Content, - const ChunkedContentLookup& Lookup, - CompositeBuffer&& HeaderBuffer, - const std::vector<uint32_t>& ChunksInBlock); - - enum class PartTaskSteps : uint32_t - { - ChunkPartContent = 0, - CalculateDelta, - GenerateBlocks, - BuildPartManifest, - UploadBuildPart, - UploadAttachments, - PutBuildPartStats, - StepCount - }; - - void UploadBuildPart(ChunkingController& ChunkController, - ChunkingCache& ChunkCache, - uint32_t PartIndex, - const UploadPart& Part, - uint32_t PartStepOffset, - uint32_t StepCount); - - void UploadPartBlobs(const ChunkedFolderContent& Content, - const ChunkedContentLookup& Lookup, - std::span<IoHash> RawHashes, - const std::vector<std::vector<uint32_t>>& NewBlockChunks, - GeneratedBlocks& NewBlocks, - std::span<const uint32_t> LooseChunkIndexes, - const std::uint64_t LargeAttachmentSize, - UploadStatistics& TempUploadStats, - LooseChunksStatistics& TempLooseChunksStats, - std::vector<IoHash>& OutUnknownChunks); - - CompositeBuffer CompressChunk(const ChunkedFolderContent& Content, - const ChunkedContentLookup& Lookup, - uint32_t ChunkIndex, - LooseChunksStatistics& TempLooseChunksStats); - - OperationLogOutput& m_LogOutput; - StorageInstance& m_Storage; - std::atomic<bool>& m_AbortFlag; - std::atomic<bool>& m_PauseFlag; - WorkerThreadPool& m_IOWorkerPool; - WorkerThreadPool& m_NetworkPool; - const Oid m_BuildId; - - const std::filesystem::path m_Path; - const bool m_CreateBuild; // ?? Member? - const CbObject m_MetaData; // ?? Member - const Options m_Options; - - tsl::robin_set<uint32_t> m_NonCompressableExtensionHashes; - - std::future<PrepareBuildResult> m_PrepBuildResultFuture; - std::vector<ChunkBlockDescription> m_KnownBlocks; - uint64_t m_PreferredMultipartChunkSize = 0; - uint64_t m_LargeAttachmentSize = 0; -}; - -struct ValidateStatistics -{ - uint64_t BuildBlobSize = 0; - uint64_t BuildPartSize = 0; - uint64_t ChunkAttachmentCount = 0; - uint64_t BlockAttachmentCount = 0; - std::atomic<uint64_t> VerifiedAttachmentCount = 0; - std::atomic<uint64_t> VerifiedByteCount = 0; - uint64_t ElapsedWallTimeUS = 0; -}; - -class BuildsOperationValidateBuildPart -{ -public: - struct Options - { - bool IsQuiet = false; - bool IsVerbose = false; - }; - BuildsOperationValidateBuildPart(OperationLogOutput& OperationLogOutput, - BuildStorageBase& Storage, - std::atomic<bool>& AbortFlag, - std::atomic<bool>& PauseFlag, - WorkerThreadPool& IOWorkerPool, - WorkerThreadPool& NetworkPool, - const Oid& BuildId, - const Oid& BuildPartId, - const std::string_view BuildPartName, - const Options& Options); - - void Execute(); - - ValidateStatistics m_ValidateStats; - DownloadStatistics m_DownloadStats; - -private: - ChunkBlockDescription ValidateChunkBlock(IoBuffer&& Payload, - const IoHash& BlobHash, - uint64_t& OutCompressedSize, - uint64_t& OutDecompressedSize); - - OperationLogOutput& m_LogOutput; - BuildStorageBase& m_Storage; - std::atomic<bool>& m_AbortFlag; - std::atomic<bool>& m_PauseFlag; - WorkerThreadPool& m_IOWorkerPool; - WorkerThreadPool& m_NetworkPool; - const Oid m_BuildId; - Oid m_BuildPartId; - const std::string m_BuildPartName; - const Options m_Options; -}; - -class BuildsOperationPrimeCache -{ -public: - struct Options - { - bool IsQuiet = false; - bool IsVerbose = false; - std::filesystem::path ZenFolderPath; - std::uint64_t LargeAttachmentSize = 32u * 1024u * 1024u * 4u; - std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; - bool ForceUpload = false; - }; - - BuildsOperationPrimeCache(OperationLogOutput& OperationLogOutput, - StorageInstance& Storage, - std::atomic<bool>& AbortFlag, - std::atomic<bool>& PauseFlag, - WorkerThreadPool& NetworkPool, - const Oid& BuildId, - std::span<const Oid> BuildPartIds, - const Options& Options, - BuildStorageCache::Statistics& StorageCacheStats); - - void Execute(); - - DownloadStatistics m_DownloadStats; - -private: - OperationLogOutput& m_LogOutput; - StorageInstance& m_Storage; - std::atomic<bool>& m_AbortFlag; - std::atomic<bool>& m_PauseFlag; - WorkerThreadPool& m_NetworkPool; - const Oid m_BuildId; - std::vector<Oid> m_BuildPartIds; - Options m_Options; - std::filesystem::path m_TempPath; - - BuildStorageCache::Statistics& m_StorageCacheStats; -}; - -CompositeBuffer ValidateBlob(std::atomic<bool>& AbortFlag, - BuildStorageBase& Storage, - const Oid& BuildId, - const IoHash& BlobHash, - uint64_t& OutCompressedSize, - uint64_t& OutDecompressedSize); - -std::vector<std::pair<Oid, std::string>> ResolveBuildPartNames(CbObjectView BuildObject, - const Oid& BuildId, - const std::vector<Oid>& BuildPartIds, - std::span<const std::string> BuildPartNames, - std::uint64_t& OutPreferredMultipartChunkSize); - -struct BuildManifest; - -ChunkedFolderContent GetRemoteContent(OperationLogOutput& Output, - StorageInstance& Storage, - const Oid& BuildId, - const std::vector<std::pair<Oid, std::string>>& BuildParts, - const BuildManifest& Manifest, - std::span<const std::string> IncludeWildcards, - std::span<const std::string> ExcludeWildcards, - std::unique_ptr<ChunkingController>& OutChunkController, - std::vector<ChunkedFolderContent>& OutPartContents, - std::vector<ChunkBlockDescription>& OutBlockDescriptions, - std::vector<IoHash>& OutLooseChunkHashes, - bool IsQuiet, - bool IsVerbose, - bool DoExtraContentVerify); - -std::string GetCbObjectAsNiceString(CbObjectView Object, std::string_view Prefix, std::string_view Suffix); - -#if ZEN_WITH_TESTS -void buildstorageoperations_forcelink(); -#endif // ZEN_WITH_TESTS - -} // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstorageresolve.h b/src/zenremotestore/include/zenremotestore/builds/buildstorageresolve.h new file mode 100644 index 000000000..c964ad6cc --- /dev/null +++ b/src/zenremotestore/include/zenremotestore/builds/buildstorageresolve.h @@ -0,0 +1,46 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/logging.h> +#include <zenhttp/httpclient.h> + +namespace zen { + +struct BuildStorageResolveResult +{ + struct Capabilities + { + uint64_t MaxRangeCountPerRequest = 1; + }; + struct Host + { + std::string Address; + std::string Name; + bool AssumeHttp2 = false; + double LatencySec = -1.0; + Capabilities Caps; + }; + Host Cloud; + Host Cache; +}; + +////////////////////////////////////////////////////////////////////////// + +enum class ZenCacheResolveMode +{ + Off, + Discovery, + LocalHost, + All +}; + +BuildStorageResolveResult ResolveBuildStorage(LoggerRef InLog, + const HttpClientSettings& ClientSettings, + std::string_view Host, + std::string_view OverrideHost, + std::string_view ZenCacheHost, + ZenCacheResolveMode ZenResolveMode, + bool Verbose); + +} // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstoragestats.h b/src/zenremotestore/include/zenremotestore/builds/buildstoragestats.h new file mode 100644 index 000000000..e0de9ed6b --- /dev/null +++ b/src/zenremotestore/include/zenremotestore/builds/buildstoragestats.h @@ -0,0 +1,182 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <atomic> +#include <cstdint> + +namespace zen { + +struct DiskStatistics +{ + std::atomic<uint64_t> OpenReadCount = 0; + std::atomic<uint64_t> OpenWriteCount = 0; + std::atomic<uint64_t> ReadCount = 0; + std::atomic<uint64_t> ReadByteCount = 0; + std::atomic<uint64_t> WriteCount = 0; + std::atomic<uint64_t> WriteByteCount = 0; + std::atomic<uint64_t> CloneCount = 0; + std::atomic<uint64_t> CloneByteCount = 0; + std::atomic<uint64_t> CurrentOpenFileCount = 0; +}; + +struct DownloadStatistics +{ + std::atomic<uint64_t> RequestsCompleteCount = 0; + + std::atomic<uint64_t> DownloadedChunkCount = 0; + std::atomic<uint64_t> DownloadedChunkByteCount = 0; + std::atomic<uint64_t> MultipartAttachmentCount = 0; + + std::atomic<uint64_t> DownloadedBlockCount = 0; + std::atomic<uint64_t> DownloadedBlockByteCount = 0; + + std::atomic<uint64_t> DownloadedPartialBlockCount = 0; + std::atomic<uint64_t> DownloadedPartialBlockByteCount = 0; +}; + +struct CacheMappingStatistics +{ + uint64_t CacheChunkCount = 0; + uint64_t CacheChunkByteCount = 0; + + uint64_t CacheBlockCount = 0; + uint64_t CacheBlocksByteCount = 0; + + uint64_t CacheSequenceHashesCount = 0; + uint64_t CacheSequenceHashesByteCount = 0; + + uint64_t CacheScanElapsedWallTimeUs = 0; + + uint32_t LocalPathsMatchingSequencesCount = 0; + uint64_t LocalPathsMatchingSequencesByteCount = 0; + + uint64_t LocalChunkMatchingRemoteCount = 0; + uint64_t LocalChunkMatchingRemoteByteCount = 0; + + uint64_t LocalScanElapsedWallTimeUs = 0; + + uint32_t ScavengedPathsMatchingSequencesCount = 0; + uint64_t ScavengedPathsMatchingSequencesByteCount = 0; + + uint64_t ScavengedChunkMatchingRemoteCount = 0; + uint64_t ScavengedChunkMatchingRemoteByteCount = 0; + + uint64_t ScavengeElapsedWallTimeUs = 0; +}; + +struct WriteChunkStatistics +{ + uint64_t DownloadTimeUs = 0; + uint64_t WriteTimeUs = 0; + uint64_t WriteChunksElapsedWallTimeUs = 0; +}; + +struct RebuildFolderStateStatistics +{ + uint64_t CleanFolderElapsedWallTimeUs = 0; + std::atomic<uint32_t> FinalizeTreeFilesMovedCount = 0; + std::atomic<uint32_t> FinalizeTreeFilesCopiedCount = 0; + uint64_t FinalizeTreeElapsedWallTimeUs = 0; +}; + +struct FindBlocksStatistics +{ + uint64_t FindBlockTimeMS = 0; + uint64_t PotentialChunkCount = 0; + uint64_t PotentialChunkByteCount = 0; + uint64_t FoundBlockCount = 0; + uint64_t FoundBlockChunkCount = 0; + uint64_t FoundBlockByteCount = 0; + uint64_t AcceptedBlockCount = 0; + uint64_t NewBlocksCount = 0; + uint64_t NewBlocksChunkCount = 0; + uint64_t NewBlocksChunkByteCount = 0; + + FindBlocksStatistics& operator+=(const FindBlocksStatistics& Rhs) + { + FindBlockTimeMS += Rhs.FindBlockTimeMS; + PotentialChunkCount += Rhs.PotentialChunkCount; + PotentialChunkByteCount += Rhs.PotentialChunkByteCount; + FoundBlockCount += Rhs.FoundBlockCount; + FoundBlockChunkCount += Rhs.FoundBlockChunkCount; + FoundBlockByteCount += Rhs.FoundBlockByteCount; + AcceptedBlockCount += Rhs.AcceptedBlockCount; + NewBlocksCount += Rhs.NewBlocksCount; + NewBlocksChunkCount += Rhs.NewBlocksChunkCount; + NewBlocksChunkByteCount += Rhs.NewBlocksChunkByteCount; + return *this; + } +}; + +struct UploadStatistics +{ + std::atomic<uint64_t> BlockCount = 0; + std::atomic<uint64_t> BlocksBytes = 0; + std::atomic<uint64_t> ChunkCount = 0; + std::atomic<uint64_t> ChunksBytes = 0; + std::atomic<uint64_t> ReadFromDiskBytes = 0; + std::atomic<uint64_t> MultipartAttachmentCount = 0; + uint64_t ElapsedWallTimeUS = 0; + + UploadStatistics& operator+=(const UploadStatistics& Rhs) + { + BlockCount += Rhs.BlockCount; + BlocksBytes += Rhs.BlocksBytes; + ChunkCount += Rhs.ChunkCount; + ChunksBytes += Rhs.ChunksBytes; + ReadFromDiskBytes += Rhs.ReadFromDiskBytes; + MultipartAttachmentCount += Rhs.MultipartAttachmentCount; + ElapsedWallTimeUS += Rhs.ElapsedWallTimeUS; + return *this; + } +}; + +struct LooseChunksStatistics +{ + uint64_t ChunkCount = 0; + uint64_t ChunkByteCount = 0; + std::atomic<uint64_t> CompressedChunkCount = 0; + std::atomic<uint64_t> CompressedChunkRawBytes = 0; + std::atomic<uint64_t> CompressedChunkBytes = 0; + uint64_t CompressChunksElapsedWallTimeUS = 0; + + LooseChunksStatistics& operator+=(const LooseChunksStatistics& Rhs) + { + ChunkCount += Rhs.ChunkCount; + ChunkByteCount += Rhs.ChunkByteCount; + CompressedChunkCount += Rhs.CompressedChunkCount; + CompressedChunkRawBytes += Rhs.CompressedChunkRawBytes; + CompressedChunkBytes += Rhs.CompressedChunkBytes; + CompressChunksElapsedWallTimeUS += Rhs.CompressChunksElapsedWallTimeUS; + return *this; + } +}; + +struct GenerateBlocksStatistics +{ + std::atomic<uint64_t> GeneratedBlockByteCount = 0; + std::atomic<uint64_t> GeneratedBlockCount = 0; + uint64_t GenerateBlocksElapsedWallTimeUS = 0; + + GenerateBlocksStatistics& operator+=(const GenerateBlocksStatistics& Rhs) + { + GeneratedBlockByteCount += Rhs.GeneratedBlockByteCount; + GeneratedBlockCount += Rhs.GeneratedBlockCount; + GenerateBlocksElapsedWallTimeUS += Rhs.GenerateBlocksElapsedWallTimeUS; + return *this; + } +}; + +struct ValidateStatistics +{ + uint64_t BuildBlobSize = 0; + uint64_t BuildPartSize = 0; + uint64_t ChunkAttachmentCount = 0; + uint64_t BlockAttachmentCount = 0; + std::atomic<uint64_t> VerifiedAttachmentCount = 0; + std::atomic<uint64_t> VerifiedByteCount = 0; + uint64_t ElapsedWallTimeUS = 0; +}; + +} // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstorageutil.h b/src/zenremotestore/include/zenremotestore/builds/buildstorageutil.h index 4b85d8f1e..df35f65be 100644 --- a/src/zenremotestore/include/zenremotestore/builds/buildstorageutil.h +++ b/src/zenremotestore/include/zenremotestore/builds/buildstorageutil.h @@ -3,49 +3,39 @@ #pragma once #include <zencore/logging.h> +#include <zencore/logging/sink.h> #include <zenhttp/httpclient.h> #include <zenremotestore/builds/buildstorage.h> +#include <zenremotestore/builds/buildstorageresolve.h> +#include <zenremotestore/chunking/chunkblock.h> +#include <zenutil/sessionsclient.h> + +#include <atomic> +#include <filesystem> +#include <string> +#include <vector> namespace zen { -class OperationLogOutput; -class BuildStorageBase; class BuildStorageCache; +class ParallelWork; +class WorkerThreadPool; +struct ChunkedFolderContent; +struct BuildManifest; +class ChunkingController; -struct BuildStorageResolveResult -{ - std::string HostUrl; - std::string HostName; - bool HostAssumeHttp2 = false; - double HostLatencySec = -1.0; - - std::string CacheUrl; - std::string CacheName; - bool CacheAssumeHttp2 = false; - double CacheLatencySec = -1.0; -}; - -enum class ZenCacheResolveMode -{ - Off, - Discovery, - LocalHost, - All -}; +inline const std::string ZenFolderName = ".zen"; +inline const std::string UnsyncFolderName = ".unsync"; +inline const std::string UGSFolderName = ".ugs"; +inline const std::string LegacyZenTempFolderName = ".zen-tmp"; -BuildStorageResolveResult ResolveBuildStorage(OperationLogOutput& Output, - const HttpClientSettings& ClientSettings, - std::string_view Host, - std::string_view OverrideHost, - std::string_view ZenCacheHost, - ZenCacheResolveMode ZenResolveMode, - bool Verbose); +inline const std::vector<std::string> DefaultExcludeFolders{UnsyncFolderName, ZenFolderName, UGSFolderName, LegacyZenTempFolderName}; +inline const std::vector<std::string> DefaultExcludeExtensions{}; -std::vector<ChunkBlockDescription> GetBlockDescriptions(OperationLogOutput& Output, +std::vector<ChunkBlockDescription> GetBlockDescriptions(LoggerRef InLog, BuildStorageBase& Storage, BuildStorageCache* OptionalCacheStorage, const Oid& BuildId, - const Oid& BuildPartId, std::span<const IoHash> BlockRawHashes, bool AttemptFallback, bool IsQuiet, @@ -53,14 +43,85 @@ std::vector<ChunkBlockDescription> GetBlockDescriptions(OperationLogOutput& Out struct StorageInstance { - std::unique_ptr<HttpClient> BuildStorageHttp; - std::unique_ptr<BuildStorageBase> BuildStorage; - std::string StorageName; - double BuildStorageLatencySec = -1.0; - std::unique_ptr<HttpClient> CacheHttp; - std::unique_ptr<BuildStorageCache> BuildCacheStorage; - std::string CacheName; - double CacheLatencySec = -1.0; + ~StorageInstance(); + + StorageInstance() = default; + StorageInstance(StorageInstance&&) = default; + StorageInstance& operator=(StorageInstance&&) = default; + StorageInstance(const StorageInstance&) = delete; + StorageInstance& operator=(const StorageInstance&) = delete; + + BuildStorageResolveResult::Host BuildStorageHost; + std::unique_ptr<HttpClient> BuildStorageHttp; + std::unique_ptr<BuildStorageBase> BuildStorage; + + BuildStorageResolveResult::Host CacheHost; + std::unique_ptr<HttpClient> CacheHttp; + std::unique_ptr<BuildStorageCache> CacheStorage; + std::unique_ptr<SessionsServiceClient> CacheSession; + logging::SinkPtr CacheLogSink; + + void SetupCacheSession(std::string_view TargetUrl, std::string_view Mode, const Oid& SessionId); }; +std::filesystem::path ZenStateFilePath(const std::filesystem::path& ZenFolderPath); +std::filesystem::path ZenTempFolderPath(const std::filesystem::path& ZenFolderPath); + +CbObject GetBuild(BuildStorageBase& Storage, const Oid& BuildId, bool IsQuiet); + +uint64_t GetMaxMemoryBufferSize(size_t MaxBlockSize, bool BoostWorkerMemory); + +void DownloadLargeBlob(BuildStorageBase& Storage, + const std::filesystem::path& DownloadFolder, + const Oid& BuildId, + const IoHash& ChunkHash, + const std::uint64_t PreferredMultipartChunkSize, + ParallelWork& Work, + WorkerThreadPool& NetworkPool, + std::atomic<uint64_t>& DownloadedChunkByteCount, + std::atomic<uint64_t>& MultipartAttachmentCount, + std::function<void(IoBuffer&& Payload)>&& OnDownloadComplete); + +CompositeBuffer ValidateBlob(std::atomic<bool>& AbortFlag, + IoBuffer&& Payload, + const IoHash& BlobHash, + uint64_t& OutCompressedSize, + uint64_t& OutDecompressedSize); + +CompositeBuffer ValidateBlob(std::atomic<bool>& AbortFlag, + BuildStorageBase& Storage, + const Oid& BuildId, + const IoHash& BlobHash, + uint64_t& OutCompressedSize, + uint64_t& OutDecompressedSize); + +std::vector<std::pair<Oid, std::string>> ResolveBuildPartNames(CbObjectView BuildObject, + const Oid& BuildId, + const std::vector<Oid>& BuildPartIds, + std::span<const std::string> BuildPartNames, + std::uint64_t& OutPreferredMultipartChunkSize); + +void NormalizePartSelection(std::vector<Oid>& BuildPartIds, std::vector<std::string>& BuildPartNames, std::string_view HelpText); + +ChunkedFolderContent GetRemoteContent(LoggerRef InLog, + StorageInstance& Storage, + const Oid& BuildId, + const std::vector<std::pair<Oid, std::string>>& BuildParts, + const BuildManifest& Manifest, + std::span<const std::string> IncludeWildcards, + std::span<const std::string> ExcludeWildcards, + std::unique_ptr<ChunkingController>& OutChunkController, + std::vector<ChunkedFolderContent>& OutPartContents, + std::vector<ChunkBlockDescription>& OutBlockDescriptions, + std::vector<IoHash>& OutLooseChunkHashes, + bool IsQuiet, + bool IsVerbose, + bool DoExtraContentVerify); + +std::string GetCbObjectAsNiceString(CbObjectView Object, std::string_view Prefix, std::string_view Suffix); + +#if ZEN_WITH_TESTS +void buildstorageutil_forcelink(); +#endif // ZEN_WITH_TESTS + } // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/builds/buildupdatefolder.h b/src/zenremotestore/include/zenremotestore/builds/buildupdatefolder.h new file mode 100644 index 000000000..c820f4dcb --- /dev/null +++ b/src/zenremotestore/include/zenremotestore/builds/buildupdatefolder.h @@ -0,0 +1,529 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/iohash.h> +#include <zencore/logging.h> +#include <zencore/uid.h> +#include <zencore/zencore.h> +#include <zenremotestore/builds/buildsavedstate.h> +#include <zenremotestore/builds/buildstoragecache.h> +#include <zenremotestore/builds/buildstoragestats.h> +#include <zenremotestore/builds/buildstorageutil.h> +#include <zenremotestore/builds/builduploadfolder.h> +#include <zenremotestore/chunking/chunkblock.h> +#include <zenremotestore/chunking/chunkedcontent.h> +#include <zenremotestore/partialblockrequestmode.h> +#include <zenutil/bufferedwritefilecache.h> + +#include <filesystem> +#include <span> +#include <string> +#include <vector> + +#include <atomic> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_set.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +class CloneQueryInterface; +class FilteredRate; +class ParallelWork; +class ProgressBase; +class WorkerThreadPool; + +////////////////////////////////////////////////////////////////////////// + +class BuildsOperationUpdateFolder +{ +public: + struct Options + { + bool IsQuiet = false; + bool IsVerbose = false; + bool AllowFileClone = true; + bool UseSparseFiles = true; + std::filesystem::path SystemRootDir; + std::filesystem::path ZenFolderPath; + std::uint64_t LargeAttachmentSize = 32u * 1024u * 1024u * 4u; + std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; + EPartialBlockRequestMode PartialBlockRequestMode = EPartialBlockRequestMode::Mixed; + bool WipeTargetFolder = false; + bool EnableOtherDownloadsScavenging = true; + bool EnableTargetFolderScavenging = true; + bool ValidateCompletedSequences = true; + std::vector<std::string> ExcludeFolders; + uint64_t MaximumInMemoryPayloadSize = 512u * 1024u; + bool PopulateCache = true; + }; + + BuildsOperationUpdateFolder(LoggerRef Log, + ProgressBase& Progress, + StorageInstance& Storage, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + WorkerThreadPool& IOWorkerPool, + WorkerThreadPool& NetworkPool, + const Oid& BuildId, + const std::filesystem::path& Path, + const ChunkedFolderContent& LocalContent, + const ChunkedContentLookup& LocalLookup, + const ChunkedFolderContent& RemoteContent, + const ChunkedContentLookup& RemoteLookup, + const std::vector<ChunkBlockDescription>& BlockDescriptions, + const std::vector<IoHash>& LooseChunkHashes, + const Options& Options); + + void Execute(FolderContent& OutLocalFolderState); + + DiskStatistics m_DiskStats; + CacheMappingStatistics m_CacheMappingStats; + GetFolderContentStatistics m_ScavengedFolderScanStats; + DownloadStatistics m_DownloadStats; + WriteChunkStatistics m_WriteChunkStats; + RebuildFolderStateStatistics m_RebuildFolderStateStats; + std::atomic<uint64_t> m_WrittenChunkByteCount = 0; + +private: + struct BlockWriteOps + { + std::vector<CompositeBuffer> ChunkBuffers; + struct WriteOpData + { + const ChunkedContentLookup::ChunkSequenceLocation* Target = nullptr; + size_t ChunkBufferIndex = (size_t)-1; + }; + std::vector<WriteOpData> WriteOps; + }; + + struct ScavengeSource + { + std::filesystem::path StateFilePath; + std::filesystem::path Path; + }; + + struct ScavengedSequenceCopyOperation + { + uint32_t ScavengedContentIndex = (uint32_t)-1; + uint32_t ScavengedPathIndex = (uint32_t)-1; + uint32_t RemoteSequenceIndex = (uint32_t)-1; + uint64_t RawSize = (uint64_t)-1; + }; + + struct CopyChunkData + { + uint32_t ScavengeSourceIndex = (uint32_t)-1; + uint32_t SourceSequenceIndex = (uint32_t)-1; + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> TargetChunkLocationPtrs; + struct ChunkTarget + { + uint32_t TargetChunkLocationCount = (uint32_t)-1; + uint32_t RemoteChunkIndex = (uint32_t)-1; + uint64_t CacheFileOffset = (uint64_t)-1; + }; + std::vector<ChunkTarget> ChunkTargets; + }; + + struct BlobsExistsResult + { + tsl::robin_set<IoHash> ExistingBlobs; + uint64_t ElapsedTimeMs = 0; + }; + + struct LooseChunkHashWorkData + { + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs; + uint32_t RemoteChunkIndex = (uint32_t)-1; + }; + + struct FinalizeTarget + { + IoHash RawHash; + uint32_t RemotePathIndex; + }; + + struct LocalPathCategorization + { + std::vector<uint32_t> FilesToCache; + std::vector<uint32_t> RemoveLocalPathIndexes; + tsl::robin_map<uint32_t, uint32_t> RemotePathIndexToLocalPathIndex; + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> SequenceHashToLocalPathIndex; + uint64_t MatchCount = 0; + uint64_t PathMismatchCount = 0; + uint64_t HashMismatchCount = 0; + uint64_t SkippedCount = 0; + uint64_t DeleteCount = 0; + }; + + struct WriteChunksContext + { + ParallelWork& Work; + BufferedWriteFileCache& WriteCache; + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters; + std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags; + std::atomic<uint64_t>& WritePartsComplete; + uint64_t TotalPartWriteCount; + uint64_t TotalRequestCount; + const BlobsExistsResult& ExistsResult; + FilteredRate& FilteredDownloadedBytesPerSecond; + FilteredRate& FilteredWrittenBytesPerSecond; + }; + + void ScanCacheFolder(tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedChunkHashesFound, + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedSequenceHashesFound); + void ScanTempBlocksFolder(tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedBlocksFound); + std::vector<uint32_t> ScanTargetFolder(const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedChunkHashesFound, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedSequenceHashesFound); + + std::vector<ScavengeSource> FindScavengeSources(); + + bool FindScavengeContent(const ScavengeSource& Source, + ChunkedFolderContent& OutScavengedLocalContent, + ChunkedContentLookup& OutScavengedLookup); + + void ScavengeSourceForChunks(uint32_t& InOutRemainingChunkCount, + std::vector<bool>& InOutRemoteChunkIndexNeedsCopyFromLocalFileFlags, + tsl::robin_map<IoHash, size_t, IoHash::Hasher>& InOutRawHashToCopyChunkDataIndex, + const std::vector<std::atomic<uint32_t>>& SequenceIndexChunksLeftToWriteCounters, + const ChunkedFolderContent& ScavengedContent, + const ChunkedContentLookup& ScavengedLookup, + std::vector<CopyChunkData>& InOutCopyChunkDatas, + uint32_t ScavengedContentIndex, + uint64_t& InOutChunkMatchingRemoteCount, + uint64_t& InOutChunkMatchingRemoteByteCount); + + std::filesystem::path FindDownloadedChunk(const IoHash& ChunkHash); + + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> GetRemainingChunkTargets( + std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + uint32_t ChunkIndex); + + uint64_t GetChunkWriteCount(std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, uint32_t ChunkIndex); + + void CheckRequiredDiskSpace(const tsl::robin_map<std::string, uint32_t>& RemotePathToRemoteIndex); + + void WriteScavengedSequenceToCache(const std::filesystem::path& ScavengeRootPath, + const ChunkedFolderContent& ScavengedContent, + const ScavengedSequenceCopyOperation& ScavengeOp); + + void WriteLooseChunk(const uint32_t RemoteChunkIndex, + const BlobsExistsResult& ExistsResult, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + std::atomic<uint64_t>& WritePartsComplete, + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>&& ChunkTargetPtrs, + BufferedWriteFileCache& WriteCache, + ParallelWork& Work, + uint64_t TotalRequestCount, + uint64_t TotalPartWriteCount, + FilteredRate& FilteredDownloadedBytesPerSecond, + FilteredRate& FilteredWrittenBytesPerSecond); + + void DownloadBuildBlob(uint32_t RemoteChunkIndex, + const BlobsExistsResult& ExistsResult, + ParallelWork& Work, + uint64_t TotalRequestCount, + FilteredRate& FilteredDownloadedBytesPerSecond, + std::function<void(IoBuffer&& Payload)>&& OnDownloaded); + + void DownloadPartialBlock(std::span<const ChunkBlockAnalyser::BlockRangeDescriptor> BlockRanges, + size_t BlockRangeIndex, + size_t BlockRangeCount, + const BlobsExistsResult& ExistsResult, + uint64_t TotalRequestCount, + FilteredRate& FilteredDownloadedBytesPerSecond, + std::function<void(IoBuffer&& InMemoryBuffer, + const std::filesystem::path& OnDiskPath, + size_t BlockRangeStartIndex, + std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths)>&& OnDownloaded); + + std::vector<uint32_t> WriteLocalChunkToCache(CloneQueryInterface* CloneQuery, + const CopyChunkData& CopyData, + const std::vector<ChunkedFolderContent>& ScavengedContents, + const std::vector<ChunkedContentLookup>& ScavengedLookups, + const std::vector<std::filesystem::path>& ScavengedPaths, + BufferedWriteFileCache& WriteCache); + + bool WriteCompressedChunkToCache(const IoHash& ChunkHash, + const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>& ChunkTargetPtrs, + BufferedWriteFileCache& WriteCache, + IoBuffer&& CompressedPart); + + void StreamDecompress(const IoHash& SequenceRawHash, CompositeBuffer&& CompressedPart); + + void WriteSequenceChunkToCache(BufferedWriteFileCache::Local& LocalWriter, + const CompositeBuffer& Chunk, + const uint32_t SequenceIndex, + const uint64_t FileOffset, + const uint32_t PathIndex); + + bool GetBlockWriteOps(const IoHash& BlockRawHash, + std::span<const IoHash> ChunkRawHashes, + std::span<const uint32_t> ChunkCompressedLengths, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags, + const MemoryView BlockView, + uint32_t FirstIncludedBlockChunkIndex, + uint32_t LastIncludedBlockChunkIndex, + BlockWriteOps& OutOps); + + void WriteBlockChunkOpsToCache(std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + const BlockWriteOps& Ops, + BufferedWriteFileCache& WriteCache, + ParallelWork& Work); + + bool WriteChunksBlockToCache(const ChunkBlockDescription& BlockDescription, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + ParallelWork& Work, + CompositeBuffer&& BlockBuffer, + std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags, + BufferedWriteFileCache& WriteCache); + + bool WritePartialBlockChunksToCache(const ChunkBlockDescription& BlockDescription, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + ParallelWork& Work, + CompositeBuffer&& PartialBlockBuffer, + uint32_t FirstIncludedBlockChunkIndex, + uint32_t LastIncludedBlockChunkIndex, + std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags, + BufferedWriteFileCache& WriteCache); + + void AsyncWriteDownloadedChunk(uint32_t RemoteChunkIndex, + const BlobsExistsResult& ExistsResult, + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>&& ChunkTargetPtrs, + BufferedWriteFileCache& WriteCache, + ParallelWork& Work, + IoBuffer&& Payload, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + std::atomic<uint64_t>& WritePartsComplete, + const uint64_t TotalPartWriteCount, + FilteredRate& FilteredWrittenBytesPerSecond); + + void VerifyAndCompleteChunkSequencesAsync(std::span<const uint32_t> RemoteSequenceIndexes, ParallelWork& Work); + bool CompleteSequenceChunk(uint32_t RemoteSequenceIndex, std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters); + std::vector<uint32_t> CompleteChunkTargets(const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>& ChunkTargetPtrs, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters); + void FinalizeChunkSequence(const IoHash& SequenceRawHash); + void FinalizeChunkSequences(std::span<const uint32_t> RemoteSequenceIndexes); + void VerifySequence(uint32_t RemoteSequenceIndex); + + void InitializeSequenceCounters(std::vector<std::atomic<uint32_t>>& OutSequenceCounters, + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutSequencesLeftToFind, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedChunkHashesFound, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedSequenceHashesFound); + + void MatchScavengedSequencesToRemote(std::span<const ChunkedFolderContent> Contents, + std::span<const ChunkedContentLookup> Lookups, + std::span<const std::filesystem::path> Paths, + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& InOutSequencesLeftToFind, + std::vector<std::atomic<uint32_t>>& InOutSequenceCounters, + std::vector<ScavengedSequenceCopyOperation>& OutCopyOperations, + uint64_t& OutScavengedPathsCount); + + uint64_t CalculateBytesToWriteAndFlagNeededChunks(std::span<const std::atomic<uint32_t>> SequenceCounters, + const std::vector<bool>& NeedsCopyFromLocalFileFlags, + std::span<std::atomic<bool>> OutNeedsCopyFromSourceFlags); + + void ClassifyCachedAndFetchBlocks(std::span<const ChunkBlockAnalyser::NeededBlock> NeededBlocks, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedBlocksFound, + uint64_t& TotalPartWriteCount, + std::vector<uint32_t>& OutCachedChunkBlockIndexes, + std::vector<uint32_t>& OutFetchBlockIndexes); + + std::vector<uint32_t> DetermineNeededLooseChunkIndexes(std::span<const std::atomic<uint32_t>> SequenceCounters, + const std::vector<bool>& NeedsCopyFromLocalFileFlags, + std::span<std::atomic<bool>> NeedsCopyFromSourceFlags); + + BlobsExistsResult QueryBlobCacheExists(std::span<const uint32_t> NeededLooseChunkIndexes, std::span<const uint32_t> FetchBlockIndexes); + + std::vector<ChunkBlockAnalyser::EPartialBlockDownloadMode> DeterminePartialDownloadModes(const BlobsExistsResult& ExistsResult); + + std::vector<LooseChunkHashWorkData> BuildLooseChunkHashWorks(std::span<const uint32_t> NeededLooseChunkIndexes, + std::span<const std::atomic<uint32_t>> SequenceCounters); + + void VerifyWriteChunksComplete(std::span<const std::atomic<uint32_t>> SequenceCounters, + uint64_t BytesToWrite, + uint64_t BytesToValidate); + + std::vector<FinalizeTarget> BuildSortedFinalizeTargets(); + + void ScanScavengeSources(std::span<const ScavengeSource> Sources, + std::vector<ChunkedFolderContent>& OutContents, + std::vector<ChunkedContentLookup>& OutLookups, + std::vector<std::filesystem::path>& OutPaths); + + LocalPathCategorization CategorizeLocalPaths(const tsl::robin_map<std::string, uint32_t>& RemotePathToRemoteIndex); + + void ScheduleLocalFileCaching(std::span<const uint32_t> FilesToCache, + std::atomic<uint64_t>& OutCachedCount, + std::atomic<uint64_t>& OutCachedByteCount); + + void ScheduleScavengedSequenceWrites(WriteChunksContext& Context, + std::span<const ScavengedSequenceCopyOperation> CopyOperations, + const std::vector<ChunkedFolderContent>& ScavengedContents, + const std::vector<std::filesystem::path>& ScavengedPaths); + + void ScheduleLooseChunkWrites(WriteChunksContext& Context, std::vector<LooseChunkHashWorkData>& LooseChunkHashWorks); + + void ScheduleLocalChunkCopies(WriteChunksContext& Context, + std::span<const CopyChunkData> CopyChunkDatas, + CloneQueryInterface* CloneQuery, + const std::vector<ChunkedFolderContent>& ScavengedContents, + const std::vector<ChunkedContentLookup>& ScavengedLookups, + const std::vector<std::filesystem::path>& ScavengedPaths); + + void ScheduleCachedBlockWrites(WriteChunksContext& Context, std::span<const uint32_t> CachedBlockIndexes); + + void SchedulePartialBlockDownloads(WriteChunksContext& Context, const ChunkBlockAnalyser::BlockResult& PartialBlocks); + + void WritePartialBlockToCache(WriteChunksContext& Context, + size_t BlockRangeStartIndex, + IoBuffer BlockPartialBuffer, + const std::filesystem::path& BlockChunkPath, + std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths, + const ChunkBlockAnalyser::BlockResult& PartialBlocks); + + void ScheduleFullBlockDownloads(WriteChunksContext& Context, std::span<const uint32_t> FullBlockIndexes); + + void WriteFullBlockToCache(WriteChunksContext& Context, + uint32_t BlockIndex, + IoBuffer BlockBuffer, + const std::filesystem::path& BlockChunkPath); + + void ScheduleLocalFileRemovals(ParallelWork& Work, + std::span<const uint32_t> RemoveLocalPathIndexes, + std::atomic<uint64_t>& DeletedCount); + + void ScheduleTargetFinalization(ParallelWork& Work, + std::span<const FinalizeTarget> Targets, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& SequenceHashToLocalPathIndex, + const tsl::robin_map<uint32_t, uint32_t>& RemotePathIndexToLocalPathIndex, + FolderContent& OutLocalFolderState, + std::atomic<uint64_t>& TargetsComplete); + + void FinalizeTargetGroup(size_t BaseOffset, + size_t Count, + std::span<const FinalizeTarget> Targets, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& SequenceHashToLocalPathIndex, + const tsl::robin_map<uint32_t, uint32_t>& RemotePathIndexToLocalPathIndex, + FolderContent& OutLocalFolderState, + std::atomic<uint64_t>& TargetsComplete); + + LoggerRef Log() { return m_Log; } + + LoggerRef m_Log; + ProgressBase& m_Progress; + StorageInstance& m_Storage; + std::atomic<bool>& m_AbortFlag; + std::atomic<bool>& m_PauseFlag; + WorkerThreadPool& m_IOWorkerPool; + WorkerThreadPool& m_NetworkPool; + const Oid m_BuildId; + const std::filesystem::path m_Path; + const ChunkedFolderContent& m_LocalContent; + const ChunkedContentLookup& m_LocalLookup; + const ChunkedFolderContent& m_RemoteContent; + const ChunkedContentLookup& m_RemoteLookup; + const std::vector<ChunkBlockDescription>& m_BlockDescriptions; + const std::vector<IoHash>& m_LooseChunkHashes; + const Options m_Options; + const std::filesystem::path m_CacheFolderPath; + const std::filesystem::path m_TempDownloadFolderPath; + const std::filesystem::path m_TempBlockFolderPath; + + std::atomic<uint64_t> m_ValidatedChunkByteCount = 0; +}; + +////////////////////////////////////////////////////////////////////////// + +class TransferThreadWorkers; + +struct VerifyFolderStatistics +{ + std::atomic<uint64_t> FilesVerified = 0; + std::atomic<uint64_t> FilesFailed = 0; + std::atomic<uint64_t> ReadBytes = 0; + uint64_t VerifyElapsedWallTimeUs = 0; +}; + +////////////////////////////////////////////////////////////////////////// + +std::vector<std::filesystem::path> GetNewPaths(std::span<const std::filesystem::path> KnownPaths, + std::span<const std::filesystem::path> Paths); + +BuildSaveState GetLocalStateFromPaths(ProgressBase& Progress, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + TransferThreadWorkers& Workers, + GetFolderContentStatistics& LocalFolderScanStats, + ChunkingStatistics& ChunkingStats, + const std::filesystem::path& Path, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache, + std::span<const std::filesystem::path> PathsToCheck); + +BuildSaveState GetLocalContent(ProgressBase& Progress, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + bool IsQuiet, + TransferThreadWorkers& Workers, + GetFolderContentStatistics& LocalFolderScanStats, + ChunkingStatistics& ChunkingStats, + const std::filesystem::path& Path, + const std::filesystem::path& StateFilePath, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache); + +void VerifyFolder(ProgressBase& Progress, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + TransferThreadWorkers& Workers, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const std::filesystem::path& Path, + const std::vector<std::string>& ExcludeFolders, + bool VerifyFileHash, + VerifyFolderStatistics& VerifyFolderStats); + +////////////////////////////////////////////////////////////////////////// + +struct DownloadOptions +{ + std::filesystem::path SystemRootDir; + std::filesystem::path ZenFolderPath; + bool AllowMultiparts = true; + EPartialBlockRequestMode PartialBlockRequestMode = EPartialBlockRequestMode::Mixed; + bool CleanTargetFolder = false; + bool PostDownloadVerify = false; + bool EnableOtherDownloadsScavenging = true; + bool EnableTargetFolderScavenging = true; + bool AllowFileClone = true; + std::vector<std::string> IncludeWildcards; + std::vector<std::string> ExcludeWildcards; + uint64_t MaximumInMemoryPayloadSize = 512u * 1024u; + bool PopulateCache = true; + bool AppendNewContent = false; + bool IsQuiet = false; + bool IsVerbose = false; + bool UseSparseFiles = false; + bool DoExtraContentVerify = false; + std::vector<std::string> ExcludeFolders = DefaultExcludeFolders; +}; + +void DownloadFolder(LoggerRef InLog, + ProgressBase& Progress, + TransferThreadWorkers& Workers, + StorageInstance& Storage, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + const BuildStorageCache::Statistics& StorageCacheStats, + const Oid& BuildId, + const std::vector<Oid>& BuildPartIds, + std::span<const std::string> BuildPartNames, + const std::filesystem::path& DownloadSpecPath, + const std::filesystem::path& Path, + const DownloadOptions& Options); + +} // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/builds/builduploadfolder.h b/src/zenremotestore/include/zenremotestore/builds/builduploadfolder.h new file mode 100644 index 000000000..9ab80955a --- /dev/null +++ b/src/zenremotestore/include/zenremotestore/builds/builduploadfolder.h @@ -0,0 +1,393 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/iohash.h> +#include <zencore/logging.h> +#include <zencore/uid.h> +#include <zencore/zencore.h> +#include <zenremotestore/builds/buildstoragestats.h> +#include <zenremotestore/builds/buildstorageutil.h> +#include <zenremotestore/chunking/chunkblock.h> +#include <zenremotestore/chunking/chunkedcontent.h> + +#include <filesystem> +#include <string> +#include <vector> + +#include <atomic> +#include <future> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_set.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +class FilteredRate; +class ParallelWork; +class ProgressBase; +class ReadFileCache; +class RwLock; +class TransferThreadWorkers; +class WorkerThreadPool; + +static constexpr size_t DefaultMaxChunkBlockSize = 64u * 1024u * 1024u; +static constexpr size_t DefaultMaxChunksPerChunkBlock = 4u * 1000u; +static constexpr size_t DefaultMaxChunkBlockEmbedSize = 3u * 512u * 1024u; + +////////////////////////////////////////////////////////////////////////// + +class BuildsOperationUploadFolder +{ +public: + struct ChunksBlockParameters + { + size_t MaxBlockSize = DefaultMaxChunkBlockSize; + size_t MaxChunksPerBlock = DefaultMaxChunksPerChunkBlock; + size_t MaxChunkEmbedSize = DefaultMaxChunkBlockEmbedSize; + }; + + struct Options + { + bool IsQuiet = false; + bool IsVerbose = false; + bool DoExtraContentValidation = false; + + const uint64_t FindBlockMaxCount = 10000; + const uint8_t BlockReuseMinPercentLimit = 85; + bool AllowMultiparts = true; + bool IgnoreExistingBlocks = false; + ChunksBlockParameters BlockParameters; + + uint32_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; + + const uint64_t MinimumSizeForCompressInBlock = 2u * 1024u; + + std::filesystem::path TempDir; + std::vector<std::string> ExcludeFolders; + std::vector<std::string> ExcludeExtensions; + std::string ZenExcludeManifestName = ".zen_exclude_manifest.txt"; + + std::vector<std::string> NonCompressableExtensions; + + bool PopulateCache = true; + }; + BuildsOperationUploadFolder(LoggerRef Log, + ProgressBase& Progress, + StorageInstance& Storage, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + WorkerThreadPool& IOWorkerPool, + WorkerThreadPool& NetworkPool, + const Oid& BuildId, + const std::filesystem::path& Path, + bool CreateBuild, + const CbObject& MetaData, + const Options& Options); + + std::vector<std::pair<Oid, std::string>> Execute(const Oid& BuildPartId, + const std::string_view BuildPartName, + const std::filesystem::path& ManifestPath, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache); + + DiskStatistics m_DiskStats; + GetFolderContentStatistics m_LocalFolderScanStats; + ChunkingStatistics m_ChunkingStats; + FindBlocksStatistics m_FindBlocksStats; + ReuseBlocksStatistics m_ReuseBlocksStats; + UploadStatistics m_UploadStats; + GenerateBlocksStatistics m_GenerateBlocksStats; + LooseChunksStatistics m_LooseChunksStats; + +private: + struct PrepareBuildResult + { + std::vector<ChunkBlockDescription> KnownBlocks; + uint64_t PreferredMultipartChunkSize = 0; + uint64_t PayloadSize = 0; + uint64_t PrepareBuildTimeMs = 0; + uint64_t FindBlocksTimeMs = 0; + uint64_t ElapsedTimeMs = 0; + }; + + PrepareBuildResult PrepareBuild(); + + struct UploadPart + { + Oid PartId = Oid::Zero; + std::string PartName; + FolderContent Content; + uint64_t TotalRawSize = 0; + GetFolderContentStatistics LocalFolderScanStats; + }; + + std::vector<BuildsOperationUploadFolder::UploadPart> ReadFolder(); + std::vector<UploadPart> ReadManifestParts(const std::filesystem::path& ManifestPath); + + bool IsAcceptedFolder(const std::string_view& RelativePath) const; + bool IsAcceptedFile(const std::string_view& RelativePath) const; + + void ArrangeChunksIntoBlocks(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + std::vector<uint32_t>& ChunkIndexes, + std::vector<std::vector<uint32_t>>& OutBlocks); + struct GeneratedBlocks + { + std::vector<ChunkBlockDescription> BlockDescriptions; + std::vector<uint64_t> BlockSizes; + std::vector<CompositeBuffer> BlockHeaders; + std::vector<CbObject> BlockMetaDatas; + std::vector<uint8_t> + MetaDataHasBeenUploaded; // NOTE: Do not use std::vector<bool> here as this vector is modified by multiple threads + tsl::robin_map<IoHash, size_t, IoHash::Hasher> BlockHashToBlockIndex; + }; + + void GenerateBuildBlocks(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const std::vector<std::vector<uint32_t>>& NewBlockChunks, + GeneratedBlocks& OutBlocks, + GenerateBlocksStatistics& GenerateBlocksStats, + UploadStatistics& UploadStats); + + struct GenerateBuildBlocksContext + { + ParallelWork& Work; + WorkerThreadPool& GenerateBlobsPool; + WorkerThreadPool& UploadBlocksPool; + FilteredRate& FilteredGeneratedBytesPerSecond; + FilteredRate& FilteredUploadedBytesPerSecond; + std::atomic<uint64_t>& QueuedPendingBlocksForUpload; + RwLock& Lock; + GeneratedBlocks& OutBlocks; + GenerateBlocksStatistics& GenerateBlocksStats; + UploadStatistics& UploadStats; + size_t NewBlockCount; + }; + + void ScheduleBlockGeneration(GenerateBuildBlocksContext& Context, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const std::vector<std::vector<uint32_t>>& NewBlockChunks); + + void UploadGeneratedBlock(GenerateBuildBlocksContext& Context, size_t BlockIndex, CompressedBuffer Payload); + + std::vector<uint32_t> CalculateAbsoluteChunkOrders(const std::span<const IoHash> LocalChunkHashes, + const std::span<const uint32_t> LocalChunkOrder, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToLocalChunkIndex, + const std::span<const uint32_t>& LooseChunkIndexes, + const std::span<const ChunkBlockDescription>& BlockDescriptions); + + CompositeBuffer FetchChunk(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const IoHash& ChunkHash, + ReadFileCache& OpenFileCache); + + CompressedBuffer GenerateBlock(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const std::vector<uint32_t>& ChunksInBlock, + ChunkBlockDescription& OutBlockDescription); + + CompressedBuffer RebuildBlock(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + CompositeBuffer&& HeaderBuffer, + const std::vector<uint32_t>& ChunksInBlock); + + enum class PartTaskSteps : uint32_t + { + ChunkPartContent = 0, + CalculateDelta, + GenerateBlocks, + BuildPartManifest, + UploadBuildPart, + UploadAttachments, + PutBuildPartStats, + StepCount + }; + + void UploadBuildPart(ChunkingController& ChunkController, + ChunkingCache& ChunkCache, + uint32_t PartIndex, + const UploadPart& Part, + uint32_t PartStepOffset, + uint32_t StepCount); + + ChunkedFolderContent ScanPartContent(const UploadPart& Part, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache, + ChunkingStatistics& ChunkingStats); + + void ConsumePrepareBuildResult(); + + void ClassifyChunksByBlockEligibility(const ChunkedFolderContent& LocalContent, + std::vector<uint32_t>& OutLooseChunkIndexes, + std::vector<uint32_t>& OutNewBlockChunkIndexes, + std::vector<size_t>& OutReuseBlockIndexes, + LooseChunksStatistics& LooseChunksStats, + FindBlocksStatistics& FindBlocksStats, + ReuseBlocksStatistics& ReuseBlocksStats); + + struct BuiltPartManifest + { + CbObject PartManifest; + std::vector<ChunkBlockDescription> AllChunkBlockDescriptions; + std::vector<IoHash> AllChunkBlockHashes; + }; + + BuiltPartManifest BuildPartManifestObject(const ChunkedFolderContent& LocalContent, + const ChunkedContentLookup& LocalLookup, + ChunkingController& ChunkController, + std::span<const size_t> ReuseBlockIndexes, + const GeneratedBlocks& NewBlocks, + std::span<const uint32_t> LooseChunkIndexes); + + void UploadAttachmentBatch(std::span<IoHash> RawHashes, + std::vector<IoHash>& OutUnknownChunks, + const ChunkedFolderContent& LocalContent, + const ChunkedContentLookup& LocalLookup, + const std::vector<std::vector<uint32_t>>& NewBlockChunks, + GeneratedBlocks& NewBlocks, + std::span<const uint32_t> LooseChunkIndexes, + UploadStatistics& UploadStats, + LooseChunksStatistics& LooseChunksStats); + + void FinalizeBuildPartWithRetries(const UploadPart& Part, + const IoHash& PartHash, + std::vector<IoHash>& InOutUnknownChunks, + const ChunkedFolderContent& LocalContent, + const ChunkedContentLookup& LocalLookup, + const std::vector<std::vector<uint32_t>>& NewBlockChunks, + GeneratedBlocks& NewBlocks, + std::span<const uint32_t> LooseChunkIndexes, + UploadStatistics& UploadStats, + LooseChunksStatistics& LooseChunksStats); + + void UploadMissingBlockMetadata(GeneratedBlocks& NewBlocks, UploadStatistics& UploadStats); + + void UploadPartBlobs(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + std::span<IoHash> RawHashes, + const std::vector<std::vector<uint32_t>>& NewBlockChunks, + GeneratedBlocks& NewBlocks, + std::span<const uint32_t> LooseChunkIndexes, + const std::uint64_t LargeAttachmentSize, + UploadStatistics& TempUploadStats, + LooseChunksStatistics& TempLooseChunksStats, + std::vector<IoHash>& OutUnknownChunks); + + struct UploadPartClassification + { + std::vector<size_t> BlockIndexes; + std::vector<uint32_t> LooseChunkOrderIndexes; + uint64_t TotalBlocksSize = 0; + uint64_t TotalLooseChunksSize = 0; + }; + + UploadPartClassification ClassifyUploadRawHashes(std::span<IoHash> RawHashes, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const GeneratedBlocks& NewBlocks, + std::span<const uint32_t> LooseChunkIndexes, + std::vector<IoHash>& OutUnknownChunks); + + struct UploadPartBlobsContext + { + ParallelWork& Work; + WorkerThreadPool& ReadChunkPool; + WorkerThreadPool& UploadChunkPool; + FilteredRate& FilteredGenerateBlockBytesPerSecond; + FilteredRate& FilteredCompressedBytesPerSecond; + FilteredRate& FilteredUploadedBytesPerSecond; + std::atomic<size_t>& UploadedBlockSize; + std::atomic<size_t>& UploadedBlockCount; + std::atomic<size_t>& UploadedRawChunkSize; + std::atomic<size_t>& UploadedCompressedChunkSize; + std::atomic<uint32_t>& UploadedChunkCount; + std::atomic<uint64_t>& GeneratedBlockCount; + std::atomic<uint64_t>& GeneratedBlockByteCount; + std::atomic<uint64_t>& QueuedPendingInMemoryBlocksForUpload; + size_t UploadBlockCount; + uint32_t UploadChunkCount; + uint64_t LargeAttachmentSize; + GeneratedBlocks& NewBlocks; + const ChunkedFolderContent& Content; + const ChunkedContentLookup& Lookup; + const std::vector<std::vector<uint32_t>>& NewBlockChunks; + std::span<const uint32_t> LooseChunkIndexes; + UploadStatistics& TempUploadStats; + LooseChunksStatistics& TempLooseChunksStats; + }; + + void ScheduleBlockGenerationAndUpload(UploadPartBlobsContext& Context, std::span<const size_t> BlockIndexes); + + void ScheduleLooseChunkCompressionAndUpload(UploadPartBlobsContext& Context, std::span<const uint32_t> LooseChunkOrderIndexes); + + void UploadBlockPayload(UploadPartBlobsContext& Context, size_t BlockIndex, const IoHash& BlockHash, CompositeBuffer Payload); + + void UploadLooseChunkPayload(UploadPartBlobsContext& Context, const IoHash& RawHash, uint64_t RawSize, CompositeBuffer Payload); + + CompositeBuffer CompressChunk(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + uint32_t ChunkIndex, + LooseChunksStatistics& TempLooseChunksStats); + + LoggerRef Log() { return m_Log; } + + LoggerRef m_Log; + ProgressBase& m_Progress; + StorageInstance& m_Storage; + std::atomic<bool>& m_AbortFlag; + std::atomic<bool>& m_PauseFlag; + WorkerThreadPool& m_IOWorkerPool; + WorkerThreadPool& m_NetworkPool; + const Oid m_BuildId; + + const std::filesystem::path m_Path; + const bool m_CreateBuild; + const CbObject m_MetaData; + const Options m_Options; + + tsl::robin_set<uint32_t> m_NonCompressableExtensionHashes; + + std::future<PrepareBuildResult> m_PrepBuildResultFuture; + std::vector<ChunkBlockDescription> m_KnownBlocks; + uint64_t m_PreferredMultipartChunkSize = 0; + uint64_t m_LargeAttachmentSize = 0; +}; + +////////////////////////////////////////////////////////////////////////// + +struct UploadFolderOptions +{ + std::filesystem::path TempDir; + uint64_t FindBlockMaxCount; + uint8_t BlockReuseMinPercentLimit; + bool AllowMultiparts; + bool CreateBuild; + bool IgnoreExistingBlocks; + bool UploadToZenCache; + bool IsQuiet = false; + bool IsVerbose = false; + bool DoExtraContentVerify = false; + const std::vector<std::string>& ExcludeFolders = DefaultExcludeFolders; + const std::vector<std::string>& ExcludeExtensions = DefaultExcludeExtensions; +}; + +std::vector<std::pair<Oid, std::string>> UploadFolder(LoggerRef Log, + ProgressBase& Progress, + TransferThreadWorkers& Workers, + StorageInstance& Storage, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + const Oid& BuildId, + const Oid& BuildPartId, + std::string_view BuildPartName, + const std::filesystem::path& Path, + const std::filesystem::path& ManifestPath, + const CbObject& MetaData, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache, + const UploadFolderOptions& Options); + +} // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/builds/buildvalidatebuildpart.h b/src/zenremotestore/include/zenremotestore/builds/buildvalidatebuildpart.h new file mode 100644 index 000000000..d9403c67b --- /dev/null +++ b/src/zenremotestore/include/zenremotestore/builds/buildvalidatebuildpart.h @@ -0,0 +1,125 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/iohash.h> +#include <zencore/logging.h> +#include <zencore/uid.h> +#include <zenremotestore/builds/buildstoragestats.h> +#include <zenremotestore/chunking/chunkblock.h> + +#include <atomic> +#include <filesystem> + +namespace zen { + +class BuildStorageBase; +class FilteredRate; +class ParallelWork; +class ProgressBase; +class TransferThreadWorkers; +class WorkerThreadPool; + +////////////////////////////////////////////////////////////////////////// + +class BuildsOperationValidateBuildPart +{ +public: + struct Options + { + // Scratch area for streaming large chunk downloads. Must be non-empty. + std::filesystem::path TempFolder; + bool IsQuiet = false; + bool IsVerbose = false; + }; + BuildsOperationValidateBuildPart(LoggerRef Log, + ProgressBase& Progress, + BuildStorageBase& Storage, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + WorkerThreadPool& IOWorkerPool, + WorkerThreadPool& NetworkPool, + const Oid& BuildId, + const Oid& BuildPartId, + const std::string_view BuildPartName, + const Options& Options); + + void Execute(); + + ValidateStatistics m_ValidateStats; + DownloadStatistics m_DownloadStats; + +private: + enum class TaskSteps : uint32_t + { + FetchBuild, + FetchBuildPart, + ValidateBlobs, + Cleanup, + StepCount + }; + + ChunkBlockDescription ValidateChunkBlock(IoBuffer&& Payload, + const IoHash& BlobHash, + uint64_t& OutCompressedSize, + uint64_t& OutDecompressedSize); + + struct ValidateBlobsContext + { + ParallelWork& Work; + uint64_t AttachmentsToVerifyCount; + FilteredRate& FilteredDownloadedBytesPerSecond; + FilteredRate& FilteredVerifiedBytesPerSecond; + }; + + struct ResolvedBuildPart + { + std::vector<IoHash> ChunkAttachments; + std::vector<IoHash> BlockAttachments; + uint64_t PreferredMultipartChunkSize = 0; + }; + + ResolvedBuildPart ResolveBuildPart(); + + void ScheduleChunkAttachmentValidation(ValidateBlobsContext& Context, + std::span<const IoHash> ChunkAttachments, + const std::filesystem::path& TempFolder, + uint64_t PreferredMultipartChunkSize); + + void ScheduleBlockAttachmentValidation(ValidateBlobsContext& Context, std::span<const IoHash> BlockAttachments); + + void ValidateDownloadedChunk(ValidateBlobsContext& Context, const IoHash& ChunkHash, IoBuffer Payload); + + void ValidateDownloadedBlock(ValidateBlobsContext& Context, const IoHash& BlockAttachment, IoBuffer Payload); + + LoggerRef Log() { return m_Log; } + + LoggerRef m_Log; + ProgressBase& m_Progress; + BuildStorageBase& m_Storage; + std::atomic<bool>& m_AbortFlag; + std::atomic<bool>& m_PauseFlag; + WorkerThreadPool& m_IOWorkerPool; + WorkerThreadPool& m_NetworkPool; + const Oid m_BuildId; + Oid m_BuildPartId; + const std::string m_BuildPartName; + const Options m_Options; +}; + +////////////////////////////////////////////////////////////////////////// + +void ValidateBuildPart(LoggerRef Log, + ProgressBase& Progress, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + bool IsQuiet, + bool IsVerbose, + TransferThreadWorkers& Workers, + BuildStorageBase& Storage, + const std::filesystem::path& TempFolder, + const Oid& BuildId, + const Oid& BuildPartId, + std::string_view BuildPartName); + +} // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/builds/jupiterbuildstorage.h b/src/zenremotestore/include/zenremotestore/builds/jupiterbuildstorage.h index 888ec8ead..270835521 100644 --- a/src/zenremotestore/include/zenremotestore/builds/jupiterbuildstorage.h +++ b/src/zenremotestore/include/zenremotestore/builds/jupiterbuildstorage.h @@ -22,4 +22,6 @@ bool ParseBuildStorageUrl(std::string_view InUrl, std::string& OutBucket, std::string& OutBuildId); +void jupiterbuildstorage_forcelink(); + } // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkblock.h b/src/zenremotestore/include/zenremotestore/chunking/chunkblock.h index 5a17ef79c..73d037542 100644 --- a/src/zenremotestore/include/zenremotestore/chunking/chunkblock.h +++ b/src/zenremotestore/include/zenremotestore/chunking/chunkblock.h @@ -3,6 +3,7 @@ #pragma once #include <zencore/iohash.h> +#include <zencore/logbase.h> #include <zencore/compactbinary.h> #include <zencore/compress.h> @@ -11,9 +12,6 @@ ZEN_THIRD_PARTY_INCLUDES_START #include <tsl/robin_map.h> ZEN_THIRD_PARTY_INCLUDES_END -#include <optional> -#include <vector> - namespace zen { struct ThinChunkBlockDescription @@ -24,16 +22,17 @@ struct ThinChunkBlockDescription struct ChunkBlockDescription : public ThinChunkBlockDescription { - uint64_t HeaderSize; + uint64_t HeaderSize = 0; std::vector<uint32_t> ChunkRawLengths; std::vector<uint32_t> ChunkCompressedLengths; }; std::vector<ChunkBlockDescription> ParseChunkBlockDescriptionList(const CbObjectView& BlocksObject); ChunkBlockDescription ParseChunkBlockDescription(const CbObjectView& BlockObject); +std::vector<ChunkBlockDescription> ParseBlockMetadatas(std::span<const CbObject> BlockMetadatas); CbObject BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView MetaData); ChunkBlockDescription GetChunkBlockDescription(const SharedBuffer& BlockPayload, const IoHash& RawHash); -typedef std::function<std::pair<uint64_t, CompressedBuffer>(const IoHash& RawHash)> FetchChunkFunc; +typedef std::function<std::pair<uint64_t, CompositeBuffer>(const IoHash& RawHash)> FetchChunkFunc; CompressedBuffer GenerateChunkBlock(std::vector<std::pair<IoHash, FetchChunkFunc>>&& FetchChunks, ChunkBlockDescription& OutBlock); bool IterateChunkBlock(const SharedBuffer& BlockPayload, @@ -66,9 +65,7 @@ struct ReuseBlocksStatistics } }; -class OperationLogOutput; - -std::vector<size_t> FindReuseBlocks(OperationLogOutput& Output, +std::vector<size_t> FindReuseBlocks(LoggerRef InLog, const uint8_t BlockReuseMinPercentLimit, const bool IsVerbose, ReuseBlocksStatistics& Stats, @@ -82,15 +79,18 @@ class ChunkBlockAnalyser public: struct Options { - bool IsQuiet = false; - bool IsVerbose = false; - double HostLatencySec = -1.0; - double HostHighSpeedLatencySec = -1.0; - uint64_t HostSpeedBytesPerSec = (1u * 1024u * 1024u * 1024u) / 8u; // 1GBit - uint64_t HostHighSpeedBytesPerSec = (2u * 1024u * 1024u * 1024u) / 8u; // 2GBit + bool IsQuiet = false; + bool IsVerbose = false; + double HostLatencySec = -1.0; + double HostHighSpeedLatencySec = -1.0; + uint64_t HostSpeedBytesPerSec = (1u * 1024u * 1024u * 1024u) / 8u; // 1GBit + uint64_t HostHighSpeedBytesPerSec = (2u * 1024u * 1024u * 1024u) / 8u; // 2GBit + uint64_t HostMaxRangeCountPerRequest = (uint64_t)-1; + uint64_t HostHighSpeedMaxRangeCountPerRequest = (uint64_t)-1; // No limit + uint64_t MaxRangesPerBlock = 1024u; }; - ChunkBlockAnalyser(OperationLogOutput& LogOutput, std::span<const ChunkBlockDescription> BlockDescriptions, const Options& Options); + ChunkBlockAnalyser(LoggerRef Log, std::span<const ChunkBlockDescription> BlockDescriptions, const Options& Options); struct BlockRangeDescriptor { @@ -110,7 +110,7 @@ public: std::vector<NeededBlock> GetNeeded(const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex, std::function<bool(uint32_t ChunkIndex)>&& NeedsBlockChunk); - enum EPartialBlockDownloadMode + enum class EPartialBlockDownloadMode { Off, SingleRange, @@ -129,49 +129,17 @@ public: std::span<const EPartialBlockDownloadMode> BlockPartialDownloadModes); private: - struct BlockRangeLimit - { - uint16_t SizePercent; - uint16_t MaxRangeCount; - }; + LoggerRef Log() { return m_Log; } - static constexpr uint16_t FullBlockRangePercentLimit = 98; - - static constexpr BlockRangeLimit ForceMergeLimits[] = {{.SizePercent = FullBlockRangePercentLimit, .MaxRangeCount = 1}, - {.SizePercent = 90, .MaxRangeCount = 4}, - {.SizePercent = 85, .MaxRangeCount = 16}, - {.SizePercent = 80, .MaxRangeCount = 32}, - {.SizePercent = 75, .MaxRangeCount = 48}, - {.SizePercent = 70, .MaxRangeCount = 64}, - {.SizePercent = 4, .MaxRangeCount = 82}, - {.SizePercent = 0, .MaxRangeCount = 96}}; - - BlockRangeDescriptor MergeBlockRanges(std::span<const BlockRangeDescriptor> Ranges); - std::optional<std::vector<BlockRangeDescriptor>> MakeOptionalBlockRangeVector(uint64_t TotalBlockSize, - const BlockRangeDescriptor& Range); - const BlockRangeLimit* GetBlockRangeLimitForRange(std::span<const BlockRangeLimit> Limits, - uint64_t TotalBlockSize, - std::span<const BlockRangeDescriptor> Ranges); - std::vector<BlockRangeDescriptor> CollapseBlockRanges(const uint64_t AlwaysAcceptableGap, - std::span<const BlockRangeDescriptor> BlockRanges); - uint64_t CalculateNextGap(const uint64_t AlwaysAcceptableGap, std::span<const BlockRangeDescriptor> BlockRanges); - std::optional<std::vector<BlockRangeDescriptor>> CalculateBlockRanges(uint32_t BlockIndex, - const ChunkBlockDescription& BlockDescription, - std::span<const uint32_t> BlockChunkIndexNeeded, - EPartialBlockDownloadMode PartialBlockDownloadMode, - const uint64_t ChunkStartOffsetInBlock, - const uint64_t TotalBlockSize, - uint64_t& OutTotalWantedChunksSize); - - OperationLogOutput& m_LogOutput; + LoggerRef m_Log; const std::span<const ChunkBlockDescription> m_BlockDescriptions; const Options m_Options; }; #if ZEN_WITH_TESTS -class CbWriter; void chunkblock_forcelink(); + #endif // ZEN_WITH_TESTS } // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h index d402bd3f0..f374211f2 100644 --- a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h +++ b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h @@ -3,7 +3,6 @@ #pragma once #include <zencore/compactbinary.h> -#include <zencore/compactbinarybuilder.h> #include <zencore/iohash.h> #include <filesystem> @@ -231,7 +230,7 @@ GetSequenceIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& Raw inline uint32_t GetChunkIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash) { - return Lookup.RawHashToSequenceIndex.at(RawHash); + return Lookup.ChunkHashToChunkIndex.at(RawHash); } inline uint32_t diff --git a/src/zenremotestore/include/zenremotestore/filesystemutils.h b/src/zenremotestore/include/zenremotestore/filesystemutils.h deleted file mode 100644 index cb2d718f7..000000000 --- a/src/zenremotestore/include/zenremotestore/filesystemutils.h +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#pragma once - -#include <zencore/basicfile.h> -#include <zenremotestore/chunking/chunkedcontent.h> - -namespace zen { - -class CompositeBuffer; - -class BufferedOpenFile -{ -public: - static constexpr uint64_t BlockSize = 256u * 1024u; - - BufferedOpenFile(const std::filesystem::path Path, - std::atomic<uint64_t>& OpenReadCount, - std::atomic<uint64_t>& CurrentOpenFileCount, - std::atomic<uint64_t>& ReadCount, - std::atomic<uint64_t>& ReadByteCount); - ~BufferedOpenFile(); - BufferedOpenFile() = delete; - BufferedOpenFile(const BufferedOpenFile&) = delete; - BufferedOpenFile(BufferedOpenFile&&) = delete; - BufferedOpenFile& operator=(BufferedOpenFile&&) = delete; - BufferedOpenFile& operator=(const BufferedOpenFile&) = delete; - - CompositeBuffer GetRange(uint64_t Offset, uint64_t Size); - -public: - void* Handle() { return m_Source.Handle(); } - -private: - BasicFile m_Source; - const uint64_t m_SourceSize; - std::atomic<uint64_t>& m_OpenReadCount; - std::atomic<uint64_t>& m_CurrentOpenFileCount; - std::atomic<uint64_t>& m_ReadCount; - std::atomic<uint64_t>& m_ReadByteCount; - uint64_t m_CacheBlockIndex = (uint64_t)-1; - IoBuffer m_Cache; -}; - -class ReadFileCache -{ -public: - // A buffered file reader that provides CompositeBuffer where the buffers are owned and the memory never overwritten - ReadFileCache(std::atomic<uint64_t>& OpenReadCount, - std::atomic<uint64_t>& CurrentOpenFileCount, - std::atomic<uint64_t>& ReadCount, - std::atomic<uint64_t>& ReadByteCount, - const std::filesystem::path& Path, - const ChunkedFolderContent& LocalContent, - const ChunkedContentLookup& LocalLookup, - size_t MaxOpenFileCount); - ~ReadFileCache(); - - CompositeBuffer GetRange(uint32_t SequenceIndex, uint64_t Offset, uint64_t Size); - -private: - const std::filesystem::path m_Path; - const ChunkedFolderContent& m_LocalContent; - const ChunkedContentLookup& m_LocalLookup; - std::vector<std::pair<uint32_t, std::unique_ptr<BufferedOpenFile>>> m_OpenFiles; - std::atomic<uint64_t>& m_OpenReadCount; - std::atomic<uint64_t>& m_CurrentOpenFileCount; - std::atomic<uint64_t>& m_ReadCount; - std::atomic<uint64_t>& m_ReadByteCount; -}; - -uint32_t SetNativeFileAttributes(const std::filesystem::path FilePath, SourcePlatform SourcePlatform, uint32_t Attributes); - -uint32_t GetNativeFileAttributes(const std::filesystem::path FilePath); - -bool IsFileWithRetry(const std::filesystem::path& Path); - -bool SetFileReadOnlyWithRetry(const std::filesystem::path& Path, bool ReadOnly); - -std::error_code RenameFileWithRetry(const std::filesystem::path& SourcePath, const std::filesystem::path& TargetPath); - -std::error_code TryRemoveFile(const std::filesystem::path& Path); - -void RemoveFileWithRetry(const std::filesystem::path& Path); - -void FastCopyFile(bool AllowFileClone, - bool UseSparseFiles, - const std::filesystem::path& SourceFilePath, - const std::filesystem::path& TargetFilePath, - uint64_t RawSize, - std::atomic<uint64_t>& WriteCount, - std::atomic<uint64_t>& WriteByteCount, - std::atomic<uint64_t>& CloneCount, - std::atomic<uint64_t>& CloneByteCount); - -struct CleanDirectoryResult -{ - uint64_t FoundCount = 0; - uint64_t DeletedCount = 0; - uint64_t DeletedByteCount = 0; - std::vector<std::pair<std::filesystem::path, std::error_code>> FailedRemovePaths; -}; - -CleanDirectoryResult CleanDirectory( - WorkerThreadPool& IOWorkerPool, - std::atomic<bool>& AbortFlag, - std::atomic<bool>& PauseFlag, - const std::filesystem::path& Path, - std::span<const std::string> ExcludeDirectories, - std::function<void(const std::string_view Details, uint64_t TotalCount, uint64_t RemainingCount, bool IsPaused, bool IsAborted)>&& - ProgressFunc, - uint32_t ProgressUpdateDelayMS); - -bool CleanAndRemoveDirectory(WorkerThreadPool& WorkerPool, - std::atomic<bool>& AbortFlag, - std::atomic<bool>& PauseFlag, - const std::filesystem::path& Directory); - -void filesystemutils_forcelink(); // internal - -} // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/jupiter/jupiterhost.h b/src/zenremotestore/include/zenremotestore/jupiter/jupiterhost.h index 7bbf40dfa..caf7ecd28 100644 --- a/src/zenremotestore/include/zenremotestore/jupiter/jupiterhost.h +++ b/src/zenremotestore/include/zenremotestore/jupiter/jupiterhost.h @@ -2,6 +2,7 @@ #pragma once +#include <cstdint> #include <string> #include <string_view> #include <vector> @@ -28,7 +29,8 @@ struct JupiterEndpointTestResult { bool Success = false; std::string FailureReason; - double LatencySeconds = -1.0; + double LatencySeconds = -1.0; + uint64_t MaxRangeCountPerRequest = 1; }; JupiterEndpointTestResult TestJupiterEndpoint(std::string_view BaseUrl, const bool AssumeHttp2, const bool HttpVerbose); diff --git a/src/zenremotestore/include/zenremotestore/jupiter/jupitersession.h b/src/zenremotestore/include/zenremotestore/jupiter/jupitersession.h index eaf6962fd..8721bc37f 100644 --- a/src/zenremotestore/include/zenremotestore/jupiter/jupitersession.h +++ b/src/zenremotestore/include/zenremotestore/jupiter/jupitersession.h @@ -56,6 +56,11 @@ struct FinalizeBuildPartResult : JupiterResult std::vector<IoHash> Needs; }; +struct BuildBlobRangesResult : JupiterResult +{ + std::vector<std::pair<uint64_t, uint64_t>> Ranges; +}; + /** * Context for performing Jupiter operations * @@ -135,6 +140,13 @@ public: uint64_t Offset = 0, uint64_t Size = (uint64_t)-1); + BuildBlobRangesResult GetBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const IoHash& Hash, + std::filesystem::path TempFolderPath, + std::span<const std::pair<uint64_t, uint64_t>> Ranges); + JupiterResult PutMultipartBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, diff --git a/src/zenremotestore/include/zenremotestore/operationlogoutput.h b/src/zenremotestore/include/zenremotestore/operationlogoutput.h deleted file mode 100644 index 6f10ab156..000000000 --- a/src/zenremotestore/include/zenremotestore/operationlogoutput.h +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#pragma once - -#include <zencore/fmtutils.h> -#include <zencore/logbase.h> - -namespace zen { - -class OperationLogOutput -{ -public: - virtual ~OperationLogOutput() {} - virtual void EmitLogMessage(int LogLevel, std::string_view Format, fmt::format_args Args) = 0; - - virtual void SetLogOperationName(std::string_view Name) = 0; - virtual void SetLogOperationProgress(uint32_t StepIndex, uint32_t StepCount) = 0; - virtual uint32_t GetProgressUpdateDelayMS() = 0; - - class ProgressBar - { - public: - struct State - { - bool operator==(const State&) const = default; - std::string Task; - std::string Details; - uint64_t TotalCount = 0; - uint64_t RemainingCount = 0; - enum class EStatus - { - Running, - Aborted, - Paused - }; - EStatus Status = EStatus::Running; - - static EStatus CalculateStatus(bool IsAborted, bool IsPaused) - { - if (IsAborted) - { - return EStatus::Aborted; - } - if (IsPaused) - { - return EStatus::Paused; - } - return EStatus::Running; - } - }; - - virtual ~ProgressBar() {} - - virtual void UpdateState(const State& NewState, bool DoLinebreak) = 0; - virtual void Finish() = 0; - }; - - virtual ProgressBar* CreateProgressBar(std::string_view InSubTask) = 0; -}; - -OperationLogOutput* CreateStandardLogOutput(LoggerRef Log); - -#define ZEN_OPERATION_LOG(OutputTarget, InLevel, fmtstr, ...) \ - do \ - { \ - using namespace std::literals; \ - ZEN_CHECK_FORMAT_STRING(fmtstr##sv, ##__VA_ARGS__); \ - OutputTarget.EmitLogMessage(InLevel, fmtstr, zen::logging::LogCaptureArguments(__VA_ARGS__)); \ - } while (false) - -#define ZEN_OPERATION_LOG_INFO(OutputTarget, fmtstr, ...) \ - ZEN_OPERATION_LOG((OutputTarget), zen::logging::level::Info, fmtstr, ##__VA_ARGS__) -#define ZEN_OPERATION_LOG_DEBUG(OutputTarget, fmtstr, ...) \ - ZEN_OPERATION_LOG((OutputTarget), zen::logging::level::Debug, fmtstr, ##__VA_ARGS__) -#define ZEN_OPERATION_LOG_WARN(OutputTarget, fmtstr, ...) \ - ZEN_OPERATION_LOG((OutputTarget), zen::logging::level::Warn, fmtstr, ##__VA_ARGS__) - -} // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/projectstore/buildsremoteprojectstore.h b/src/zenremotestore/include/zenremotestore/projectstore/buildsremoteprojectstore.h index 66dfcc62d..c058e1c1f 100644 --- a/src/zenremotestore/include/zenremotestore/projectstore/buildsremoteprojectstore.h +++ b/src/zenremotestore/include/zenremotestore/projectstore/buildsremoteprojectstore.h @@ -2,6 +2,7 @@ #pragma once +#include <zenhttp/httpclient.h> #include <zenremotestore/projectstore/remoteprojectstore.h> namespace zen { @@ -10,9 +11,6 @@ class AuthMgr; struct BuildsRemoteStoreOptions : RemoteStoreOptions { - std::string Host; - std::string OverrideHost; - std::string ZenHost; std::string Namespace; std::string Bucket; Oid BuildId; @@ -22,20 +20,16 @@ struct BuildsRemoteStoreOptions : RemoteStoreOptions std::filesystem::path OidcExePath; bool ForceDisableBlocks = false; bool ForceDisableTempBlocks = false; - bool AssumeHttp2 = false; - bool PopulateCache = true; IoBuffer MetaData; size_t MaximumInMemoryDownloadSize = 1024u * 1024u; }; -std::shared_ptr<RemoteProjectStore> CreateJupiterBuildsRemoteStore(LoggerRef InLog, - const BuildsRemoteStoreOptions& Options, - const std::filesystem::path& TempFilePath, - bool Quiet, - bool Unattended, - bool Hidden, - WorkerThreadPool& CacheBackgroundWorkerPool, - double& OutHostLatencySec, - double& OutCacheLatencySec); +struct BuildStorageResolveResult; + +std::shared_ptr<RemoteProjectStore> CreateJupiterBuildsRemoteStore(LoggerRef InLog, + const BuildStorageResolveResult& ResolveResult, + std::function<HttpClientAccessToken()>&& TokenProvider, + const BuildsRemoteStoreOptions& Options, + const std::filesystem::path& TempFilePath); } // namespace zen diff --git a/src/zenremotestore/include/zenremotestore/projectstore/projectstoreoperations.h b/src/zenremotestore/include/zenremotestore/projectstore/projectstoreoperations.h index a07ede6f6..db5b27d3f 100644 --- a/src/zenremotestore/include/zenremotestore/projectstore/projectstoreoperations.h +++ b/src/zenremotestore/include/zenremotestore/projectstore/projectstoreoperations.h @@ -20,7 +20,7 @@ ZEN_THIRD_PARTY_INCLUDES_END namespace zen { class BuildStorageBase; -class OperationLogOutput; +class ProgressBase; struct StorageInstance; class ProjectStoreOperationOplogState @@ -34,10 +34,7 @@ public: std::filesystem::path TempFolderPath; }; - ProjectStoreOperationOplogState(OperationLogOutput& OperationLogOutput, - StorageInstance& Storage, - const Oid& BuildId, - const Options& Options); + ProjectStoreOperationOplogState(LoggerRef Log, StorageInstance& Storage, const Oid& BuildId, const Options& Options); CbObjectView LoadBuildObject(); CbObjectView LoadBuildPartsObject(); @@ -51,10 +48,12 @@ public: const Oid& GetBuildPartId(); private: - OperationLogOutput& m_LogOutput; - StorageInstance& m_Storage; - const Oid m_BuildId; - const Options m_Options; + LoggerRef Log() { return m_Log; } + + LoggerRef m_Log; + StorageInstance& m_Storage; + const Oid m_BuildId; + const Options m_Options; Oid m_BuildPartId = Oid::Zero; CbObject m_BuildObject; @@ -79,7 +78,8 @@ public: bool PopulateCache = true; }; - ProjectStoreOperationDownloadAttachments(OperationLogOutput& OperationLogOutput, + ProjectStoreOperationDownloadAttachments(LoggerRef Log, + ProgressBase& Progress, StorageInstance& Storage, std::atomic<bool>& AbortFlag, std::atomic<bool>& PauseFlag, @@ -92,12 +92,15 @@ public: void Execute(); private: - OperationLogOutput& m_LogOutput; - StorageInstance& m_Storage; - std::atomic<bool>& m_AbortFlag; - std::atomic<bool>& m_PauseFlag; - WorkerThreadPool& m_IOWorkerPool; - WorkerThreadPool& m_NetworkPool; + LoggerRef Log() { return m_Log; } + + LoggerRef m_Log; + ProgressBase& m_Progress; + StorageInstance& m_Storage; + std::atomic<bool>& m_AbortFlag; + std::atomic<bool>& m_PauseFlag; + WorkerThreadPool& m_IOWorkerPool; + WorkerThreadPool& m_NetworkPool; ProjectStoreOperationOplogState& m_State; const tsl::robin_set<IoHash, IoHash::Hasher> m_AttachmentHashes; diff --git a/src/zenremotestore/include/zenremotestore/projectstore/remoteprojectstore.h b/src/zenremotestore/include/zenremotestore/projectstore/remoteprojectstore.h index 152c02ee2..b81708341 100644 --- a/src/zenremotestore/include/zenremotestore/projectstore/remoteprojectstore.h +++ b/src/zenremotestore/include/zenremotestore/projectstore/remoteprojectstore.h @@ -5,6 +5,7 @@ #include <zencore/jobqueue.h> #include <zenstore/projectstore.h> +#include <zenremotestore/builds/buildstoragecache.h> #include <zenremotestore/chunking/chunkblock.h> #include <zenremotestore/partialblockrequestmode.h> @@ -79,29 +80,30 @@ public: std::vector<ChunkBlockDescription> Blocks; }; - struct AttachmentExistsInCacheResult : public Result + struct LoadAttachmentRangesResult : public Result { - std::vector<bool> HasBody; + IoBuffer Bytes; + std::vector<std::pair<uint64_t, uint64_t>> Ranges; }; struct RemoteStoreInfo { - bool CreateBlocks; - bool UseTempBlockFiles; - bool AllowChunking; + bool CreateBlocks = false; + bool UseTempBlockFiles = false; + bool AllowChunking = false; std::string ContainerName; std::string Description; }; struct Stats { - std::uint64_t m_SentBytes; - std::uint64_t m_ReceivedBytes; - std::uint64_t m_RequestTimeNS; - std::uint64_t m_RequestCount; - std::uint64_t m_PeakSentBytes; - std::uint64_t m_PeakReceivedBytes; - std::uint64_t m_PeakBytesPerSec; + std::uint64_t m_SentBytes = 0; + std::uint64_t m_ReceivedBytes = 0; + std::uint64_t m_RequestTimeNS = 0; + std::uint64_t m_RequestCount = 0; + std::uint64_t m_PeakSentBytes = 0; + std::uint64_t m_PeakReceivedBytes = 0; + std::uint64_t m_PeakBytesPerSec = 0; }; struct ExtendedStats @@ -122,22 +124,17 @@ public: virtual FinalizeResult FinalizeContainer(const IoHash& RawHash) = 0; virtual SaveAttachmentsResult SaveAttachments(const std::vector<SharedBuffer>& Payloads) = 0; - virtual LoadContainerResult LoadContainer() = 0; - virtual GetKnownBlocksResult GetKnownBlocks() = 0; - virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes) = 0; - virtual AttachmentExistsInCacheResult AttachmentExistsInCache(std::span<const IoHash> RawHashes) = 0; + virtual LoadContainerResult LoadContainer() = 0; + virtual GetKnownBlocksResult GetKnownBlocks() = 0; + virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes, + BuildStorageCache* OptionalCache, + const Oid& CacheBuildId) = 0; - struct AttachmentRange - { - uint64_t Offset = 0; - uint64_t Bytes = (uint64_t)-1; - - inline operator bool() const { return Offset != 0 || Bytes != (uint64_t)-1; } - }; - virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash, const AttachmentRange& Range) = 0; - virtual LoadAttachmentsResult LoadAttachments(const std::vector<IoHash>& RawHashes) = 0; + virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash) = 0; - virtual void Flush() = 0; + virtual LoadAttachmentRangesResult LoadAttachmentRanges(const IoHash& RawHash, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) = 0; + virtual LoadAttachmentsResult LoadAttachments(const std::vector<IoHash>& RawHashes) = 0; }; struct RemoteStoreOptions @@ -153,27 +150,52 @@ struct RemoteStoreOptions size_t ChunkFileSizeLimit = DefaultChunkFileSizeLimit; }; -typedef std::function<IoBuffer(const IoHash& AttachmentHash)> TGetAttachmentBufferFunc; - -RemoteProjectStore::LoadContainerResult BuildContainer( - CidStore& ChunkStore, - ProjectStore::Project& Project, - ProjectStore::Oplog& Oplog, - WorkerThreadPool& WorkerPool, - size_t MaxBlockSize, - size_t MaxChunksPerBlock, - size_t MaxChunkEmbedSize, - size_t ChunkFileSizeLimit, - bool BuildBlocks, - bool IgnoreMissingAttachments, - bool AllowChunking, - const std::function<void(CompressedBuffer&&, ChunkBlockDescription&&)>& AsyncOnBlock, - const std::function<void(const IoHash&, TGetAttachmentBufferFunc&&)>& OnLargeAttachment, - const std::function<void(std::vector<std::pair<IoHash, FetchChunkFunc>>&&)>& OnBlockChunks, - bool EmbedLooseFiles); +typedef std::function<CompositeBuffer(const IoHash& AttachmentHash)> TGetAttachmentBufferFunc; + +CbObject BuildContainer(LoggerRef InLog, + CidStore& ChunkStore, + ProjectStore::Project& Project, + ProjectStore::Oplog& Oplog, + WorkerThreadPool& WorkerPool, + size_t MaxBlockSize, + size_t MaxChunksPerBlock, + size_t MaxChunkEmbedSize, + size_t ChunkFileSizeLimit, + bool BuildBlocks, + bool IgnoreMissingAttachments, + bool AllowChunking, + const std::function<void(CompressedBuffer&&, ChunkBlockDescription&&)>& AsyncOnBlock, + const std::function<void(const IoHash&, TGetAttachmentBufferFunc&&)>& OnLargeAttachment, + const std::function<void(std::vector<std::pair<IoHash, FetchChunkFunc>>&&)>& OnBlockChunks, + bool EmbedLooseFiles); class JobContext; +class RemoteStoreError : public std::runtime_error +{ +public: + RemoteStoreError(const std::string& Message, int32_t ErrorCode, std::string_view Text) + : std::runtime_error(Message) + , m_ErrorCode(ErrorCode) + , m_Text(Text) + { + } + + RemoteStoreError(const char* Message, int32_t ErrorCode, std::string_view Text) + : std::runtime_error(Message) + , m_ErrorCode(ErrorCode) + , m_Text(Text) + { + } + + inline int32_t GetErrorCode() const { return m_ErrorCode; } + inline std::string_view GetText() const { return m_Text; } + +private: + int32_t m_ErrorCode = 0; + std::string m_Text; +}; + RemoteProjectStore::Result SaveOplogContainer( ProjectStore::Oplog& Oplog, const CbObject& ContainerObject, @@ -184,33 +206,46 @@ RemoteProjectStore::Result SaveOplogContainer( const std::function<void(const ChunkedInfo& Chunked)>& OnChunkedAttachment, JobContext* OptionalContext); -RemoteProjectStore::Result SaveOplog(CidStore& ChunkStore, - RemoteProjectStore& RemoteStore, - ProjectStore::Project& Project, - ProjectStore::Oplog& Oplog, - WorkerThreadPool& NetworkWorkerPool, - WorkerThreadPool& WorkerPool, - size_t MaxBlockSize, - size_t MaxChunksPerBlock, - size_t MaxChunkEmbedSize, - size_t ChunkFileSizeLimit, - bool EmbedLooseFiles, - bool ForceUpload, - bool IgnoreMissingAttachments, - JobContext* OptionalContext); - -RemoteProjectStore::Result LoadOplog(CidStore& ChunkStore, - RemoteProjectStore& RemoteStore, - ProjectStore::Oplog& Oplog, - WorkerThreadPool& NetworkWorkerPool, - WorkerThreadPool& WorkerPool, - bool ForceDownload, - bool IgnoreMissingAttachments, - bool CleanOplog, - EPartialBlockRequestMode PartialBlockRequestMode, - double HostLatencySec, - double CacheLatencySec, - JobContext* OptionalContext); +void SaveOplog(LoggerRef InLog, + CidStore& ChunkStore, + RemoteProjectStore& RemoteStore, + ProjectStore::Project& Project, + ProjectStore::Oplog& Oplog, + WorkerThreadPool& NetworkWorkerPool, + WorkerThreadPool& WorkerPool, + size_t MaxBlockSize, + size_t MaxChunksPerBlock, + size_t MaxChunkEmbedSize, + size_t ChunkFileSizeLimit, + bool EmbedLooseFiles, + bool ForceUpload, + bool IgnoreMissingAttachments, + JobContext* OptionalContext); + +struct LoadOplogContext +{ + LoggerRef Log; + CidStore& ChunkStore; + RemoteProjectStore& RemoteStore; + BuildStorageCache* OptionalCache = nullptr; + Oid CacheBuildId = Oid::Zero; + BuildStorageCache::Statistics* OptionalCacheStats = nullptr; + ProjectStore::Oplog& Oplog; + WorkerThreadPool& NetworkWorkerPool; + WorkerThreadPool& WorkerPool; + bool ForceDownload = false; + bool IgnoreMissingAttachments = false; + bool CleanOplog = false; + EPartialBlockRequestMode PartialBlockRequestMode = EPartialBlockRequestMode::All; + bool PopulateCache = false; + double StoreLatencySec = -1.0; + uint64_t StoreMaxRangeCountPerRequest = 1; + double CacheLatencySec = -1.0; + uint64_t CacheMaxRangeCountPerRequest = 1; + JobContext* OptionalJobContext = nullptr; +}; + +void LoadOplog(LoadOplogContext&& Context); std::vector<IoHash> GetBlockHashesFromOplog(CbObjectView ContainerObject); std::vector<ThinChunkBlockDescription> GetBlocksFromOplog(CbObjectView ContainerObject, std::span<const IoHash> IncludeBlockHashes); diff --git a/src/zenremotestore/include/zenremotestore/transferthreadworkers.h b/src/zenremotestore/include/zenremotestore/transferthreadworkers.h index a7faacfd5..6b6584614 100644 --- a/src/zenremotestore/include/zenremotestore/transferthreadworkers.h +++ b/src/zenremotestore/include/zenremotestore/transferthreadworkers.h @@ -3,7 +3,6 @@ #pragma once #include <zenbase/refcount.h> -#include <zencore/timer.h> #include <zencore/zencore.h> #include <memory> diff --git a/src/zenremotestore/jupiter/jupiterhost.cpp b/src/zenremotestore/jupiter/jupiterhost.cpp index 2583cfc84..314aafc78 100644 --- a/src/zenremotestore/jupiter/jupiterhost.cpp +++ b/src/zenremotestore/jupiter/jupiterhost.cpp @@ -59,13 +59,22 @@ TestJupiterEndpoint(std::string_view BaseUrl, const bool AssumeHttp2, const bool HttpClient::Response TestResponse = TestHttpClient.Get("/health/live"); if (TestResponse.IsSuccess()) { + // TODO: dan.engelbrecht 20260305 - replace this naive nginx detection with proper capabilites end point once it exists in Jupiter + uint64_t MaxRangeCountPerRequest = 1; + if (auto It = TestResponse.Header.Entries.find("Server"); It != TestResponse.Header.Entries.end()) + { + if (StrCaseCompare(It->second.c_str(), "nginx", 5) == 0) + { + MaxRangeCountPerRequest = 128u; // This leaves more than 2k header space for auth token etc + } + } LatencyTestResult LatencyResult = MeasureLatency(TestHttpClient, "/health/ready"); if (!LatencyResult.Success) { return {.Success = false, .FailureReason = LatencyResult.FailureReason}; } - return {.Success = true, .LatencySeconds = LatencyResult.LatencySeconds}; + return {.Success = true, .LatencySeconds = LatencyResult.LatencySeconds, .MaxRangeCountPerRequest = MaxRangeCountPerRequest}; } return {.Success = false, .FailureReason = TestResponse.ErrorMessage("")}; } diff --git a/src/zenremotestore/jupiter/jupitersession.cpp b/src/zenremotestore/jupiter/jupitersession.cpp index 1bc6564ce..d610d1fc8 100644 --- a/src/zenremotestore/jupiter/jupitersession.cpp +++ b/src/zenremotestore/jupiter/jupitersession.cpp @@ -68,7 +68,7 @@ namespace detail { return {.SentBytes = gsl::narrow<uint64_t>(Response.UploadedBytes), .ReceivedBytes = gsl::narrow<uint64_t>(Response.DownloadedBytes), .ElapsedSeconds = Response.ElapsedSeconds, - .ErrorCode = Response.Error.value().ErrorCode, + .ErrorCode = static_cast<int32_t>(Response.Error.value().ErrorCode), .Reason = Response.ErrorMessage(ErrorPrefix), .Success = false}; } @@ -673,7 +673,7 @@ JupiterSession::PutMultipartBuildBlob(std::string_view Namespace, size_t RetryPartIndex = PartNameToIndex.at(RetryPartId); const MultipartUploadResponse::Part& RetryPart = Workload->PartDescription.Parts[RetryPartIndex]; IoBuffer RetryPartPayload = - Workload->Transmitter(RetryPart.FirstByte, RetryPart.LastByte - RetryPart.FirstByte - 1); + Workload->Transmitter(RetryPart.FirstByte, RetryPart.LastByte - RetryPart.FirstByte); std::string RetryMultipartUploadResponseRequestString = fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}/uploadMultipart{}&supportsRedirect={}", Namespace, @@ -852,6 +852,72 @@ JupiterSession::GetBuildBlob(std::string_view Namespace, return detail::ConvertResponse(Response, "JupiterSession::GetBuildBlob"sv); } +BuildBlobRangesResult +JupiterSession::GetBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const IoHash& Hash, + std::filesystem::path TempFolderPath, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) +{ + HttpClient::KeyValueMap Headers; + if (!Ranges.empty()) + { + ExtendableStringBuilder<512> SB; + for (const std::pair<uint64_t, uint64_t>& R : Ranges) + { + if (SB.Size() > 0) + { + SB << ", "; + } + SB << R.first << "-" << R.first + R.second - 1; + } + Headers.Entries.insert({"Range", fmt::format("bytes={}", SB.ToView())}); + } + std::string Url = fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}?supportsRedirect={}", + Namespace, + BucketId, + BuildId, + Hash.ToHexString(), + m_AllowRedirect ? "true"sv : "false"sv); + + HttpClient::Response Response = m_HttpClient.Download(Url, TempFolderPath, Headers); + if ((Response.StatusCode == HttpResponseCode::RangeNotSatisfiable || Response.StatusCode == HttpResponseCode::NotImplemented) && + Ranges.size() > 1) + { + // Requests to Jupiter that is not served via nginx (content not stored locally in the file system) can not serve multi-range + // requests (asp.net limitation) This rejection is not implemented as of 2026-03-02, it is in the backlog (@joakim.lindqvist) + // If we encounter this error we fall back to a single range which covers all the requested ranges + uint64_t RangeStart = Ranges.front().first; + uint64_t RangeEnd = Ranges.back().first + Ranges.back().second - 1; + Headers.Entries.insert_or_assign("Range", fmt::format("bytes={}-{}", RangeStart, RangeEnd)); + Response = m_HttpClient.Download(Url, TempFolderPath, Headers); + } + if (Response.IsSuccess()) + { + // If we get a redirect to S3 or a non-Jupiter endpoint the content type will not be correct, validate it and set it + if (m_AllowRedirect && (Response.ResponsePayload.GetContentType() == HttpContentType::kBinary)) + { + IoHash ValidateRawHash; + uint64_t ValidateRawSize = 0; + if (!Headers.Entries.contains("Range")) + { + ZEN_ASSERT_SLOW(CompressedBuffer::ValidateCompressedHeader(Response.ResponsePayload, + ValidateRawHash, + ValidateRawSize, + /*OutOptionalTotalCompressedSize*/ nullptr)); + ZEN_ASSERT_SLOW(ValidateRawHash == Hash); + ZEN_ASSERT_SLOW(ValidateRawSize > 0); + ZEN_UNUSED(ValidateRawHash, ValidateRawSize); + Response.ResponsePayload.SetContentType(ZenContentType::kCompressedBinary); + } + } + } + BuildBlobRangesResult Result = {detail::ConvertResponse(Response, "JupiterSession::GetBuildBlob"sv)}; + Result.Ranges = Response.GetRanges(Ranges); + return Result; +} + JupiterResult JupiterSession::PutBlockMetadata(std::string_view Namespace, std::string_view BucketId, diff --git a/src/zenremotestore/operationlogoutput.cpp b/src/zenremotestore/operationlogoutput.cpp deleted file mode 100644 index 7ed93c947..000000000 --- a/src/zenremotestore/operationlogoutput.cpp +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#include <zenremotestore/operationlogoutput.h> - -#include <zencore/logging.h> - -ZEN_THIRD_PARTY_INCLUDES_START -#include <gsl/gsl-lite.hpp> -ZEN_THIRD_PARTY_INCLUDES_END - -namespace zen { - -class StandardLogOutput; - -class StandardLogOutputProgressBar : public OperationLogOutput::ProgressBar -{ -public: - StandardLogOutputProgressBar(StandardLogOutput& Output, std::string_view InSubTask) : m_Output(Output), m_SubTask(InSubTask) {} - - virtual void UpdateState(const State& NewState, bool DoLinebreak) override; - virtual void Finish() override; - -private: - StandardLogOutput& m_Output; - std::string m_SubTask; - State m_State; -}; - -class StandardLogOutput : public OperationLogOutput -{ -public: - StandardLogOutput(LoggerRef& Log) : m_Log(Log) {} - virtual void EmitLogMessage(int LogLevel, std::string_view Format, fmt::format_args Args) override - { - if (m_Log.ShouldLog(LogLevel)) - { - fmt::basic_memory_buffer<char, 250> MessageBuffer; - fmt::vformat_to(fmt::appender(MessageBuffer), Format, Args); - ZEN_LOG(m_Log, LogLevel, "{}", std::string_view(MessageBuffer.data(), MessageBuffer.size())); - } - } - - virtual void SetLogOperationName(std::string_view Name) override - { - m_LogOperationName = Name; - ZEN_OPERATION_LOG_INFO(*this, "{}", m_LogOperationName); - } - virtual void SetLogOperationProgress(uint32_t StepIndex, uint32_t StepCount) override - { - const size_t PercentDone = StepCount > 0u ? gsl::narrow<uint8_t>((100 * StepIndex) / StepCount) : 0u; - ZEN_OPERATION_LOG_INFO(*this, "{}: {}%", m_LogOperationName, PercentDone); - } - virtual uint32_t GetProgressUpdateDelayMS() override { return 2000; } - virtual ProgressBar* CreateProgressBar(std::string_view InSubTask) override - { - return new StandardLogOutputProgressBar(*this, InSubTask); - } - -private: - LoggerRef m_Log; - std::string m_LogOperationName; -}; - -void -StandardLogOutputProgressBar::UpdateState(const State& NewState, bool DoLinebreak) -{ - ZEN_UNUSED(DoLinebreak); - const size_t PercentDone = - NewState.TotalCount > 0u ? gsl::narrow<uint8_t>((100 * (NewState.TotalCount - NewState.RemainingCount)) / NewState.TotalCount) : 0u; - std::string Task = NewState.Task; - switch (NewState.Status) - { - case State::EStatus::Aborted: - Task = "Aborting"; - break; - case State::EStatus::Paused: - Task = "Paused"; - break; - default: - break; - } - ZEN_OPERATION_LOG_INFO(m_Output, "{}: {}%{}", Task, PercentDone, NewState.Details.empty() ? "" : fmt::format(" {}", NewState.Details)); - m_State = NewState; -} -void -StandardLogOutputProgressBar::Finish() -{ - if (m_State.RemainingCount > 0) - { - State NewState = m_State; - NewState.RemainingCount = 0; - NewState.Details = ""; - UpdateState(NewState, /*DoLinebreak*/ true); - } -} - -OperationLogOutput* -CreateStandardLogOutput(LoggerRef Log) -{ - return new StandardLogOutput(Log); -} - -} // namespace zen diff --git a/src/zenremotestore/projectstore/buildsremoteprojectstore.cpp b/src/zenremotestore/projectstore/buildsremoteprojectstore.cpp index c42373e4d..d7596263b 100644 --- a/src/zenremotestore/projectstore/buildsremoteprojectstore.cpp +++ b/src/zenremotestore/projectstore/buildsremoteprojectstore.cpp @@ -7,11 +7,8 @@ #include <zencore/fmtutils.h> #include <zencore/scopeguard.h> -#include <zenhttp/httpclientauth.h> -#include <zenremotestore/builds/buildstoragecache.h> #include <zenremotestore/builds/buildstorageutil.h> #include <zenremotestore/builds/jupiterbuildstorage.h> -#include <zenremotestore/operationlogoutput.h> #include <numeric> @@ -26,18 +23,14 @@ class BuildsRemoteStore : public RemoteProjectStore public: BuildsRemoteStore(LoggerRef InLog, const HttpClientSettings& ClientSettings, - HttpClientSettings* OptionalCacheClientSettings, std::string_view HostUrl, - std::string_view CacheUrl, const std::filesystem::path& TempFilePath, - WorkerThreadPool& CacheBackgroundWorkerPool, std::string_view Namespace, std::string_view Bucket, const Oid& BuildId, const IoBuffer& MetaData, bool ForceDisableBlocks, - bool ForceDisableTempBlocks, - bool PopulateCache) + bool ForceDisableTempBlocks) : m_Log(InLog) , m_BuildStorageHttp(HostUrl, ClientSettings) , m_BuildStorage(CreateJupiterBuildStorage(Log(), @@ -53,20 +46,8 @@ public: , m_MetaData(MetaData) , m_EnableBlocks(!ForceDisableBlocks) , m_UseTempBlocks(!ForceDisableTempBlocks) - , m_PopulateCache(PopulateCache) { m_MetaData.MakeOwned(); - if (OptionalCacheClientSettings) - { - ZEN_ASSERT(!CacheUrl.empty()); - m_BuildCacheStorageHttp = std::make_unique<HttpClient>(CacheUrl, *OptionalCacheClientSettings); - m_BuildCacheStorage = CreateZenBuildStorageCache(*m_BuildCacheStorageHttp, - m_StorageCacheStats, - Namespace, - Bucket, - TempFilePath, - CacheBackgroundWorkerPool); - } } virtual RemoteStoreInfo GetInfo() const override @@ -75,9 +56,8 @@ public: .UseTempBlockFiles = m_UseTempBlocks, .AllowChunking = true, .ContainerName = fmt::format("{}/{}/{}", m_Namespace, m_Bucket, m_BuildId), - .Description = fmt::format("[cloud] {}{}. SessionId: {}. {}/{}/{}"sv, + .Description = fmt::format("[cloud] {}. SessionId: {}. {}/{}/{}"sv, m_BuildStorageHttp.GetBaseUri(), - m_BuildCacheStorage ? fmt::format(" (Cache: {})", m_BuildCacheStorageHttp->GetBaseUri()) : ""sv, m_BuildStorageHttp.GetSessionId(), m_Namespace, m_Bucket, @@ -86,15 +66,13 @@ public: virtual Stats GetStats() const override { - return { - .m_SentBytes = m_BuildStorageStats.TotalBytesWritten.load() + m_StorageCacheStats.TotalBytesWritten.load(), - .m_ReceivedBytes = m_BuildStorageStats.TotalBytesRead.load() + m_StorageCacheStats.TotalBytesRead.load(), - .m_RequestTimeNS = m_BuildStorageStats.TotalRequestTimeUs.load() * 1000 + m_StorageCacheStats.TotalRequestTimeUs.load() * 1000, - .m_RequestCount = m_BuildStorageStats.TotalRequestCount.load() + m_StorageCacheStats.TotalRequestCount.load(), - .m_PeakSentBytes = Max(m_BuildStorageStats.PeakSentBytes.load(), m_StorageCacheStats.PeakSentBytes.load()), - .m_PeakReceivedBytes = Max(m_BuildStorageStats.PeakReceivedBytes.load(), m_StorageCacheStats.PeakReceivedBytes.load()), - .m_PeakBytesPerSec = Max(m_BuildStorageStats.PeakBytesPerSec.load(), m_StorageCacheStats.PeakBytesPerSec.load()), - }; + return {.m_SentBytes = m_BuildStorageStats.TotalBytesWritten.load(), + .m_ReceivedBytes = m_BuildStorageStats.TotalBytesRead.load(), + .m_RequestTimeNS = m_BuildStorageStats.TotalRequestTimeUs.load() * 1000, + .m_RequestCount = m_BuildStorageStats.TotalRequestCount.load(), + .m_PeakSentBytes = m_BuildStorageStats.PeakSentBytes.load(), + .m_PeakReceivedBytes = m_BuildStorageStats.PeakReceivedBytes.load(), + .m_PeakBytesPerSec = m_BuildStorageStats.PeakBytesPerSec.load()}; } virtual bool GetExtendedStats(ExtendedStats& OutStats) const override @@ -109,11 +87,6 @@ public: } Result = true; } - if (m_BuildCacheStorage) - { - OutStats.m_ReceivedBytesPerSource.insert_or_assign("Cache", m_StorageCacheStats.TotalBytesRead); - Result = true; - } return Result; } @@ -310,9 +283,7 @@ public: } catch (const HttpClientError& Ex) { - Result.ErrorCode = Ex.GetInternalErrorCode() != 0 ? Ex.GetInternalErrorCode() - : Ex.GetHttpResponseCode() != HttpResponseCode::ImATeapot ? (int)Ex.GetHttpResponseCode() - : 0; + Result.ErrorCode = MakeErrorCode(Ex); Result.Reason = fmt::format("Failed finalizing oplog container build part to {}/{}/{}/{}/{}. Reason: '{}'", m_BuildStorageHttp.GetBaseUri(), m_Namespace, @@ -341,9 +312,7 @@ public: } catch (const HttpClientError& Ex) { - Result.ErrorCode = Ex.GetInternalErrorCode() != 0 ? Ex.GetInternalErrorCode() - : Ex.GetHttpResponseCode() != HttpResponseCode::ImATeapot ? (int)Ex.GetHttpResponseCode() - : 0; + Result.ErrorCode = MakeErrorCode(Ex); Result.Reason = fmt::format("Failed finalizing oplog container build to {}/{}/{}/{}. Reason: '{}'", m_BuildStorageHttp.GetBaseUri(), m_Namespace, @@ -462,11 +431,12 @@ public: return Result; } - virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes) override + virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes, + BuildStorageCache* OptionalCache, + const Oid& CacheBuildId) override { - std::unique_ptr<OperationLogOutput> Output(CreateStandardLogOutput(Log())); - ZEN_ASSERT(m_OplogBuildPartId != Oid::Zero); + ZEN_ASSERT(OptionalCache == nullptr || CacheBuildId == m_BuildId); GetBlockDescriptionsResult Result; Stopwatch Timer; @@ -474,11 +444,10 @@ public: try { - Result.Blocks = zen::GetBlockDescriptions(*Output, + Result.Blocks = zen::GetBlockDescriptions(Log(), *m_BuildStorage, - m_BuildCacheStorage.get(), + OptionalCache, m_BuildId, - m_OplogBuildPartId, BlockHashes, /*AttemptFallback*/ false, /*IsQuiet*/ false, @@ -507,99 +476,83 @@ public: return Result; } - virtual AttachmentExistsInCacheResult AttachmentExistsInCache(std::span<const IoHash> RawHashes) override + virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash) override { - AttachmentExistsInCacheResult Result; - Stopwatch Timer; - auto _ = MakeGuard([&Timer, &Result]() { Result.ElapsedSeconds = Timer.GetElapsedTimeUs() / 1000000.0; }); + ZEN_ASSERT(m_OplogBuildPartId != Oid::Zero); + + LoadAttachmentResult Result; + Stopwatch Timer; + auto _ = MakeGuard([&Timer, &Result]() { Result.ElapsedSeconds = Timer.GetElapsedTimeUs() / 1000000.0; }); + try { - const std::vector<BuildStorageCache::BlobExistsResult> CacheExistsResult = - m_BuildCacheStorage->BlobsExists(m_BuildId, RawHashes); - - if (CacheExistsResult.size() == RawHashes.size()) - { - Result.HasBody.reserve(CacheExistsResult.size()); - for (size_t BlobIndex = 0; BlobIndex < CacheExistsResult.size(); BlobIndex++) - { - Result.HasBody.push_back(CacheExistsResult[BlobIndex].HasBody); - } - } + Result.Bytes = m_BuildStorage->GetBuildBlob(m_BuildId, RawHash); } catch (const HttpClientError& Ex) { Result.ErrorCode = MakeErrorCode(Ex); - Result.Reason = fmt::format("Remote cache: Failed finding known blobs for {}/{}/{}/{}. Reason: '{}'", + Result.Reason = fmt::format("Failed getting blob {}/{}/{}/{}/{}. Reason: '{}'", m_BuildStorageHttp.GetBaseUri(), m_Namespace, m_Bucket, m_BuildId, + RawHash, Ex.what()); } catch (const std::exception& Ex) { Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError); - Result.Reason = fmt::format("Remote cache: Failed finding known blobs for {}/{}/{}/{}. Reason: '{}'", + Result.Reason = fmt::format("Failed getting blob {}/{}/{}/{}/{}. Reason: '{}'", m_BuildStorageHttp.GetBaseUri(), m_Namespace, m_Bucket, m_BuildId, + RawHash, Ex.what()); } + return Result; } - virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash, const AttachmentRange& Range) override + virtual LoadAttachmentRangesResult LoadAttachmentRanges(const IoHash& RawHash, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) override { - ZEN_ASSERT(m_OplogBuildPartId != Oid::Zero); - - LoadAttachmentResult Result; - Stopwatch Timer; - auto _ = MakeGuard([&Timer, &Result]() { Result.ElapsedSeconds = Timer.GetElapsedTimeUs() / 1000000.0; }); + ZEN_ASSERT(!Ranges.empty()); + LoadAttachmentRangesResult Result; + Stopwatch Timer; + auto _ = MakeGuard([&Timer, &Result]() { Result.ElapsedSeconds = Timer.GetElapsedTimeUs() / 1000000.0; }); try { - if (m_BuildCacheStorage) + BuildStorageBase::BuildBlobRanges BlobRanges = m_BuildStorage->GetBuildBlobRanges(m_BuildId, RawHash, Ranges); + if (BlobRanges.PayloadBuffer) { - IoBuffer CachedBlob = m_BuildCacheStorage->GetBuildBlob(m_BuildId, RawHash, Range.Offset, Range.Bytes); - if (CachedBlob) - { - Result.Bytes = std::move(CachedBlob); - } - } - if (!Result.Bytes) - { - Result.Bytes = m_BuildStorage->GetBuildBlob(m_BuildId, RawHash, Range.Offset, Range.Bytes); - if (m_BuildCacheStorage && Result.Bytes && m_PopulateCache) - { - if (!Range) - { - m_BuildCacheStorage->PutBuildBlob(m_BuildId, - RawHash, - Result.Bytes.GetContentType(), - CompositeBuffer(SharedBuffer(Result.Bytes))); - } - } + Result.Bytes = std::move(BlobRanges.PayloadBuffer); + Result.Ranges = std::move(BlobRanges.Ranges); } } catch (const HttpClientError& Ex) { Result.ErrorCode = MakeErrorCode(Ex); - Result.Reason = fmt::format("Failed listing known blocks for {}/{}/{}/{}. Reason: '{}'", + Result.Reason = fmt::format("Failed getting {} ranges for blob {}/{}/{}/{}/{}. Reason: '{}'", + Ranges.size(), m_BuildStorageHttp.GetBaseUri(), m_Namespace, m_Bucket, m_BuildId, + RawHash, Ex.what()); } catch (const std::exception& Ex) { Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError); - Result.Reason = fmt::format("Failed listing known blocks for {}/{}/{}/{}. Reason: '{}'", + Result.Reason = fmt::format("Failed getting {} ranges for blob {}/{}/{}/{}/{}. Reason: '{}'", + Ranges.size(), m_BuildStorageHttp.GetBaseUri(), m_Namespace, m_Bucket, m_BuildId, + RawHash, Ex.what()); } @@ -614,72 +567,25 @@ public: std::vector<IoHash> AttachmentsLeftToFind = RawHashes; - if (m_BuildCacheStorage) - { - std::vector<BuildStorageCache::BlobExistsResult> ExistCheck = m_BuildCacheStorage->BlobsExists(m_BuildId, RawHashes); - if (ExistCheck.size() == RawHashes.size()) - { - AttachmentsLeftToFind.clear(); - for (size_t BlobIndex = 0; BlobIndex < RawHashes.size(); BlobIndex++) - { - const IoHash& Hash = RawHashes[BlobIndex]; - const BuildStorageCache::BlobExistsResult& BlobExists = ExistCheck[BlobIndex]; - if (BlobExists.HasBody) - { - IoBuffer CachedPayload = m_BuildCacheStorage->GetBuildBlob(m_BuildId, Hash); - if (CachedPayload) - { - Result.Chunks.emplace_back( - std::pair<IoHash, CompressedBuffer>{Hash, - CompressedBuffer::FromCompressedNoValidate(std::move(CachedPayload))}); - } - else - { - AttachmentsLeftToFind.push_back(Hash); - } - } - else - { - AttachmentsLeftToFind.push_back(Hash); - } - } - } - } - for (const IoHash& Hash : AttachmentsLeftToFind) { - LoadAttachmentResult ChunkResult = LoadAttachment(Hash, {}); + LoadAttachmentResult ChunkResult = LoadAttachment(Hash); if (ChunkResult.ErrorCode) { return LoadAttachmentsResult{ChunkResult}; } ZEN_DEBUG("Loaded attachment in {}", NiceTimeSpanMs(static_cast<uint64_t>(ChunkResult.ElapsedSeconds * 1000))); - if (m_BuildCacheStorage && ChunkResult.Bytes && m_PopulateCache) - { - m_BuildCacheStorage->PutBuildBlob(m_BuildId, - Hash, - ChunkResult.Bytes.GetContentType(), - CompositeBuffer(SharedBuffer(ChunkResult.Bytes))); - } Result.Chunks.emplace_back( std::pair<IoHash, CompressedBuffer>{Hash, CompressedBuffer::FromCompressedNoValidate(std::move(ChunkResult.Bytes))}); } return Result; } - virtual void Flush() override - { - if (m_BuildCacheStorage) - { - m_BuildCacheStorage->Flush(100, [](intptr_t) { return false; }); - } - } - private: static int MakeErrorCode(const HttpClientError& Ex) { - return Ex.GetInternalErrorCode() != 0 ? Ex.GetInternalErrorCode() - : Ex.GetHttpResponseCode() != HttpResponseCode::ImATeapot ? (int)Ex.GetHttpResponseCode() + return Ex.GetInternalErrorCode() != HttpClientErrorCode::kOK ? static_cast<int>(Ex.GetInternalErrorCode()) + : Ex.GetHttpResponseCode() != HttpResponseCode::ImATeapot ? static_cast<int>(Ex.GetHttpResponseCode()) : 0; } @@ -691,10 +597,6 @@ private: HttpClient m_BuildStorageHttp; std::unique_ptr<BuildStorageBase> m_BuildStorage; - BuildStorageCache::Statistics m_StorageCacheStats; - std::unique_ptr<HttpClient> m_BuildCacheStorageHttp; - std::unique_ptr<BuildStorageCache> m_BuildCacheStorage; - const std::string m_Namespace; const std::string m_Bucket; const Oid m_BuildId; @@ -703,125 +605,34 @@ private: const bool m_EnableBlocks = true; const bool m_UseTempBlocks = true; const bool m_AllowRedirect = false; - const bool m_PopulateCache = true; }; std::shared_ptr<RemoteProjectStore> -CreateJupiterBuildsRemoteStore(LoggerRef InLog, - const BuildsRemoteStoreOptions& Options, - const std::filesystem::path& TempFilePath, - bool Quiet, - bool Unattended, - bool Hidden, - WorkerThreadPool& CacheBackgroundWorkerPool, - double& OutHostLatencySec, - double& OutCacheLatencySec) +CreateJupiterBuildsRemoteStore(LoggerRef InLog, + const BuildStorageResolveResult& ResolveResult, + std::function<HttpClientAccessToken()>&& TokenProvider, + const BuildsRemoteStoreOptions& Options, + const std::filesystem::path& TempFilePath) { - std::string Host = Options.Host; - if (!Host.empty() && Host.find("://"sv) == std::string::npos) - { - // Assume https URL - Host = fmt::format("https://{}"sv, Host); - } - std::string OverrideUrl = Options.OverrideHost; - if (!OverrideUrl.empty() && OverrideUrl.find("://"sv) == std::string::npos) - { - // Assume https URL - OverrideUrl = fmt::format("https://{}"sv, OverrideUrl); - } - std::string ZenHost = Options.ZenHost; - if (!ZenHost.empty() && ZenHost.find("://"sv) == std::string::npos) - { - // Assume https URL - ZenHost = fmt::format("https://{}"sv, ZenHost); - } - - // 1) openid-provider if given (assumes oidctoken.exe -Zen true has been run with matching Options.OpenIdProvider - // 2) Access token as parameter in request - // 3) Environment variable (different win vs linux/mac) - // 4) Default openid-provider (assumes oidctoken.exe -Zen true has been run with matching Options.OpenIdProvider - - std::function<HttpClientAccessToken()> TokenProvider; - if (!Options.OpenIdProvider.empty()) - { - TokenProvider = httpclientauth::CreateFromOpenIdProvider(Options.AuthManager, Options.OpenIdProvider); - } - else if (!Options.AccessToken.empty()) - { - TokenProvider = httpclientauth::CreateFromStaticToken(Options.AccessToken); - } - else if (!Options.OidcExePath.empty()) - { - if (auto TokenProviderMaybe = httpclientauth::CreateFromOidcTokenExecutable(Options.OidcExePath, - Host.empty() ? OverrideUrl : Host, - Quiet, - Unattended, - Hidden); - TokenProviderMaybe) - { - TokenProvider = TokenProviderMaybe.value(); - } - } - - if (!TokenProvider) - { - TokenProvider = httpclientauth::CreateFromDefaultOpenIdProvider(Options.AuthManager); - } - - BuildStorageResolveResult ResolveRes; - { - HttpClientSettings ClientSettings{.LogCategory = "httpbuildsclient", - .AccessTokenProvider = TokenProvider, - .AssumeHttp2 = Options.AssumeHttp2, - .AllowResume = true, - .RetryCount = 2}; - - std::unique_ptr<OperationLogOutput> Output(CreateStandardLogOutput(InLog)); - - ResolveRes = - ResolveBuildStorage(*Output, ClientSettings, Host, OverrideUrl, ZenHost, ZenCacheResolveMode::Discovery, /*Verbose*/ false); - } - HttpClientSettings ClientSettings{.LogCategory = "httpbuildsclient", .ConnectTimeout = std::chrono::milliseconds(3000), .Timeout = std::chrono::milliseconds(1800000), .AccessTokenProvider = std::move(TokenProvider), - .AssumeHttp2 = ResolveRes.HostAssumeHttp2, + .AssumeHttp2 = ResolveResult.Cloud.AssumeHttp2, .AllowResume = true, .RetryCount = 4, .MaximumInMemoryDownloadSize = Options.MaximumInMemoryDownloadSize}; - std::unique_ptr<HttpClientSettings> CacheClientSettings; - - if (!ResolveRes.CacheUrl.empty()) - { - CacheClientSettings = - std::make_unique<HttpClientSettings>(HttpClientSettings{.LogCategory = "httpcacheclient", - .ConnectTimeout = std::chrono::milliseconds{3000}, - .Timeout = std::chrono::milliseconds{30000}, - .AssumeHttp2 = ResolveRes.CacheAssumeHttp2, - .AllowResume = true, - .RetryCount = 0, - .MaximumInMemoryDownloadSize = Options.MaximumInMemoryDownloadSize}); - } - std::shared_ptr<RemoteProjectStore> RemoteStore = std::make_shared<BuildsRemoteStore>(InLog, ClientSettings, - CacheClientSettings.get(), - ResolveRes.HostUrl, - ResolveRes.CacheUrl, + ResolveResult.Cloud.Address, TempFilePath, - CacheBackgroundWorkerPool, Options.Namespace, Options.Bucket, Options.BuildId, Options.MetaData, Options.ForceDisableBlocks, - Options.ForceDisableTempBlocks, - Options.PopulateCache); - - OutHostLatencySec = ResolveRes.HostLatencySec; - OutCacheLatencySec = ResolveRes.CacheLatencySec; + Options.ForceDisableTempBlocks); return RemoteStore; } diff --git a/src/zenremotestore/projectstore/fileremoteprojectstore.cpp b/src/zenremotestore/projectstore/fileremoteprojectstore.cpp index ec7fb7bbc..bb21de12c 100644 --- a/src/zenremotestore/projectstore/fileremoteprojectstore.cpp +++ b/src/zenremotestore/projectstore/fileremoteprojectstore.cpp @@ -7,8 +7,12 @@ #include <zencore/filesystem.h> #include <zencore/fmtutils.h> #include <zencore/logging.h> +#include <zencore/scopeguard.h> #include <zencore/timer.h> #include <zenhttp/httpcommon.h> +#include <zenremotestore/builds/buildstoragecache.h> + +#include <numeric> namespace zen { @@ -74,9 +78,11 @@ public: virtual SaveResult SaveContainer(const IoBuffer& Payload) override { - Stopwatch Timer; SaveResult Result; + Stopwatch Timer; + auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; }); + { CbObject ContainerObject = LoadCompactBinaryObject(Payload); @@ -87,6 +93,10 @@ public: { Result.Needs.insert(AttachmentHash); } + else if (std::filesystem::path AttachmentMetaPath = GetAttachmentMetaPath(AttachmentHash); IsFile(AttachmentMetaPath)) + { + BasicFile TouchIt(AttachmentMetaPath, BasicFile::Mode::kWrite); + } }); } @@ -112,14 +122,18 @@ public: Result.Reason = fmt::format("Failed saving oplog container to '{}'. Reason: {}", ContainerPath, Ex.what()); } AddStats(Payload.GetSize(), 0, Timer.GetElapsedTimeUs() * 1000); - Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; return Result; } - virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, ChunkBlockDescription&&) override + virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, + const IoHash& RawHash, + ChunkBlockDescription&& BlockDescription) override { - Stopwatch Timer; - SaveAttachmentResult Result; + SaveAttachmentResult Result; + + Stopwatch Timer; + auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; }); + std::filesystem::path ChunkPath = GetAttachmentPath(RawHash); if (!IsFile(ChunkPath)) { @@ -142,14 +156,33 @@ public: Result.Reason = fmt::format("Failed saving oplog attachment to '{}'. Reason: {}", ChunkPath, Ex.what()); } } + if (!Result.ErrorCode && BlockDescription.BlockHash != IoHash::Zero) + { + try + { + std::filesystem::path MetaPath = GetAttachmentMetaPath(RawHash); + CbObject MetaData = BuildChunkBlockDescription(BlockDescription, {}); + SharedBuffer MetaBuffer = MetaData.GetBuffer(); + BasicFile MetaFile; + MetaFile.Open(MetaPath, BasicFile::Mode::kTruncate); + MetaFile.Write(MetaBuffer.GetView(), 0); + } + catch (const std::exception& Ex) + { + Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError); + Result.Reason = fmt::format("Failed saving block description to '{}'. Reason: {}", RawHash, Ex.what()); + } + } AddStats(Payload.GetSize(), 0, Timer.GetElapsedTimeUs() * 1000); - Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; return Result; } virtual SaveAttachmentsResult SaveAttachments(const std::vector<SharedBuffer>& Chunks) override { + SaveAttachmentsResult Result; + Stopwatch Timer; + auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; }); for (const SharedBuffer& Chunk : Chunks) { @@ -157,12 +190,10 @@ public: SaveAttachmentResult ChunkResult = SaveAttachment(Compressed.GetCompressed(), Compressed.DecodeRawHash(), {}); if (ChunkResult.ErrorCode) { - ChunkResult.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; - return SaveAttachmentsResult{ChunkResult}; + Result = SaveAttachmentsResult{ChunkResult}; + break; } } - SaveAttachmentsResult Result; - Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; return Result; } @@ -172,21 +203,60 @@ public: virtual GetKnownBlocksResult GetKnownBlocks() override { + Stopwatch Timer; if (m_OptionalBaseName.empty()) { - return GetKnownBlocksResult{{.ErrorCode = static_cast<int>(HttpResponseCode::NoContent)}}; + size_t MaxBlockCount = 10000; + + GetKnownBlocksResult Result; + + DirectoryContent Content; + GetDirectoryContent( + m_OutputPath, + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive | DirectoryContentFlags::IncludeModificationTick, + Content); + std::vector<size_t> RecentOrder(Content.Files.size()); + std::iota(RecentOrder.begin(), RecentOrder.end(), 0u); + std::sort(RecentOrder.begin(), RecentOrder.end(), [&Content](size_t Lhs, size_t Rhs) { + return Content.FileModificationTicks[Lhs] > Content.FileModificationTicks[Rhs]; + }); + + for (size_t FileIndex : RecentOrder) + { + std::filesystem::path MetaPath = Content.Files[FileIndex]; + if (MetaPath.extension() == MetaExtension) + { + IoBuffer MetaFile = ReadFile(MetaPath).Flatten(); + CbValidateError Err; + CbObject ValidatedObject = ValidateAndReadCompactBinaryObject(std::move(MetaFile), Err); + if (Err == CbValidateError::None) + { + ChunkBlockDescription Description = ParseChunkBlockDescription(ValidatedObject); + if (Description.BlockHash != IoHash::Zero) + { + Result.Blocks.emplace_back(std::move(Description)); + if (Result.Blocks.size() == MaxBlockCount) + { + break; + } + } + } + } + } + + Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; + return Result; } LoadContainerResult LoadResult = LoadContainer(m_OptionalBaseName); if (LoadResult.ErrorCode) { return GetKnownBlocksResult{LoadResult}; } - Stopwatch Timer; std::vector<IoHash> BlockHashes = GetBlockHashesFromOplog(LoadResult.ContainerObject); if (BlockHashes.empty()) { return GetKnownBlocksResult{{.ErrorCode = static_cast<int>(HttpResponseCode::NoContent), - .ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000}}; + .ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeMs() / 1000.0}}; } std::vector<IoHash> ExistingBlockHashes; for (const IoHash& RawHash : BlockHashes) @@ -200,15 +270,15 @@ public: if (ExistingBlockHashes.empty()) { return GetKnownBlocksResult{{.ErrorCode = static_cast<int>(HttpResponseCode::NoContent), - .ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000}}; + .ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeMs() / 1000.0}}; } std::vector<ThinChunkBlockDescription> ThinKnownBlocks = GetBlocksFromOplog(LoadResult.ContainerObject, ExistingBlockHashes); - const size_t KnowBlockCount = ThinKnownBlocks.size(); + const size_t KnownBlockCount = ThinKnownBlocks.size(); - GetKnownBlocksResult Result{{.ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000}}; - Result.Blocks.resize(KnowBlockCount); - for (size_t BlockIndex = 0; BlockIndex < KnowBlockCount; BlockIndex++) + GetKnownBlocksResult Result{{.ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeMs() / 1000.0}}; + Result.Blocks.resize(KnownBlockCount); + for (size_t BlockIndex = 0; BlockIndex < KnownBlockCount; BlockIndex++) { Result.Blocks[BlockIndex].BlockHash = ThinKnownBlocks[BlockIndex].BlockHash; Result.Blocks[BlockIndex].ChunkRawHashes = std::move(ThinKnownBlocks[BlockIndex].ChunkRawHashes); @@ -217,43 +287,131 @@ public: return Result; } - virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes) override + virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes, + BuildStorageCache* OptionalCache, + const Oid& CacheBuildId) override { - ZEN_UNUSED(BlockHashes); - return GetBlockDescriptionsResult{Result{.ErrorCode = int(HttpResponseCode::NotFound)}}; - } + GetBlockDescriptionsResult Result; - virtual AttachmentExistsInCacheResult AttachmentExistsInCache(std::span<const IoHash> RawHashes) override - { - return AttachmentExistsInCacheResult{Result{.ErrorCode = 0}, std::vector<bool>(RawHashes.size(), false)}; + Stopwatch Timer; + auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; }); + + Result.Blocks.reserve(BlockHashes.size()); + + uint64_t ByteCount = 0; + + std::vector<ChunkBlockDescription> UnorderedList; + { + if (OptionalCache) + { + std::vector<CbObject> CacheBlockMetadatas = OptionalCache->GetBlobMetadatas(CacheBuildId, BlockHashes); + for (const CbObject& BlockObject : CacheBlockMetadatas) + { + ByteCount += BlockObject.GetSize(); + } + UnorderedList = ParseBlockMetadatas(CacheBlockMetadatas); + } + + tsl::robin_map<IoHash, size_t, IoHash::Hasher> BlockDescriptionLookup; + BlockDescriptionLookup.reserve(BlockHashes.size()); + for (size_t DescriptionIndex = 0; DescriptionIndex < UnorderedList.size(); DescriptionIndex++) + { + const ChunkBlockDescription& Description = UnorderedList[DescriptionIndex]; + BlockDescriptionLookup.insert_or_assign(Description.BlockHash, DescriptionIndex); + } + + if (UnorderedList.size() < BlockHashes.size()) + { + for (const IoHash& RawHash : BlockHashes) + { + if (!BlockDescriptionLookup.contains(RawHash)) + { + std::filesystem::path MetaPath = GetAttachmentMetaPath(RawHash); + IoBuffer MetaFile = ReadFile(MetaPath).Flatten(); + ByteCount += MetaFile.GetSize(); + CbValidateError Err; + CbObject ValidatedObject = ValidateAndReadCompactBinaryObject(std::move(MetaFile), Err); + if (Err == CbValidateError::None) + { + ChunkBlockDescription Description = ParseChunkBlockDescription(ValidatedObject); + if (Description.BlockHash != IoHash::Zero) + { + BlockDescriptionLookup.insert_or_assign(Description.BlockHash, UnorderedList.size()); + UnorderedList.emplace_back(std::move(Description)); + } + } + } + } + } + + Result.Blocks.reserve(UnorderedList.size()); + for (const IoHash& RawHash : BlockHashes) + { + if (auto It = BlockDescriptionLookup.find(RawHash); It != BlockDescriptionLookup.end()) + { + Result.Blocks.emplace_back(std::move(UnorderedList[It->second])); + } + } + } + AddStats(0, ByteCount, Timer.GetElapsedTimeUs() * 1000); + return Result; } - virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash, const AttachmentRange& Range) override + virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash) override { - Stopwatch Timer; - LoadAttachmentResult Result; + LoadAttachmentResult Result; + + Stopwatch Timer; + auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; }); + std::filesystem::path ChunkPath = GetAttachmentPath(RawHash); if (!IsFile(ChunkPath)) { Result.ErrorCode = gsl::narrow<int>(HttpResponseCode::NotFound); Result.Reason = fmt::format("Failed loading oplog attachment from '{}'. Reason: 'The file does not exist'", ChunkPath.string()); - Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; return Result; } { BasicFile ChunkFile; ChunkFile.Open(ChunkPath, BasicFile::Mode::kRead); - if (Range) - { - Result.Bytes = ChunkFile.ReadRange(Range.Offset, Range.Bytes); - } - else + Result.Bytes = ChunkFile.ReadAll(); + } + AddStats(0, Result.Bytes.GetSize(), Timer.GetElapsedTimeUs() * 1000); + return Result; + } + + virtual LoadAttachmentRangesResult LoadAttachmentRanges(const IoHash& RawHash, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) override + { + ZEN_ASSERT(!Ranges.empty()); + LoadAttachmentRangesResult Result; + + Stopwatch Timer; + auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; }); + + std::filesystem::path ChunkPath = GetAttachmentPath(RawHash); + if (!IsFile(ChunkPath)) + { + Result.ErrorCode = gsl::narrow<int>(HttpResponseCode::NotFound); + Result.Reason = fmt::format("Failed loading oplog attachment from '{}'. Reason: 'The file does not exist'", ChunkPath.string()); + return Result; + } + { + uint64_t Start = Ranges.front().first; + uint64_t Length = Ranges.back().first + Ranges.back().second - Ranges.front().first; + Result.Bytes = IoBufferBuilder::MakeFromFile(ChunkPath, Start, Length); + Result.Ranges.reserve(Ranges.size()); + for (const std::pair<uint64_t, uint64_t>& Range : Ranges) { - Result.Bytes = ChunkFile.ReadAll(); + Result.Ranges.push_back(std::make_pair(Range.first - Start, Range.second)); } } - AddStats(0, Result.Bytes.GetSize(), Timer.GetElapsedTimeUs() * 1000); - Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; + AddStats(0, + std::accumulate(Result.Ranges.begin(), + Result.Ranges.end(), + uint64_t(0), + [](uint64_t Current, const std::pair<uint64_t, uint64_t>& Value) { return Current + Value.second; }), + Timer.GetElapsedTimeUs() * 1000); return Result; } @@ -263,7 +421,7 @@ public: LoadAttachmentsResult Result; for (const IoHash& Hash : RawHashes) { - LoadAttachmentResult ChunkResult = LoadAttachment(Hash, {}); + LoadAttachmentResult ChunkResult = LoadAttachment(Hash); if (ChunkResult.ErrorCode) { ChunkResult.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; @@ -276,20 +434,20 @@ public: return Result; } - virtual void Flush() override {} - private: LoadContainerResult LoadContainer(const std::string& Name) { - Stopwatch Timer; - LoadContainerResult Result; + LoadContainerResult Result; + + Stopwatch Timer; + auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; }); + std::filesystem::path SourcePath = m_OutputPath; SourcePath.append(Name); if (!IsFile(SourcePath)) { Result.ErrorCode = gsl::narrow<int>(HttpResponseCode::NotFound); Result.Reason = fmt::format("Failed loading oplog container from '{}'. Reason: 'The file does not exist'", SourcePath.string()); - Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; return Result; } IoBuffer ContainerPayload; @@ -303,18 +461,16 @@ private: if (Result.ContainerObject = ValidateAndReadCompactBinaryObject(std::move(ContainerPayload), ValidateResult); ValidateResult != CbValidateError::None || !Result.ContainerObject) { - Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError); - Result.Reason = fmt::format("The file {} is not formatted as a compact binary object ('{}')", - SourcePath.string(), - ToString(ValidateResult)); - Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; + Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError); + Result.Reason = fmt::format("The file {} is not formatted as a compact binary object ('{}')", + SourcePath.string(), + ToString(ValidateResult)); return Result; } - Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; return Result; } - std::filesystem::path GetAttachmentPath(const IoHash& RawHash) const + std::filesystem::path GetAttachmentBasePath(const IoHash& RawHash) const { ExtendablePathBuilder<128> ShardedPath; ShardedPath.Append(m_OutputPath.c_str()); @@ -333,6 +489,19 @@ private: return ShardedPath.ToPath(); } + static constexpr std::string_view BlobExtension = ".blob"; + static constexpr std::string_view MetaExtension = ".meta"; + + std::filesystem::path GetAttachmentPath(const IoHash& RawHash) + { + return GetAttachmentBasePath(RawHash).replace_extension(BlobExtension); + } + + std::filesystem::path GetAttachmentMetaPath(const IoHash& RawHash) + { + return GetAttachmentBasePath(RawHash).replace_extension(MetaExtension); + } + void AddStats(uint64_t UploadedBytes, uint64_t DownloadedBytes, uint64_t ElapsedNS) { m_SentBytes.fetch_add(UploadedBytes); diff --git a/src/zenremotestore/projectstore/jupiterremoteprojectstore.cpp b/src/zenremotestore/projectstore/jupiterremoteprojectstore.cpp index f8179831c..5b456cb4c 100644 --- a/src/zenremotestore/projectstore/jupiterremoteprojectstore.cpp +++ b/src/zenremotestore/projectstore/jupiterremoteprojectstore.cpp @@ -212,24 +212,43 @@ public: return Result; } - virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes) override + virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes, + BuildStorageCache* OptionalCache, + const Oid& CacheBuildId) override { - ZEN_UNUSED(BlockHashes); + ZEN_UNUSED(BlockHashes, OptionalCache, CacheBuildId); return GetBlockDescriptionsResult{Result{.ErrorCode = int(HttpResponseCode::NotFound)}}; } - virtual AttachmentExistsInCacheResult AttachmentExistsInCache(std::span<const IoHash> RawHashes) override + virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash) override { - return AttachmentExistsInCacheResult{Result{.ErrorCode = 0}, std::vector<bool>(RawHashes.size(), false)}; + LoadAttachmentResult Result; + JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect); + JupiterResult GetResult = Session.GetCompressedBlob(m_Namespace, RawHash, m_TempFilePath); + AddStats(GetResult); + + Result = {ConvertResult(GetResult), std::move(GetResult.Response)}; + if (GetResult.ErrorCode) + { + Result.Reason = fmt::format("Failed fetching oplog attachment from {}/{}/{}. Reason: '{}'", + m_JupiterClient->ServiceUrl(), + m_Namespace, + RawHash, + Result.Reason); + } + return Result; } - virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash, const AttachmentRange& Range) override + virtual LoadAttachmentRangesResult LoadAttachmentRanges(const IoHash& RawHash, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) override { - JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect); - JupiterResult GetResult = Session.GetCompressedBlob(m_Namespace, RawHash, m_TempFilePath); + ZEN_ASSERT(!Ranges.empty()); + LoadAttachmentRangesResult Result; + JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect); + JupiterResult GetResult = Session.GetCompressedBlob(m_Namespace, RawHash, m_TempFilePath); AddStats(GetResult); - LoadAttachmentResult Result{ConvertResult(GetResult), std::move(GetResult.Response)}; + Result = LoadAttachmentRangesResult{ConvertResult(GetResult), std::move(GetResult.Response)}; if (GetResult.ErrorCode) { Result.Reason = fmt::format("Failed fetching oplog attachment from {}/{}/{}. Reason: '{}'", @@ -238,9 +257,9 @@ public: RawHash, Result.Reason); } - if (!Result.ErrorCode && Range) + else { - Result.Bytes = IoBuffer(Result.Bytes, Range.Offset, Range.Bytes); + Result.Ranges = std::vector<std::pair<uint64_t, uint64_t>>(Ranges.begin(), Ranges.end()); } return Result; } @@ -250,7 +269,7 @@ public: LoadAttachmentsResult Result; for (const IoHash& Hash : RawHashes) { - LoadAttachmentResult ChunkResult = LoadAttachment(Hash, {}); + LoadAttachmentResult ChunkResult = LoadAttachment(Hash); if (ChunkResult.ErrorCode) { return LoadAttachmentsResult{ChunkResult}; @@ -262,8 +281,6 @@ public: return Result; } - virtual void Flush() override {} - private: LoadContainerResult LoadContainer(const IoHash& Key) { diff --git a/src/zenremotestore/projectstore/projectstoreoperations.cpp b/src/zenremotestore/projectstore/projectstoreoperations.cpp index becac3d4c..ba4b74825 100644 --- a/src/zenremotestore/projectstore/projectstoreoperations.cpp +++ b/src/zenremotestore/projectstore/projectstoreoperations.cpp @@ -3,13 +3,14 @@ #include <zenremotestore/projectstore/projectstoreoperations.h> #include <zencore/compactbinaryutil.h> +#include <zencore/fmtutils.h> #include <zencore/parallelwork.h> #include <zencore/scopeguard.h> #include <zencore/timer.h> #include <zenremotestore/builds/buildstorageutil.h> #include <zenremotestore/chunking/chunkedfile.h> -#include <zenremotestore/operationlogoutput.h> #include <zenremotestore/projectstore/remoteprojectstore.h> +#include <zenutil/progress.h> namespace zen { @@ -17,11 +18,11 @@ using namespace std::literals; //////////////////////////// ProjectStoreOperationOplogState -ProjectStoreOperationOplogState::ProjectStoreOperationOplogState(OperationLogOutput& OperationLogOutput, - StorageInstance& Storage, - const Oid& BuildId, - const Options& Options) -: m_LogOutput(OperationLogOutput) +ProjectStoreOperationOplogState::ProjectStoreOperationOplogState(LoggerRef Log, + StorageInstance& Storage, + const Oid& BuildId, + const Options& Options) +: m_Log(Log) , m_Storage(Storage) , m_BuildId(BuildId) , m_Options(Options) @@ -48,10 +49,7 @@ ProjectStoreOperationOplogState::LoadBuildObject() { if (!m_Options.IsQuiet) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Read build {} from locally cached file in {}", - m_BuildId, - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + ZEN_INFO("Read build {} from locally cached file in {}", m_BuildId, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); } return m_BuildObject; } @@ -61,11 +59,10 @@ ProjectStoreOperationOplogState::LoadBuildObject() m_BuildObject = m_Storage.BuildStorage->GetBuild(m_BuildId); if (!m_Options.IsQuiet) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Fetched build {} from {} in {}", - m_BuildId, - m_Storage.BuildStorageHttp->GetBaseUri(), - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + ZEN_INFO("Fetched build {} from {} in {}", + m_BuildId, + m_Storage.BuildStorageHttp->GetBaseUri(), + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); } CreateDirectories(CachedBuildObjectPath.parent_path()); TemporaryFile::SafeWriteFile(CachedBuildObjectPath, m_BuildObject.GetBuffer().GetView()); @@ -122,11 +119,10 @@ ProjectStoreOperationOplogState::LoadBuildPartsObject() { if (!m_Options.IsQuiet) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Read build part {}/{} from locally cached file in {}", - m_BuildId, - BuildPartId, - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + ZEN_INFO("Read build part {}/{} from locally cached file in {}", + m_BuildId, + BuildPartId, + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); } return m_BuildPartsObject; } @@ -136,12 +132,11 @@ ProjectStoreOperationOplogState::LoadBuildPartsObject() m_BuildPartsObject = m_Storage.BuildStorage->GetBuildPart(m_BuildId, BuildPartId); if (!m_Options.IsQuiet) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Fetched build part {}/{} from {} in {}", - m_BuildId, - BuildPartId, - m_Storage.BuildStorageHttp->GetBaseUri(), - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + ZEN_INFO("Fetched build part {}/{} from {} in {}", + m_BuildId, + BuildPartId, + m_Storage.BuildStorageHttp->GetBaseUri(), + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); } CreateDirectories(CachedBuildPartObjectPath.parent_path()); TemporaryFile::SafeWriteFile(CachedBuildPartObjectPath, m_BuildPartsObject.GetBuffer().GetView()); @@ -168,11 +163,7 @@ ProjectStoreOperationOplogState::LoadOpsSectionObject() } else if (!m_Options.IsQuiet) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Read {}/{}/ops from locally cached file in {}", - BuildPartId, - m_BuildId, - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + ZEN_INFO("Read {}/{}/ops from locally cached file in {}", BuildPartId, m_BuildId, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); return m_OpsSectionObject; } } @@ -193,11 +184,10 @@ ProjectStoreOperationOplogState::LoadOpsSectionObject() } if (!m_Options.IsQuiet) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Decompressed and validated oplog payload {} -> {} in {}", - NiceBytes(OpsSection.GetSize()), - NiceBytes(m_OpsSectionObject.GetSize()), - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + ZEN_INFO("Decompressed and validated oplog payload {} -> {} in {}", + NiceBytes(OpsSection.GetSize()), + NiceBytes(m_OpsSectionObject.GetSize()), + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); } if (m_OpsSectionObject) { @@ -226,12 +216,11 @@ ProjectStoreOperationOplogState::LoadArrayFromBuildPart(std::string_view ArrayNa { if (!m_Options.IsQuiet) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Read {}/{}/{} from locally cached file in {}", - BuildPartId, - m_BuildId, - ArrayName, - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + ZEN_INFO("Read {}/{}/{} from locally cached file in {}", + BuildPartId, + m_BuildId, + ArrayName, + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); } CbArray Result = CbArray(SharedBuffer(std::move(Payload))); return Result; @@ -290,7 +279,8 @@ ProjectStoreOperationOplogState::LoadChunksArray() //////////////////////////// ProjectStoreOperationDownloadAttachments -ProjectStoreOperationDownloadAttachments::ProjectStoreOperationDownloadAttachments(OperationLogOutput& OperationLogOutput, +ProjectStoreOperationDownloadAttachments::ProjectStoreOperationDownloadAttachments(LoggerRef Log, + ProgressBase& Progress, StorageInstance& Storage, std::atomic<bool>& AbortFlag, std::atomic<bool>& PauseFlag, @@ -299,7 +289,8 @@ ProjectStoreOperationDownloadAttachments::ProjectStoreOperationDownloadAttachmen ProjectStoreOperationOplogState& State, std::span<const IoHash> AttachmentHashes, const Options& Options) -: m_LogOutput(OperationLogOutput) +: m_Log(Log) +, m_Progress(Progress) , m_Storage(Storage) , m_AbortFlag(AbortFlag) , m_PauseFlag(PauseFlag) @@ -325,9 +316,9 @@ ProjectStoreOperationDownloadAttachments::Execute() }; auto EndProgress = - MakeGuard([&]() { m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::StepCount, (uint32_t)TaskSteps::StepCount); }); + MakeGuard([&]() { m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::StepCount, (uint32_t)TaskSteps::StepCount); }); - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::ReadAttachmentData, (uint32_t)TaskSteps::StepCount); + m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::ReadAttachmentData, (uint32_t)TaskSteps::StepCount); Stopwatch Timer; tsl::robin_map<IoHash, uint64_t, IoHash::Hasher> ChunkSizes; @@ -415,30 +406,29 @@ ProjectStoreOperationDownloadAttachments::Execute() FilesToDechunk.size() > 0 ? fmt::format("\n{} file{} needs to be dechunked", FilesToDechunk.size(), FilesToDechunk.size() == 1 ? "" : "s") : ""; - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Need to download {} block{} and {} chunk{}{}", - BlocksToDownload.size(), - BlocksToDownload.size() == 1 ? "" : "s", - LooseChunksToDownload.size(), - LooseChunksToDownload.size() == 1 ? "" : "s", - DechunkInfo); + ZEN_INFO("Need to download {} block{} and {} chunk{}{}", + BlocksToDownload.size(), + BlocksToDownload.size() == 1 ? "" : "s", + LooseChunksToDownload.size(), + LooseChunksToDownload.size() == 1 ? "" : "s", + DechunkInfo); } auto GetBuildBlob = [this](const IoHash& RawHash, const std::filesystem::path& OutputPath) { IoBuffer Payload; - if (m_Storage.BuildCacheStorage) + if (m_Storage.CacheStorage) { - Payload = m_Storage.BuildCacheStorage->GetBuildBlob(m_State.GetBuildId(), RawHash); + Payload = m_Storage.CacheStorage->GetBuildBlob(m_State.GetBuildId(), RawHash); } if (!Payload) { Payload = m_Storage.BuildStorage->GetBuildBlob(m_State.GetBuildId(), RawHash); - if (m_Storage.BuildCacheStorage && m_Options.PopulateCache) + if (m_Storage.CacheStorage && m_Options.PopulateCache) { - m_Storage.BuildCacheStorage->PutBuildBlob(m_State.GetBuildId(), - RawHash, - Payload.GetContentType(), - CompositeBuffer(SharedBuffer(Payload))); + m_Storage.CacheStorage->PutBuildBlob(m_State.GetBuildId(), + RawHash, + Payload.GetContentType(), + CompositeBuffer(SharedBuffer(Payload))); } } uint64_t PayloadSize = Payload.GetSize(); @@ -470,18 +460,15 @@ ProjectStoreOperationDownloadAttachments::Execute() std::filesystem::path TempAttachmentPath = MakeSafeAbsolutePath(m_Options.AttachmentOutputPath) / ".tmp"; CreateDirectories(TempAttachmentPath); auto _0 = MakeGuard([this, &TempAttachmentPath]() { - if (true) + if (!m_Options.IsQuiet) { - if (!m_Options.IsQuiet) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, "Cleaning up temporary directory"); - } - CleanDirectory(TempAttachmentPath, true); - RemoveDir(TempAttachmentPath); + ZEN_INFO("Cleaning up temporary directory"); } + CleanDirectory(TempAttachmentPath, true); + RemoveDir(TempAttachmentPath); }); - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::Download, (uint32_t)TaskSteps::StepCount); + m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::Download, (uint32_t)TaskSteps::StepCount); std::filesystem::path BlocksPath = TempAttachmentPath / "blocks"; CreateDirectories(BlocksPath); @@ -492,11 +479,9 @@ ProjectStoreOperationDownloadAttachments::Execute() std::filesystem::path LooseChunksPath = TempAttachmentPath / "loosechunks"; CreateDirectories(LooseChunksPath); - std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(m_LogOutput.CreateProgressBar("Downloading")); - OperationLogOutput::ProgressBar& DownloadProgressBar(*ProgressBarPtr); + std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Downloading"); - std::atomic<bool> PauseFlag; - ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); std::atomic<size_t> LooseChunksCompleted; std::atomic<size_t> BlocksCompleted; @@ -511,7 +496,7 @@ ProjectStoreOperationDownloadAttachments::Execute() if (m_Options.ForceDownload || !IsFile(LooseChunkOutputPath)) { GetBuildBlob(RawHash, LooseChunkOutputPath); - ZEN_OPERATION_LOG_DEBUG(m_LogOutput, "Downloaded chunk {}", RawHash); + ZEN_DEBUG("Downloaded chunk {}", RawHash); } Work.ScheduleWork(m_IOWorkerPool, [&, LooseChunkIndex, LooseChunkOutputPath](std::atomic<bool>&) { @@ -547,7 +532,7 @@ ProjectStoreOperationDownloadAttachments::Execute() { ChunkOutput.Close(); RemoveFile(ChunkOutputPath); - throw std::runtime_error(fmt::format("Failed to decompress chunk {} to ", RawHash, ChunkOutputPath)); + throw std::runtime_error(fmt::format("Failed to decompress chunk {} to '{}'", RawHash, ChunkOutputPath)); } } else @@ -555,7 +540,7 @@ ProjectStoreOperationDownloadAttachments::Execute() TemporaryFile::SafeWriteFile(ChunkOutputPath, CompressedChunk.GetCompressed()); } - ZEN_OPERATION_LOG_DEBUG(m_LogOutput, "Wrote loose chunk {} to '{}'", RawHash, ChunkOutputPath); + ZEN_DEBUG("Wrote loose chunk {} to '{}'", RawHash, ChunkOutputPath); LooseChunksCompleted++; }); }); @@ -572,7 +557,7 @@ ProjectStoreOperationDownloadAttachments::Execute() if (m_Options.ForceDownload || !IsFile(BlockOutputPath)) { GetBuildBlob(RawHash, BlockOutputPath); - ZEN_OPERATION_LOG_DEBUG(m_LogOutput, "Downloaded block {}", RawHash); + ZEN_DEBUG("Downloaded block {}", RawHash); } Work.ScheduleWork(m_IOWorkerPool, [&, BlockIndex, BlockOutputPath](std::atomic<bool>&) { @@ -607,7 +592,7 @@ ProjectStoreOperationDownloadAttachments::Execute() ChunkOutput.Close(); RemoveFile(ChunkOutputPath); throw std::runtime_error( - fmt::format("Failed to decompress chunk {} to ", ChunkHash, ChunkOutputPath)); + fmt::format("Failed to decompress chunk {} to '{}'", ChunkHash, ChunkOutputPath)); } } else @@ -615,7 +600,7 @@ ProjectStoreOperationDownloadAttachments::Execute() TemporaryFile::SafeWriteFile(ChunkOutputPath, CompressedChunk.GetCompressed()); } - ZEN_OPERATION_LOG_DEBUG(m_LogOutput, "Wrote block chunk {} to '{}'", ChunkHash, ChunkOutputPath); + ZEN_DEBUG("Wrote block chunk {} to '{}'", ChunkHash, ChunkOutputPath); } if (ChunkedFileRawHashes.contains(ChunkHash)) { @@ -635,7 +620,7 @@ ProjectStoreOperationDownloadAttachments::Execute() }); } - Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { ZEN_UNUSED(IsAborted, IsPaused, PendingWork); std::string Details = fmt::format("{}/{} blocks, {}/{} chunks downloaded", @@ -643,39 +628,37 @@ ProjectStoreOperationDownloadAttachments::Execute() BlocksToDownload.size(), LooseChunksCompleted.load(), LooseChunksToDownload.size()); - DownloadProgressBar.UpdateState({.Task = "Downloading", - .Details = Details, - .TotalCount = BlocksToDownload.size() + LooseChunksToDownload.size(), - .RemainingCount = BlocksToDownload.size() + LooseChunksToDownload.size() - - (BlocksCompleted.load() + LooseChunksCompleted.load()), - .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, - false); + ProgressBar->UpdateState({.Task = "Downloading", + .Details = Details, + .TotalCount = BlocksToDownload.size() + LooseChunksToDownload.size(), + .RemainingCount = BlocksToDownload.size() + LooseChunksToDownload.size() - + (BlocksCompleted.load() + LooseChunksCompleted.load()), + .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); }); - DownloadProgressBar.Finish(); + ProgressBar->Finish(); if (!m_Options.IsQuiet) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "{} block{} downloaded, {} loose chunk{} downloaded in {}", - BlocksToDownload.size(), - BlocksToDownload.size() == 1 ? "" : "s", - LooseChunksToDownload.size(), - LooseChunksToDownload.size() == 1 ? "" : "s", - NiceTimeSpanMs(DownloadTimer.GetElapsedTimeMs())); + ZEN_INFO("{} block{} downloaded, {} loose chunk{} downloaded in {}", + BlocksToDownload.size(), + BlocksToDownload.size() == 1 ? "" : "s", + LooseChunksToDownload.size(), + LooseChunksToDownload.size() == 1 ? "" : "s", + NiceTimeSpanMs(DownloadTimer.GetElapsedTimeMs())); } } if (!ChunkedFileInfos.empty()) { - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::AnalyzeDechunk, (uint32_t)TaskSteps::StepCount); + m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::AnalyzeDechunk, (uint32_t)TaskSteps::StepCount); std::filesystem::path ChunkedFilesPath = TempAttachmentPath / "chunkedfiles"; CreateDirectories(ChunkedFilesPath); try { - std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(m_LogOutput.CreateProgressBar("Dechunking")); - OperationLogOutput::ProgressBar& DechunkingProgressBar(*ProgressBarPtr); + std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Dechunking"); std::atomic<uint64_t> ChunksWritten; @@ -729,7 +712,7 @@ ProjectStoreOperationDownloadAttachments::Execute() PrepareFileForScatteredWrite(OpenChunkedFiles.back()->Handle(), ChunkedFileInfo.RawSize); } - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::Dechunk, (uint32_t)TaskSteps::StepCount); + m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::Dechunk, (uint32_t)TaskSteps::StepCount); std::vector<std::atomic<uint8_t>> ChunkWrittenFlags(ChunkOpenFileTargets.size()); @@ -755,7 +738,7 @@ ProjectStoreOperationDownloadAttachments::Execute() })) { std::error_code DummyEc; - throw std::runtime_error(fmt::format("Failed to decompress chunk {} at offset {} to {}", + throw std::runtime_error(fmt::format("Failed to decompress chunk {} at offset {} to '{}'", CompressedChunkBuffer.DecodeRawHash(), ChunkTarget.Offset, PathFromHandle(OutputFile.Handle(), DummyEc))); @@ -768,8 +751,7 @@ ProjectStoreOperationDownloadAttachments::Execute() { Stopwatch DechunkTimer; - std::atomic<bool> PauseFlag; - ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); std::vector<IoHash> LooseChunks(LooseChunksToDownload.begin(), LooseChunksToDownload.end()); @@ -819,26 +801,24 @@ ProjectStoreOperationDownloadAttachments::Execute() } }); } - Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { ZEN_UNUSED(IsAborted, IsPaused, PendingWork); std::string Details = fmt::format("{}/{} chunks written", ChunksWritten.load(), ChunkOpenFileTargets.size()); - DechunkingProgressBar.UpdateState( - {.Task = "Dechunking ", - .Details = Details, - .TotalCount = ChunkOpenFileTargets.size(), - .RemainingCount = ChunkOpenFileTargets.size() - ChunksWritten.load(), - .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, - false); + ProgressBar->UpdateState({.Task = "Dechunking ", + .Details = Details, + .TotalCount = ChunkOpenFileTargets.size(), + .RemainingCount = ChunkOpenFileTargets.size() - ChunksWritten.load(), + .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); }); - DechunkingProgressBar.Finish(); + ProgressBar->Finish(); if (!m_Options.IsQuiet) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "{} file{} dechunked in {}", - ChunkedFileInfos.size(), - ChunkedFileInfos.size() == 1 ? "" : "s", - NiceTimeSpanMs(DechunkTimer.GetElapsedTimeMs())); + ZEN_INFO("{} file{} dechunked in {}", + ChunkedFileInfos.size(), + ChunkedFileInfos.size() == 1 ? "" : "s", + NiceTimeSpanMs(DechunkTimer.GetElapsedTimeMs())); } } } @@ -853,12 +833,10 @@ ProjectStoreOperationDownloadAttachments::Execute() throw; } { - Stopwatch VerifyTimer; - std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(m_LogOutput.CreateProgressBar("Verifying")); - OperationLogOutput::ProgressBar& VerifyProgressBar(*ProgressBarPtr); + Stopwatch VerifyTimer; + std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Verifying"); - std::atomic<bool> PauseFlag; - ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog); std::atomic<size_t> DechunkedFilesMoved; @@ -875,43 +853,41 @@ ProjectStoreOperationDownloadAttachments::Execute() } std::filesystem::path ChunkOutputPath = m_Options.AttachmentOutputPath / fmt::format("{}", ChunkedFileInfo.RawHash); RenameFile(ChunkedFilePath, ChunkOutputPath); - ZEN_OPERATION_LOG_DEBUG(m_LogOutput, "Moved dechunked file {} to '{}'", ChunkedFileInfo.RawHash, ChunkOutputPath); + ZEN_DEBUG("Moved dechunked file {} to '{}'", ChunkedFileInfo.RawHash, ChunkOutputPath); DechunkedFilesMoved++; }); } - Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { ZEN_UNUSED(IsAborted, IsPaused, PendingWork); std::string Details = fmt::format("{}/{} files verified", DechunkedFilesMoved.load(), ChunkedFileInfos.size()); - VerifyProgressBar.UpdateState({.Task = "Verifying ", - .Details = Details, - .TotalCount = ChunkedFileInfos.size(), - .RemainingCount = ChunkedFileInfos.size() - DechunkedFilesMoved.load(), - .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, - false); + ProgressBar->UpdateState({.Task = "Verifying ", + .Details = Details, + .TotalCount = ChunkedFileInfos.size(), + .RemainingCount = ChunkedFileInfos.size() - DechunkedFilesMoved.load(), + .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); }); - VerifyProgressBar.Finish(); + ProgressBar->Finish(); if (!m_Options.IsQuiet) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Verified {} chunked file{} in {}", - ChunkedFileInfos.size(), - ChunkedFileInfos.size() == 1 ? "" : "s", - NiceTimeSpanMs(VerifyTimer.GetElapsedTimeMs())); + ZEN_INFO("Verified {} chunked file{} in {}", + ChunkedFileInfos.size(), + ChunkedFileInfos.size() == 1 ? "" : "s", + NiceTimeSpanMs(VerifyTimer.GetElapsedTimeMs())); } } } if (!m_Options.IsQuiet) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Downloaded {} attachment{} to '{}' in {}", - m_AttachmentHashes.size(), - m_AttachmentHashes.size() == 1 ? "" : "s", - m_Options.AttachmentOutputPath, - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + ZEN_INFO("Downloaded {} attachment{} to '{}' in {}", + m_AttachmentHashes.size(), + m_AttachmentHashes.size() == 1 ? "" : "s", + m_Options.AttachmentOutputPath, + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); } - m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::Cleanup, (uint32_t)TaskSteps::StepCount); + m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::Cleanup, (uint32_t)TaskSteps::StepCount); } } // namespace zen diff --git a/src/zenremotestore/projectstore/remoteprojectstore.cpp b/src/zenremotestore/projectstore/remoteprojectstore.cpp index 2a9da6f58..f43f0813a 100644 --- a/src/zenremotestore/projectstore/remoteprojectstore.cpp +++ b/src/zenremotestore/projectstore/remoteprojectstore.cpp @@ -8,16 +8,21 @@ #include <zencore/filesystem.h> #include <zencore/fmtutils.h> #include <zencore/logging.h> +#include <zencore/logging/broadcastsink.h> +#include <zencore/logging/logger.h> +#include <zencore/parallelwork.h> #include <zencore/scopeguard.h> #include <zencore/stream.h> #include <zencore/timer.h> #include <zencore/trace.h> #include <zencore/workthreadpool.h> #include <zenhttp/httpcommon.h> +#include <zenremotestore/builds/buildstoragecache.h> #include <zenremotestore/chunking/chunkedcontent.h> #include <zenremotestore/chunking/chunkedfile.h> -#include <zenremotestore/operationlogoutput.h> #include <zenstore/cidstore.h> +#include <zenutil/logging.h> +#include <zenutil/progress.h> #include <numeric> #include <unordered_map> @@ -65,44 +70,19 @@ namespace zen { } */ namespace remotestore_impl { - ////////////////////////////// AsyncRemoteResult - - struct AsyncRemoteResult - { - void SetError(int32_t ErrorCode, const std::string& ErrorReason, const std::string ErrorText) - { - int32_t Expected = 0; - if (m_ErrorCode.compare_exchange_weak(Expected, ErrorCode ? ErrorCode : -1)) - { - m_ErrorReason = ErrorReason; - m_ErrorText = ErrorText; - } - } - bool IsError() const { return m_ErrorCode.load() != 0; } - int GetError() const { return m_ErrorCode.load(); }; - const std::string& GetErrorReason() const { return m_ErrorReason; }; - const std::string& GetErrorText() const { return m_ErrorText; }; - RemoteProjectStore::Result ConvertResult(double ElapsedSeconds = 0.0) const - { - return RemoteProjectStore::Result{m_ErrorCode, ElapsedSeconds, m_ErrorReason, m_ErrorText}; - } - - private: - std::atomic<int32_t> m_ErrorCode = 0; - std::string m_ErrorReason; - std::string m_ErrorText; - }; + using namespace std::literals; void ReportProgress(JobContext* OptionalContext, std::string_view CurrentOp, std::string_view Details, ptrdiff_t Total, - ptrdiff_t Remaining) + ptrdiff_t Remaining, + uint64_t ElapsedTimeMS) { if (OptionalContext) { ZEN_ASSERT(Total > 0); - OptionalContext->ReportProgress(CurrentOp, Details, Total, Remaining); + OptionalContext->ReportProgress(CurrentOp, Details, Total, Remaining, ElapsedTimeMS); } } @@ -124,14 +104,17 @@ namespace remotestore_impl { return OptionalContext->IsCancelled(); } - std::string GetStats(const RemoteProjectStore::Stats& Stats, uint64_t ElapsedWallTimeMS) + std::string GetStats(const RemoteProjectStore::Stats& Stats, + const BuildStorageCache::Statistics* OptionalCacheStats, + uint64_t ElapsedWallTimeMS) { - return fmt::format( - "Sent: {} ({}bits/s) Recv: {} ({}bits/s)", - NiceBytes(Stats.m_SentBytes), - NiceNum(ElapsedWallTimeMS > 0u ? static_cast<uint64_t>((Stats.m_SentBytes * 8 * 1000) / ElapsedWallTimeMS) : 0u), - NiceBytes(Stats.m_ReceivedBytes), - NiceNum(ElapsedWallTimeMS > 0u ? static_cast<uint64_t>((Stats.m_ReceivedBytes * 8 * 1000) / ElapsedWallTimeMS) : 0u)); + uint64_t SentBytes = Stats.m_SentBytes + (OptionalCacheStats ? OptionalCacheStats->TotalBytesWritten.load() : 0); + uint64_t ReceivedBytes = Stats.m_ReceivedBytes + (OptionalCacheStats ? OptionalCacheStats->TotalBytesRead.load() : 0); + return fmt::format("Sent: {} ({}bits/s) Recv: {} ({}bits/s)", + NiceBytes(SentBytes), + NiceNum(ElapsedWallTimeMS > 0u ? static_cast<uint64_t>((SentBytes * 8 * 1000) / ElapsedWallTimeMS) : 0u), + NiceBytes(ReceivedBytes), + NiceNum(ElapsedWallTimeMS > 0u ? static_cast<uint64_t>((ReceivedBytes * 8 * 1000) / ElapsedWallTimeMS) : 0u)); } void LogRemoteStoreStatsDetails(const RemoteProjectStore::Stats& Stats) @@ -165,6 +148,925 @@ namespace remotestore_impl { return BlockIndex; } + // BlockComposer packs attachment chunks (each identified by an IoHash and a byte size) into + // fixed-size blocks subject to two constraints: + // - The total encoded content of a block must not exceed UsableBlockSize bytes. + // - A block may contain at most MaxChunksPerBlock chunk entries. + // + // Chunks belonging to the same op key (Oid) are kept together in one block whenever possible, + // so that a single block fetch can satisfy an entire op without needing to read multiple blocks. + // + // When a block is complete the OnNewBlock callback is invoked with ownership of the chunk-hash + // vector for that block. The callback is also invoked for any partially-filled pending block + // that remains after all attachments have been processed. + class BlockComposer + { + public: + struct Configuration + { + uint64_t MaxBlockSize = 0; // Total encoded block size limit in bytes (includes header overhead). + uint64_t MaxChunksPerBlock = 0; // Maximum number of chunk entries allowed in a single block. + uint64_t MaxChunkEmbedSize = 0; // Maximum size of one embeddable chunk; used to calculate worst-case header size. + std::function<bool()> + IsCancelledFunc; // Optional: if set and returns true, Compose returns early without emitting remaining blocks. + }; + + explicit BlockComposer(const Configuration& Config) : m_Config(Config), m_UsableBlockSize(CalculateUsableBlockSize(m_Config)) {} + + // Compose distributes AttachmentHashes into blocks via a two-phase algorithm. + // + // Phase 1 - Gather (inner while loop): + // Starting from the current index, collect all consecutive attachments that share the same + // op key (Oid) into CurrentOpRawHashes / CurrentOpChunkSizes. Collection stops (with + // CurrentOpFillFullBlock = false) when a different op key is encountered. Collection also + // stops early (with CurrentOpFillFullBlock = true) if adding the next same-key attachment + // would exceed m_UsableBlockSize by bytes OR would reach MaxChunksPerBlock by count - + // meaning the gathered chunks exactly saturate one block and must be emitted immediately. + // + // Phase 2 - Place (while loop over CurrentOpChunkSizes): + // Decides where the gathered chunks go. Exactly one of four mutually exclusive paths runs + // per iteration; after each path the loop re-evaluates with whatever chunks remain: + // + // Path A: CurrentOpFillFullBlock == true + // The gathered set exactly fills one block. Emit it immediately as a standalone block + // and clear CurrentOpChunkSizes. The pending block is left untouched. + // + // Path B: All gathered chunks fit in the pending block (both size and count constraints met) + // Merge the gathered chunks into PendingChunkHashes/PendingBlockSize and clear the + // current-op buffers. If the pending block is now exactly full, flush it immediately. + // + // Path C: Gathered chunks don't fit AND pending block is >75% full by bytes + // The pending block is already well-utilised; flush it now and loop so that the gathered + // chunks are re-evaluated against the freshly emptied pending block. + // + // Path D: Gathered chunks don't fit AND pending block is <=75% full by bytes + // The binding constraint is chunk count, not bytes. Greedily fill the pending block with + // as many gathered chunks as fit (stopping at the first chunk that would violate either + // size or count), flush the pending block, remove the added chunks from the current-op + // buffers, and loop so the remaining gathered chunks are re-evaluated. + // + // Final flush: after all attachments have been processed, any non-empty pending block is + // emitted. + void Compose(std::span<const IoHash> AttachmentHashes, + std::span<const uint64_t> AttachmentSizes, + std::span<const Oid> AttachmentKeys, + const std::function<void(std::vector<IoHash>&& ChunkRawHashes)>& OnNewBlock) + { + std::vector<IoHash> PendingChunkHashes; + uint64_t PendingBlockSize = 0; + + size_t SortedUploadAttachmentsIndex = 0; + + Stopwatch AssembleBlocksProgressTimer; + while (SortedUploadAttachmentsIndex < AttachmentHashes.size()) + { + if (m_Config.IsCancelledFunc && m_Config.IsCancelledFunc()) + { + return; + } + + const IoHash& FirstAttachmentHash = AttachmentHashes[SortedUploadAttachmentsIndex]; + const Oid FirstAttachmentOpKey = AttachmentKeys[SortedUploadAttachmentsIndex]; + uint64_t CurrentOpAttachmentsSize = AttachmentSizes[SortedUploadAttachmentsIndex]; + ZEN_ASSERT(CurrentOpAttachmentsSize <= m_Config.MaxChunkEmbedSize); + + std::vector<IoHash> CurrentOpRawHashes; + CurrentOpRawHashes.push_back(FirstAttachmentHash); + + std::vector<uint64_t> CurrentOpChunkSizes; + CurrentOpChunkSizes.push_back(CurrentOpAttachmentsSize); + + bool CurrentOpFillFullBlock = false; + + while (SortedUploadAttachmentsIndex + CurrentOpRawHashes.size() < AttachmentHashes.size()) + { + size_t NextSortedUploadAttachmentsIndex = SortedUploadAttachmentsIndex + CurrentOpChunkSizes.size(); + const Oid NextAttachmentOpKey = AttachmentKeys[NextSortedUploadAttachmentsIndex]; + if (NextAttachmentOpKey != FirstAttachmentOpKey) + { + break; + } + const IoHash& NextAttachmentHash = AttachmentHashes[NextSortedUploadAttachmentsIndex]; + uint64_t NextOpAttachmentSize = AttachmentSizes[NextSortedUploadAttachmentsIndex]; + ZEN_ASSERT(NextOpAttachmentSize <= m_Config.MaxChunkEmbedSize); + + if (CurrentOpAttachmentsSize + NextOpAttachmentSize > m_UsableBlockSize) + { + CurrentOpFillFullBlock = true; + break; + } + CurrentOpRawHashes.push_back(NextAttachmentHash); + CurrentOpChunkSizes.push_back(NextOpAttachmentSize); + CurrentOpAttachmentsSize += NextOpAttachmentSize; + + if (CurrentOpRawHashes.size() == m_Config.MaxChunksPerBlock) + { + CurrentOpFillFullBlock = true; + break; + } + } + SortedUploadAttachmentsIndex += CurrentOpChunkSizes.size(); + + while (!CurrentOpChunkSizes.empty()) + { + size_t CurrentOpAttachmentCount = CurrentOpChunkSizes.size(); + + ZEN_ASSERT(CurrentOpRawHashes.size() == CurrentOpChunkSizes.size()); + ZEN_ASSERT(CurrentOpAttachmentsSize <= m_UsableBlockSize); + ZEN_ASSERT(CurrentOpAttachmentCount <= m_Config.MaxChunksPerBlock); + + // Path A: gathered chunks exactly fill one block -- emit as a standalone block immediately. + if (CurrentOpFillFullBlock) + { + OnNewBlock(std::move(CurrentOpRawHashes)); + CurrentOpChunkSizes.clear(); + CurrentOpAttachmentsSize = 0; + CurrentOpFillFullBlock = false; + } + else if ((PendingBlockSize + CurrentOpAttachmentsSize) <= m_UsableBlockSize && + (PendingChunkHashes.size() + CurrentOpAttachmentCount) <= m_Config.MaxChunksPerBlock) + { + // Path B: all gathered chunks fit in the pending block -- merge them in. + PendingChunkHashes.insert(PendingChunkHashes.end(), CurrentOpRawHashes.begin(), CurrentOpRawHashes.end()); + PendingBlockSize += CurrentOpAttachmentsSize; + ZEN_ASSERT(PendingBlockSize <= m_UsableBlockSize); + ZEN_ASSERT(PendingChunkHashes.size() <= m_Config.MaxChunksPerBlock); + + CurrentOpRawHashes.clear(); + CurrentOpChunkSizes.clear(); + CurrentOpAttachmentsSize = 0; + + if (PendingBlockSize == m_UsableBlockSize || PendingChunkHashes.size() == m_Config.MaxChunksPerBlock) + { + OnNewBlock(std::move(PendingChunkHashes)); + PendingChunkHashes.clear(); + PendingBlockSize = 0; + } + } + else if (PendingBlockSize > (m_UsableBlockSize * 3) / 4) + { + // Path C: gathered chunks don't fit AND pending block is >75% full by bytes -- flush pending + // block now; loop to re-evaluate gathered chunks against the freshly emptied pending block. + OnNewBlock(std::move(PendingChunkHashes)); + PendingChunkHashes.clear(); + PendingBlockSize = 0; + } + else + { + // Path D: gathered chunks don't fit AND pending block is <=75% full by bytes -- the + // binding constraint is chunk count. Greedily fill the pending block with as many + // chunks as fit, flush it, remove them from the current-op buffers, and loop with the + // remaining gathered chunks in the next iteration. + + size_t AddedChunkCount = 0; + uint64_t AddedChunkSize = 0; + + for (size_t CurrentChunkIndex = 0; CurrentChunkIndex < CurrentOpRawHashes.size(); CurrentChunkIndex++) + { + uint64_t ChunkSize = CurrentOpChunkSizes[CurrentChunkIndex]; + if (PendingBlockSize + ChunkSize > m_UsableBlockSize) + { + break; + } + if (PendingChunkHashes.size() == m_Config.MaxChunksPerBlock) + { + break; + } + PendingBlockSize += ChunkSize; + PendingChunkHashes.push_back(CurrentOpRawHashes[CurrentChunkIndex]); + AddedChunkSize += ChunkSize; + AddedChunkCount++; + + ZEN_ASSERT(PendingBlockSize <= m_UsableBlockSize); + ZEN_ASSERT(PendingChunkHashes.size() <= m_Config.MaxChunksPerBlock); + } + ZEN_ASSERT(AddedChunkSize <= CurrentOpAttachmentsSize); + + ZEN_ASSERT(PendingBlockSize <= m_UsableBlockSize); + ZEN_ASSERT(PendingChunkHashes.size() <= m_Config.MaxChunksPerBlock); + ZEN_ASSERT(AddedChunkCount < CurrentOpRawHashes.size()); + + OnNewBlock(std::move(PendingChunkHashes)); + PendingChunkHashes.clear(); + PendingBlockSize = 0; + + CurrentOpRawHashes.erase(CurrentOpRawHashes.begin(), CurrentOpRawHashes.begin() + AddedChunkCount); + CurrentOpChunkSizes.erase(CurrentOpChunkSizes.begin(), CurrentOpChunkSizes.begin() + AddedChunkCount); + CurrentOpAttachmentsSize -= AddedChunkSize; + } + } + } + if (!PendingChunkHashes.empty()) + { + ZEN_ASSERT(PendingBlockSize < m_UsableBlockSize); + ZEN_ASSERT(PendingChunkHashes.size() < m_Config.MaxChunksPerBlock); + OnNewBlock(std::move(PendingChunkHashes)); + PendingChunkHashes.clear(); + } + } + + private: + // CalculateUsableBlockSize computes the maximum bytes available for chunk content in one + // block. The block header encodes: + // - A CompressedBuffer header of fixed size. + // - One VarUInt field encoding MaxChunksPerBlock. + // - MaxChunksPerBlock VarUInt entries each encoding one chunk size (bounded by + // MaxChunkEmbedSize, which determines the worst-case VarUInt width). + // MaxHeaderSize is the worst-case total header size, so + // UsableBlockSize = MaxBlockSize - MaxHeaderSize is a conservative bound that guarantees + // chunk content always fits within the encoded block. + static uint64_t CalculateUsableBlockSize(const Configuration& Config) + { + ZEN_ASSERT(Config.MaxChunksPerBlock > 0); + ZEN_ASSERT(Config.MaxChunkEmbedSize > 0); + uint64_t MaxHeaderSize = CompressedBuffer::GetHeaderSizeForNoneEncoder() + MeasureVarUInt(Config.MaxChunksPerBlock) + + MeasureVarUInt(Config.MaxChunkEmbedSize) * Config.MaxChunksPerBlock; + ZEN_ASSERT(Config.MaxBlockSize > MaxHeaderSize); + return Config.MaxBlockSize - MaxHeaderSize; + } + + const Configuration m_Config; + const uint64_t m_UsableBlockSize = 0; + }; + + IoBuffer CompressToTempFile(const IoHash& RawHash, + const IoBuffer& RawData, + const std::filesystem::path& AttachmentPath, + OodleCompressor Compressor, + OodleCompressionLevel CompressionLevel) + { + if (IsFile(AttachmentPath)) + { + ZEN_WARN("Temp attachment file already exists at '{}', truncating", AttachmentPath); + } + BasicFile CompressedFile; + std::error_code Ec; + CompressedFile.Open(AttachmentPath, BasicFile::Mode::kTruncateDelete, Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to create temp file for blob {} at '{}'", RawHash, AttachmentPath)); + } + + if (RawData.GetSize() < 512u * 1024u) + { + CompressedBuffer CompressedBlob = CompressedBuffer::Compress(SharedBuffer(std::move(RawData)), Compressor, CompressionLevel); + if (!CompressedBlob) + { + throw std::runtime_error(fmt::format("Failed to compress blob {}", RawHash)); + } + CompressedFile.Write(CompressedBlob.GetCompressed(), 0); + } + else + { + bool CouldCompress = CompressedBuffer::CompressToStream( + CompositeBuffer(SharedBuffer(RawData)), + [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) { + ZEN_UNUSED(SourceOffset, SourceSize); + CompressedFile.Write(RangeBuffer, Offset); + }, + Compressor, + CompressionLevel); + if (!CouldCompress) + { + // Compressed is larger than source data... + CompressedBuffer CompressedBlob = + CompressedBuffer::Compress(SharedBuffer(std::move(RawData)), OodleCompressor::Mermaid, OodleCompressionLevel::None); + if (!CompressedBlob) + { + throw std::runtime_error(fmt::format("Failed to compress blob {}", RawHash)); + } + CompressedFile.SetFileSize(0); + CompressedFile.Write(CompressedBlob.GetCompressed(), 0); + } + } + IoBuffer TempAttachmentBuffer = IoBufferBuilder::MakeFromFile(AttachmentPath); + CompressedFile.Close(); + TempAttachmentBuffer.SetDeleteOnClose(true); + ZEN_ASSERT_SLOW(CompressedBuffer::FromCompressedNoValidate(IoBuffer(TempAttachmentBuffer)).CompressedBuffer::Decompress()); + return TempAttachmentBuffer; + } + + struct FoundAttachment + { + std::filesystem::path RawPath; // If not stored in cid + uint64_t Size = 0; + Oid Key = Oid::Zero; + }; + + CbObject RewriteOplog( + LoggerRef InLog, + ProjectStore::Project& Project, + ProjectStore::Oplog& Oplog, + bool IgnoreMissingAttachments, + bool EmbedLooseFiles, + const std::filesystem::path& AttachmentTempPath, + std::unordered_map<IoHash, FoundAttachment, IoHash::Hasher>& UploadAttachments, // TODO: Rename to OutUploadAttachments + JobContext* OptionalContext) + { + ZEN_SCOPED_LOG(InLog); + size_t OpCount = 0; + CreateDirectories(AttachmentTempPath); + + auto RewriteOp = [&](const Oid& Key, CbObjectView Op, const std::function<void(CbObjectView)>& CB) { + bool OpRewritten = false; + CbArrayView Files = Op["files"sv].AsArrayView(); + if (Files.Num() == 0) + { + CB(Op); + return; + } + + CbWriter Cbo; + Cbo.BeginArray("files"sv); + + for (CbFieldView& Field : Files) + { + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return; + } + + bool CopyField = true; + + if (CbObjectView View = Field.AsObjectView()) + { + IoHash DataHash = View["data"sv].AsHash(); + + if (DataHash == IoHash::Zero) + { + std::string_view ServerPath = View["serverpath"sv].AsString(); + std::filesystem::path FilePath = (Project.RootDir / ServerPath).make_preferred(); + MakeSafeAbsolutePathInPlace(FilePath); + if (!IsFile(FilePath)) + { + remotestore_impl::ReportMessage( + OptionalContext, + fmt::format("Missing attachment '{}' for op '{}'", FilePath, View["id"sv].AsObjectId())); + if (IgnoreMissingAttachments) + { + continue; + } + else + { + ExtendableStringBuilder<1024> Sb; + Sb.Append("Failed to find attachment '"); + Sb.Append(FilePath.string()); + Sb.Append("' for op: \n"); + View.ToJson(Sb); + throw std::runtime_error(Sb.ToString()); + } + } + + { + Stopwatch HashTimer; + SharedBuffer DataBuffer(IoBufferBuilder::MakeFromFile(FilePath)); + DataHash = IoHash::HashBuffer(CompositeBuffer(DataBuffer)); + ZEN_INFO("Hashed loose file '{}' {}: {} in {}", + FilePath, + NiceBytes(DataBuffer.GetSize()), + DataHash, + NiceTimeSpanMs(HashTimer.GetElapsedTimeMs())); + } + + // Rewrite file array entry with new data reference + CbObjectWriter Writer; + RewriteCbObject(Writer, View, [&](CbObjectWriter&, CbFieldView Field) -> bool { + if (Field.GetName() == "data"sv) + { + // omit this field as we will write it explicitly ourselves + return true; + } + return false; + }); + Writer.AddBinaryAttachment("data"sv, DataHash); + UploadAttachments.insert_or_assign(DataHash, FoundAttachment{.RawPath = FilePath, .Key = Key}); + + CbObject RewrittenOp = Writer.Save(); + Cbo.AddObject(std::move(RewrittenOp)); + CopyField = false; + } + } + + if (CopyField) + { + Cbo.AddField(Field); + } + else + { + OpRewritten = true; + } + } + + if (!OpRewritten) + { + CB(Op); + return; + } + + Cbo.EndArray(); + CbArray FilesArray = Cbo.Save().AsArray(); + + CbObject RewrittenOp = RewriteCbObject(Op, [&](CbObjectWriter& NewWriter, CbFieldView Field) -> bool { + if (Field.GetName() == "files"sv) + { + NewWriter.AddArray("files"sv, FilesArray); + + return true; + } + + return false; + }); + CB(RewrittenOp); + }; + + remotestore_impl::ReportMessage(OptionalContext, "Building exported oplog and collecting attachments"); + + Stopwatch Timer; + + size_t TotalOpCount = Oplog.GetOplogEntryCount(); + Stopwatch RewriteOplogTimer; + CbObjectWriter SectionOpsWriter; + SectionOpsWriter.BeginArray("ops"sv); + { + Stopwatch BuildingOplogProgressTimer; + Oplog.IterateOplogWithKey([&](int, const Oid& Key, CbObjectView Op) { + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return; + } + Op.IterateAttachments([&](CbFieldView FieldView) { + UploadAttachments.insert_or_assign(FieldView.AsAttachment(), FoundAttachment{.Key = Key}); + }); + if (EmbedLooseFiles) + { + RewriteOp(Key, Op, [&SectionOpsWriter](CbObjectView Op) { SectionOpsWriter << Op; }); + } + else + { + SectionOpsWriter << Op; + } + OpCount++; + + if (OpCount % 1000 == 0) + { + remotestore_impl::ReportProgress(OptionalContext, + "Building oplog"sv, + fmt::format("{} ops processed", OpCount), + TotalOpCount, + TotalOpCount - OpCount, + BuildingOplogProgressTimer.GetElapsedTimeMs()); + } + }); + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return {}; + } + if (TotalOpCount > 0) + { + remotestore_impl::ReportProgress(OptionalContext, + "Building oplog"sv, + fmt::format("{} ops processed", OpCount), + TotalOpCount, + 0, + BuildingOplogProgressTimer.GetElapsedTimeMs()); + } + } + SectionOpsWriter.EndArray(); // "ops" + + return SectionOpsWriter.Save(); + } + + struct FoundChunkedFile + { + IoHash RawHash = IoHash::Zero; + IoBuffer Source; + uint64_t Offset = 0; + uint64_t Size = 0; + }; + + void FindChunkSizes(CidStore& ChunkStore, + WorkerThreadPool& WorkerPool, + size_t MaxChunkEmbedSize, + size_t ChunkFileSizeLimit, + bool AllowChunking, + const std::filesystem::path& AttachmentTempPath, + std::unordered_map<IoHash, FoundAttachment, IoHash::Hasher>& UploadAttachments, + std::unordered_set<IoHash, IoHash::Hasher>& MissingHashes, + std::vector<FoundChunkedFile>& AttachmentsToChunk, + JobContext* OptionalContext) + { + if (UploadAttachments.empty()) + { + return; + } + Stopwatch FindChunkSizesTimer; + + RwLock FindChunkSizesLock; + + std::atomic<bool> AbortFlag(false); + std::atomic<bool> PauseFlag(false); + ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + + for (auto& It : UploadAttachments) + { + if (remotestore_impl::IsCancelled(OptionalContext)) + { + AbortFlag.store(true); + break; + } + Work.ScheduleWork( + WorkerPool, + [&ChunkStore, + UploadAttachment = &It.second, + RawHash = It.first, + &FindChunkSizesLock, + &MissingHashes, + AttachmentTempPath, + MaxChunkEmbedSize, + ChunkFileSizeLimit, + AllowChunking, + &AttachmentsToChunk, + OptionalContext](std::atomic<bool>& AbortFlag) { + if (remotestore_impl::IsCancelled(OptionalContext)) + { + AbortFlag.store(true); + } + if (AbortFlag) + { + return; + } + if (!UploadAttachment->RawPath.empty()) + { + const std::filesystem::path& FilePath = UploadAttachment->RawPath; + IoBuffer RawData = IoBufferBuilder::MakeFromFile(FilePath); + if (RawData) + { + UploadAttachment->Size = RawData.GetSize(); + if (AllowChunking && UploadAttachment->Size > ChunkFileSizeLimit) + { + FindChunkSizesLock.WithExclusiveLock([&]() { + AttachmentsToChunk.push_back( + FoundChunkedFile{.RawHash = RawHash, .Source = RawData, .Offset = 0, .Size = RawData.GetSize()}); + }); + } + } + else + { + FindChunkSizesLock.WithExclusiveLock([RawHash, &MissingHashes]() { MissingHashes.insert(RawHash); }); + } + } + else + { + IoBuffer Data = ChunkStore.FindChunkByCid(RawHash); + if (Data) + { + UploadAttachment->Size = Data.GetSize(); + if (AllowChunking && Data.IsWholeFile()) + { + IoHash VerifyRawHash; + uint64_t VerifyRawSize; + CompressedBuffer Compressed = + CompressedBuffer::FromCompressed(SharedBuffer(Data), VerifyRawHash, VerifyRawSize); + if (Compressed) + { + if (VerifyRawSize > ChunkFileSizeLimit) + { + OodleCompressor Compressor; + OodleCompressionLevel CompressionLevel; + uint64_t BlockSize; + if (Compressed.TryGetCompressParameters(Compressor, CompressionLevel, BlockSize)) + { + if (CompressionLevel == OodleCompressionLevel::None) + { + CompositeBuffer Decompressed = Compressed.DecompressToComposite(); + if (Decompressed) + { + std::span<const SharedBuffer> Segments = Decompressed.GetSegments(); + if (Segments.size() == 1) + { + IoBuffer DecompressedData = Segments[0].AsIoBuffer(); + IoBufferFileReference DecompressedFileRef; + if (DecompressedData.GetFileReference(DecompressedFileRef)) + { + // Are we still pointing to disk? + FindChunkSizesLock.WithExclusiveLock([&]() { + AttachmentsToChunk.push_back( + FoundChunkedFile{.RawHash = RawHash, + .Source = Data, + .Offset = DecompressedFileRef.FileChunkOffset, + .Size = DecompressedFileRef.FileChunkSize}); + }); + } + } + } + } + } + } + } + } + } + else + { + FindChunkSizesLock.WithExclusiveLock([RawHash, &MissingHashes]() { MissingHashes.insert(RawHash); }); + } + } + }); + } + + Work.Wait(1000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, IsPaused); + if (remotestore_impl::IsCancelled(OptionalContext)) + { + AbortFlag.store(true); + } + remotestore_impl::ReportProgress(OptionalContext, + "Finding attachments"sv, + fmt::format("{}{} remaining...", AbortFlag.load() ? "Aborting, " : "", PendingWork), + UploadAttachments.size(), + PendingWork, + FindChunkSizesTimer.GetElapsedTimeMs()); + }); + + if (!AbortFlag.load()) + { + remotestore_impl::ReportProgress(OptionalContext, + "Finding attachments"sv, + "", + UploadAttachments.size(), + 0, + FindChunkSizesTimer.GetElapsedTimeMs()); + } + } + + struct ChunkedFile + { + IoBuffer Source; + ChunkedInfoWithSource Chunked; + }; + + std::vector<ChunkedFile> ChunkAttachments(WorkerThreadPool& WorkerPool, + const std::vector<remotestore_impl::FoundChunkedFile>& AttachmentsToChunk, + JobContext* OptionalContext) + { + if (AttachmentsToChunk.empty()) + { + return {}; + } + Stopwatch ChunkAttachmentsTimer; + + std::vector<ChunkedFile> ChunkedFiles(AttachmentsToChunk.size()); + + std::atomic<bool> AbortFlag(false); + std::atomic<bool> PauseFlag(false); + ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + + for (size_t ChunkFileIndexToChunk = 0; ChunkFileIndexToChunk < AttachmentsToChunk.size(); ChunkFileIndexToChunk++) + { + if (remotestore_impl::IsCancelled(OptionalContext)) + { + AbortFlag.store(true); + break; + } + Work.ScheduleWork(WorkerPool, + [&AttachmentsToChunk, ChunkFileIndexToChunk, &ChunkedFiles, OptionalContext](std::atomic<bool>& AbortFlag) { + if (remotestore_impl::IsCancelled(OptionalContext)) + { + AbortFlag.store(true); + } + if (AbortFlag) + { + return; + } + const remotestore_impl::FoundChunkedFile& AttachmentToChunk = AttachmentsToChunk[ChunkFileIndexToChunk]; + const IoHash& RawHash = AttachmentToChunk.RawHash; + + const IoBuffer& Buffer = AttachmentToChunk.Source; + IoBufferFileReference FileRef; + bool IsFile = Buffer.GetFileReference(FileRef); + ZEN_ASSERT(IsFile); + + Stopwatch ChunkOneTimer; + + uint64_t Offset = AttachmentToChunk.Offset; + uint64_t Size = AttachmentToChunk.Size; + + BasicFile SourceFile; + SourceFile.Attach(FileRef.FileHandle); + auto __ = MakeGuard([&SourceFile]() { SourceFile.Detach(); }); + + ChunkedFile& Chunked = ChunkedFiles[ChunkFileIndexToChunk]; + Chunked.Source = Buffer; + Chunked.Chunked = ChunkData(SourceFile, Offset, Size, UShaderByteCodeParams); + ZEN_ASSERT(Chunked.Chunked.Info.RawHash == RawHash); + + ZEN_INFO("Chunked large attachment '{}' {} into {} chunks in {}", + RawHash, + NiceBytes(Chunked.Chunked.Info.RawSize), + Chunked.Chunked.Info.ChunkHashes.size(), + NiceTimeSpanMs(ChunkOneTimer.GetElapsedTimeMs())); + }); + } + + Work.Wait(1000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, IsPaused); + if (remotestore_impl::IsCancelled(OptionalContext)) + { + AbortFlag.store(true); + } + remotestore_impl::ReportProgress(OptionalContext, + "Chunking attachments"sv, + fmt::format("{}{} remaining...", AbortFlag.load() ? "Aborting, " : "", PendingWork), + AttachmentsToChunk.size(), + PendingWork, + ChunkAttachmentsTimer.GetElapsedTimeMs()); + }); + + if (!AbortFlag.load()) + { + remotestore_impl::ReportProgress(OptionalContext, + "Chunking attachments"sv, + "", + AttachmentsToChunk.size(), + 0, + ChunkAttachmentsTimer.GetElapsedTimeMs()); + } + return ChunkedFiles; + } + + void ResolveAttachments(CidStore& ChunkStore, + WorkerThreadPool& WorkerPool, + uint64_t MaxChunkEmbedSize, + const std::filesystem::path& AttachmentTempPath, + std::unordered_map<IoHash, FoundAttachment, IoHash::Hasher>& UploadAttachments, + std::unordered_map<IoHash, TGetAttachmentBufferFunc, IoHash::Hasher>& LargeChunkAttachments, + std::unordered_map<IoHash, std::pair<uint64_t, IoBuffer>, IoHash::Hasher>& LooseUploadAttachments, + JobContext* OptionalContext) + { + ZEN_ASSERT(!UploadAttachments.empty()); + Stopwatch UploadAttachmentsTimer; + + RwLock ResolveLock; + + std::atomic<bool> AbortFlag(false); + std::atomic<bool> PauseFlag(false); + ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + + for (auto& It : UploadAttachments) + { + if (remotestore_impl::IsCancelled(OptionalContext)) + { + AbortFlag.store(true); + break; + } + Work.ScheduleWork( + WorkerPool, + [&ChunkStore, + MaxChunkEmbedSize, + &AttachmentTempPath, + &ResolveLock, + &LargeChunkAttachments, + &LooseUploadAttachments, + UploadAttachment = &It.second, + RawHash = It.first, + OptionalContext](std::atomic<bool>& AbortFlag) { + if (remotestore_impl::IsCancelled(OptionalContext)) + { + AbortFlag.store(true); + } + if (AbortFlag) + { + return; + } + if (!UploadAttachment->RawPath.empty()) + { + if (UploadAttachment->Size > (MaxChunkEmbedSize * 2)) + { + // Assume the compressed file is going to be larger than MaxChunkEmbedSize, even if it isn't + // it will be a loose attachment instead of going into a block + + TGetAttachmentBufferFunc FetchFunc = + [RawPath = UploadAttachment->RawPath, AttachmentTempPath, RawSize = UploadAttachment->Size]( + const IoHash& RawHash) -> CompositeBuffer { + IoBuffer RawData = IoBufferBuilder::MakeFromFile(RawPath); + if (!RawData) + { + throw std::runtime_error( + fmt::format("Failed to read source file for blob {} from '{}'", RawHash, RawPath)); + } + + std::filesystem::path AttachmentPath = AttachmentTempPath; + AttachmentPath.append(RawHash.ToHexString()); + + IoBuffer TempAttachmentBuffer = remotestore_impl::CompressToTempFile(RawHash, + RawData, + AttachmentPath, + OodleCompressor::Mermaid, + OodleCompressionLevel::VeryFast); + if (!TempAttachmentBuffer) + { + throw std::runtime_error(fmt::format("Failed to compressed source file for blob {} from '{}' to '{}'", + RawHash, + RawPath, + AttachmentPath)); + } + TempAttachmentBuffer.SetDeleteOnClose(true); + + ZEN_INFO("Saved temp attachment to '{}', {} ({})", + AttachmentPath, + NiceBytes(RawSize), + NiceBytes(TempAttachmentBuffer.GetSize())); + return CompositeBuffer(SharedBuffer(std::move(TempAttachmentBuffer))); + }; + + RwLock::ExclusiveLockScope _(ResolveLock); + LargeChunkAttachments.insert_or_assign(RawHash, std::move(FetchFunc)); + } + else + { + // Compress inline - check compressed size to see if it should go into a block or not + IoBuffer RawData = IoBufferBuilder::MakeFromFile(UploadAttachment->RawPath); + if (!RawData) + { + throw std::runtime_error( + fmt::format("Failed to read source file for blob {} from '{}'", RawHash, UploadAttachment->RawPath)); + } + + std::filesystem::path TempFilePath = AttachmentTempPath; + TempFilePath.append(RawHash.ToHexString()); + + IoBuffer TempAttachmentBuffer = remotestore_impl::CompressToTempFile(RawHash, + RawData, + TempFilePath, + OodleCompressor::Mermaid, + OodleCompressionLevel::VeryFast); + TempAttachmentBuffer.SetDeleteOnClose(true); + + uint64_t CompressedSize = TempAttachmentBuffer.GetSize(); + + ZEN_INFO("Saved temp attachment to '{}', {} ({})", + TempFilePath, + NiceBytes(UploadAttachment->Size), + NiceBytes(CompressedSize)); + + if (CompressedSize > MaxChunkEmbedSize) + { + TGetAttachmentBufferFunc FetchFunc = [Data = std::move(TempAttachmentBuffer)](const IoHash&) mutable { + return CompositeBuffer(SharedBuffer(std::move(Data))); + }; + + RwLock::ExclusiveLockScope _(ResolveLock); + LargeChunkAttachments.insert_or_assign(RawHash, std::move(FetchFunc)); + } + else + { + UploadAttachment->Size = CompressedSize; + + std::pair<uint64_t, IoBuffer> LooseAttachment(RawData.GetSize(), std::move(TempAttachmentBuffer)); + + RwLock::ExclusiveLockScope _(ResolveLock); + LooseUploadAttachments.insert_or_assign(RawHash, std::move(LooseAttachment)); + } + } + } + else + { + if (UploadAttachment->Size > MaxChunkEmbedSize) + { + TGetAttachmentBufferFunc FetchFunc = [&ChunkStore](const IoHash& RawHash) { + return CompositeBuffer(SharedBuffer(ChunkStore.FindChunkByCid(RawHash))); + }; + RwLock::ExclusiveLockScope _(ResolveLock); + LargeChunkAttachments.insert_or_assign(RawHash, std::move(FetchFunc)); + } + } + }); + } + + Work.Wait(1000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, IsPaused); + if (remotestore_impl::IsCancelled(OptionalContext)) + { + AbortFlag.store(true); + } + remotestore_impl::ReportProgress(OptionalContext, + "Resolving attachments"sv, + fmt::format("{}{} remaining...", AbortFlag.load() ? "Aborting, " : "", PendingWork), + UploadAttachments.size(), + PendingWork, + UploadAttachmentsTimer.GetElapsedTimeMs()); + }); + + if (!AbortFlag.load()) + { + remotestore_impl::ReportProgress(OptionalContext, + "Resolving attachments"sv, + "", + UploadAttachments.size(), + 0, + UploadAttachmentsTimer.GetElapsedTimeMs()); + } + } + RemoteProjectStore::Result WriteOplogSection(ProjectStore::Oplog& Oplog, const CbObjectView& SectionObject, JobContext* OptionalContext) { using namespace std::literals; @@ -198,7 +1100,8 @@ namespace remotestore_impl { "Writing oplog"sv, fmt::format("{} remaining...", OpCount - OpsCompleteCount), OpCount, - OpCount - OpsCompleteCount); + OpCount - OpsCompleteCount, + Timer.GetElapsedTimeMs()); }; BinaryWriter Writer; @@ -222,7 +1125,7 @@ namespace remotestore_impl { if (OpCount > 0) { - ReportProgress(OptionalContext, "Writing oplog"sv, ""sv, OpCount, 0); + ReportProgress(OptionalContext, "Writing oplog"sv, ""sv, OpCount, 0, Timer.GetElapsedTimeMs()); } return RemoteProjectStore::Result{.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0}; @@ -240,70 +1143,78 @@ namespace remotestore_impl { std::atomic<uint64_t> AttachmentsStored = 0; std::atomic<uint64_t> AttachmentBytesStored = 0; std::atomic_size_t MissingAttachmentCount = 0; + + std::atomic<uint64_t> ChunksCompleteCount = 0; }; - class JobContextLogOutput : public OperationLogOutput + class JobContextSink : public logging::Sink { public: - JobContextLogOutput(JobContext* OptionalContext) : m_OptionalContext(OptionalContext) {} - virtual void EmitLogMessage(int LogLevel, std::string_view Format, fmt::format_args Args) override + explicit JobContextSink(JobContext* Context) : m_Context(Context) {} + + void Log(const logging::LogMessage& Msg) override { - ZEN_UNUSED(LogLevel); - if (m_OptionalContext) + if (m_Context) { - fmt::basic_memory_buffer<char, 250> MessageBuffer; - fmt::vformat_to(fmt::appender(MessageBuffer), Format, Args); - remotestore_impl::ReportMessage(m_OptionalContext, std::string_view(MessageBuffer.data(), MessageBuffer.size())); + m_Context->ReportMessage(Msg.GetPayload()); } } - virtual void SetLogOperationName(std::string_view Name) override { ZEN_UNUSED(Name); } - virtual void SetLogOperationProgress(uint32_t StepIndex, uint32_t StepCount) override { ZEN_UNUSED(StepIndex, StepCount); } - virtual uint32_t GetProgressUpdateDelayMS() override { return 0; } - virtual ProgressBar* CreateProgressBar(std::string_view InSubTask) override + void Flush() override {} + void SetFormatter(std::unique_ptr<logging::Formatter>) override {} + + private: + JobContext* m_Context; + }; + + class JobContextLogger + { + public: + explicit JobContextLogger(JobContext* OptionalContext) { - ZEN_UNUSED(InSubTask); - return nullptr; + if (!OptionalContext) + { + return; + } + logging::SinkPtr ContextSink(new JobContextSink(OptionalContext)); + Ref<logging::BroadcastSink> DefaultSink = GetDefaultBroadcastSink(); + std::vector<logging::SinkPtr> Sinks; + if (DefaultSink) + { + Sinks.push_back(DefaultSink); + } + Sinks.push_back(std::move(ContextSink)); + Ref<logging::BroadcastSink> Broadcast(new logging::BroadcastSink(std::move(Sinks))); + m_Log = Ref<logging::Logger>(new logging::Logger("jobcontext", Broadcast)); } + LoggerRef Log() const { return m_Log ? LoggerRef(*m_Log) : zen::Log(); } + private: - JobContext* m_OptionalContext; + Ref<logging::Logger> m_Log; }; - void DownloadAndSaveBlockChunks(CidStore& ChunkStore, - RemoteProjectStore& RemoteStore, - bool IgnoreMissingAttachments, - JobContext* OptionalContext, - WorkerThreadPool& NetworkWorkerPool, - WorkerThreadPool& WorkerPool, - Latch& AttachmentsDownloadLatch, - Latch& AttachmentsWriteLatch, - AsyncRemoteResult& RemoteResult, + void DownloadAndSaveBlockChunks(LoadOplogContext& Context, + ParallelWork& AttachmentWork, DownloadInfo& Info, Stopwatch& LoadAttachmentsTimer, std::atomic_uint64_t& DownloadStartMS, ThinChunkBlockDescription&& ThinBlockDescription, std::vector<uint32_t>&& NeededChunkIndexes) { - AttachmentsDownloadLatch.AddCount(1); - NetworkWorkerPool.ScheduleWork( - [&RemoteStore, - &ChunkStore, - &WorkerPool, - &AttachmentsDownloadLatch, - &AttachmentsWriteLatch, - &RemoteResult, + AttachmentWork.ScheduleWork( + Context.NetworkWorkerPool, + [&Context, + &AttachmentWork, ThinBlockDescription = std::move(ThinBlockDescription), NeededChunkIndexes = std::move(NeededChunkIndexes), &Info, &LoadAttachmentsTimer, - &DownloadStartMS, - IgnoreMissingAttachments, - OptionalContext]() { + &DownloadStartMS](std::atomic<bool>& AbortFlag) { ZEN_TRACE_CPU("DownloadBlockChunks"); + ZEN_SCOPED_LOG(Context.Log); - auto _ = MakeGuard([&AttachmentsDownloadLatch] { AttachmentsDownloadLatch.CountDown(); }); - if (RemoteResult.IsError()) + if (AbortFlag) { return; } @@ -318,18 +1229,18 @@ namespace remotestore_impl { uint64_t Unset = (std::uint64_t)-1; DownloadStartMS.compare_exchange_strong(Unset, LoadAttachmentsTimer.GetElapsedTimeMs()); - RemoteProjectStore::LoadAttachmentsResult Result = RemoteStore.LoadAttachments(Chunks); + RemoteProjectStore::LoadAttachmentsResult Result = Context.RemoteStore.LoadAttachments(Chunks); if (Result.ErrorCode) { - ReportMessage(OptionalContext, + ReportMessage(Context.OptionalJobContext, fmt::format("Failed to load attachments with {} chunks ({}): {}", Chunks.size(), - RemoteResult.GetError(), - RemoteResult.GetErrorReason())); + Result.ErrorCode, + Result.Reason)); Info.MissingAttachmentCount.fetch_add(1); - if (IgnoreMissingAttachments) + if (!Context.IgnoreMissingAttachments) { - RemoteResult.SetError(Result.ErrorCode, Result.Reason, Result.Text); + throw RemoteStoreError(Result.Reason, Result.ErrorCode, Result.Text); } return; } @@ -339,76 +1250,64 @@ namespace remotestore_impl { uint64_t ChunkSize = It.second.GetCompressedSize(); Info.AttachmentBytesDownloaded.fetch_add(ChunkSize); } - ZEN_INFO("Loaded {} bulk attachments in {}", - Chunks.size(), - NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000))); - if (RemoteResult.IsError()) + remotestore_impl::ReportMessage(Context.OptionalJobContext, + fmt::format("Loaded {} bulk attachments in {}", + Chunks.size(), + NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)))); + if (AbortFlag) { return; } - AttachmentsWriteLatch.AddCount(1); - WorkerPool.ScheduleWork( - [&AttachmentsWriteLatch, &RemoteResult, &Info, &ChunkStore, Chunks = std::move(Result.Chunks)]() { - auto _ = MakeGuard([&AttachmentsWriteLatch] { AttachmentsWriteLatch.CountDown(); }); - if (RemoteResult.IsError()) + AttachmentWork.ScheduleWork( + Context.WorkerPool, + [&Info, &Context, Chunks = std::move(Result.Chunks)](std::atomic<bool>& AbortFlag) { + if (AbortFlag) { return; } if (!Chunks.empty()) { - try - { - std::vector<IoBuffer> WriteAttachmentBuffers; - std::vector<IoHash> WriteRawHashes; - WriteAttachmentBuffers.reserve(Chunks.size()); - WriteRawHashes.reserve(Chunks.size()); + std::vector<IoBuffer> WriteAttachmentBuffers; + std::vector<IoHash> WriteRawHashes; + WriteAttachmentBuffers.reserve(Chunks.size()); + WriteRawHashes.reserve(Chunks.size()); - for (const auto& It : Chunks) - { - WriteAttachmentBuffers.push_back(It.second.GetCompressed().Flatten().AsIoBuffer()); - WriteRawHashes.push_back(It.first); - } - std::vector<CidStore::InsertResult> InsertResults = - ChunkStore.AddChunks(WriteAttachmentBuffers, WriteRawHashes, CidStore::InsertMode::kCopyOnly); + for (const auto& It : Chunks) + { + WriteAttachmentBuffers.push_back(It.second.GetCompressed().Flatten().AsIoBuffer()); + WriteRawHashes.push_back(It.first); + } + std::vector<CidStore::InsertResult> InsertResults = + Context.ChunkStore.AddChunks(WriteAttachmentBuffers, WriteRawHashes, CidStore::InsertMode::kCopyOnly); - for (size_t Index = 0; Index < InsertResults.size(); Index++) + for (size_t Index = 0; Index < InsertResults.size(); Index++) + { + if (InsertResults[Index].New) { - if (InsertResults[Index].New) - { - Info.AttachmentBytesStored.fetch_add(WriteAttachmentBuffers[Index].GetSize()); - Info.AttachmentsStored.fetch_add(1); - } + Info.AttachmentBytesStored.fetch_add(WriteAttachmentBuffers[Index].GetSize()); + Info.AttachmentsStored.fetch_add(1); } } - catch (const std::exception& Ex) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Failed to bulk save {} attachments", Chunks.size()), - Ex.what()); - } } }, WorkerThreadPool::EMode::EnableBacklog); } + catch (const RemoteStoreError&) + { + throw; + } catch (const std::exception& Ex) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Failed to bulk load {} attachments", NeededChunkIndexes.size()), - Ex.what()); + throw RemoteStoreError(fmt::format("Failed to bulk load {} attachments", NeededChunkIndexes.size()), + gsl::narrow<int>(HttpResponseCode::InternalServerError), + Ex.what()); } }, WorkerThreadPool::EMode::EnableBacklog); }; - void DownloadAndSaveBlock(CidStore& ChunkStore, - RemoteProjectStore& RemoteStore, - bool IgnoreMissingAttachments, - JobContext* OptionalContext, - WorkerThreadPool& NetworkWorkerPool, - WorkerThreadPool& WorkerPool, - Latch& AttachmentsDownloadLatch, - Latch& AttachmentsWriteLatch, - AsyncRemoteResult& RemoteResult, + void DownloadAndSaveBlock(LoadOplogContext& Context, + ParallelWork& AttachmentWork, DownloadInfo& Info, Stopwatch& LoadAttachmentsTimer, std::atomic_uint64_t& DownloadStartMS, @@ -417,28 +1316,21 @@ namespace remotestore_impl { std::span<std::atomic<bool>> ChunkDownloadedFlags, uint32_t RetriesLeft) { - AttachmentsDownloadLatch.AddCount(1); - NetworkWorkerPool.ScheduleWork( - [&AttachmentsDownloadLatch, - &AttachmentsWriteLatch, - &ChunkStore, - &RemoteStore, - &NetworkWorkerPool, - &WorkerPool, - &RemoteResult, + AttachmentWork.ScheduleWork( + Context.NetworkWorkerPool, + [&AttachmentWork, + &Context, &Info, &LoadAttachmentsTimer, &DownloadStartMS, - IgnoreMissingAttachments, - OptionalContext, RetriesLeft, BlockHash = IoHash(BlockHash), &AllNeededPartialChunkHashesLookup, - ChunkDownloadedFlags]() { + ChunkDownloadedFlags](std::atomic<bool>& AbortFlag) { ZEN_TRACE_CPU("DownloadBlock"); + ZEN_SCOPED_LOG(Context.Log); - auto _ = MakeGuard([&AttachmentsDownloadLatch] { AttachmentsDownloadLatch.CountDown(); }); - if (RemoteResult.IsError()) + if (AbortFlag) { return; } @@ -446,54 +1338,65 @@ namespace remotestore_impl { { uint64_t Unset = (std::uint64_t)-1; DownloadStartMS.compare_exchange_strong(Unset, LoadAttachmentsTimer.GetElapsedTimeMs()); - RemoteProjectStore::LoadAttachmentResult BlockResult = RemoteStore.LoadAttachment(BlockHash, {}); - if (BlockResult.ErrorCode) + + IoBuffer BlobBuffer; + if (Context.OptionalCache) { - ReportMessage(OptionalContext, - fmt::format("Failed to download block attachment {} ({}): {}", - BlockHash, - RemoteResult.GetError(), - RemoteResult.GetErrorReason())); - Info.MissingAttachmentCount.fetch_add(1); - if (!IgnoreMissingAttachments) - { - RemoteResult.SetError(BlockResult.ErrorCode, BlockResult.Reason, BlockResult.Text); - } - return; + BlobBuffer = Context.OptionalCache->GetBuildBlob(Context.CacheBuildId, BlockHash); } - if (RemoteResult.IsError()) + + if (!BlobBuffer) { - return; + RemoteProjectStore::LoadAttachmentResult BlockResult = Context.RemoteStore.LoadAttachment(BlockHash); + if (BlockResult.ErrorCode) + { + ReportMessage(Context.OptionalJobContext, + fmt::format("Failed to download block attachment {} ({}): {}", + BlockHash, + BlockResult.Reason, + BlockResult.Text)); + Info.MissingAttachmentCount.fetch_add(1); + if (!Context.IgnoreMissingAttachments) + { + throw RemoteStoreError(BlockResult.Reason, BlockResult.ErrorCode, BlockResult.Text); + } + return; + } + if (AbortFlag) + { + return; + } + BlobBuffer = std::move(BlockResult.Bytes); + ZEN_DEBUG("Loaded block attachment '{}' in {} ({})", + BlockHash, + NiceTimeSpanMs(static_cast<uint64_t>(BlockResult.ElapsedSeconds * 1000)), + NiceBytes(BlobBuffer.Size())); + if (Context.OptionalCache && Context.PopulateCache) + { + Context.OptionalCache->PutBuildBlob(Context.CacheBuildId, + BlockHash, + BlobBuffer.GetContentType(), + CompositeBuffer(SharedBuffer(BlobBuffer))); + } } - uint64_t BlockSize = BlockResult.Bytes.GetSize(); + uint64_t BlockSize = BlobBuffer.GetSize(); Info.AttachmentBlocksDownloaded.fetch_add(1); - ZEN_DEBUG("Loaded block attachment '{}' in {} ({})", - BlockHash, - NiceTimeSpanMs(static_cast<uint64_t>(BlockResult.ElapsedSeconds * 1000)), - NiceBytes(BlockSize)); Info.AttachmentBlockBytesDownloaded.fetch_add(BlockSize); - AttachmentsWriteLatch.AddCount(1); - WorkerPool.ScheduleWork( - [&AttachmentsDownloadLatch, - &AttachmentsWriteLatch, - &ChunkStore, - &RemoteStore, - &NetworkWorkerPool, - &WorkerPool, - &RemoteResult, + AttachmentWork.ScheduleWork( + Context.WorkerPool, + [&AttachmentWork, + &Context, &Info, &LoadAttachmentsTimer, &DownloadStartMS, - IgnoreMissingAttachments, - OptionalContext, RetriesLeft, BlockHash = IoHash(BlockHash), &AllNeededPartialChunkHashesLookup, ChunkDownloadedFlags, - Bytes = std::move(BlockResult.Bytes)]() { - auto _ = MakeGuard([&AttachmentsWriteLatch] { AttachmentsWriteLatch.CountDown(); }); - if (RemoteResult.IsError()) + Bytes = std::move(BlobBuffer)](std::atomic<bool>& AbortFlag) { + ZEN_SCOPED_LOG(Context.Log); + if (AbortFlag) { return; } @@ -506,59 +1409,103 @@ namespace remotestore_impl { IoHash RawHash; uint64_t RawSize; CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(Bytes), RawHash, RawSize); + + std::string ErrorString; + if (!Compressed) { - if (RetriesLeft > 0) + ErrorString = + fmt::format("Block attachment {} is malformed, can't parse as compressed binary", BlockHash); + } + else if (RawHash != BlockHash) + { + ErrorString = fmt::format("Block attachment {} has mismatching raw hash ({})", BlockHash, RawHash); + } + else if (CompositeBuffer BlockPayload = Compressed.DecompressToComposite(); !BlockPayload) + { + ErrorString = fmt::format("Block attachment {} is malformed, can't decompress payload", BlockHash); + } + else + { + uint64_t PotentialSize = 0; + uint64_t UsedSize = 0; + uint64_t BlockSize = BlockPayload.GetSize(); + + uint64_t BlockHeaderSize = 0; + + bool StoreChunksOK = IterateChunkBlock( + BlockPayload.Flatten(), + [&AllNeededPartialChunkHashesLookup, + &ChunkDownloadedFlags, + &WriteAttachmentBuffers, + &WriteRawHashes, + &Info, + &PotentialSize](CompressedBuffer&& Chunk, const IoHash& AttachmentRawHash) { + auto ChunkIndexIt = AllNeededPartialChunkHashesLookup.find(AttachmentRawHash); + if (ChunkIndexIt != AllNeededPartialChunkHashesLookup.end()) + { + bool Expected = false; + if (ChunkDownloadedFlags[ChunkIndexIt->second].compare_exchange_strong(Expected, true)) + { + WriteAttachmentBuffers.emplace_back(Chunk.GetCompressed().Flatten().AsIoBuffer()); + IoHash RawHash; + uint64_t RawSize; + ZEN_ASSERT(CompressedBuffer::ValidateCompressedHeader( + WriteAttachmentBuffers.back(), + RawHash, + RawSize, + /*OutOptionalTotalCompressedSize*/ nullptr)); + ZEN_ASSERT(RawHash == AttachmentRawHash); + WriteRawHashes.emplace_back(AttachmentRawHash); + PotentialSize += WriteAttachmentBuffers.back().GetSize(); + } + } + }, + BlockHeaderSize); + + if (!StoreChunksOK) { - ReportMessage( - OptionalContext, - fmt::format( - "Block attachment {} is malformed, can't parse as compressed binary, retrying download", - BlockHash)); - return DownloadAndSaveBlock(ChunkStore, - RemoteStore, - IgnoreMissingAttachments, - OptionalContext, - NetworkWorkerPool, - WorkerPool, - AttachmentsDownloadLatch, - AttachmentsWriteLatch, - RemoteResult, - Info, - LoadAttachmentsTimer, - DownloadStartMS, - BlockHash, - AllNeededPartialChunkHashesLookup, - ChunkDownloadedFlags, - RetriesLeft - 1); + ErrorString = fmt::format("Invalid format for block {}", BlockHash); + } + else + { + if (!WriteAttachmentBuffers.empty()) + { + std::vector<CidStore::InsertResult> Results = + Context.ChunkStore.AddChunks(WriteAttachmentBuffers, WriteRawHashes); + for (size_t Index = 0; Index < Results.size(); Index++) + { + const CidStore::InsertResult& Result = Results[Index]; + if (Result.New) + { + Info.AttachmentBytesStored.fetch_add(WriteAttachmentBuffers[Index].GetSize()); + Info.AttachmentsStored.fetch_add(1); + UsedSize += WriteAttachmentBuffers[Index].GetSize(); + } + } + Info.ChunksCompleteCount += WriteAttachmentBuffers.size(); + if (UsedSize < BlockSize) + { + ZEN_DEBUG("Used {} (skipping {}) out of {} for block {} ({} %) (use of matching {}%)", + NiceBytes(UsedSize), + NiceBytes(BlockSize - UsedSize), + NiceBytes(BlockSize), + BlockHash, + (100 * UsedSize) / BlockSize, + PotentialSize > 0 ? (UsedSize * 100) / PotentialSize : 0); + } + } } - ReportMessage( - OptionalContext, - fmt::format("Block attachment {} is malformed, can't parse as compressed binary", BlockHash)); - RemoteResult.SetError( - gsl::narrow<int32_t>(HttpResponseCode::InternalServerError), - fmt::format("Block attachment {} is malformed, can't parse as compressed binary", BlockHash), - {}); - return; } - CompositeBuffer BlockPayload = Compressed.DecompressToComposite(); - if (!BlockPayload) + + if (!ErrorString.empty()) { if (RetriesLeft > 0) { - ReportMessage( - OptionalContext, - fmt::format("Block attachment {} is malformed, can't decompress payload, retrying download", - BlockHash)); - return DownloadAndSaveBlock(ChunkStore, - RemoteStore, - IgnoreMissingAttachments, - OptionalContext, - NetworkWorkerPool, - WorkerPool, - AttachmentsDownloadLatch, - AttachmentsWriteLatch, - RemoteResult, + ReportMessage(Context.OptionalJobContext, fmt::format("{}, retrying download", ErrorString)); + + return DownloadAndSaveBlock(Context, + AttachmentWork, Info, LoadAttachmentsTimer, DownloadStartMS, @@ -567,128 +1514,214 @@ namespace remotestore_impl { ChunkDownloadedFlags, RetriesLeft - 1); } - ReportMessage(OptionalContext, - fmt::format("Block attachment {} is malformed, can't decompress payload", BlockHash)); - RemoteResult.SetError( - gsl::narrow<int32_t>(HttpResponseCode::InternalServerError), - fmt::format("Block attachment {} is malformed, can't decompress payload", BlockHash), - {}); - return; - } - if (RawHash != BlockHash) - { - ReportMessage(OptionalContext, - fmt::format("Block attachment {} has mismatching raw hash ({})", BlockHash, RawHash)); - RemoteResult.SetError( - gsl::narrow<int32_t>(HttpResponseCode::InternalServerError), - fmt::format("Block attachment {} has mismatching raw hash ({})", BlockHash, RawHash), - {}); - return; - } - - uint64_t PotentialSize = 0; - uint64_t UsedSize = 0; - uint64_t BlockSize = BlockPayload.GetSize(); - - uint64_t BlockHeaderSize = 0; - - bool StoreChunksOK = IterateChunkBlock( - BlockPayload.Flatten(), - [&AllNeededPartialChunkHashesLookup, - &ChunkDownloadedFlags, - &WriteAttachmentBuffers, - &WriteRawHashes, - &Info, - &PotentialSize](CompressedBuffer&& Chunk, const IoHash& AttachmentRawHash) { - auto ChunkIndexIt = AllNeededPartialChunkHashesLookup.find(AttachmentRawHash); - if (ChunkIndexIt != AllNeededPartialChunkHashesLookup.end()) - { - bool Expected = false; - if (ChunkDownloadedFlags[ChunkIndexIt->second].compare_exchange_strong(Expected, true)) - { - WriteAttachmentBuffers.emplace_back(Chunk.GetCompressed().Flatten().AsIoBuffer()); - IoHash RawHash; - uint64_t RawSize; - ZEN_ASSERT( - CompressedBuffer::ValidateCompressedHeader(WriteAttachmentBuffers.back(), - RawHash, - RawSize, - /*OutOptionalTotalCompressedSize*/ nullptr)); - ZEN_ASSERT(RawHash == AttachmentRawHash); - WriteRawHashes.emplace_back(AttachmentRawHash); - PotentialSize += WriteAttachmentBuffers.back().GetSize(); - } - } - }, - BlockHeaderSize); - - if (!StoreChunksOK) - { - ReportMessage(OptionalContext, - fmt::format("Block attachment {} has invalid format ({}): {}", - BlockHash, - RemoteResult.GetError(), - RemoteResult.GetErrorReason())); - RemoteResult.SetError(gsl::narrow<int32_t>(HttpResponseCode::InternalServerError), - fmt::format("Invalid format for block {}", BlockHash), - {}); - return; - } - - if (!WriteAttachmentBuffers.empty()) - { - auto Results = ChunkStore.AddChunks(WriteAttachmentBuffers, WriteRawHashes); - for (size_t Index = 0; Index < Results.size(); Index++) + else { - const auto& Result = Results[Index]; - if (Result.New) - { - Info.AttachmentBytesStored.fetch_add(WriteAttachmentBuffers[Index].GetSize()); - Info.AttachmentsStored.fetch_add(1); - UsedSize += WriteAttachmentBuffers[Index].GetSize(); - } + ReportMessage(Context.OptionalJobContext, ErrorString); + throw RemoteStoreError(ErrorString, + gsl::narrow<int32_t>(HttpResponseCode::InternalServerError), + {}); } - ZEN_DEBUG("Used {} (matching {}) out of {} for block {} ({} %) (use of matching {}%)", - NiceBytes(UsedSize), - NiceBytes(PotentialSize), - NiceBytes(BlockSize), - BlockHash, - (100 * UsedSize) / BlockSize, - PotentialSize > 0 ? (UsedSize * 100) / PotentialSize : 0); } } + catch (const RemoteStoreError&) + { + throw; + } catch (const std::exception& Ex) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Failed save block attachment {}", BlockHash), - Ex.what()); + throw RemoteStoreError(fmt::format("Failed to save block attachment {}", BlockHash), + gsl::narrow<int>(HttpResponseCode::InternalServerError), + Ex.what()); } }, WorkerThreadPool::EMode::EnableBacklog); } + catch (const RemoteStoreError&) + { + throw; + } catch (const std::exception& Ex) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Failed to block attachment {}", BlockHash), - Ex.what()); + throw RemoteStoreError(fmt::format("Failed to download block attachment {}", BlockHash), + gsl::narrow<int>(HttpResponseCode::InternalServerError), + Ex.what()); } }, WorkerThreadPool::EMode::EnableBacklog); }; - void DownloadAndSavePartialBlock(CidStore& ChunkStore, - RemoteProjectStore& RemoteStore, - bool IgnoreMissingAttachments, - JobContext* OptionalContext, - WorkerThreadPool& NetworkWorkerPool, - WorkerThreadPool& WorkerPool, - Latch& AttachmentsDownloadLatch, - Latch& AttachmentsWriteLatch, - AsyncRemoteResult& RemoteResult, + void DownloadPartialBlock(LoadOplogContext& Context, + std::atomic<bool>& AbortFlag, + DownloadInfo& Info, + double& DownloadTimeSeconds, + const ChunkBlockDescription& BlockDescription, + bool BlockExistsInCache, + std::span<const ChunkBlockAnalyser::BlockRangeDescriptor> BlockRangeDescriptors, + size_t BlockRangeIndexStart, + size_t BlockRangeCount, + std::function<void(IoBuffer&& Buffer, + size_t BlockRangeStartIndex, + std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths)>&& OnDownloaded) + { + ZEN_ASSERT(Context.StoreMaxRangeCountPerRequest != 0); + ZEN_ASSERT(BlockExistsInCache == false || Context.CacheMaxRangeCountPerRequest != 0); + + std::vector<std::pair<uint64_t, uint64_t>> Ranges; + Ranges.reserve(BlockRangeDescriptors.size()); + for (size_t BlockRangeIndex = BlockRangeIndexStart; BlockRangeIndex < BlockRangeIndexStart + BlockRangeCount; BlockRangeIndex++) + { + const ChunkBlockAnalyser::BlockRangeDescriptor& BlockRange = BlockRangeDescriptors[BlockRangeIndex]; + Ranges.push_back(std::make_pair(BlockRange.RangeStart, BlockRange.RangeLength)); + } + + size_t SubBlockRangeCount = BlockRangeCount; + size_t SubRangeCountComplete = 0; + std::span<const std::pair<uint64_t, uint64_t>> RangesSpan(Ranges); + + while (SubRangeCountComplete < SubBlockRangeCount) + { + if (AbortFlag.load()) + { + break; + } + + size_t SubRangeStartIndex = BlockRangeIndexStart + SubRangeCountComplete; + if (BlockExistsInCache) + { + ZEN_ASSERT(Context.OptionalCache); + size_t SubRangeCount = Min(BlockRangeCount - SubRangeCountComplete, Context.CacheMaxRangeCountPerRequest); + + if (SubRangeCount == 1) + { + // Legacy single-range path, prefer that for max compatibility + + const std::pair<uint64_t, uint64_t> SubRange = RangesSpan[SubRangeCountComplete]; + Stopwatch CacheTimer; + IoBuffer PayloadBuffer = Context.OptionalCache->GetBuildBlob(Context.CacheBuildId, + BlockDescription.BlockHash, + SubRange.first, + SubRange.second); + DownloadTimeSeconds += CacheTimer.GetElapsedTimeMs() / 1000.0; + if (AbortFlag.load()) + { + break; + } + if (PayloadBuffer) + { + OnDownloaded(std::move(PayloadBuffer), + SubRangeStartIndex, + std::vector<std::pair<uint64_t, uint64_t>>{std::make_pair(0u, SubRange.second)}); + SubRangeCountComplete += SubRangeCount; + continue; + } + } + else + { + auto SubRanges = RangesSpan.subspan(SubRangeCountComplete, SubRangeCount); + + Stopwatch CacheTimer; + BuildStorageCache::BuildBlobRanges RangeBuffers = + Context.OptionalCache->GetBuildBlobRanges(Context.CacheBuildId, BlockDescription.BlockHash, SubRanges); + DownloadTimeSeconds += CacheTimer.GetElapsedTimeMs() / 1000.0; + if (AbortFlag.load()) + { + break; + } + if (RangeBuffers.PayloadBuffer) + { + if (RangeBuffers.Ranges.empty()) + { + SubRangeCount = Ranges.size() - SubRangeCountComplete; + OnDownloaded(std::move(RangeBuffers.PayloadBuffer), + SubRangeStartIndex, + RangesSpan.subspan(SubRangeCountComplete, SubRangeCount)); + SubRangeCountComplete += SubRangeCount; + continue; + } + else if (RangeBuffers.Ranges.size() == SubRangeCount) + { + OnDownloaded(std::move(RangeBuffers.PayloadBuffer), SubRangeStartIndex, RangeBuffers.Ranges); + SubRangeCountComplete += SubRangeCount; + continue; + } + } + } + } + + size_t SubRangeCount = Min(BlockRangeCount - SubRangeCountComplete, Context.StoreMaxRangeCountPerRequest); + + auto SubRanges = RangesSpan.subspan(SubRangeCountComplete, SubRangeCount); + + RemoteProjectStore::LoadAttachmentRangesResult BlockResult = + Context.RemoteStore.LoadAttachmentRanges(BlockDescription.BlockHash, SubRanges); + DownloadTimeSeconds += BlockResult.ElapsedSeconds; + if (AbortFlag.load()) + { + break; + } + if (BlockResult.ErrorCode || !BlockResult.Bytes) + { + ReportMessage(Context.OptionalJobContext, + fmt::format("Failed to download {} ranges from block attachment '{}' ({}): {}", + SubRanges.size(), + BlockDescription.BlockHash, + BlockResult.ErrorCode, + BlockResult.Reason)); + Info.MissingAttachmentCount.fetch_add(1); + if (!Context.IgnoreMissingAttachments) + { + throw RemoteStoreError(BlockResult.Reason, BlockResult.ErrorCode, BlockResult.Text); + } + } + else + { + if (BlockResult.Ranges.empty()) + { + // Jupiter will ignore the ranges and send the whole payload if it fetches the payload from S3 + // Use the whole payload for the remaining ranges + + if (Context.OptionalCache && Context.PopulateCache) + { + Context.OptionalCache->PutBuildBlob(Context.CacheBuildId, + BlockDescription.BlockHash, + ZenContentType::kCompressedBinary, + CompositeBuffer(std::vector<IoBuffer>{BlockResult.Bytes})); + if (AbortFlag.load()) + { + break; + } + } + SubRangeCount = Ranges.size() - SubRangeCountComplete; + OnDownloaded(std::move(BlockResult.Bytes), + SubRangeStartIndex, + RangesSpan.subspan(SubRangeCountComplete, SubRangeCount)); + } + else + { + if (BlockResult.Ranges.size() != SubRanges.size()) + { + throw RemoteStoreError(fmt::format("Range response for block {} contains {} ranges, expected {} ranges", + BlockDescription.BlockHash, + BlockResult.Ranges.size(), + SubRanges.size()), + gsl::narrow<int32_t>(HttpResponseCode::InternalServerError), + ""); + } + OnDownloaded(std::move(BlockResult.Bytes), SubRangeStartIndex, BlockResult.Ranges); + } + } + + SubRangeCountComplete += SubRangeCount; + } + } + + void DownloadAndSavePartialBlock(LoadOplogContext& Context, + ParallelWork& AttachmentWork, DownloadInfo& Info, Stopwatch& LoadAttachmentsTimer, std::atomic_uint64_t& DownloadStartMS, const ChunkBlockDescription& BlockDescription, + bool BlockExistsInCache, std::span<const ChunkBlockAnalyser::BlockRangeDescriptor> BlockRangeDescriptors, size_t BlockRangeIndexStart, size_t BlockRangeCount, @@ -696,30 +1729,23 @@ namespace remotestore_impl { std::span<std::atomic<bool>> ChunkDownloadedFlags, uint32_t RetriesLeft) { - AttachmentsDownloadLatch.AddCount(1); - NetworkWorkerPool.ScheduleWork( - [&AttachmentsDownloadLatch, - &AttachmentsWriteLatch, - &ChunkStore, - &RemoteStore, - &NetworkWorkerPool, - &WorkerPool, - &RemoteResult, + AttachmentWork.ScheduleWork( + Context.NetworkWorkerPool, + [&AttachmentWork, + &Context, &Info, &LoadAttachmentsTimer, &DownloadStartMS, - IgnoreMissingAttachments, - OptionalContext, - RetriesLeft, BlockDescription, + BlockExistsInCache, BlockRangeDescriptors, BlockRangeIndexStart, BlockRangeCount, &AllNeededPartialChunkHashesLookup, - ChunkDownloadedFlags]() { + ChunkDownloadedFlags, + RetriesLeft](std::atomic<bool>& AbortFlag) { ZEN_TRACE_CPU("DownloadBlockRanges"); - - auto _ = MakeGuard([&AttachmentsDownloadLatch] { AttachmentsDownloadLatch.CountDown(); }); + ZEN_SCOPED_LOG(Context.Log); try { uint64_t Unset = (std::uint64_t)-1; @@ -728,273 +1754,259 @@ namespace remotestore_impl { double DownloadElapsedSeconds = 0; uint64_t DownloadedBytes = 0; - for (size_t BlockRangeIndex = BlockRangeIndexStart; BlockRangeIndex < BlockRangeIndexStart + BlockRangeCount; - BlockRangeIndex++) - { - if (RemoteResult.IsError()) - { - return; - } + DownloadPartialBlock( + Context, + AbortFlag, + Info, + DownloadElapsedSeconds, + BlockDescription, + BlockExistsInCache, + BlockRangeDescriptors, + BlockRangeIndexStart, + BlockRangeCount, + [&](IoBuffer&& Buffer, + size_t BlockRangeStartIndex, + std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths) { + uint64_t BlockPartSize = Buffer.GetSize(); + DownloadedBytes += BlockPartSize; + + Info.AttachmentBlockRangeBytesDownloaded.fetch_add(BlockPartSize); + Info.AttachmentBlocksRangesDownloaded++; + + AttachmentWork.ScheduleWork( + Context.WorkerPool, + [&AttachmentWork, + &Context, + &Info, + &LoadAttachmentsTimer, + &DownloadStartMS, + BlockDescription, + BlockExistsInCache, + BlockRangeDescriptors, + BlockRangeStartIndex, + &AllNeededPartialChunkHashesLookup, + ChunkDownloadedFlags, + RetriesLeft, + BlockPayload = std::move(Buffer), + OffsetAndLengths = + std::vector<std::pair<uint64_t, uint64_t>>(OffsetAndLengths.begin(), OffsetAndLengths.end())]( + std::atomic<bool>& AbortFlag) { + ZEN_SCOPED_LOG(Context.Log); + try + { + ZEN_ASSERT(BlockPayload.Size() > 0); - const ChunkBlockAnalyser::BlockRangeDescriptor& BlockRange = BlockRangeDescriptors[BlockRangeIndex]; + size_t RangeCount = OffsetAndLengths.size(); + for (size_t RangeOffset = 0; RangeOffset < RangeCount; RangeOffset++) + { + if (AbortFlag) + { + return; + } - RemoteProjectStore::LoadAttachmentResult BlockResult = - RemoteStore.LoadAttachment(BlockDescription.BlockHash, - {.Offset = BlockRange.RangeStart, .Bytes = BlockRange.RangeLength}); - if (BlockResult.ErrorCode) - { - ReportMessage(OptionalContext, - fmt::format("Failed to download block attachment '{}' range {},{} ({}): {}", - BlockDescription.BlockHash, - BlockRange.RangeStart, - BlockRange.RangeLength, - BlockResult.ErrorCode, - BlockResult.Reason)); - Info.MissingAttachmentCount.fetch_add(1); - if (!IgnoreMissingAttachments) - { - RemoteResult.SetError(BlockResult.ErrorCode, BlockResult.Reason, BlockResult.Text); - } - return; - } - if (RemoteResult.IsError()) - { - return; - } - uint64_t BlockPartSize = BlockResult.Bytes.GetSize(); - if (BlockPartSize != BlockRange.RangeLength) - { - std::string ErrorString = - fmt::format("Failed to download block attachment '{}' range {},{}, got {} bytes ({}): {}", - BlockDescription.BlockHash, - BlockRange.RangeStart, - BlockRange.RangeLength, - BlockPartSize, - RemoteResult.GetError(), - RemoteResult.GetErrorReason()); - - ReportMessage(OptionalContext, ErrorString); - Info.MissingAttachmentCount.fetch_add(1); - if (!IgnoreMissingAttachments) - { - RemoteResult.SetError(gsl::narrow<int32_t>(HttpResponseCode::NotFound), - "Mismatching block part range received", - ErrorString); - } - return; - } - Info.AttachmentBlocksRangesDownloaded.fetch_add(1); - - DownloadElapsedSeconds += BlockResult.ElapsedSeconds; - DownloadedBytes += BlockPartSize; - - Info.AttachmentBlockRangeBytesDownloaded.fetch_add(BlockPartSize); - - AttachmentsWriteLatch.AddCount(1); - WorkerPool.ScheduleWork( - [&AttachmentsDownloadLatch, - &AttachmentsWriteLatch, - &ChunkStore, - &RemoteStore, - &NetworkWorkerPool, - &WorkerPool, - &RemoteResult, - &Info, - &LoadAttachmentsTimer, - &DownloadStartMS, - IgnoreMissingAttachments, - OptionalContext, - RetriesLeft, - BlockDescription, - BlockRange, - &AllNeededPartialChunkHashesLookup, - ChunkDownloadedFlags, - BlockPayload = std::move(BlockResult.Bytes)]() { - auto _ = MakeGuard([&AttachmentsWriteLatch] { AttachmentsWriteLatch.CountDown(); }); - if (RemoteResult.IsError()) - { - return; - } - try - { - ZEN_ASSERT(BlockPayload.Size() > 0); - std::vector<IoBuffer> WriteAttachmentBuffers; - std::vector<IoHash> WriteRawHashes; + const ChunkBlockAnalyser::BlockRangeDescriptor& BlockRange = + BlockRangeDescriptors[BlockRangeStartIndex + RangeOffset]; + const std::pair<uint64_t, uint64_t>& OffsetAndLength = OffsetAndLengths[RangeOffset]; + IoBuffer BlockRangeBuffer(BlockPayload, OffsetAndLength.first, OffsetAndLength.second); - uint64_t PotentialSize = 0; - uint64_t UsedSize = 0; - uint64_t BlockPartSize = BlockPayload.GetSize(); + std::vector<IoBuffer> WriteAttachmentBuffers; + std::vector<IoHash> WriteRawHashes; - uint32_t OffsetInBlock = 0; - for (uint32_t ChunkBlockIndex = BlockRange.ChunkBlockIndexStart; - ChunkBlockIndex < BlockRange.ChunkBlockIndexStart + BlockRange.ChunkBlockIndexCount; - ChunkBlockIndex++) - { - const uint32_t ChunkCompressedSize = BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; - const IoHash& ChunkHash = BlockDescription.ChunkRawHashes[ChunkBlockIndex]; + uint64_t PotentialSize = 0; + uint64_t UsedSize = 0; + uint64_t BlockPartSize = BlockRangeBuffer.GetSize(); - if (auto ChunkIndexIt = AllNeededPartialChunkHashesLookup.find(ChunkHash); - ChunkIndexIt != AllNeededPartialChunkHashesLookup.end()) - { - bool Expected = false; - if (ChunkDownloadedFlags[ChunkIndexIt->second].compare_exchange_strong(Expected, true)) + uint32_t OffsetInBlock = 0; + for (uint32_t ChunkBlockIndex = BlockRange.ChunkBlockIndexStart; + ChunkBlockIndex < BlockRange.ChunkBlockIndexStart + BlockRange.ChunkBlockIndexCount; + ChunkBlockIndex++) { - IoHash VerifyChunkHash; - uint64_t VerifyChunkSize; - CompressedBuffer CompressedChunk = CompressedBuffer::FromCompressed( - SharedBuffer(IoBuffer(BlockPayload, OffsetInBlock, ChunkCompressedSize)), - VerifyChunkHash, - VerifyChunkSize); - if (!CompressedChunk) + if (AbortFlag) { - std::string ErrorString = fmt::format( - "Chunk at {},{} in block attachment '{}' is not a valid compressed buffer", - OffsetInBlock, - ChunkCompressedSize, - BlockDescription.BlockHash); - ReportMessage(OptionalContext, ErrorString); - Info.MissingAttachmentCount.fetch_add(1); - if (!IgnoreMissingAttachments) + break; + } + + const uint32_t ChunkCompressedSize = + BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; + const IoHash& ChunkHash = BlockDescription.ChunkRawHashes[ChunkBlockIndex]; + + if (auto ChunkIndexIt = AllNeededPartialChunkHashesLookup.find(ChunkHash); + ChunkIndexIt != AllNeededPartialChunkHashesLookup.end()) + { + if (!ChunkDownloadedFlags[ChunkIndexIt->second]) { - RemoteResult.SetError(gsl::narrow<int32_t>(HttpResponseCode::NotFound), - "Malformed chunk block", - ErrorString); + IoHash VerifyChunkHash; + uint64_t VerifyChunkSize; + CompressedBuffer CompressedChunk = CompressedBuffer::FromCompressed( + SharedBuffer(IoBuffer(BlockRangeBuffer, OffsetInBlock, ChunkCompressedSize)), + VerifyChunkHash, + VerifyChunkSize); + + std::string ErrorString; + + if (!CompressedChunk) + { + ErrorString = fmt::format( + "Chunk at {},{} in block attachment '{}' is not a valid compressed buffer", + OffsetInBlock, + ChunkCompressedSize, + BlockDescription.BlockHash); + } + else if (VerifyChunkHash != ChunkHash) + { + ErrorString = fmt::format( + "Chunk at {},{} in block attachment '{}' has mismatching hash, expected " + "{}, got {}", + OffsetInBlock, + ChunkCompressedSize, + BlockDescription.BlockHash, + ChunkHash, + VerifyChunkHash); + } + else if (VerifyChunkSize != BlockDescription.ChunkRawLengths[ChunkBlockIndex]) + { + ErrorString = fmt::format( + "Chunk at {},{} in block attachment '{}' has mismatching raw size, " + "expected {}, " + "got {}", + OffsetInBlock, + ChunkCompressedSize, + BlockDescription.BlockHash, + BlockDescription.ChunkRawLengths[ChunkBlockIndex], + VerifyChunkSize); + } + + if (!ErrorString.empty()) + { + if (RetriesLeft > 0) + { + ReportMessage(Context.OptionalJobContext, + fmt::format("{}, retrying download", ErrorString)); + return DownloadAndSavePartialBlock(Context, + AttachmentWork, + Info, + LoadAttachmentsTimer, + DownloadStartMS, + BlockDescription, + BlockExistsInCache, + BlockRangeDescriptors, + BlockRangeStartIndex, + RangeCount, + AllNeededPartialChunkHashesLookup, + ChunkDownloadedFlags, + RetriesLeft - 1); + } + + ReportMessage(Context.OptionalJobContext, ErrorString); + Info.MissingAttachmentCount.fetch_add(1); + if (!Context.IgnoreMissingAttachments) + { + throw RemoteStoreError("Malformed chunk block", + gsl::narrow<int32_t>(HttpResponseCode::NotFound), + ErrorString); + } + } + else + { + bool Expected = false; + if (ChunkDownloadedFlags[ChunkIndexIt->second].compare_exchange_strong(Expected, + true)) + { + WriteAttachmentBuffers.emplace_back( + CompressedChunk.GetCompressed().Flatten().AsIoBuffer()); + WriteRawHashes.emplace_back(ChunkHash); + PotentialSize += WriteAttachmentBuffers.back().GetSize(); + } + } } - continue; } - if (VerifyChunkHash != ChunkHash) + OffsetInBlock += ChunkCompressedSize; + } + + if (!WriteAttachmentBuffers.empty()) + { + std::vector<CidStore::InsertResult> Results = + Context.ChunkStore.AddChunks(WriteAttachmentBuffers, WriteRawHashes); + for (size_t Index = 0; Index < Results.size(); Index++) { - std::string ErrorString = fmt::format( - "Chunk at {},{} in block attachment '{}' has mismatching hash, expected {}, got {}", - OffsetInBlock, - ChunkCompressedSize, - BlockDescription.BlockHash, - ChunkHash, - VerifyChunkHash); - ReportMessage(OptionalContext, ErrorString); - Info.MissingAttachmentCount.fetch_add(1); - if (!IgnoreMissingAttachments) + const CidStore::InsertResult& Result = Results[Index]; + if (Result.New) { - RemoteResult.SetError(gsl::narrow<int32_t>(HttpResponseCode::NotFound), - "Malformed chunk block", - ErrorString); + Info.AttachmentBytesStored.fetch_add(WriteAttachmentBuffers[Index].GetSize()); + Info.AttachmentsStored.fetch_add(1); + UsedSize += WriteAttachmentBuffers[Index].GetSize(); } - continue; } - if (VerifyChunkSize != BlockDescription.ChunkRawLengths[ChunkBlockIndex]) + Info.ChunksCompleteCount += WriteAttachmentBuffers.size(); + if (UsedSize < BlockPartSize) { - std::string ErrorString = fmt::format( - "Chunk at {},{} in block attachment '{}' has mismatching raw size, expected {}, " - "got {}", - OffsetInBlock, - ChunkCompressedSize, + ZEN_DEBUG( + "Used {} (skipping {}) out of {} for block {} range {}, {} ({} %) (use of matching " + "{}%)", + NiceBytes(UsedSize), + NiceBytes(BlockPartSize - UsedSize), + NiceBytes(BlockPartSize), BlockDescription.BlockHash, - BlockDescription.ChunkRawLengths[ChunkBlockIndex], - VerifyChunkSize); - ReportMessage(OptionalContext, ErrorString); - Info.MissingAttachmentCount.fetch_add(1); - if (!IgnoreMissingAttachments) - { - RemoteResult.SetError(gsl::narrow<int32_t>(HttpResponseCode::NotFound), - "Malformed chunk block", - ErrorString); - } - continue; + BlockRange.RangeStart, + BlockRange.RangeLength, + (100 * UsedSize) / BlockPartSize, + PotentialSize > 0 ? (UsedSize * 100) / PotentialSize : 0); } - - WriteAttachmentBuffers.emplace_back(CompressedChunk.GetCompressed().Flatten().AsIoBuffer()); - WriteRawHashes.emplace_back(ChunkHash); - PotentialSize += WriteAttachmentBuffers.back().GetSize(); } } - OffsetInBlock += ChunkCompressedSize; } - - if (!WriteAttachmentBuffers.empty()) + catch (const RemoteStoreError&) { - auto Results = ChunkStore.AddChunks(WriteAttachmentBuffers, WriteRawHashes); - for (size_t Index = 0; Index < Results.size(); Index++) - { - const auto& Result = Results[Index]; - if (Result.New) - { - Info.AttachmentBytesStored.fetch_add(WriteAttachmentBuffers[Index].GetSize()); - Info.AttachmentsStored.fetch_add(1); - UsedSize += WriteAttachmentBuffers[Index].GetSize(); - } - } - ZEN_DEBUG("Used {} (matching {}) out of {} for block {} range {}, {} ({} %) (use of matching {}%)", - NiceBytes(UsedSize), - NiceBytes(PotentialSize), - NiceBytes(BlockPartSize), - BlockDescription.BlockHash, - BlockRange.RangeStart, - BlockRange.RangeLength, - (100 * UsedSize) / BlockPartSize, - PotentialSize > 0 ? (UsedSize * 100) / PotentialSize : 0); + throw; } - } - catch (const std::exception& Ex) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Failed save block attachment {} range {}, {}", - BlockDescription.BlockHash, - BlockRange.RangeStart, - BlockRange.RangeLength), - Ex.what()); - } - }, - WorkerThreadPool::EMode::EnableBacklog); + catch (const std::exception& Ex) + { + throw RemoteStoreError(fmt::format("Failed saving {} ranges from block attachment {}", + OffsetAndLengths.size(), + BlockDescription.BlockHash), + gsl::narrow<int>(HttpResponseCode::InternalServerError), + Ex.what()); + } + }, + WorkerThreadPool::EMode::EnableBacklog); + }); + if (!AbortFlag) + { + ZEN_DEBUG("Loaded {} ranges from block attachment '{}' in {} ({})", + BlockRangeCount, + BlockDescription.BlockHash, + NiceTimeSpanMs(static_cast<uint64_t>(DownloadElapsedSeconds * 1000)), + NiceBytes(DownloadedBytes)); } - - ZEN_DEBUG("Loaded {} ranges from block attachment '{}' in {} ({})", - BlockRangeCount, - BlockDescription.BlockHash, - NiceTimeSpanMs(static_cast<uint64_t>(DownloadElapsedSeconds * 1000)), - NiceBytes(DownloadedBytes)); + } + catch (const RemoteStoreError&) + { + throw; } catch (const std::exception& Ex) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Failed to download block attachment {} ranges", BlockDescription.BlockHash), - Ex.what()); + throw RemoteStoreError(fmt::format("Failed to download block attachment {} ranges", BlockDescription.BlockHash), + gsl::narrow<int>(HttpResponseCode::InternalServerError), + Ex.what()); } }, WorkerThreadPool::EMode::EnableBacklog); }; - void DownloadAndSaveAttachment(CidStore& ChunkStore, - RemoteProjectStore& RemoteStore, - bool IgnoreMissingAttachments, - JobContext* OptionalContext, - WorkerThreadPool& NetworkWorkerPool, - WorkerThreadPool& WorkerPool, - Latch& AttachmentsDownloadLatch, - Latch& AttachmentsWriteLatch, - AsyncRemoteResult& RemoteResult, + void DownloadAndSaveAttachment(LoadOplogContext& Context, + ParallelWork& AttachmentWork, DownloadInfo& Info, Stopwatch& LoadAttachmentsTimer, std::atomic_uint64_t& DownloadStartMS, const IoHash& RawHash) { - AttachmentsDownloadLatch.AddCount(1); - NetworkWorkerPool.ScheduleWork( - [&RemoteStore, - &ChunkStore, - &WorkerPool, - &RemoteResult, - &AttachmentsDownloadLatch, - &AttachmentsWriteLatch, - RawHash, - &LoadAttachmentsTimer, - &DownloadStartMS, - &Info, - IgnoreMissingAttachments, - OptionalContext]() { + AttachmentWork.ScheduleWork( + Context.NetworkWorkerPool, + [&Context, &AttachmentWork, RawHash, &LoadAttachmentsTimer, &DownloadStartMS, &Info](std::atomic<bool>& AbortFlag) { ZEN_TRACE_CPU("DownloadAttachment"); + ZEN_SCOPED_LOG(Context.Log); - auto _ = MakeGuard([&AttachmentsDownloadLatch] { AttachmentsDownloadLatch.CountDown(); }); - if (RemoteResult.IsError()) + if (AbortFlag) { return; } @@ -1002,132 +2014,127 @@ namespace remotestore_impl { { uint64_t Unset = (std::uint64_t)-1; DownloadStartMS.compare_exchange_strong(Unset, LoadAttachmentsTimer.GetElapsedTimeMs()); - RemoteProjectStore::LoadAttachmentResult AttachmentResult = RemoteStore.LoadAttachment(RawHash, {}); - if (AttachmentResult.ErrorCode) + IoBuffer BlobBuffer; + if (Context.OptionalCache) { - ReportMessage(OptionalContext, - fmt::format("Failed to download large attachment {}: '{}', error code : {}", - RawHash, - AttachmentResult.Reason, - AttachmentResult.ErrorCode)); - Info.MissingAttachmentCount.fetch_add(1); - if (!IgnoreMissingAttachments) + BlobBuffer = Context.OptionalCache->GetBuildBlob(Context.CacheBuildId, RawHash); + } + if (!BlobBuffer) + { + RemoteProjectStore::LoadAttachmentResult AttachmentResult = Context.RemoteStore.LoadAttachment(RawHash); + if (AttachmentResult.ErrorCode) { - RemoteResult.SetError(AttachmentResult.ErrorCode, AttachmentResult.Reason, AttachmentResult.Text); + ReportMessage(Context.OptionalJobContext, + fmt::format("Failed to download large attachment {}: '{}', error code: {}", + RawHash, + AttachmentResult.Reason, + AttachmentResult.ErrorCode)); + Info.MissingAttachmentCount.fetch_add(1); + if (!Context.IgnoreMissingAttachments) + { + throw RemoteStoreError(AttachmentResult.Reason, AttachmentResult.ErrorCode, AttachmentResult.Text); + } + return; + } + ZEN_ASSERT(AttachmentResult.Bytes); + BlobBuffer = std::move(AttachmentResult.Bytes); + ZEN_DEBUG("Loaded large attachment '{}' in {} ({})", + RawHash, + NiceTimeSpanMs(static_cast<uint64_t>(AttachmentResult.ElapsedSeconds * 1000)), + NiceBytes(BlobBuffer.GetSize())); + if (Context.OptionalCache && Context.PopulateCache) + { + Context.OptionalCache->PutBuildBlob(Context.CacheBuildId, + RawHash, + BlobBuffer.GetContentType(), + CompositeBuffer(SharedBuffer(BlobBuffer))); } - return; } - uint64_t AttachmentSize = AttachmentResult.Bytes.GetSize(); - ZEN_DEBUG("Loaded large attachment '{}' in {} ({})", - RawHash, - NiceTimeSpanMs(static_cast<uint64_t>(AttachmentResult.ElapsedSeconds * 1000)), - NiceBytes(AttachmentSize)); - Info.AttachmentsDownloaded.fetch_add(1); - if (RemoteResult.IsError()) + if (AbortFlag) { return; } + uint64_t AttachmentSize = BlobBuffer.GetSize(); + Info.AttachmentsDownloaded.fetch_add(1); Info.AttachmentBytesDownloaded.fetch_add(AttachmentSize); + ZEN_ASSERT(BlobBuffer); - AttachmentsWriteLatch.AddCount(1); - WorkerPool.ScheduleWork( - [&AttachmentsWriteLatch, - &RemoteResult, - &Info, - &ChunkStore, - RawHash, - AttachmentSize, - Bytes = std::move(AttachmentResult.Bytes), - OptionalContext]() { + AttachmentWork.ScheduleWork( + Context.WorkerPool, + [&Context, &Info, RawHash, AttachmentSize, Bytes = std::move(BlobBuffer)](std::atomic<bool>& AbortFlag) { ZEN_TRACE_CPU("WriteAttachment"); - auto _ = MakeGuard([&AttachmentsWriteLatch] { AttachmentsWriteLatch.CountDown(); }); - if (RemoteResult.IsError()) + ZEN_ASSERT(Bytes); + + if (AbortFlag) { return; } - try - { - CidStore::InsertResult InsertResult = ChunkStore.AddChunk(Bytes, RawHash); - if (InsertResult.New) - { - Info.AttachmentBytesStored.fetch_add(AttachmentSize); - Info.AttachmentsStored.fetch_add(1); - } - } - catch (const std::exception& Ex) + CidStore::InsertResult InsertResult = Context.ChunkStore.AddChunk(Bytes, RawHash); + if (InsertResult.New) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Saving attachment {} failed", RawHash), - Ex.what()); + Info.AttachmentBytesStored.fetch_add(AttachmentSize); + Info.AttachmentsStored.fetch_add(1); } + Info.ChunksCompleteCount++; }, WorkerThreadPool::EMode::EnableBacklog); } - catch (const std::exception& Ex) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Loading attachment {} failed", RawHash), - Ex.what()); - } - }, - WorkerThreadPool::EMode::EnableBacklog); - }; - - void CreateBlock(WorkerThreadPool& WorkerPool, - Latch& OpSectionsLatch, - std::vector<std::pair<IoHash, FetchChunkFunc>>&& ChunksInBlock, - RwLock& SectionsLock, - std::vector<ChunkBlockDescription>& Blocks, - size_t BlockIndex, - const std::function<void(CompressedBuffer&&, ChunkBlockDescription&&)>& AsyncOnBlock, - AsyncRemoteResult& RemoteResult) - { - OpSectionsLatch.AddCount(1); - WorkerPool.ScheduleWork( - [&Blocks, - &SectionsLock, - &OpSectionsLatch, - BlockIndex, - Chunks = std::move(ChunksInBlock), - &AsyncOnBlock, - &RemoteResult]() mutable { - ZEN_TRACE_CPU("CreateBlock"); - - auto _ = MakeGuard([&OpSectionsLatch] { OpSectionsLatch.CountDown(); }); - if (RemoteResult.IsError()) - { - return; - } - size_t ChunkCount = Chunks.size(); - try + catch (const RemoteStoreError&) { - ZEN_ASSERT(ChunkCount > 0); - Stopwatch Timer; - ChunkBlockDescription Block; - CompressedBuffer CompressedBlock = GenerateChunkBlock(std::move(Chunks), Block); - IoHash BlockHash = CompressedBlock.DecodeRawHash(); - ZEN_UNUSED(BlockHash); - { - // We can share the lock as we are not resizing the vector and only touch BlockHash at our own index - RwLock::SharedLockScope __(SectionsLock); - Blocks[BlockIndex] = Block; - } - uint64_t BlockSize = CompressedBlock.GetCompressedSize(); - AsyncOnBlock(std::move(CompressedBlock), std::move(Block)); - ZEN_INFO("Generated block with {} attachments in {} ({})", - ChunkCount, - NiceTimeSpanMs(Timer.GetElapsedTimeMs()), - NiceBytes(BlockSize)); + throw; } catch (const std::exception& Ex) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Failed creating block {} with {} chunks", BlockIndex, ChunkCount), - Ex.what()); + throw RemoteStoreError(fmt::format("Loading attachment {} failed", RawHash), + gsl::narrow<int>(HttpResponseCode::InternalServerError), + Ex.what()); } }, WorkerThreadPool::EMode::EnableBacklog); + }; + + void AsyncCreateBlock(LoggerRef InLog, + ParallelWork& Work, + WorkerThreadPool& WorkerPool, + std::vector<std::pair<IoHash, FetchChunkFunc>>&& ChunksInBlock, + RwLock& SectionsLock, + std::vector<ChunkBlockDescription>& Blocks, + size_t BlockIndex, + const std::function<void(CompressedBuffer&&, ChunkBlockDescription&&)>& AsyncOnBlock, + JobContext* OptionalContext) + { + Work.ScheduleWork(WorkerPool, + [InLog, &Blocks, &SectionsLock, BlockIndex, Chunks = std::move(ChunksInBlock), &AsyncOnBlock, OptionalContext]( + std::atomic<bool>& AbortFlag) mutable { + ZEN_TRACE_CPU("CreateBlock"); + ZEN_SCOPED_LOG(InLog); + + if (remotestore_impl::IsCancelled(OptionalContext)) + { + AbortFlag.store(true); + } + if (AbortFlag) + { + return; + } + size_t ChunkCount = Chunks.size(); + ZEN_ASSERT(ChunkCount > 0); + Stopwatch Timer; + ChunkBlockDescription Block; + CompressedBuffer CompressedBlock = GenerateChunkBlock(std::move(Chunks), Block); + uint64_t BlockSize = CompressedBlock.GetCompressedSize(); + { + // We can share the lock as we are not resizing the vector and only touch our own index + RwLock::SharedLockScope __(SectionsLock); + Blocks[BlockIndex] = Block; + } + AsyncOnBlock(std::move(CompressedBlock), std::move(Block)); + ZEN_INFO("Generated block with {} attachments in {} ({})", + ChunkCount, + NiceTimeSpanMs(Timer.GetElapsedTimeMs()), + NiceBytes(BlockSize)); + }); } struct UploadInfo @@ -1141,7 +2148,7 @@ namespace remotestore_impl { struct CreatedBlock { - IoBuffer Payload; + CompositeBuffer Payload; ChunkBlockDescription Block; }; @@ -1155,7 +2162,6 @@ namespace remotestore_impl { const std::unordered_set<IoHash, IoHash::Hasher>& Needs, bool ForceAll, UploadInfo& Info, - AsyncRemoteResult& RemoteResult, JobContext* OptionalContext) { using namespace std::literals; @@ -1216,22 +2222,15 @@ namespace remotestore_impl { if (!UnknownAttachments.empty()) { - RemoteResult.SetError( - gsl::narrow<int>(HttpResponseCode::NotFound), + throw RemoteStoreError( fmt::format("Upload requested of {} missing attachments, the base container referenced blocks that are no longer available", UnknownAttachments.size()), + gsl::narrow<int>(HttpResponseCode::NotFound), ""); - ReportMessage(OptionalContext, fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return; } if (IsCancelled(OptionalContext)) { - if (!RemoteResult.IsError()) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - ReportMessage(OptionalContext, fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - } return; } @@ -1244,122 +2243,91 @@ namespace remotestore_impl { Stopwatch Timer; - ptrdiff_t AttachmentsToSave(0); - Latch SaveAttachmentsLatch(1); + std::atomic<bool> AbortFlag(false); + std::atomic<bool> PauseFlag(false); + ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + ptrdiff_t AttachmentsToSave(0); for (const IoHash& RawHash : AttachmentsToUpload) { - if (RemoteResult.IsError()) + if (AbortFlag.load()) { break; } - SaveAttachmentsLatch.AddCount(1); AttachmentsToSave++; - WorkerPool.ScheduleWork( - [&ChunkStore, - &RemoteStore, - &SaveAttachmentsLatch, - &RemoteResult, - RawHash, - &CreatedBlocks, - &LooseFileAttachments, - &Info, - OptionalContext]() { + Work.ScheduleWork( + WorkerPool, + [&ChunkStore, &RemoteStore, RawHash, &CreatedBlocks, &LooseFileAttachments, &Info, OptionalContext]( + std::atomic<bool>& AbortFlag) { ZEN_TRACE_CPU("UploadAttachment"); - auto _ = MakeGuard([&SaveAttachmentsLatch] { SaveAttachmentsLatch.CountDown(); }); - if (RemoteResult.IsError()) + if (AbortFlag.load()) { return; } - try + CompositeBuffer Payload; + ChunkBlockDescription Block; + if (auto BlockIt = CreatedBlocks.find(RawHash); BlockIt != CreatedBlocks.end()) { - IoBuffer Payload; - ChunkBlockDescription Block; - if (auto BlockIt = CreatedBlocks.find(RawHash); BlockIt != CreatedBlocks.end()) - { - Payload = BlockIt->second.Payload; - Block = BlockIt->second.Block; - } - else if (auto LooseTmpFileIt = LooseFileAttachments.find(RawHash); LooseTmpFileIt != LooseFileAttachments.end()) - { - Payload = LooseTmpFileIt->second(RawHash); - } - else - { - Payload = ChunkStore.FindChunkByCid(RawHash); - } - if (!Payload) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::NotFound), - fmt::format("Failed to find attachment {}", RawHash), - {}); - ZEN_WARN("Failed to save attachment '{}' ({}): {}", - RawHash, - RemoteResult.GetError(), - RemoteResult.GetErrorReason()); - return; - } - const bool IsBlock = Block.BlockHash == RawHash; - size_t PayloadSize = Payload.GetSize(); - RemoteProjectStore::SaveAttachmentResult Result = - RemoteStore.SaveAttachment(CompositeBuffer(SharedBuffer(std::move(Payload))), RawHash, std::move(Block)); - if (Result.ErrorCode) - { - RemoteResult.SetError(Result.ErrorCode, Result.Reason, Result.Text); - ReportMessage(OptionalContext, - fmt::format("Failed to save attachment '{}', {} ({}): {}", - RawHash, - NiceBytes(PayloadSize), - RemoteResult.GetError(), - RemoteResult.GetErrorReason())); - return; - } - if (IsBlock) - { - Info.AttachmentBlocksUploaded.fetch_add(1); - Info.AttachmentBlockBytesUploaded.fetch_add(PayloadSize); - ZEN_INFO("Saved block attachment '{}' in {} ({})", - RawHash, - NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)), - NiceBytes(PayloadSize)); - } - else - { - Info.AttachmentsUploaded.fetch_add(1); - Info.AttachmentBytesUploaded.fetch_add(PayloadSize); - ZEN_INFO("Saved large attachment '{}' in {} ({})", - RawHash, - NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)), - NiceBytes(PayloadSize)); - } + Payload = BlockIt->second.Payload; + Block = BlockIt->second.Block; } - catch (const std::exception& Ex) + else if (auto LooseTmpFileIt = LooseFileAttachments.find(RawHash); LooseTmpFileIt != LooseFileAttachments.end()) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("To upload attachment {}", RawHash), - Ex.what()); + Payload = LooseTmpFileIt->second(RawHash); } - }, - WorkerThreadPool::EMode::EnableBacklog); + else + { + Payload = CompositeBuffer(SharedBuffer(ChunkStore.FindChunkByCid(RawHash))); + } + if (!Payload) + { + throw RemoteStoreError(fmt::format("Failed to find attachment {}", RawHash), + gsl::narrow<int>(HttpResponseCode::NotFound), + {}); + } + const bool IsBlock = Block.BlockHash == RawHash; + size_t PayloadSize = Payload.GetSize(); + RemoteProjectStore::SaveAttachmentResult Result = + RemoteStore.SaveAttachment(std::move(Payload), RawHash, std::move(Block)); + if (Result.ErrorCode) + { + throw RemoteStoreError(fmt::format("Failed to save attachment '{}', {}", RawHash, NiceBytes(PayloadSize)), + Result.ErrorCode, + Result.Text); + } + if (IsBlock) + { + Info.AttachmentBlocksUploaded.fetch_add(1); + Info.AttachmentBlockBytesUploaded.fetch_add(PayloadSize); + ZEN_INFO("Saved block attachment '{}' in {} ({})", + RawHash, + NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)), + NiceBytes(PayloadSize)); + } + else + { + Info.AttachmentsUploaded.fetch_add(1); + Info.AttachmentBytesUploaded.fetch_add(PayloadSize); + ZEN_INFO("Saved large attachment '{}' in {} ({})", + RawHash, + NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)), + NiceBytes(PayloadSize)); + } + }); } if (IsCancelled(OptionalContext)) { - if (!RemoteResult.IsError()) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - ReportMessage(OptionalContext, fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - } - return; + AbortFlag = true; } if (!BulkBlockAttachmentsToUpload.empty()) { for (const std::vector<std::pair<IoHash, FetchChunkFunc>>& Chunks : BlockChunks) { - if (RemoteResult.IsError()) + if (AbortFlag.load()) { break; } @@ -1379,103 +2347,82 @@ namespace remotestore_impl { continue; } - SaveAttachmentsLatch.AddCount(1); AttachmentsToSave++; - WorkerPool.ScheduleWork( + Work.ScheduleWork( + WorkerPool, [&RemoteStore, &ChunkStore, - &SaveAttachmentsLatch, - &RemoteResult, NeededChunks = std::move(NeededChunks), &BulkBlockAttachmentsToUpload, &Info, - OptionalContext]() { + OptionalContext](std::atomic<bool>& AbortFlag) { ZEN_TRACE_CPU("UploadChunk"); - auto _ = MakeGuard([&SaveAttachmentsLatch] { SaveAttachmentsLatch.CountDown(); }); - if (RemoteResult.IsError()) + if (AbortFlag.load()) { return; } - try + size_t ChunksSize = 0; + std::vector<SharedBuffer> ChunkBuffers; + ChunkBuffers.reserve(NeededChunks.size()); + for (const IoHash& Chunk : NeededChunks) { - size_t ChunksSize = 0; - std::vector<SharedBuffer> ChunkBuffers; - ChunkBuffers.reserve(NeededChunks.size()); - for (const IoHash& Chunk : NeededChunks) - { - auto It = BulkBlockAttachmentsToUpload.find(Chunk); - ZEN_ASSERT(It != BulkBlockAttachmentsToUpload.end()); - CompressedBuffer ChunkPayload = It->second(It->first).second; - if (!ChunkPayload) - { - RemoteResult.SetError(static_cast<int32_t>(HttpResponseCode::NotFound), - fmt::format("Missing chunk {}"sv, Chunk), - fmt::format("Unable to fetch attachment {} required by the oplog"sv, Chunk)); - ChunkBuffers.clear(); - break; - } - ChunksSize += ChunkPayload.GetCompressedSize(); - ChunkBuffers.emplace_back(SharedBuffer(std::move(ChunkPayload).GetCompressed().Flatten().AsIoBuffer())); - } - RemoteProjectStore::SaveAttachmentsResult Result = RemoteStore.SaveAttachments(ChunkBuffers); - if (Result.ErrorCode) + auto It = BulkBlockAttachmentsToUpload.find(Chunk); + ZEN_ASSERT(It != BulkBlockAttachmentsToUpload.end()); + CompositeBuffer ChunkPayload = It->second(It->first).second; + if (!ChunkPayload) { - RemoteResult.SetError(Result.ErrorCode, Result.Reason, Result.Text); - ReportMessage(OptionalContext, - fmt::format("Failed to save attachments with {} chunks ({}): {}", - NeededChunks.size(), - RemoteResult.GetError(), - RemoteResult.GetErrorReason())); - return; + throw RemoteStoreError(fmt::format("Missing chunk {}"sv, Chunk), + static_cast<int32_t>(HttpResponseCode::NotFound), + fmt::format("Unable to fetch attachment {} required by the oplog"sv, Chunk)); } - Info.AttachmentsUploaded.fetch_add(ChunkBuffers.size()); - Info.AttachmentBytesUploaded.fetch_add(ChunksSize); - - ZEN_INFO("Saved {} bulk attachments in {} ({})", - NeededChunks.size(), - NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)), - NiceBytes(ChunksSize)); + ChunksSize += ChunkPayload.GetSize(); + ChunkBuffers.emplace_back(SharedBuffer(ChunkPayload.Flatten().AsIoBuffer())); } - catch (const std::exception& Ex) + RemoteProjectStore::SaveAttachmentsResult Result = RemoteStore.SaveAttachments(ChunkBuffers); + if (Result.ErrorCode) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Failed to buck upload {} attachments", NeededChunks.size()), - Ex.what()); + throw RemoteStoreError(fmt::format("Failed to save attachments with {} chunks", NeededChunks.size()), + Result.ErrorCode, + Result.Text); } - }, - WorkerThreadPool::EMode::EnableBacklog); + Info.AttachmentsUploaded.fetch_add(ChunkBuffers.size()); + Info.AttachmentBytesUploaded.fetch_add(ChunksSize); + + ZEN_INFO("Saved {} bulk attachments in {} ({})", + NeededChunks.size(), + NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)), + NiceBytes(ChunksSize)); + }); } } - SaveAttachmentsLatch.CountDown(); - while (!SaveAttachmentsLatch.Wait(1000)) - { - ptrdiff_t Remaining = SaveAttachmentsLatch.Remaining(); - if (IsCancelled(OptionalContext)) + Stopwatch SaveAttachmentsProgressTimer; + Work.Wait(1000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t Remaining) { + ZEN_UNUSED(IsAborted, IsPaused); + if (IsCancelled(OptionalContext) && !AbortFlag.load()) { - if (!RemoteResult.IsError()) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - } + AbortFlag = true; } uint64_t PartialTransferWallTimeMS = Timer.GetElapsedTimeMs(); ReportProgress(OptionalContext, "Saving attachments"sv, - fmt::format("{} remaining... {}", Remaining, GetStats(RemoteStore.GetStats(), PartialTransferWallTimeMS)), + fmt::format("{} remaining... {}", + Remaining, + GetStats(RemoteStore.GetStats(), /*OptionalCacheStats*/ nullptr, PartialTransferWallTimeMS)), AttachmentsToSave, - Remaining); - } + Remaining, + SaveAttachmentsProgressTimer.GetElapsedTimeMs()); + }); uint64_t ElapsedTimeMS = Timer.GetElapsedTimeMs(); if (AttachmentsToSave > 0) { ReportProgress(OptionalContext, "Saving attachments"sv, - fmt::format("{}", GetStats(RemoteStore.GetStats(), ElapsedTimeMS)), + fmt::format("{}", GetStats(RemoteStore.GetStats(), /*OptionalCacheStats*/ nullptr, ElapsedTimeMS)), AttachmentsToSave, - 0); + 0, + SaveAttachmentsProgressTimer.GetElapsedTimeMs()); } ReportMessage(OptionalContext, fmt::format("Saved {} attachments ({} blocks, {} attachments, {} bulk attachments) in {} {}", @@ -1484,7 +2431,7 @@ namespace remotestore_impl { LargeAttachmentCountToUpload, BulkAttachmentCountToUpload, NiceTimeSpanMs(ElapsedTimeMS), - GetStats(RemoteStore.GetStats(), ElapsedTimeMS))); + GetStats(RemoteStore.GetStats(), /*OptionalCacheStats*/ nullptr, ElapsedTimeMS))); } } // namespace remotestore_impl @@ -1493,8 +2440,7 @@ std::vector<IoHash> GetBlockHashesFromOplog(CbObjectView ContainerObject) { using namespace std::literals; - std::vector<ChunkBlockDescription> Result; - CbArrayView BlocksArray = ContainerObject["blocks"sv].AsArrayView(); + CbArrayView BlocksArray = ContainerObject["blocks"sv].AsArrayView(); std::vector<IoHash> BlockHashes; BlockHashes.reserve(BlocksArray.Num()); @@ -1541,7 +2487,8 @@ GetBlocksFromOplog(CbObjectView ContainerObject, std::span<const IoHash> Include } CbObject -BuildContainer(CidStore& ChunkStore, +BuildContainer(LoggerRef InLog, + CidStore& ChunkStore, ProjectStore::Project& Project, ProjectStore::Oplog& Oplog, size_t MaxBlockSize, @@ -1557,1127 +2504,651 @@ BuildContainer(CidStore& ChunkStore, const std::function<void(const IoHash&, TGetAttachmentBufferFunc&&)>& OnLargeAttachment, const std::function<void(std::vector<std::pair<IoHash, FetchChunkFunc>>&&)>& OnBlockChunks, bool EmbedLooseFiles, - JobContext* OptionalContext, - remotestore_impl::AsyncRemoteResult& RemoteResult) + JobContext* OptionalContext) { using namespace std::literals; - std::unique_ptr<OperationLogOutput> LogOutput(std::make_unique<remotestore_impl::JobContextLogOutput>(OptionalContext)); - - size_t OpCount = 0; - - CbObject OplogContainerObject; - { - struct FoundAttachment - { - std::filesystem::path RawPath; // If not stored in cid - uint64_t Size = 0; - Oid Key = Oid::Zero; - }; - - std::unordered_map<IoHash, FoundAttachment, IoHash::Hasher> UploadAttachments; - - RwLock BlocksLock; - std::vector<ChunkBlockDescription> Blocks; - CompressedBuffer OpsBuffer; - - std::filesystem::path AttachmentTempPath = Oplog.TempPath(); - AttachmentTempPath.append(".pending"); - CreateDirectories(AttachmentTempPath); - - auto RewriteOp = [&](const Oid& Key, CbObjectView Op, const std::function<void(CbObjectView)>& CB) { - bool OpRewritten = false; - CbArrayView Files = Op["files"sv].AsArrayView(); - if (Files.Num() == 0) - { - CB(Op); - return; - } - - CbWriter Cbo; - Cbo.BeginArray("files"sv); + ZEN_SCOPED_LOG(InLog); + remotestore_impl::JobContextLogger JobContextOutput(OptionalContext); - for (CbFieldView& Field : Files) - { - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - CB(Op); - return; - } + Stopwatch Timer; - bool CopyField = true; + CbObject OplogContainerObject; + CompressedBuffer CompressedOpsSection; + std::unordered_map<IoHash, remotestore_impl::FoundAttachment, IoHash::Hasher> UploadAttachments; + std::filesystem::path AttachmentTempPath = Oplog.TempPath(); + AttachmentTempPath.append(".pending"); + + size_t TotalOpCount = Oplog.GetOplogEntryCount(); + + Stopwatch RewriteOplogTimer; + CbObject SectionOps = remotestore_impl::RewriteOplog(InLog, + Project, + Oplog, + IgnoreMissingAttachments, + EmbedLooseFiles, + AttachmentTempPath, + UploadAttachments, + OptionalContext); + remotestore_impl::ReportMessage(OptionalContext, + fmt::format("Rewrote {} ops to new oplog in {}", + TotalOpCount, + NiceTimeSpanMs(static_cast<uint64_t>(RewriteOplogTimer.GetElapsedTimeMs())))); - if (CbObjectView View = Field.AsObjectView()) - { - IoHash DataHash = View["data"sv].AsHash(); + { + Stopwatch CompressOpsTimer; + CompressedOpsSection = CompressedBuffer::Compress(SectionOps.GetBuffer(), OodleCompressor::Mermaid, OodleCompressionLevel::Fast); + remotestore_impl::ReportMessage(OptionalContext, + fmt::format("Compressed oplog section {} ({} -> {}) in {}", + CompressedOpsSection.DecodeRawHash(), + NiceBytes(CompressedOpsSection.DecodeRawSize()), + NiceBytes(CompressedOpsSection.GetCompressedSize()), + NiceTimeSpanMs(static_cast<uint64_t>(CompressOpsTimer.GetElapsedTimeMs())))); + } - if (DataHash == IoHash::Zero) - { - std::string_view ServerPath = View["serverpath"sv].AsString(); - std::filesystem::path FilePath = Project.RootDir / ServerPath; - if (!IsFile(FilePath)) - { - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Missing attachment '{}' for op '{}'", FilePath, View["id"sv].AsObjectId())); - if (IgnoreMissingAttachments) - { - continue; - } - else - { - ExtendableStringBuilder<1024> Sb; - Sb.Append("Failed to find attachment '"); - Sb.Append(FilePath.string()); - Sb.Append("' for op: \n"); - View.ToJson(Sb); - throw std::runtime_error(Sb.ToString()); - } - } + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return {}; + } - { - Stopwatch HashTimer; - SharedBuffer DataBuffer(IoBufferBuilder::MakeFromFile(FilePath)); - DataHash = IoHash::HashBuffer(CompositeBuffer(DataBuffer)); - ZEN_INFO("Hashed loose file '{}' {}: {} in {}", - FilePath, - NiceBytes(DataBuffer.GetSize()), - DataHash, - NiceTimeSpanMs(HashTimer.GetElapsedTimeMs())); - } + std::unordered_set<IoHash, IoHash::Hasher> FoundHashes; + FoundHashes.reserve(UploadAttachments.size()); + for (const auto& It : UploadAttachments) + { + FoundHashes.insert(It.first); + } - // Rewrite file array entry with new data reference - CbObjectWriter Writer; - RewriteCbObject(Writer, View, [&](CbObjectWriter&, CbFieldView Field) -> bool { - if (Field.GetName() == "data"sv) - { - // omit this field as we will write it explicitly ourselves - return true; - } - return false; - }); - Writer.AddBinaryAttachment("data"sv, DataHash); - UploadAttachments.insert_or_assign(DataHash, FoundAttachment{.RawPath = FilePath, .Key = Key}); + std::unordered_set<IoHash, IoHash::Hasher> MissingHashes; + std::vector<remotestore_impl::FoundChunkedFile> AttachmentsToChunk; - CbObject RewrittenOp = Writer.Save(); - Cbo.AddObject(std::move(RewrittenOp)); - CopyField = false; - } - } + remotestore_impl::FindChunkSizes(ChunkStore, + WorkerPool, + MaxChunkEmbedSize, + ChunkFileSizeLimit, + AllowChunking, + AttachmentTempPath, + UploadAttachments, + MissingHashes, + AttachmentsToChunk, + OptionalContext); - if (CopyField) - { - Cbo.AddField(Field); - } - else - { - OpRewritten = true; - } - } + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return {}; + } - if (!OpRewritten) - { - CB(Op); - return; - } + for (const IoHash& AttachmentHash : MissingHashes) + { + auto It = UploadAttachments.find(AttachmentHash); + ZEN_ASSERT(It != UploadAttachments.end()); + std::optional<CbObject> Op = Oplog.GetOpByKey(It->second.Key); + ZEN_ASSERT(Op.has_value()); - Cbo.EndArray(); - CbArray FilesArray = Cbo.Save().AsArray(); + if (IgnoreMissingAttachments) + { + remotestore_impl::ReportMessage(OptionalContext, + fmt::format("Missing attachment '{}' for op '{}'", AttachmentHash, It->second.Key)); + } + else + { + ExtendableStringBuilder<1024> Sb; + Sb.Append("Failed to find attachment '"); + Sb.Append(AttachmentHash.ToHexString()); + Sb.Append("' for op: \n"); + Op.value().ToJson(Sb); + throw std::runtime_error(Sb.ToString()); + } + UploadAttachments.erase(AttachmentHash); + } - CbObject RewrittenOp = RewriteCbObject(Op, [&](CbObjectWriter& NewWriter, CbFieldView Field) -> bool { - if (Field.GetName() == "files"sv) - { - NewWriter.AddArray("files"sv, FilesArray); + std::vector<remotestore_impl::ChunkedFile> ChunkedFiles = ChunkAttachments(WorkerPool, AttachmentsToChunk, OptionalContext); + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return {}; + } - return true; - } + for (const remotestore_impl::ChunkedFile& Chunked : ChunkedFiles) + { + UploadAttachments.erase(Chunked.Chunked.Info.RawHash); + for (const IoHash& ChunkHash : Chunked.Chunked.Info.ChunkHashes) + { + UploadAttachments.erase(ChunkHash); + } + } - return false; - }); - CB(RewrittenOp); - }; + size_t ChunkedChunkCount = std::accumulate( + ChunkedFiles.begin(), + ChunkedFiles.end(), + size_t(0), + [](size_t Current, const remotestore_impl::ChunkedFile& Value) { return Current + Value.Chunked.Info.ChunkHashes.size(); }); - remotestore_impl::ReportMessage(OptionalContext, "Building exported oplog and collecting attachments"); + size_t ReusedAttachmentCount = 0; + std::vector<size_t> ReusedBlockIndexes; + { + std::unordered_set<IoHash, IoHash::Hasher> UniqueChunkHashes; + UniqueChunkHashes.reserve(FoundHashes.size() + ChunkedChunkCount); - Stopwatch Timer; + UniqueChunkHashes.insert(FoundHashes.begin(), FoundHashes.end()); - size_t TotalOpCount = Oplog.GetOplogEntryCount(); - CompressedBuffer CompressedOpsSection; + for (remotestore_impl::ChunkedFile& Chunked : ChunkedFiles) + { + UniqueChunkHashes.insert(Chunked.Chunked.Info.ChunkHashes.begin(), Chunked.Chunked.Info.ChunkHashes.end()); + } + std::vector<IoHash> ChunkHashes(UniqueChunkHashes.begin(), UniqueChunkHashes.end()); + + std::vector<uint32_t> ChunkIndexes; + ChunkIndexes.resize(ChunkHashes.size()); + std::iota(ChunkIndexes.begin(), ChunkIndexes.end(), 0); + + std::vector<uint32_t> UnusedChunkIndexes; + ReuseBlocksStatistics ReuseBlocksStats; + + ReusedBlockIndexes = FindReuseBlocks(JobContextOutput.Log(), + /*BlockReuseMinPercentLimit*/ 80, + /*IsVerbose*/ false, + ReuseBlocksStats, + KnownBlocks, + ChunkHashes, + ChunkIndexes, + UnusedChunkIndexes); + for (size_t KnownBlockIndex : ReusedBlockIndexes) { - Stopwatch RewriteOplogTimer; - CbObjectWriter SectionOpsWriter; - SectionOpsWriter.BeginArray("ops"sv); + const ThinChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; + for (const IoHash& KnownHash : KnownBlock.ChunkRawHashes) { - Oplog.IterateOplogWithKey([&](int, const Oid& Key, CbObjectView Op) { - if (RemoteResult.IsError()) - { - return; - } - Op.IterateAttachments([&](CbFieldView FieldView) { - UploadAttachments.insert_or_assign(FieldView.AsAttachment(), FoundAttachment{.Key = Key}); - }); - if (EmbedLooseFiles) - { - RewriteOp(Key, Op, [&SectionOpsWriter](CbObjectView Op) { SectionOpsWriter << Op; }); - } - else - { - SectionOpsWriter << Op; - } - OpCount++; - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return; - } - if (OpCount % 1000 == 0) - { - remotestore_impl::ReportProgress(OptionalContext, - "Building oplog"sv, - fmt::format("{} ops processed", OpCount), - TotalOpCount, - TotalOpCount - OpCount); - } - }); - if (RemoteResult.IsError()) - { - return {}; - } - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return {}; - } - if (TotalOpCount > 0) + if (UploadAttachments.erase(KnownHash) == 1) { - remotestore_impl::ReportProgress(OptionalContext, - "Building oplog"sv, - fmt::format("{} ops processed", OpCount), - TotalOpCount, - 0); + ReusedAttachmentCount++; } } - SectionOpsWriter.EndArray(); // "ops" - - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Rewrote {} ops to new oplog in {}", - OpCount, - NiceTimeSpanMs(static_cast<uint64_t>(RewriteOplogTimer.GetElapsedTimeMs())))); - - { - Stopwatch CompressOpsTimer; - CompressedOpsSection = - CompressedBuffer::Compress(SectionOpsWriter.Save().GetBuffer(), OodleCompressor::Mermaid, OodleCompressionLevel::Fast); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Compressed oplog section {} ({} -> {}) in {}", - CompressedOpsSection.DecodeRawHash(), - NiceBytes(CompressedOpsSection.DecodeRawSize()), - NiceBytes(CompressedOpsSection.GetCompressedSize()), - NiceTimeSpanMs(static_cast<uint64_t>(CompressOpsTimer.GetElapsedTimeMs())))); - } } + } - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return {}; - } + std::unordered_map<IoHash, TGetAttachmentBufferFunc, IoHash::Hasher> LargeChunkAttachments; + std::unordered_map<IoHash, std::pair<uint64_t, IoBuffer>, IoHash::Hasher> LooseUploadAttachments; - std::unordered_set<IoHash, IoHash::Hasher> FoundHashes; - FoundHashes.reserve(UploadAttachments.size()); - for (const auto& It : UploadAttachments) + if (UploadAttachments.empty()) + { + if (ReusedAttachmentCount != 0) { - FoundHashes.insert(It.first); + remotestore_impl::ReportMessage( + OptionalContext, + fmt::format("Found all {} attachments from {} ops in existing blocks", ReusedAttachmentCount, TotalOpCount)); } + } + else + { + const size_t TotalAttachmentCount = UploadAttachments.size() + ReusedAttachmentCount; + remotestore_impl::ReportMessage(OptionalContext, + fmt::format("Resolving {} attachments from {} ops ({} ({:.1f}%) found in existing blocks)", + UploadAttachments.size(), + TotalOpCount, + ReusedAttachmentCount, + (100.f * ReusedAttachmentCount) / TotalAttachmentCount)); + + ResolveAttachments(ChunkStore, + WorkerPool, + MaxChunkEmbedSize, + AttachmentTempPath, + UploadAttachments, + LargeChunkAttachments, + LooseUploadAttachments, + OptionalContext); - size_t ReusedAttachmentCount = 0; - std::vector<size_t> ReusedBlockIndexes; + if (remotestore_impl::IsCancelled(OptionalContext)) { - std::vector<IoHash> ChunkHashes; - ChunkHashes.reserve(FoundHashes.size()); - ChunkHashes.insert(ChunkHashes.begin(), FoundHashes.begin(), FoundHashes.end()); - std::vector<uint32_t> ChunkIndexes; - ChunkIndexes.resize(FoundHashes.size()); - std::iota(ChunkIndexes.begin(), ChunkIndexes.end(), 0); - - std::vector<uint32_t> UnusedChunkIndexes; - ReuseBlocksStatistics ReuseBlocksStats; - - ReusedBlockIndexes = FindReuseBlocks(*LogOutput, - /*BlockReuseMinPercentLimit*/ 80, - /*IsVerbose*/ false, - ReuseBlocksStats, - KnownBlocks, - ChunkHashes, - ChunkIndexes, - UnusedChunkIndexes); - for (size_t KnownBlockIndex : ReusedBlockIndexes) - { - const ThinChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; - for (const IoHash& KnownHash : KnownBlock.ChunkRawHashes) - { - if (UploadAttachments.erase(KnownHash) == 1) - { - ReusedAttachmentCount++; - } - } - } + return {}; } + } - struct ChunkedFile - { - IoBuffer Source; - - ChunkedInfoWithSource Chunked; - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkLoookup; - }; - std::vector<ChunkedFile> ChunkedFiles; - - auto ChunkFile = [](const IoHash& RawHash, IoBuffer& RawData, const IoBufferFileReference& FileRef, JobContext*) -> ChunkedFile { - ChunkedFile Chunked; - Stopwatch Timer; - - uint64_t Offset = FileRef.FileChunkOffset; - uint64_t Size = FileRef.FileChunkSize; - - BasicFile SourceFile; - SourceFile.Attach(FileRef.FileHandle); - auto __ = MakeGuard([&SourceFile]() { SourceFile.Detach(); }); + std::unordered_set<IoHash, IoHash::Hasher> LargeChunkHashes; - Chunked.Chunked = ChunkData(SourceFile, Offset, Size, UShaderByteCodeParams); - ZEN_ASSERT(Chunked.Chunked.Info.RawHash == RawHash); - Chunked.Source = RawData; + for (auto& It : LargeChunkAttachments) + { + UploadAttachments.erase(It.first); + LargeChunkHashes.insert(It.first); + OnLargeAttachment(It.first, std::move(It.second)); + } - ZEN_INFO("Chunked large attachment '{}' {} into {} chunks in {}", - RawHash, - NiceBytes(Chunked.Chunked.Info.RawSize), - Chunked.Chunked.Info.ChunkHashes.size(), - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + RwLock BlocksLock; + std::vector<ChunkBlockDescription> Blocks; - return Chunked; - }; + std::vector<std::pair<IoHash, Oid>> SortedUploadAttachments; + SortedUploadAttachments.reserve(UploadAttachments.size()); + for (const auto& It : UploadAttachments) + { + SortedUploadAttachments.push_back(std::make_pair(It.first, It.second.Key)); + } - RwLock ResolveLock; - std::unordered_set<IoHash, IoHash::Hasher> ChunkedHashes; - std::unordered_set<IoHash, IoHash::Hasher> LargeChunkHashes; - std::unordered_map<IoHash, size_t, IoHash::Hasher> ChunkedUploadAttachments; - std::unordered_map<IoHash, std::pair<uint64_t, IoBuffer>, IoHash::Hasher> LooseUploadAttachments; - std::unordered_set<IoHash, IoHash::Hasher> MissingHashes; + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return {}; + } - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Resolving {} attachments from {} ops", UploadAttachments.size(), TotalOpCount)); + remotestore_impl::ReportMessage(OptionalContext, + fmt::format("Sorting {} attachments from {} ops", SortedUploadAttachments.size(), TotalOpCount)); - Latch ResolveAttachmentsLatch(1); - for (auto& It : UploadAttachments) - { - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return {}; - } + // Sort attachments so we get predictable blocks for the same oplog upload + std::sort(SortedUploadAttachments.begin(), + SortedUploadAttachments.end(), + [](const std::pair<IoHash, Oid>& Lhs, const std::pair<IoHash, Oid>& Rhs) { + if (Lhs.second == Rhs.second) + { + // Same key, sort by raw hash + return Lhs.first < Rhs.first; + } + // Sort by key + return Lhs.second < Rhs.second; + }); - ResolveAttachmentsLatch.AddCount(1); + std::vector<size_t> ChunkedFilesOrder; + ChunkedFilesOrder.reserve(ChunkedFiles.size()); + for (size_t Index = 0; Index < ChunkedFiles.size(); Index++) + { + ChunkedFilesOrder.push_back(Index); + } + std::sort(ChunkedFilesOrder.begin(), ChunkedFilesOrder.end(), [&ChunkedFiles](size_t Lhs, size_t Rhs) { + return ChunkedFiles[Lhs].Chunked.Info.RawHash < ChunkedFiles[Rhs].Chunked.Info.RawHash; + }); - WorkerPool.ScheduleWork( - [&ChunkStore, - UploadAttachment = &It.second, - RawHash = It.first, - &ResolveAttachmentsLatch, - &ResolveLock, - &ChunkedHashes, - &LargeChunkHashes, - &ChunkedUploadAttachments, - &LooseUploadAttachments, - &MissingHashes, - &OnLargeAttachment, - &AttachmentTempPath, - &ChunkFile, - &ChunkedFiles, - MaxChunkEmbedSize, - ChunkFileSizeLimit, - AllowChunking, - &RemoteResult, - OptionalContext]() { - ZEN_TRACE_CPU("PrepareChunk"); + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return {}; + } + remotestore_impl::ReportMessage( + OptionalContext, + fmt::format("Assembling {} attachments and {} chunked parts from {} ops into blocks and loose attachments", + SortedUploadAttachments.size(), + ChunkedChunkCount, + TotalOpCount)); - auto _ = MakeGuard([&ResolveAttachmentsLatch] { ResolveAttachmentsLatch.CountDown(); }); - if (remotestore_impl::IsCancelled(OptionalContext)) - { - return; - } + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return {}; + } - try - { - if (!UploadAttachment->RawPath.empty()) - { - const std::filesystem::path& FilePath = UploadAttachment->RawPath; - IoBuffer RawData = IoBufferBuilder::MakeFromFile(FilePath); - if (RawData) - { - if (AllowChunking && RawData.GetSize() > ChunkFileSizeLimit) - { - IoBufferFileReference FileRef; - (void)RawData.GetFileReference(FileRef); - - ChunkedFile Chunked = ChunkFile(RawHash, RawData, FileRef, OptionalContext); - ResolveLock.WithExclusiveLock( - [RawHash, &ChunkedFiles, &ChunkedUploadAttachments, &ChunkedHashes, &Chunked]() { - ChunkedUploadAttachments.insert_or_assign(RawHash, ChunkedFiles.size()); - ChunkedHashes.reserve(ChunkedHashes.size() + Chunked.Chunked.Info.ChunkHashes.size()); - for (const IoHash& ChunkHash : Chunked.Chunked.Info.ChunkHashes) - { - ChunkedHashes.insert(ChunkHash); - } - ChunkedFiles.emplace_back(std::move(Chunked)); - }); - } - else if (RawData.GetSize() > (MaxChunkEmbedSize * 2)) - { - // Assume the compressed file is going to be larger than MaxChunkEmbedSize, even if it isn't - // it will be a loose attachment instead of going into a block - OnLargeAttachment(RawHash, [RawData = std::move(RawData), AttachmentTempPath](const IoHash& RawHash) { - size_t RawSize = RawData.GetSize(); - CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer(std::move(RawData)), - OodleCompressor::Mermaid, - OodleCompressionLevel::VeryFast); + size_t ChunkAssembleCount = SortedUploadAttachments.size() + ChunkedChunkCount; + size_t ChunksAssembled = 0; + remotestore_impl::ReportMessage(OptionalContext, + fmt::format("Assembling {} attachments from {} ops into blocks", ChunkAssembleCount, TotalOpCount)); - std::filesystem::path AttachmentPath = AttachmentTempPath; - AttachmentPath.append(RawHash.ToHexString()); - IoBuffer TempAttachmentBuffer = - WriteToTempFile(std::move(Compressed).GetCompressed(), AttachmentPath); - ZEN_INFO("Saved temp attachment to '{}', {} ({})", - AttachmentPath, - NiceBytes(RawSize), - NiceBytes(TempAttachmentBuffer.GetSize())); - return TempAttachmentBuffer; - }); - ResolveLock.WithExclusiveLock([RawHash, &LargeChunkHashes]() { LargeChunkHashes.insert(RawHash); }); - } - else - { - uint64_t RawSize = RawData.GetSize(); - CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer(RawData), - OodleCompressor::Mermaid, - OodleCompressionLevel::VeryFast); - - std::filesystem::path AttachmentPath = AttachmentTempPath; - AttachmentPath.append(RawHash.ToHexString()); - - uint64_t CompressedSize = Compressed.GetCompressedSize(); - IoBuffer TempAttachmentBuffer = WriteToTempFile(std::move(Compressed).GetCompressed(), AttachmentPath); - ZEN_INFO("Saved temp attachment to '{}', {} ({})", - AttachmentPath, - NiceBytes(RawSize), - NiceBytes(TempAttachmentBuffer.GetSize())); - - if (CompressedSize > MaxChunkEmbedSize) - { - OnLargeAttachment(RawHash, - [Data = std::move(TempAttachmentBuffer)](const IoHash&) { return Data; }); - ResolveLock.WithExclusiveLock([RawHash, &LargeChunkHashes]() { LargeChunkHashes.insert(RawHash); }); - } - else - { - UploadAttachment->Size = CompressedSize; - ResolveLock.WithExclusiveLock( - [RawHash, RawSize, &LooseUploadAttachments, Data = std::move(TempAttachmentBuffer)]() { - LooseUploadAttachments.insert_or_assign(RawHash, std::make_pair(RawSize, std::move(Data))); - }); - } - } - } - else - { - ResolveLock.WithExclusiveLock([RawHash, &MissingHashes]() { MissingHashes.insert(RawHash); }); - } - } - else - { - IoBuffer Data = ChunkStore.FindChunkByCid(RawHash); - if (Data) - { - auto GetForChunking = - [](size_t ChunkFileSizeLimit, const IoBuffer& Data, IoBufferFileReference& OutFileRef) -> bool { - if (Data.IsWholeFile()) - { - IoHash VerifyRawHash; - uint64_t VerifyRawSize; - CompressedBuffer Compressed = - CompressedBuffer::FromCompressed(SharedBuffer(Data), VerifyRawHash, VerifyRawSize); - if (Compressed) - { - if (VerifyRawSize > ChunkFileSizeLimit) - { - OodleCompressor Compressor; - OodleCompressionLevel CompressionLevel; - uint64_t BlockSize; - if (Compressed.TryGetCompressParameters(Compressor, CompressionLevel, BlockSize)) - { - if (CompressionLevel == OodleCompressionLevel::None) - { - CompositeBuffer Decompressed = Compressed.DecompressToComposite(); - if (Decompressed) - { - std::span<const SharedBuffer> Segments = Decompressed.GetSegments(); - if (Segments.size() == 1) - { - IoBuffer DecompressedData = Segments[0].AsIoBuffer(); - if (DecompressedData.GetFileReference(OutFileRef)) - { - return true; - } - } - } - } - } - } - } - } - return false; - }; + std::atomic<bool> AbortFlag(false); + std::atomic<bool> PauseFlag(false); + ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog); - IoBufferFileReference FileRef; - if (AllowChunking && GetForChunking(ChunkFileSizeLimit, Data, FileRef)) - { - ChunkedFile Chunked = ChunkFile(RawHash, Data, FileRef, OptionalContext); - ResolveLock.WithExclusiveLock( - [RawHash, &ChunkedFiles, &ChunkedUploadAttachments, &ChunkedHashes, &Chunked]() { - ChunkedUploadAttachments.insert_or_assign(RawHash, ChunkedFiles.size()); - ChunkedHashes.reserve(ChunkedHashes.size() + Chunked.Chunked.Info.ChunkHashes.size()); - for (const IoHash& ChunkHash : Chunked.Chunked.Info.ChunkHashes) - { - ChunkedHashes.insert(ChunkHash); - } - ChunkedFiles.emplace_back(std::move(Chunked)); - }); - } - else if (Data.GetSize() > MaxChunkEmbedSize) - { - OnLargeAttachment(RawHash, - [&ChunkStore](const IoHash& RawHash) { return ChunkStore.FindChunkByCid(RawHash); }); - ResolveLock.WithExclusiveLock([RawHash, &LargeChunkHashes]() { LargeChunkHashes.insert(RawHash); }); - } - else - { - UploadAttachment->Size = Data.GetSize(); - } - } - else - { - ResolveLock.WithExclusiveLock([RawHash, &MissingHashes]() { MissingHashes.insert(RawHash); }); - } - } - } - catch (const std::exception& Ex) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::NotFound), - fmt::format("Failed to resolve attachment {}", RawHash), - Ex.what()); - } - }, - WorkerThreadPool::EMode::EnableBacklog); - } - ResolveAttachmentsLatch.CountDown(); + uint32_t ComposedBlocks = 0; - while (!ResolveAttachmentsLatch.Wait(1000)) - { - ptrdiff_t Remaining = ResolveAttachmentsLatch.Remaining(); - if (remotestore_impl::IsCancelled(OptionalContext)) + uint64_t CreateBlocksStartMS = Timer.GetElapsedTimeMs(); + { + Stopwatch BlockCreateProgressTimer; + remotestore_impl::BlockComposer Composer(remotestore_impl::BlockComposer::Configuration{ + .MaxBlockSize = MaxBlockSize, + .MaxChunksPerBlock = MaxChunksPerBlock, + .MaxChunkEmbedSize = MaxChunkEmbedSize, + .IsCancelledFunc = [OptionalContext]() { return remotestore_impl::IsCancelled(OptionalContext); }}); + + auto OnNewBlock = [&Log, + &Work, + &WorkerPool, + BuildBlocks, + &BlockCreateProgressTimer, + &BlocksLock, + &Blocks, + &AsyncOnBlock, + &OnBlockChunks, + ChunkAssembleCount, + &ChunksAssembled, + &ComposedBlocks, + OptionalContext](std::vector<IoHash>&& ChunkRawHashes, + const std::function<FetchChunkFunc(const IoHash& AttachmentHash)>& FetchAttachmentResolver) { + size_t ChunkCount = ChunkRawHashes.size(); + std::vector<std::pair<IoHash, FetchChunkFunc>> ChunksInBlock; + ChunksInBlock.reserve(ChunkCount); + + for (const IoHash& AttachmentHash : ChunkRawHashes) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - while (!ResolveAttachmentsLatch.Wait(1000)) - { - Remaining = ResolveAttachmentsLatch.Remaining(); - remotestore_impl::ReportProgress(OptionalContext, - "Resolving attachments"sv, - fmt::format("Aborting, {} attachments remaining...", Remaining), - UploadAttachments.size(), - Remaining); - } - remotestore_impl::ReportProgress(OptionalContext, "Resolving attachments"sv, "Aborted"sv, UploadAttachments.size(), 0); - return {}; + ChunksInBlock.emplace_back(std::make_pair(AttachmentHash, FetchAttachmentResolver(AttachmentHash))); } - remotestore_impl::ReportProgress(OptionalContext, - "Resolving attachments"sv, - fmt::format("{} remaining...", Remaining), - UploadAttachments.size(), - Remaining); - } - if (UploadAttachments.size() > 0) - { - remotestore_impl::ReportProgress(OptionalContext, "Resolving attachments"sv, ""sv, UploadAttachments.size(), 0); - } - - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return {}; - } - for (const IoHash& AttachmentHash : MissingHashes) - { - auto It = UploadAttachments.find(AttachmentHash); - ZEN_ASSERT(It != UploadAttachments.end()); - std::optional<CbObject> Op = Oplog.GetOpByKey(It->second.Key); - ZEN_ASSERT(Op.has_value()); - - if (IgnoreMissingAttachments) + size_t BlockIndex = remotestore_impl::AddBlock(BlocksLock, Blocks); + if (BuildBlocks) { - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Missing attachment '{}' for op '{}'", AttachmentHash, It->second.Key)); + remotestore_impl::AsyncCreateBlock(Log(), + Work, + WorkerPool, + std::move(ChunksInBlock), + BlocksLock, + Blocks, + BlockIndex, + AsyncOnBlock, + OptionalContext); } else { - ExtendableStringBuilder<1024> Sb; - Sb.Append("Failed to find attachment '"); - Sb.Append(AttachmentHash.ToHexString()); - Sb.Append("' for op: \n"); - Op.value().ToJson(Sb); - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::NotFound), Sb.ToString(), {}); - return {}; - } - UploadAttachments.erase(AttachmentHash); - } + ZEN_INFO("Bulk group {} attachments", ChunkCount); - for (const auto& It : ChunkedUploadAttachments) - { - UploadAttachments.erase(It.first); - } - for (const auto& It : LargeChunkHashes) - { - UploadAttachments.erase(It); - } - - { - std::vector<IoHash> ChunkHashes; - ChunkHashes.reserve(ChunkedHashes.size()); - ChunkHashes.insert(ChunkHashes.begin(), ChunkedHashes.begin(), ChunkedHashes.end()); - std::vector<uint32_t> ChunkIndexes; - ChunkIndexes.resize(ChunkedHashes.size()); - std::iota(ChunkIndexes.begin(), ChunkIndexes.end(), 0); - - std::vector<uint32_t> UnusedChunkIndexes; - ReuseBlocksStatistics ReuseBlocksStats; - - std::vector<size_t> ReusedBlockFromChunking = FindReuseBlocks(*LogOutput, - /*BlockReuseMinPercentLimit*/ 80, - /*IsVerbose*/ false, - ReuseBlocksStats, - KnownBlocks, - ChunkHashes, - ChunkIndexes, - UnusedChunkIndexes); - for (size_t KnownBlockIndex : ReusedBlockIndexes) - { - const ThinChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; - for (const IoHash& KnownHash : KnownBlock.ChunkRawHashes) - { - if (ChunkedHashes.erase(KnownHash) == 1) - { - ReusedAttachmentCount++; - } - } + // We can share the lock as we are not resizing the vector and only touch BlockHash at our own index + RwLock::SharedLockScope _(BlocksLock); + Blocks[BlockIndex].ChunkRawHashes = std::move(ChunkRawHashes); + OnBlockChunks(std::move(ChunksInBlock)); } - ReusedBlockIndexes.insert(ReusedBlockIndexes.end(), ReusedBlockFromChunking.begin(), ReusedBlockFromChunking.end()); - } - std::sort(ReusedBlockIndexes.begin(), ReusedBlockIndexes.end()); - auto UniqueKnownBlocksEnd = std::unique(ReusedBlockIndexes.begin(), ReusedBlockIndexes.end()); - size_t ReuseBlockCount = std::distance(ReusedBlockIndexes.begin(), UniqueKnownBlocksEnd); - if (ReuseBlockCount > 0) - { - Blocks.reserve(ReuseBlockCount); - for (auto It = ReusedBlockIndexes.begin(); It != UniqueKnownBlocksEnd; It++) + ChunksAssembled += ChunkCount; + ComposedBlocks++; + + if (ChunksAssembled % 1000 == 0) { - Blocks.push_back({KnownBlocks[*It]}); + remotestore_impl::ReportProgress( + OptionalContext, + "Assembling blocks"sv, + fmt::format("{} attachments processed, {} blocks assembled", ChunksAssembled, ComposedBlocks), + ChunkAssembleCount, + ChunkAssembleCount - ChunksAssembled, + BlockCreateProgressTimer.GetElapsedTimeMs()); } - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Reused {} attachments from {} blocks", ReusedAttachmentCount, ReuseBlockCount)); - } - - std::vector<std::pair<IoHash, Oid>> SortedUploadAttachments; - SortedUploadAttachments.reserve(UploadAttachments.size()); - for (const auto& It : UploadAttachments) - { - SortedUploadAttachments.push_back(std::make_pair(It.first, It.second.Key)); - } - - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return {}; - } - - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Sorting {} attachments from {} ops", SortedUploadAttachments.size(), TotalOpCount)); - - // Sort attachments so we get predictable blocks for the same oplog upload - std::sort(SortedUploadAttachments.begin(), - SortedUploadAttachments.end(), - [](const std::pair<IoHash, Oid>& Lhs, const std::pair<IoHash, Oid>& Rhs) { - if (Lhs.second == Rhs.second) - { - // Same key, sort by raw hash - return Lhs.first < Rhs.first; - } - // Sort by key - return Lhs.second < Rhs.second; - }); - - std::vector<size_t> ChunkedFilesOrder; - ChunkedFilesOrder.reserve(ChunkedFiles.size()); - for (size_t Index = 0; Index < ChunkedFiles.size(); Index++) - { - ChunkedFilesOrder.push_back(Index); - } - std::sort(ChunkedFilesOrder.begin(), ChunkedFilesOrder.end(), [&ChunkedFiles](size_t Lhs, size_t Rhs) { - return ChunkedFiles[Lhs].Chunked.Info.RawHash < ChunkedFiles[Rhs].Chunked.Info.RawHash; - }); - - // SortedUploadAttachments now contains all whole chunks with size to be composed into blocks and uploaded - // ChunkedHashes contains all chunked up chunks to be composed into blocks - - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return {}; - } - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Assembling {} attachments and {} chunked parts from {} ops into blocks and loose attachments", - SortedUploadAttachments.size(), - ChunkedHashes.size(), - TotalOpCount)); - - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return {}; - } - - // SortedUploadAttachments now contains all whole chunks with size to be composed into blocks and uploaded - // ChunkedHashes contains all chunked up chunks to be composed into blocks - - size_t ChunkAssembleCount = SortedUploadAttachments.size() + ChunkedHashes.size(); - size_t ChunksAssembled = 0; - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Assembling {} attachments from {} ops into blocks", ChunkAssembleCount, TotalOpCount)); - - Latch BlockCreateLatch(1); - size_t GeneratedBlockCount = 0; - size_t BlockSize = 0; - std::vector<std::pair<IoHash, FetchChunkFunc>> ChunksInBlock; - - Oid LastOpKey = Oid::Zero; - uint32_t ComposedBlocks = 0; + }; - uint64_t CreateBlocksStartMS = Timer.GetElapsedTimeMs(); - try { - uint64_t FetchAttachmentsStartMS = Timer.GetElapsedTimeMs(); - std::unordered_set<IoHash, IoHash::Hasher> AddedAttachmentHashes; - auto NewBlock = [&]() { - size_t BlockIndex = remotestore_impl::AddBlock(BlocksLock, Blocks); - size_t ChunkCount = ChunksInBlock.size(); - std::vector<IoHash> ChunkRawHashes; - ChunkRawHashes.reserve(ChunkCount); - for (const std::pair<IoHash, FetchChunkFunc>& Chunk : ChunksInBlock) - { - ChunkRawHashes.push_back(Chunk.first); - } - if (BuildBlocks) - { - remotestore_impl::CreateBlock(WorkerPool, - BlockCreateLatch, - std::move(ChunksInBlock), - BlocksLock, - Blocks, - BlockIndex, - AsyncOnBlock, - RemoteResult); - ComposedBlocks++; - } - else - { - ZEN_INFO("Bulk group {} attachments", ChunkCount); - OnBlockChunks(std::move(ChunksInBlock)); - } - { - // We can share the lock as we are not resizing the vector and only touch BlockHash at our own index - RwLock::SharedLockScope _(BlocksLock); - Blocks[BlockIndex].ChunkRawHashes = std::move(ChunkRawHashes); - } - uint64_t NowMS = Timer.GetElapsedTimeMs(); - ZEN_INFO("Assembled block {} with {} chunks in {} ({})", - BlockIndex, - ChunkCount, - NiceTimeSpanMs(NowMS - FetchAttachmentsStartMS), - NiceBytes(BlockSize)); - FetchAttachmentsStartMS = NowMS; - ChunksInBlock.clear(); - BlockSize = 0; - GeneratedBlockCount++; - }; - - for (auto HashIt = SortedUploadAttachments.begin(); HashIt != SortedUploadAttachments.end(); HashIt++) + std::vector<IoHash> AttachmentHashes; + AttachmentHashes.reserve(SortedUploadAttachments.size()); + std::vector<uint64_t> AttachmentSizes; + AttachmentSizes.reserve(SortedUploadAttachments.size()); + std::vector<Oid> AttachmentKeys; + AttachmentKeys.reserve(SortedUploadAttachments.size()); + + for (const std::pair<IoHash, Oid>& Attachment : SortedUploadAttachments) { - if (remotestore_impl::IsCancelled(OptionalContext)) + AttachmentHashes.push_back(Attachment.first); + if (auto It = UploadAttachments.find(Attachment.first); It != UploadAttachments.end()) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - break; - } - if (ChunksAssembled % 1000 == 0) - { - remotestore_impl::ReportProgress( - OptionalContext, - "Assembling blocks"sv, - fmt::format("{} attachments processed, {} blocks assembled", ChunksAssembled, ComposedBlocks), - ChunkAssembleCount, - ChunkAssembleCount - ChunksAssembled); + AttachmentSizes.push_back(It->second.Size); } - const IoHash& RawHash(HashIt->first); - const Oid CurrentOpKey = HashIt->second; - const IoHash& AttachmentHash(HashIt->first); - auto InfoIt = UploadAttachments.find(RawHash); - ZEN_ASSERT(InfoIt != UploadAttachments.end()); - uint64_t PayloadSize = InfoIt->second.Size; - - if (AddedAttachmentHashes.insert(AttachmentHash).second) + else { - if (BuildBlocks && ChunksInBlock.size() > 0) - { - if (((BlockSize + PayloadSize) > MaxBlockSize || (ChunksInBlock.size() + 1) > MaxChunksPerBlock) && - (CurrentOpKey != LastOpKey)) - { - NewBlock(); - } - } - - if (auto It = LooseUploadAttachments.find(RawHash); It != LooseUploadAttachments.end()) - { - ChunksInBlock.emplace_back(std::make_pair( - RawHash, - [RawSize = It->second.first, - IoBuffer = SharedBuffer(It->second.second)](const IoHash&) -> std::pair<uint64_t, CompressedBuffer> { - return std::make_pair(RawSize, CompressedBuffer::FromCompressedNoValidate(IoBuffer.AsIoBuffer())); - })); - LooseUploadAttachments.erase(It); - } - else - { - ChunksInBlock.emplace_back( - std::make_pair(RawHash, [&ChunkStore](const IoHash& RawHash) -> std::pair<uint64_t, CompressedBuffer> { - IoBuffer Chunk = ChunkStore.FindChunkByCid(RawHash); - if (!Chunk) - { - throw std::runtime_error(fmt::format("Failed to find chunk {} in cid store", RawHash)); - } - IoHash ValidateRawHash; - uint64_t RawSize = 0; - CompressedBuffer Compressed = - CompressedBuffer::FromCompressed(SharedBuffer(std::move(Chunk)), ValidateRawHash, RawSize); - if (!Compressed) - { - throw std::runtime_error( - fmt::format("Chunk {} in cid store is malformed (not a compressed buffer)", RawHash)); - } - if (RawHash != ValidateRawHash) - { - throw std::runtime_error( - fmt::format("Chunk {} in cid store is malformed (mismatching raw hash)", RawHash)); - } - return {RawSize, Compressed}; - })); - } - BlockSize += PayloadSize; - - LastOpKey = CurrentOpKey; - ChunksAssembled++; + throw std::runtime_error( + fmt::format("Attachment to upload state inconsistent, could not find attachment {}", Attachment.first)); } + AttachmentKeys.push_back(Attachment.second); } - if (!RemoteResult.IsError()) - { - // Keep the chunked files as separate blocks to make the blocks generated - // more consistent - if (BlockSize > 0) + + auto FetchWholeAttachmentResolver = [&LooseUploadAttachments, &ChunkStore](const IoHash& AttachmentHash) -> FetchChunkFunc { + if (auto It = LooseUploadAttachments.find(AttachmentHash); It != LooseUploadAttachments.end()) { - NewBlock(); + uint64_t RawSize = It->second.first; + IoBuffer Payload = std::move(It->second.second); + return + [RawSize, Payload = std::move(Payload)](const IoHash& ChunkHash) mutable -> std::pair<uint64_t, CompositeBuffer> { + ZEN_UNUSED(ChunkHash); + return {RawSize, CompositeBuffer(SharedBuffer(std::move(Payload)))}; + }; } - - for (size_t ChunkedFileIndex : ChunkedFilesOrder) + else { - const ChunkedFile& ChunkedFile = ChunkedFiles[ChunkedFileIndex]; - const ChunkedInfoWithSource& Chunked = ChunkedFile.Chunked; - size_t ChunkCount = Chunked.Info.ChunkHashes.size(); - for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ChunkIndex++) - { - if (remotestore_impl::IsCancelled(OptionalContext)) + return [&ChunkStore](const IoHash& RawHash) -> std::pair<uint64_t, CompositeBuffer> { + IoBuffer Chunk = ChunkStore.FindChunkByCid(RawHash); + if (!Chunk) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - break; + throw std::runtime_error(fmt::format("Failed to find chunk {} in cid store", RawHash)); } - if (ChunksAssembled % 1000 == 0) + + // These are small chunks - make memory resident + Chunk = IoBufferBuilder::ReadFromFileMaybe(Chunk); + + IoHash ValidateRawHash; + uint64_t RawSize = 0; + CompressedBuffer Compressed = + CompressedBuffer::FromCompressed(SharedBuffer(std::move(Chunk)), ValidateRawHash, RawSize); + if (!Compressed) { - remotestore_impl::ReportProgress( - OptionalContext, - "Assembling blocks"sv, - fmt::format("{} attachments processed, {} blocks assembled", ChunksAssembled, ComposedBlocks), - ChunkAssembleCount, - ChunkAssembleCount - ChunksAssembled); + throw std::runtime_error(fmt::format("Chunk {} in cid store is malformed (not a compressed buffer)", RawHash)); } - const IoHash& ChunkHash = ChunkedFile.Chunked.Info.ChunkHashes[ChunkIndex]; - if (auto FindIt = ChunkedHashes.find(ChunkHash); FindIt != ChunkedHashes.end()) + if (RawHash != ValidateRawHash) { - if (AddedAttachmentHashes.insert(ChunkHash).second) - { - const ChunkSource& Source = Chunked.ChunkSources[ChunkIndex]; - uint32_t ChunkSize = gsl::narrow<uint32_t>(CompressedBuffer::GetHeaderSizeForNoneEncoder() + Source.Size); - if (BuildBlocks && ChunksInBlock.size() > 0) - { - if ((BlockSize + ChunkSize) > MaxBlockSize || (ChunksInBlock.size() + 1) > MaxChunksPerBlock) - { - NewBlock(); - } - } - ChunksInBlock.emplace_back( - std::make_pair(ChunkHash, - [Source = ChunkedFile.Source, Offset = Source.Offset, Size = Source.Size]( - const IoHash&) -> std::pair<uint64_t, CompressedBuffer> { - return {Size, - CompressedBuffer::Compress(SharedBuffer(IoBuffer(Source, Offset, Size)), - OodleCompressor::Mermaid, - OodleCompressionLevel::None)}; - })); - BlockSize += CompressedBuffer::GetHeaderSizeForNoneEncoder() + Source.Size; - ChunksAssembled++; - } - ChunkedHashes.erase(FindIt); + throw std::runtime_error(fmt::format("Chunk {} in cid store is malformed (mismatching raw hash)", RawHash)); } - } + return {RawSize, Compressed.GetCompressed()}; + }; } - } + }; + + Composer.Compose(AttachmentHashes, + AttachmentSizes, + AttachmentKeys, + [&OnNewBlock, &FetchWholeAttachmentResolver](std::vector<IoHash>&& ChunkRawHashes) { + OnNewBlock(std::move(ChunkRawHashes), FetchWholeAttachmentResolver); + }); + } + + { + std::vector<IoHash> AttachmentHashes; + AttachmentHashes.reserve(ChunkedChunkCount); + std::vector<uint64_t> AttachmentSizes; + AttachmentSizes.reserve(ChunkedChunkCount); + std::vector<Oid> AttachmentKeys; + AttachmentKeys.reserve(ChunkedChunkCount); - if (BlockSize > 0 && !RemoteResult.IsError()) + tsl::robin_map<IoHash, std::pair<size_t, size_t>, IoHash::Hasher> ChunkHashToChunkFileIndexAndChunkIndex; + + for (size_t ChunkedFileIndex : ChunkedFilesOrder) { - if (!remotestore_impl::IsCancelled(OptionalContext)) + const remotestore_impl::ChunkedFile& ChunkedFile = ChunkedFiles[ChunkedFileIndex]; + const ChunkedInfoWithSource& Chunked = ChunkedFile.Chunked; + size_t ChunkCount = Chunked.Info.ChunkHashes.size(); + Oid ChunkedFileOid = Oid::NewOid(); + for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ChunkIndex++) { - NewBlock(); + const IoHash& ChunkHash = Chunked.Info.ChunkHashes[ChunkIndex]; + uint64_t ChunkSize = Chunked.ChunkSources[ChunkIndex].Size; + { + if (ChunkHashToChunkFileIndexAndChunkIndex + .insert(std::make_pair(ChunkHash, std::make_pair(ChunkedFileIndex, ChunkIndex))) + .second) + { + if (ChunkSize > MaxChunkEmbedSize) + { + OnLargeAttachment(ChunkHash, + [SourceBuffer = ChunkedFile.Source, + ChunkSource = Chunked.ChunkSources[ChunkIndex], + ChunkHash](const IoHash& RawHash) -> CompositeBuffer { + ZEN_ASSERT(RawHash == ChunkHash); + CompressedBuffer Compressed = CompressedBuffer::Compress( + SharedBuffer(IoBuffer(SourceBuffer, ChunkSource.Offset, ChunkSource.Size)), + OodleCompressor::Mermaid, + OodleCompressionLevel::None); + return Compressed.GetCompressed(); + }); + + LargeChunkHashes.insert(ChunkHash); + } + else + { + AttachmentHashes.push_back(ChunkHash); + AttachmentSizes.push_back(ChunkSize); + AttachmentKeys.push_back(ChunkedFileOid); + } + } + } } } - if (ChunkAssembleCount > 0) - { - remotestore_impl::ReportProgress( - OptionalContext, - "Assembling blocks"sv, - fmt::format("{} attachments processed, {} blocks assembled", ChunksAssembled, ComposedBlocks), - ChunkAssembleCount, - 0); - } - - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Built oplog and collected {} attachments from {} ops into {} blocks and in {}", - ChunkAssembleCount, - TotalOpCount, - GeneratedBlockCount, - NiceTimeSpanMs(static_cast<uint64_t>(Timer.GetElapsedTimeMs())))); - - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - BlockCreateLatch.CountDown(); - while (!BlockCreateLatch.Wait(1000)) + auto ChunkedFileAttachmentResolver = [&ChunkHashToChunkFileIndexAndChunkIndex, + &ChunkedFiles](const IoHash& AttachmentHash) -> FetchChunkFunc { + if (auto It = ChunkHashToChunkFileIndexAndChunkIndex.find(AttachmentHash); + It != ChunkHashToChunkFileIndexAndChunkIndex.end()) { - ptrdiff_t Remaining = BlockCreateLatch.Remaining(); - remotestore_impl::ReportProgress(OptionalContext, - "Assembling blocks"sv, - fmt::format("Aborting, {} blocks remaining...", Remaining), - GeneratedBlockCount, - Remaining); + const std::pair<size_t, size_t>& ChunkFileIndexAndChunkIndex = It->second; + size_t ChunkedFileIndex = ChunkFileIndexAndChunkIndex.first; + size_t ChunkIndex = ChunkFileIndexAndChunkIndex.second; + const remotestore_impl::ChunkedFile& ChunkedFile = ChunkedFiles[ChunkedFileIndex]; + + const ChunkSource& Source = ChunkedFile.Chunked.ChunkSources[ChunkIndex]; + ZEN_ASSERT(Source.Offset + Source.Size <= ChunkedFile.Source.GetSize()); + + return [Source = ChunkedFile.Source, Offset = Source.Offset, Size = Source.Size]( + const IoHash&) -> std::pair<uint64_t, CompositeBuffer> { + return {Size, + CompressedBuffer::Compress(SharedBuffer(IoBuffer(Source, Offset, Size)), + OodleCompressor::Mermaid, + OodleCompressionLevel::None) + .GetCompressed()}; + }; } - if (GeneratedBlockCount > 0) + else { - remotestore_impl::ReportProgress(OptionalContext, - "Assembling blocks"sv, - fmt::format("Aborting, {} blocks remaining...", 0), - GeneratedBlockCount, - 0); + ZEN_ASSERT(false); } - return {}; - } + }; + + Composer.Compose(AttachmentHashes, + AttachmentSizes, + AttachmentKeys, + [&OnNewBlock, &ChunkedFileAttachmentResolver](std::vector<IoHash>&& ChunkRawHashes) { + OnNewBlock(std::move(ChunkRawHashes), ChunkedFileAttachmentResolver); + }); } - catch (const std::exception& Ex) + + if (remotestore_impl::IsCancelled(OptionalContext)) { - BlockCreateLatch.CountDown(); - while (!BlockCreateLatch.Wait(1000)) - { - } - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), "Block creation failed", Ex.what()); - throw; + Work.Abort(); } - BlockCreateLatch.CountDown(); - while (!BlockCreateLatch.Wait(1000)) - { - ptrdiff_t Remaining = BlockCreateLatch.Remaining(); + Work.Wait(1000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, IsPaused); if (remotestore_impl::IsCancelled(OptionalContext)) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - while (!BlockCreateLatch.Wait(1000)) - { - Remaining = BlockCreateLatch.Remaining(); - remotestore_impl::ReportProgress(OptionalContext, - "Creating blocks"sv, - fmt::format("Aborting, {} blocks remaining...", Remaining), - GeneratedBlockCount, - Remaining); - } - remotestore_impl::ReportProgress(OptionalContext, "Creating blocks"sv, "Aborted"sv, GeneratedBlockCount, 0); - return {}; + AbortFlag.store(true); } remotestore_impl::ReportProgress(OptionalContext, "Creating blocks"sv, - fmt::format("{} remaining...", Remaining), - GeneratedBlockCount, - Remaining); - } + fmt::format("{}{} remaining...", AbortFlag.load() ? "Aborting, " : "", PendingWork), + ComposedBlocks, + PendingWork, + BlockCreateProgressTimer.GetElapsedTimeMs()); + }); - if (GeneratedBlockCount > 0) + if (!AbortFlag.load() && ComposedBlocks > 0) { + remotestore_impl::ReportProgress(OptionalContext, + "Creating blocks"sv, + ""sv, + ComposedBlocks, + 0, + BlockCreateProgressTimer.GetElapsedTimeMs()); + uint64_t NowMS = Timer.GetElapsedTimeMs(); - remotestore_impl::ReportProgress(OptionalContext, "Creating blocks"sv, ""sv, GeneratedBlockCount, 0); remotestore_impl::ReportMessage( OptionalContext, - fmt::format("Created {} blocks in {}", GeneratedBlockCount, NiceTimeSpanMs(NowMS - CreateBlocksStartMS))); + fmt::format("Created {} blocks in {}", ComposedBlocks, NiceTimeSpanMs(NowMS - CreateBlocksStartMS))); + } + } + + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return {}; + } + + // Reused blocks were not composed (their chunks were erased from UploadAttachments) but must + // still appear in the container so that a fresh receiver knows to download them. + if (BuildBlocks) + { + for (size_t KnownBlockIndex : ReusedBlockIndexes) + { + const ChunkBlockDescription& Reused = KnownBlocks[KnownBlockIndex]; + Blocks.push_back(Reused); } + } - if (!RemoteResult.IsError()) + CbObjectWriter OplogContainerWriter; + RwLock::SharedLockScope _(BlocksLock); + OplogContainerWriter.AddBinary("ops"sv, CompressedOpsSection.GetCompressed().Flatten().AsIoBuffer()); + OplogContainerWriter.BeginArray("blocks"sv); + { + for (const ChunkBlockDescription& B : Blocks) { - CbObjectWriter OplogContinerWriter; - RwLock::SharedLockScope _(BlocksLock); - OplogContinerWriter.AddBinary("ops"sv, CompressedOpsSection.GetCompressed().Flatten().AsIoBuffer()); - OplogContinerWriter.BeginArray("blocks"sv); + ZEN_ASSERT(!B.ChunkRawHashes.empty()); + if (BuildBlocks) { - for (const ChunkBlockDescription& B : Blocks) + ZEN_ASSERT(B.BlockHash != IoHash::Zero); + + OplogContainerWriter.BeginObject(); { - ZEN_ASSERT(!B.ChunkRawHashes.empty()); - if (BuildBlocks) + OplogContainerWriter.AddBinaryAttachment("rawhash"sv, B.BlockHash); + OplogContainerWriter.BeginArray("chunks"sv); { - ZEN_ASSERT(B.BlockHash != IoHash::Zero); - - OplogContinerWriter.BeginObject(); + for (const IoHash& RawHash : B.ChunkRawHashes) { - OplogContinerWriter.AddBinaryAttachment("rawhash"sv, B.BlockHash); - OplogContinerWriter.BeginArray("chunks"sv); - { - for (const IoHash& RawHash : B.ChunkRawHashes) - { - OplogContinerWriter.AddHash(RawHash); - } - } - OplogContinerWriter.EndArray(); // "chunks" + OplogContainerWriter.AddHash(RawHash); } - OplogContinerWriter.EndObject(); - continue; } + OplogContainerWriter.EndArray(); // "chunks" + } + OplogContainerWriter.EndObject(); + continue; + } - ZEN_ASSERT(B.BlockHash == IoHash::Zero); - OplogContinerWriter.BeginObject(); + ZEN_ASSERT(B.BlockHash == IoHash::Zero); + OplogContainerWriter.BeginObject(); + { + OplogContainerWriter.BeginArray("chunks"sv); + { + for (const IoHash& RawHash : B.ChunkRawHashes) { - OplogContinerWriter.BeginArray("chunks"sv); - { - for (const IoHash& RawHash : B.ChunkRawHashes) - { - OplogContinerWriter.AddBinaryAttachment(RawHash); - } - } - OplogContinerWriter.EndArray(); + OplogContainerWriter.AddBinaryAttachment(RawHash); } - OplogContinerWriter.EndObject(); } + OplogContainerWriter.EndArray(); } - OplogContinerWriter.EndArray(); // "blocks"sv - OplogContinerWriter.BeginArray("chunkedfiles"sv); + OplogContainerWriter.EndObject(); + } + } + OplogContainerWriter.EndArray(); // "blocks"sv + OplogContainerWriter.BeginArray("chunkedfiles"sv); + { + for (const remotestore_impl::ChunkedFile& F : ChunkedFiles) + { + OplogContainerWriter.BeginObject(); { - for (const ChunkedFile& F : ChunkedFiles) + OplogContainerWriter.AddHash("rawhash"sv, F.Chunked.Info.RawHash); + OplogContainerWriter.AddInteger("rawsize"sv, F.Chunked.Info.RawSize); + OplogContainerWriter.BeginArray("chunks"sv); { - OplogContinerWriter.BeginObject(); + for (const IoHash& RawHash : F.Chunked.Info.ChunkHashes) { - OplogContinerWriter.AddHash("rawhash"sv, F.Chunked.Info.RawHash); - OplogContinerWriter.AddInteger("rawsize"sv, F.Chunked.Info.RawSize); - OplogContinerWriter.BeginArray("chunks"sv); - { - for (const IoHash& RawHash : F.Chunked.Info.ChunkHashes) - { - OplogContinerWriter.AddHash(RawHash); - } - } - OplogContinerWriter.EndArray(); // "chunks" - OplogContinerWriter.BeginArray("sequence"sv); - { - for (uint32_t ChunkIndex : F.Chunked.Info.ChunkSequence) - { - OplogContinerWriter.AddInteger(ChunkIndex); - } - } - OplogContinerWriter.EndArray(); // "sequence" + OplogContainerWriter.AddHash(RawHash); } - OplogContinerWriter.EndObject(); } - } - OplogContinerWriter.EndArray(); // "chunkedfiles"sv - - OplogContinerWriter.BeginArray("chunks"sv); - { - for (const IoHash& AttachmentHash : LargeChunkHashes) + OplogContainerWriter.EndArray(); // "chunks" + OplogContainerWriter.BeginArray("sequence"sv); { - OplogContinerWriter.AddBinaryAttachment(AttachmentHash); + for (uint32_t ChunkIndex : F.Chunked.Info.ChunkSequence) + { + OplogContainerWriter.AddInteger(ChunkIndex); + } } + OplogContainerWriter.EndArray(); // "sequence" } - OplogContinerWriter.EndArray(); // "chunks" + OplogContainerWriter.EndObject(); + } + } + OplogContainerWriter.EndArray(); // "chunkedfiles"sv - OplogContainerObject = OplogContinerWriter.Save(); + OplogContainerWriter.BeginArray("chunks"sv); + { + for (const IoHash& AttachmentHash : LargeChunkHashes) + { + OplogContainerWriter.AddBinaryAttachment(AttachmentHash); } } + OplogContainerWriter.EndArray(); // "chunks" + + OplogContainerObject = OplogContainerWriter.Save(); + return OplogContainerObject; } -RemoteProjectStore::LoadContainerResult -BuildContainer(CidStore& ChunkStore, +CbObject +BuildContainer(LoggerRef InLog, + CidStore& ChunkStore, ProjectStore::Project& Project, ProjectStore::Oplog& Oplog, WorkerThreadPool& WorkerPool, @@ -2693,32 +3164,29 @@ BuildContainer(CidStore& ChunkStore, const std::function<void(std::vector<std::pair<IoHash, FetchChunkFunc>>&&)>& OnBlockChunks, bool EmbedLooseFiles) { - // WorkerThreadPool& WorkerPool = GetLargeWorkerPool(EWorkloadType::Background); - - remotestore_impl::AsyncRemoteResult RemoteResult; - CbObject ContainerObject = BuildContainer(ChunkStore, - Project, - Oplog, - MaxBlockSize, - MaxChunksPerBlock, - MaxChunkEmbedSize, - ChunkFileSizeLimit, - BuildBlocks, - IgnoreMissingAttachments, - AllowChunking, - {}, - WorkerPool, - AsyncOnBlock, - OnLargeAttachment, - OnBlockChunks, - EmbedLooseFiles, - nullptr, - RemoteResult); - return RemoteProjectStore::LoadContainerResult{RemoteResult.ConvertResult(), ContainerObject}; + return BuildContainer(InLog, + ChunkStore, + Project, + Oplog, + MaxBlockSize, + MaxChunksPerBlock, + MaxChunkEmbedSize, + ChunkFileSizeLimit, + BuildBlocks, + IgnoreMissingAttachments, + AllowChunking, + {}, + WorkerPool, + AsyncOnBlock, + OnLargeAttachment, + OnBlockChunks, + EmbedLooseFiles, + /*OptionalContext*/ nullptr); } -RemoteProjectStore::Result -SaveOplog(CidStore& ChunkStore, +void +SaveOplog(LoggerRef InLog, + CidStore& ChunkStore, RemoteProjectStore& RemoteStore, ProjectStore::Project& Project, ProjectStore::Oplog& Oplog, @@ -2735,6 +3203,7 @@ SaveOplog(CidStore& ChunkStore, { using namespace std::literals; + ZEN_SCOPED_LOG(InLog); Stopwatch Timer; remotestore_impl::UploadInfo Info; @@ -2749,58 +3218,51 @@ SaveOplog(CidStore& ChunkStore, CreateDirectories(AttachmentTempPath); } - remotestore_impl::AsyncRemoteResult RemoteResult; RwLock AttachmentsLock; std::unordered_set<IoHash, IoHash::Hasher> LargeAttachments; std::unordered_map<IoHash, remotestore_impl::CreatedBlock, IoHash::Hasher> CreatedBlocks; tsl::robin_map<IoHash, TGetAttachmentBufferFunc, IoHash::Hasher> LooseLargeFiles; - auto MakeTempBlock = [AttachmentTempPath, &RemoteResult, &AttachmentsLock, &CreatedBlocks](CompressedBuffer&& CompressedBlock, - ChunkBlockDescription&& Block) { + auto MakeTempBlock = [&Log, AttachmentTempPath, &AttachmentsLock, &CreatedBlocks](CompressedBuffer&& CompressedBlock, + ChunkBlockDescription&& Block) { std::filesystem::path BlockPath = AttachmentTempPath; BlockPath.append(Block.BlockHash.ToHexString()); - try - { - IoBuffer BlockBuffer = WriteToTempFile(std::move(CompressedBlock).GetCompressed(), BlockPath); - RwLock::ExclusiveLockScope __(AttachmentsLock); - CreatedBlocks.insert({Block.BlockHash, {.Payload = std::move(BlockBuffer), .Block = std::move(Block)}}); - ZEN_DEBUG("Saved temp block to '{}', {}", AttachmentTempPath, NiceBytes(BlockBuffer.GetSize())); - } - catch (const std::exception& Ex) - { - RemoteResult.SetError(gsl::narrow<int32_t>(HttpResponseCode::InternalServerError), - Ex.what(), - "Unable to create temp block file"); - return; - } + IoBuffer BlockBuffer = WriteToTempFile(std::move(CompressedBlock).GetCompressed(), BlockPath); + const uint64_t BlockSize = BlockBuffer.GetSize(); + RwLock::ExclusiveLockScope __(AttachmentsLock); + CreatedBlocks.insert( + {Block.BlockHash, {.Payload = CompositeBuffer(SharedBuffer(std::move(BlockBuffer))), .Block = std::move(Block)}}); + ZEN_DEBUG("Saved temp block to '{}', {}", AttachmentTempPath, NiceBytes(BlockSize)); }; - auto UploadBlock = [&RemoteStore, &RemoteResult, &Info, OptionalContext](CompressedBuffer&& CompressedBlock, - ChunkBlockDescription&& Block) { - IoHash BlockHash = Block.BlockHash; + auto UploadBlock = [&Log, &RemoteStore, &RemoteStoreInfo, &Info, OptionalContext](CompressedBuffer&& CompressedBlock, + ChunkBlockDescription&& Block) { + IoHash BlockHash = Block.BlockHash; + uint64_t CompressedSize = CompressedBlock.GetCompressedSize(); RemoteProjectStore::SaveAttachmentResult Result = RemoteStore.SaveAttachment(CompressedBlock.GetCompressed(), BlockHash, std::move(Block)); if (Result.ErrorCode) { - RemoteResult.SetError(Result.ErrorCode, Result.Reason, Result.Text); - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Failed to save attachment ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return; + throw RemoteStoreError(fmt::format("Failed to save block attachment {} for oplog '{}': {}", + BlockHash, + RemoteStoreInfo.ContainerName, + Result.Reason), + Result.ErrorCode, + Result.Text); } Info.AttachmentBlocksUploaded.fetch_add(1); - Info.AttachmentBlockBytesUploaded.fetch_add(CompressedBlock.GetCompressedSize()); - ZEN_DEBUG("Saved block {}, {}", BlockHash, NiceBytes(CompressedBlock.GetCompressedSize())); + Info.AttachmentBlockBytesUploaded.fetch_add(CompressedSize); + ZEN_DEBUG("Saved block {}, {}", BlockHash, NiceBytes(CompressedSize)); }; std::vector<std::vector<std::pair<IoHash, FetchChunkFunc>>> BlockChunks; - auto OnBlockChunks = [&BlockChunks](std::vector<std::pair<IoHash, FetchChunkFunc>>&& Chunks) { - BlockChunks.push_back({Chunks.begin(), Chunks.end()}); + auto OnBlockChunks = [&Log, &BlockChunks](std::vector<std::pair<IoHash, FetchChunkFunc>>&& Chunks) { + BlockChunks.push_back({std::make_move_iterator(Chunks.begin()), std::make_move_iterator(Chunks.end())}); ZEN_DEBUG("Found {} block chunks", Chunks.size()); }; - auto OnLargeAttachment = [&AttachmentsLock, &LargeAttachments, &LooseLargeFiles](const IoHash& AttachmentHash, - TGetAttachmentBufferFunc&& GetBufferFunc) { + auto OnLargeAttachment = [&Log, &AttachmentsLock, &LargeAttachments, &LooseLargeFiles](const IoHash& AttachmentHash, + TGetAttachmentBufferFunc&& GetBufferFunc) { { RwLock::ExclusiveLockScope _(AttachmentsLock); LargeAttachments.insert(AttachmentHash); @@ -2826,15 +3288,10 @@ SaveOplog(CidStore& ChunkStore, RemoteProjectStore::CreateContainerResult ContainerResult = RemoteStore.CreateContainer(); if (ContainerResult.ErrorCode) { - RemoteProjectStore::Result Result = {.ErrorCode = ContainerResult.ErrorCode, - .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0, - .Text = fmt::format("Failed to create container for oplog '{}' ({}): {}", - RemoteStoreInfo.ContainerName, - ContainerResult.ErrorCode, - ContainerResult.Reason)}; - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return Result; + throw RemoteStoreError( + fmt::format("Failed to create container for oplog '{}': {}", RemoteStoreInfo.ContainerName, ContainerResult.Reason), + ContainerResult.ErrorCode, + ContainerResult.Text); } if (RemoteStoreInfo.CreateBlocks) @@ -2850,7 +3307,7 @@ SaveOplog(CidStore& ChunkStore, { ZEN_ASSERT(BlockDescription.ChunkCompressedLengths.empty()); - size_t ChunkCount = BlockDescription.ChunkRawLengths.size(); + size_t ChunkCount = BlockDescription.ChunkRawHashes.size(); if (ChunkCount > 0) { // Fake sizes, will give usage number of number of chunks used rather than bytes used - better than nothing @@ -2884,7 +3341,8 @@ SaveOplog(CidStore& ChunkStore, } } - CbObject OplogContainerObject = BuildContainer(ChunkStore, + CbObject OplogContainerObject = BuildContainer(InLog, + ChunkStore, Project, Oplog, MaxBlockSize, @@ -2900,97 +3358,80 @@ SaveOplog(CidStore& ChunkStore, OnLargeAttachment, OnBlockChunks, EmbedLooseFiles, - OptionalContext, - /* out */ RemoteResult); - if (!RemoteResult.IsError()) + OptionalContext); + if (remotestore_impl::IsCancelled(OptionalContext)) { - Info.OplogSizeBytes = OplogContainerObject.GetSize(); + return; + } - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteProjectStore::Result Result = {.ErrorCode = 0, - .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0, - .Text = "Operation cancelled"}; - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return Result; - } + Info.OplogSizeBytes = OplogContainerObject.GetSize(); + + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return; + } - uint64_t ChunkCount = OplogContainerObject["chunks"sv].AsArrayView().Num(); - uint64_t BlockCount = OplogContainerObject["blocks"sv].AsArrayView().Num(); + uint64_t ChunkCount = OplogContainerObject["chunks"sv].AsArrayView().Num(); + uint64_t BlockCount = OplogContainerObject["blocks"sv].AsArrayView().Num(); + remotestore_impl::ReportMessage(OptionalContext, + fmt::format("Saving oplog container '{}' with {} attachments and {} blocks...", + RemoteStoreInfo.ContainerName, + ChunkCount, + BlockCount)); + Stopwatch SaveContainerTimer; + IoBuffer ContainerPayload = OplogContainerObject.GetBuffer().AsIoBuffer(); + ContainerPayload.SetContentType(ZenContentType::kCbObject); + RemoteProjectStore::SaveResult ContainerSaveResult = RemoteStore.SaveContainer(std::move(ContainerPayload)); + TransferWallTimeMS += SaveContainerTimer.GetElapsedTimeMs(); + if (ContainerSaveResult.ErrorCode) + { + throw RemoteStoreError( + fmt::format("Failed to save oplog container for oplog '{}': {}", RemoteStoreInfo.ContainerName, ContainerSaveResult.Reason), + ContainerSaveResult.ErrorCode, + ContainerSaveResult.Text); + } + else + { remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Saving oplog container '{}' with {} attachments and {} blocks...", + fmt::format("Saved container '{}' in {}", RemoteStoreInfo.ContainerName, - ChunkCount, - BlockCount)); - Stopwatch SaveContainerTimer; - IoBuffer ContainerPayload = OplogContainerObject.GetBuffer().AsIoBuffer(); - ContainerPayload.SetContentType(ZenContentType::kCbObject); - RemoteProjectStore::SaveResult ContainerSaveResult = RemoteStore.SaveContainer(std::move(ContainerPayload)); - TransferWallTimeMS += SaveContainerTimer.GetElapsedTimeMs(); - if (ContainerSaveResult.ErrorCode) - { - RemoteResult.SetError(ContainerSaveResult.ErrorCode, ContainerSaveResult.Reason, "Failed to save oplog container"); - RemoteProjectStore::Result Result = { - .ErrorCode = RemoteResult.GetError(), - .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0, - .Text = fmt::format("Failed to save oplog container ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())}; - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Failed to save oplog container ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return Result; - } - else - { - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Saved container '{}' in {}", - RemoteStoreInfo.ContainerName, - NiceTimeSpanMs(static_cast<uint64_t>(ContainerSaveResult.ElapsedSeconds * 1000.0)))); - } - - { - Stopwatch UploadAttachmentsTimer; - UploadAttachments(NetworkWorkerPool, - ChunkStore, - RemoteStore, - LargeAttachments, - BlockChunks, - CreatedBlocks, - LooseLargeFiles, - ContainerSaveResult.Needs, - ForceUpload, - Info, - RemoteResult, - OptionalContext); - TransferWallTimeMS += UploadAttachmentsTimer.GetElapsedTimeMs(); - } + NiceTimeSpanMs(static_cast<uint64_t>(ContainerSaveResult.ElapsedSeconds * 1000.0)))); + } - uint32_t Try = 0; - while (!RemoteResult.IsError()) + { + Stopwatch UploadAttachmentsTimer; + UploadAttachments(NetworkWorkerPool, + ChunkStore, + RemoteStore, + LargeAttachments, + BlockChunks, + CreatedBlocks, + LooseLargeFiles, + ContainerSaveResult.Needs, + ForceUpload, + Info, + OptionalContext); + TransferWallTimeMS += UploadAttachmentsTimer.GetElapsedTimeMs(); + + const uint32_t MaxTries = 8; + uint32_t Try = 0; + while (Try < MaxTries) { if (remotestore_impl::IsCancelled(OptionalContext)) { - RemoteProjectStore::Result Result = {.ErrorCode = 0, - .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0, - .Text = "Operation cancelled"}; - remotestore_impl::ReportMessage(OptionalContext, fmt::format("Aborting ({}): {}", Result.ErrorCode, Result.Text)); - return Result; + return; } remotestore_impl::ReportMessage(OptionalContext, "Finalizing oplog container..."); RemoteProjectStore::FinalizeResult ContainerFinalizeResult = RemoteStore.FinalizeContainer(ContainerSaveResult.RawHash); if (ContainerFinalizeResult.ErrorCode) { - RemoteResult.SetError(ContainerFinalizeResult.ErrorCode, ContainerFinalizeResult.Reason, ContainerFinalizeResult.Text); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Failed to finalize oplog container {} ({}): {}", - ContainerSaveResult.RawHash, - RemoteResult.GetError(), - RemoteResult.GetErrorReason())); - RemoteProjectStore::Result Result = RemoteResult.ConvertResult(); - return Result; + throw RemoteStoreError( + fmt::format("Failed to finalize oplog container {}: {}", ContainerSaveResult.RawHash, ContainerFinalizeResult.Reason), + ContainerFinalizeResult.ErrorCode, + ContainerFinalizeResult.Text); } + remotestore_impl::ReportMessage( OptionalContext, fmt::format("Finalized container '{}' in {}", @@ -3004,78 +3445,60 @@ SaveOplog(CidStore& ChunkStore, if (remotestore_impl::IsCancelled(OptionalContext)) { - RemoteProjectStore::Result Result = {.ErrorCode = 0, - .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0, - .Text = "Operation cancelled"}; - return Result; + return; } - const uint32_t MaxTries = 8; - if (Try < MaxTries) - { - Try++; + Try++; - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Finalize of container '{}' reported {} missing attachments. Uploading missing attachements. Try {}", - RemoteStoreInfo.ContainerName, - ContainerFinalizeResult.Needs.size(), - Try)); - - Stopwatch UploadAttachmentsTimer; - UploadAttachments(NetworkWorkerPool, - ChunkStore, - RemoteStore, - LargeAttachments, - BlockChunks, - CreatedBlocks, - LooseLargeFiles, - ContainerFinalizeResult.Needs, - false, - Info, - RemoteResult, - OptionalContext); - TransferWallTimeMS += UploadAttachmentsTimer.GetElapsedTimeMs(); - } - else + if (Try == MaxTries) { - RemoteResult.SetError( - gsl::narrow<int>(HttpResponseCode::InternalServerError), - "Failed to save oplog container", + throw std::runtime_error( fmt::format("Giving up finalize oplog container {} after {} retries, still getting reports of missing attachments", ContainerSaveResult.RawHash, - ContainerFinalizeResult.Needs.size())); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Failed to finalize oplog container container {} ({}): {}", - ContainerSaveResult.RawHash, - RemoteResult.GetError(), - RemoteResult.GetErrorReason())); - break; + Try)); } - } - LooseLargeFiles.clear(); - CreatedBlocks.clear(); + remotestore_impl::ReportMessage( + OptionalContext, + fmt::format("Finalize of container '{}' reported {} missing attachments. Uploading missing attachments. Try {}", + RemoteStoreInfo.ContainerName, + ContainerFinalizeResult.Needs.size(), + Try)); + + Stopwatch RetryUploadAttachmentsTimer; + UploadAttachments(NetworkWorkerPool, + ChunkStore, + RemoteStore, + LargeAttachments, + BlockChunks, + CreatedBlocks, + LooseLargeFiles, + ContainerFinalizeResult.Needs, + false, + Info, + OptionalContext); + TransferWallTimeMS += RetryUploadAttachmentsTimer.GetElapsedTimeMs(); + } } - RemoteProjectStore::Result Result = RemoteResult.ConvertResult(); - Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; - remotestore_impl::LogRemoteStoreStatsDetails(RemoteStore.GetStats()); + LooseLargeFiles.clear(); + CreatedBlocks.clear(); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Saved oplog '{}' {} in {} ({}), Blocks: {} ({}), Attachments: {} ({}) {}", - RemoteStoreInfo.ContainerName, - RemoteResult.GetError() == 0 ? "SUCCESS" : "FAILURE", - NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000.0)), - NiceBytes(Info.OplogSizeBytes), - Info.AttachmentBlocksUploaded.load(), - NiceBytes(Info.AttachmentBlockBytesUploaded.load()), - Info.AttachmentsUploaded.load(), - NiceBytes(Info.AttachmentBytesUploaded.load()), - remotestore_impl::GetStats(RemoteStore.GetStats(), TransferWallTimeMS))); + remotestore_impl::LogRemoteStoreStatsDetails(RemoteStore.GetStats()); - return Result; -}; + remotestore_impl::ReportMessage( + OptionalContext, + fmt::format("Saved oplog '{}' {} in {} ({}), Blocks: {} ({}), Attachments: {} ({}) {}", + RemoteStoreInfo.ContainerName, + "SUCCESS", + NiceTimeSpanMs(Timer.GetElapsedTimeMs()), + NiceBytes(Info.OplogSizeBytes), + Info.AttachmentBlocksUploaded.load(), + NiceBytes(Info.AttachmentBlockBytesUploaded.load()), + Info.AttachmentsUploaded.load(), + NiceBytes(Info.AttachmentBytesUploaded.load()), + remotestore_impl::GetStats(RemoteStore.GetStats(), /*OptionalCacheStats*/ nullptr, TransferWallTimeMS))); +} RemoteProjectStore::Result ParseOplogContainer( @@ -3098,7 +3521,7 @@ ParseOplogContainer( CbValidateError ValidateResult = CbValidateError::None; if (CbObject SectionObject = ValidateAndReadCompactBinaryObject(std::move(SectionPayload), ValidateResult); - ValidateResult == CbValidateError::None && ContainerObject) + ValidateResult == CbValidateError::None && SectionObject) { OutOplogSection = SectionObject; } @@ -3106,15 +3529,23 @@ ParseOplogContainer( { remotestore_impl::ReportMessage( OptionalContext, - fmt::format("Failed to save oplog container: '{}' ('{}')", "Section has unexpected data type", ToString(ValidateResult))); + fmt::format("Failed to read oplog container: '{}' ('{}')", "Section has unexpected data type", ToString(ValidateResult))); return RemoteProjectStore::Result{gsl::narrow<int>(HttpResponseCode::BadRequest), Timer.GetElapsedTimeMs() / 1000.0, "Section has unexpected data type", - "Failed to save oplog container"}; + "Failed to read oplog container"}; } std::unordered_set<IoHash, IoHash::Hasher> NeededAttachments; { CbArrayView OpsArray = OutOplogSection["ops"sv].AsArrayView(); + + size_t OpCount = OpsArray.Num(); + size_t OpsCompleteCount = 0; + + remotestore_impl::ReportMessage(OptionalContext, fmt::format("Scanning {} ops for attachments", OpCount)); + + Stopwatch ScanOplogProgressTimer; + uint64_t LastReportTimeMs = ScanOplogProgressTimer.GetElapsedTimeMs(); for (CbFieldView OpEntry : OpsArray) { OpEntry.IterateAttachments([&](CbFieldView FieldView) { NeededAttachments.insert(FieldView.AsAttachment()); }); @@ -3124,7 +3555,25 @@ ParseOplogContainer( .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0, .Reason = "Operation cancelled"}; } + OpsCompleteCount++; + if (ScanOplogProgressTimer.GetElapsedTimeMs() - LastReportTimeMs > 200) + { + remotestore_impl::ReportProgress( + OptionalContext, + "Scanning oplog"sv, + fmt::format("{} attachments found, {} ops remaining...", NeededAttachments.size(), OpCount - OpsCompleteCount), + OpCount, + OpCount - OpsCompleteCount, + ScanOplogProgressTimer.GetElapsedTimeMs()); + LastReportTimeMs = ScanOplogProgressTimer.GetElapsedTimeMs(); + } } + remotestore_impl::ReportProgress(OptionalContext, + "Scanning oplog"sv, + fmt::format("{} attachments found", NeededAttachments.size()), + OpCount, + OpCount - OpsCompleteCount, + ScanOplogProgressTimer.GetElapsedTimeMs()); } { std::vector<IoHash> ReferencedAttachments(NeededAttachments.begin(), NeededAttachments.end()); @@ -3151,13 +3600,27 @@ ParseOplogContainer( { ChunkedInfo Chunked = ReadChunkedInfo(ChunkedFileView); + size_t NeededChunkAttachmentCount = 0; + OnReferencedAttachments(Chunked.ChunkHashes); - NeededAttachments.insert(Chunked.ChunkHashes.begin(), Chunked.ChunkHashes.end()); + for (const IoHash& ChunkHash : Chunked.ChunkHashes) + { + if (!HasAttachment(ChunkHash)) + { + if (NeededAttachments.insert(ChunkHash).second) + { + NeededChunkAttachmentCount++; + } + } + } OnChunkedAttachment(Chunked); - ZEN_INFO("Requesting chunked attachment '{}' ({}) built from {} chunks", - Chunked.RawHash, - NiceBytes(Chunked.RawSize), - Chunked.ChunkHashes.size()); + + remotestore_impl::ReportMessage(OptionalContext, + fmt::format("Requesting chunked attachment '{}' ({}) built from {} chunks, need {} chunks", + Chunked.RawHash, + NiceBytes(Chunked.RawSize), + Chunked.ChunkHashes.size(), + NeededChunkAttachmentCount)); } } if (remotestore_impl::IsCancelled(OptionalContext)) @@ -3243,7 +3706,7 @@ ParseOplogContainer( .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0, .Reason = "Operation cancelled"}; } - }; + } remotestore_impl::ReportMessage(OptionalContext, fmt::format("Requesting {} of {} large attachments", NeedAttachmentCount, LargeChunksArray.Num())); @@ -3282,23 +3745,13 @@ SaveOplogContainer( return Result; } -RemoteProjectStore::Result -LoadOplog(CidStore& ChunkStore, - RemoteProjectStore& RemoteStore, - ProjectStore::Oplog& Oplog, - WorkerThreadPool& NetworkWorkerPool, - WorkerThreadPool& WorkerPool, - bool ForceDownload, - bool IgnoreMissingAttachments, - bool CleanOplog, - EPartialBlockRequestMode PartialBlockRequestMode, - double HostLatencySec, - double CacheLatencySec, - JobContext* OptionalContext) +void +LoadOplog(LoadOplogContext&& Context) { using namespace std::literals; - std::unique_ptr<OperationLogOutput> LogOutput(std::make_unique<remotestore_impl::JobContextLogOutput>(OptionalContext)); + ZEN_SCOPED_LOG(Context.Log); + remotestore_impl::JobContextLogger JobContextOutput(Context.OptionalJobContext); remotestore_impl::DownloadInfo Info; @@ -3307,44 +3760,44 @@ LoadOplog(CidStore& ChunkStore, std::unordered_set<IoHash, IoHash::Hasher> Attachments; uint64_t BlockCountToDownload = 0; - RemoteProjectStore::RemoteStoreInfo RemoteStoreInfo = RemoteStore.GetInfo(); - remotestore_impl::ReportMessage(OptionalContext, fmt::format("Loading oplog container '{}'", RemoteStoreInfo.ContainerName)); + RemoteProjectStore::RemoteStoreInfo RemoteStoreInfo = Context.RemoteStore.GetInfo(); + remotestore_impl::ReportMessage(Context.OptionalJobContext, fmt::format("Loading oplog container '{}'", RemoteStoreInfo.ContainerName)); uint64_t TransferWallTimeMS = 0; Stopwatch LoadContainerTimer; - RemoteProjectStore::LoadContainerResult LoadContainerResult = RemoteStore.LoadContainer(); + RemoteProjectStore::LoadContainerResult LoadContainerResult = Context.RemoteStore.LoadContainer(); TransferWallTimeMS += LoadContainerTimer.GetElapsedTimeMs(); if (LoadContainerResult.ErrorCode) { remotestore_impl::ReportMessage( - OptionalContext, + Context.OptionalJobContext, fmt::format("Failed to load oplog container: '{}', error code: {}", LoadContainerResult.Reason, LoadContainerResult.ErrorCode)); - return RemoteProjectStore::Result{.ErrorCode = LoadContainerResult.ErrorCode, - .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0, - .Reason = LoadContainerResult.Reason, - .Text = LoadContainerResult.Text}; + throw RemoteStoreError( + fmt::format("Failed to load oplog container: '{}', error code: {}", LoadContainerResult.Reason, LoadContainerResult.ErrorCode), + LoadContainerResult.ErrorCode, + LoadContainerResult.Text); } - remotestore_impl::ReportMessage(OptionalContext, + remotestore_impl::ReportMessage(Context.OptionalJobContext, fmt::format("Loaded container in {} ({})", NiceTimeSpanMs(static_cast<uint64_t>(LoadContainerResult.ElapsedSeconds * 1000)), NiceBytes(LoadContainerResult.ContainerObject.GetSize()))); Info.OplogSizeBytes = LoadContainerResult.ContainerObject.GetSize(); - remotestore_impl::AsyncRemoteResult RemoteResult; - Latch AttachmentsDownloadLatch(1); - Latch AttachmentsWriteLatch(1); - std::atomic_size_t AttachmentCount = 0; + std::atomic<bool> AbortFlag(false); + std::atomic<bool> PauseFlag(false); + ParallelWork AttachmentWork(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + std::atomic_size_t AttachmentCount = 0; Stopwatch LoadAttachmentsTimer; std::atomic_uint64_t DownloadStartMS = (std::uint64_t)-1; - auto HasAttachment = [&Oplog, &ChunkStore, ForceDownload](const IoHash& RawHash) { - if (ForceDownload) + auto HasAttachment = [&Context](const IoHash& RawHash) { + if (Context.ForceDownload) { return false; } - if (ChunkStore.ContainsChunk(RawHash)) + if (Context.ChunkStore.ContainsChunk(RawHash)) { return true; } @@ -3359,22 +3812,17 @@ LoadOplog(CidStore& ChunkStore, std::vector<NeededBlockDownload> NeededBlockDownloads; - auto OnNeedBlock = [&RemoteStore, - &ChunkStore, - &NetworkWorkerPool, - &WorkerPool, - &AttachmentsDownloadLatch, - &AttachmentsWriteLatch, + auto OnNeedBlock = [&Context, + &AttachmentWork, + &AbortFlag, &AttachmentCount, - &RemoteResult, &BlockCountToDownload, &Info, &LoadAttachmentsTimer, &DownloadStartMS, - &NeededBlockDownloads, - IgnoreMissingAttachments, - OptionalContext](ThinChunkBlockDescription&& ThinBlockDescription, std::vector<uint32_t>&& NeededChunkIndexes) { - if (RemoteResult.IsError()) + &NeededBlockDownloads](ThinChunkBlockDescription&& ThinBlockDescription, + std::vector<uint32_t>&& NeededChunkIndexes) { + if (AbortFlag.load()) { return; } @@ -3383,15 +3831,8 @@ LoadOplog(CidStore& ChunkStore, AttachmentCount.fetch_add(1); if (ThinBlockDescription.BlockHash == IoHash::Zero) { - DownloadAndSaveBlockChunks(ChunkStore, - RemoteStore, - IgnoreMissingAttachments, - OptionalContext, - NetworkWorkerPool, - WorkerPool, - AttachmentsDownloadLatch, - AttachmentsWriteLatch, - RemoteResult, + DownloadAndSaveBlockChunks(Context, + AttachmentWork, Info, LoadAttachmentsTimer, DownloadStartMS, @@ -3405,53 +3846,29 @@ LoadOplog(CidStore& ChunkStore, } }; - auto OnNeedAttachment = [&RemoteStore, - &Oplog, - &ChunkStore, - &NetworkWorkerPool, - &WorkerPool, - &AttachmentsDownloadLatch, - &AttachmentsWriteLatch, - &RemoteResult, - &Attachments, - &AttachmentCount, - &LoadAttachmentsTimer, - &DownloadStartMS, - &Info, - IgnoreMissingAttachments, - OptionalContext](const IoHash& RawHash) { + std::vector<IoHash> AttachmentsToDownload; + + auto OnNeedAttachment = [&AttachmentsToDownload, &AbortFlag, &Attachments, &AttachmentCount](const IoHash& RawHash) { if (!Attachments.insert(RawHash).second) { return; } - if (RemoteResult.IsError()) + if (AbortFlag.load()) { return; } AttachmentCount.fetch_add(1); - DownloadAndSaveAttachment(ChunkStore, - RemoteStore, - IgnoreMissingAttachments, - OptionalContext, - NetworkWorkerPool, - WorkerPool, - AttachmentsDownloadLatch, - AttachmentsWriteLatch, - RemoteResult, - Info, - LoadAttachmentsTimer, - DownloadStartMS, - RawHash); + AttachmentsToDownload.push_back(RawHash); }; std::vector<ChunkedInfo> FilesToDechunk; auto OnChunkedAttachment = [&FilesToDechunk](const ChunkedInfo& Chunked) { FilesToDechunk.push_back(Chunked); }; - auto OnReferencedAttachments = [&Oplog](std::span<IoHash> RawHashes) { Oplog.CaptureAddedAttachments(RawHashes); }; + auto OnReferencedAttachments = [&Context](std::span<IoHash> RawHashes) { Context.Oplog.CaptureAddedAttachments(RawHashes); }; // Make sure we retain any attachments we download before writing the oplog - Oplog.EnableUpdateCapture(); - auto _ = MakeGuard([&Oplog]() { Oplog.DisableUpdateCapture(); }); + Context.Oplog.EnableUpdateCapture(); + auto _ = MakeGuard([&Context]() { Context.Oplog.DisableUpdateCapture(); }); CbObject OplogSection; RemoteProjectStore::Result Result = ParseOplogContainer(LoadContainerResult.ContainerObject, @@ -3461,12 +3878,14 @@ LoadOplog(CidStore& ChunkStore, OnNeedAttachment, OnChunkedAttachment, OplogSection, - OptionalContext); + Context.OptionalJobContext); if (Result.ErrorCode != 0) { - RemoteResult.SetError(Result.ErrorCode, Result.Reason, Result.Text); + AbortFlag = true; + AttachmentWork.Wait(); + throw RemoteStoreError(Result.Reason, Result.ErrorCode, Result.Text); } - remotestore_impl::ReportMessage(OptionalContext, + remotestore_impl::ReportMessage(Context.OptionalJobContext, fmt::format("Parsed oplog in {}, found {} attachments, {} blocks and {} chunked files to download", NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000.0)), Attachments.size(), @@ -3490,8 +3909,17 @@ LoadOplog(CidStore& ChunkStore, std::vector<bool> DownloadedViaLegacyChunkFlag(AllNeededChunkHashes.size(), false); ChunkBlockAnalyser::BlockResult PartialBlocksResult; - RemoteProjectStore::GetBlockDescriptionsResult BlockDescriptions = RemoteStore.GetBlockDescriptions(BlockHashes); - std::vector<IoHash> BlocksWithDescription; + remotestore_impl::ReportMessage(Context.OptionalJobContext, fmt::format("Fetching descriptions for {} blocks", BlockHashes.size())); + + RemoteProjectStore::GetBlockDescriptionsResult BlockDescriptions = + Context.RemoteStore.GetBlockDescriptions(BlockHashes, Context.OptionalCache, Context.CacheBuildId); + + remotestore_impl::ReportMessage(Context.OptionalJobContext, + fmt::format("GetBlockDescriptions took {}. Found {} blocks", + NiceTimeSpanMs(uint64_t(BlockDescriptions.ElapsedSeconds * 1000)), + BlockDescriptions.Blocks.size())); + + std::vector<IoHash> BlocksWithDescription; BlocksWithDescription.reserve(BlockDescriptions.Blocks.size()); for (const ChunkBlockDescription& BlockDescription : BlockDescriptions.Blocks) { @@ -3505,15 +3933,8 @@ LoadOplog(CidStore& ChunkStore, if (FindIt == BlockDescriptions.Blocks.end()) { // Fall back to full download as we can't get enough information about the block - DownloadAndSaveBlock(ChunkStore, - RemoteStore, - IgnoreMissingAttachments, - OptionalContext, - NetworkWorkerPool, - WorkerPool, - AttachmentsDownloadLatch, - AttachmentsWriteLatch, - RemoteResult, + DownloadAndSaveBlock(Context, + AttachmentWork, Info, LoadAttachmentsTimer, DownloadStartMS, @@ -3539,142 +3960,185 @@ LoadOplog(CidStore& ChunkStore, } else { - // Not a requested block? - ZEN_ASSERT(false); + // Not a requested block? Ignore it + FindIt++; } } } + + std::vector<bool> BlockExistsInCache(BlocksWithDescription.size(), false); + if (!AllNeededChunkHashes.empty()) { std::vector<ChunkBlockAnalyser::EPartialBlockDownloadMode> PartialBlockDownloadModes; - if (PartialBlockRequestMode == EPartialBlockRequestMode::Off) + if (Context.PartialBlockRequestMode == EPartialBlockRequestMode::Off) { PartialBlockDownloadModes.resize(BlocksWithDescription.size(), ChunkBlockAnalyser::EPartialBlockDownloadMode::Off); } else { - RemoteProjectStore::AttachmentExistsInCacheResult CacheExistsResult = - RemoteStore.AttachmentExistsInCache(BlocksWithDescription); - if (CacheExistsResult.ErrorCode != 0 || CacheExistsResult.HasBody.size() != BlocksWithDescription.size()) + if (Context.OptionalCache) { - CacheExistsResult.HasBody.resize(BlocksWithDescription.size(), false); - } - - PartialBlockDownloadModes.reserve(BlocksWithDescription.size()); - - for (bool ExistsInCache : CacheExistsResult.HasBody) - { - if (PartialBlockRequestMode == EPartialBlockRequestMode::All) - { - PartialBlockDownloadModes.push_back(ExistsInCache ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed - : ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange); - } - else if (PartialBlockRequestMode == EPartialBlockRequestMode::ZenCacheOnly) + std::vector<BuildStorageCache::BlobExistsResult> CacheExistsResult = + Context.OptionalCache->BlobsExists(Context.CacheBuildId, BlocksWithDescription); + if (CacheExistsResult.size() == BlocksWithDescription.size()) { - PartialBlockDownloadModes.push_back(ExistsInCache ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed - : ChunkBlockAnalyser::EPartialBlockDownloadMode::Off); + for (size_t BlobIndex = 0; BlobIndex < CacheExistsResult.size(); BlobIndex++) + { + BlockExistsInCache[BlobIndex] = CacheExistsResult[BlobIndex].HasBody; + } } - else if (PartialBlockRequestMode == EPartialBlockRequestMode::Mixed) + uint64_t FoundBlocks = + std::accumulate(BlockExistsInCache.begin(), + BlockExistsInCache.end(), + uint64_t(0u), + [](uint64_t Current, bool Exists) -> uint64_t { return Current + (Exists ? 1 : 0); }); + if (FoundBlocks > 0) { - PartialBlockDownloadModes.push_back(ExistsInCache ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed - : ChunkBlockAnalyser::EPartialBlockDownloadMode::SingleRange); + remotestore_impl::ReportMessage( + Context.OptionalJobContext, + fmt::format("Found {} out of {} blocks in cache", FoundBlocks, BlockExistsInCache.size())); } } + + ChunkBlockAnalyser::EPartialBlockDownloadMode CloudPartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::Off; + ChunkBlockAnalyser::EPartialBlockDownloadMode CachePartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::Off; + + switch (Context.PartialBlockRequestMode) + { + case EPartialBlockRequestMode::Off: + break; + case EPartialBlockRequestMode::ZenCacheOnly: + CachePartialDownloadMode = Context.CacheMaxRangeCountPerRequest > 1 + ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed + : ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange; + CloudPartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::Off; + break; + case EPartialBlockRequestMode::Mixed: + CachePartialDownloadMode = Context.CacheMaxRangeCountPerRequest > 1 + ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed + : ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange; + CloudPartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::SingleRange; + break; + case EPartialBlockRequestMode::All: + CachePartialDownloadMode = Context.CacheMaxRangeCountPerRequest > 1 + ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed + : ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange; + CloudPartialDownloadMode = Context.StoreMaxRangeCountPerRequest > 1 + ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange + : ChunkBlockAnalyser::EPartialBlockDownloadMode::SingleRange; + break; + } + + PartialBlockDownloadModes.reserve(BlocksWithDescription.size()); + for (uint32_t BlockIndex = 0; BlockIndex < BlocksWithDescription.size(); BlockIndex++) + { + const bool BlockExistInCache = BlockExistsInCache[BlockIndex]; + PartialBlockDownloadModes.push_back(BlockExistInCache ? CachePartialDownloadMode : CloudPartialDownloadMode); + } } ZEN_ASSERT(PartialBlockDownloadModes.size() == BlocksWithDescription.size()); - ChunkBlockAnalyser PartialAnalyser(*LogOutput, - BlockDescriptions.Blocks, - ChunkBlockAnalyser::Options{.IsQuiet = false, - .IsVerbose = false, - .HostLatencySec = HostLatencySec, - .HostHighSpeedLatencySec = CacheLatencySec}); + ChunkBlockAnalyser PartialAnalyser( + JobContextOutput.Log(), + BlockDescriptions.Blocks, + ChunkBlockAnalyser::Options{.IsQuiet = false, + .IsVerbose = false, + .HostLatencySec = Context.StoreLatencySec, + .HostHighSpeedLatencySec = Context.CacheLatencySec, + .HostMaxRangeCountPerRequest = Context.StoreMaxRangeCountPerRequest, + .HostHighSpeedMaxRangeCountPerRequest = Context.CacheMaxRangeCountPerRequest}); std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = PartialAnalyser.GetNeeded(AllNeededPartialChunkHashesLookup, [&](uint32_t ChunkIndex) { return !DownloadedViaLegacyChunkFlag[ChunkIndex]; }); PartialBlocksResult = PartialAnalyser.CalculatePartialBlockDownloads(NeededBlocks, PartialBlockDownloadModes); - for (uint32_t FullBlockIndex : PartialBlocksResult.FullBlockIndexes) - { - DownloadAndSaveBlock(ChunkStore, - RemoteStore, - IgnoreMissingAttachments, - OptionalContext, - NetworkWorkerPool, - WorkerPool, - AttachmentsDownloadLatch, - AttachmentsWriteLatch, - RemoteResult, - Info, - LoadAttachmentsTimer, - DownloadStartMS, - BlockDescriptions.Blocks[FullBlockIndex].BlockHash, - AllNeededPartialChunkHashesLookup, - ChunkDownloadedFlags, - 3); - } + } - for (size_t BlockRangeIndex = 0; BlockRangeIndex < PartialBlocksResult.BlockRanges.size();) - { - size_t RangeCount = 1; - size_t RangesLeft = PartialBlocksResult.BlockRanges.size() - BlockRangeIndex; - const ChunkBlockAnalyser::BlockRangeDescriptor& CurrentBlockRange = PartialBlocksResult.BlockRanges[BlockRangeIndex]; - while (RangeCount < RangesLeft && - CurrentBlockRange.BlockIndex == PartialBlocksResult.BlockRanges[BlockRangeIndex + RangeCount].BlockIndex) - { - RangeCount++; - } + Stopwatch AttachmentsDownloadProgressTimer; + for (uint32_t FullBlockIndex : PartialBlocksResult.FullBlockIndexes) + { + DownloadAndSaveBlock(Context, + AttachmentWork, + Info, + LoadAttachmentsTimer, + DownloadStartMS, + BlockDescriptions.Blocks[FullBlockIndex].BlockHash, + AllNeededPartialChunkHashesLookup, + ChunkDownloadedFlags, + 3); + } - DownloadAndSavePartialBlock(ChunkStore, - RemoteStore, - IgnoreMissingAttachments, - OptionalContext, - NetworkWorkerPool, - WorkerPool, - AttachmentsDownloadLatch, - AttachmentsWriteLatch, - RemoteResult, - Info, - LoadAttachmentsTimer, - DownloadStartMS, - BlockDescriptions.Blocks[CurrentBlockRange.BlockIndex], - PartialBlocksResult.BlockRanges, - BlockRangeIndex, - RangeCount, - AllNeededPartialChunkHashesLookup, - ChunkDownloadedFlags, - 3); - - BlockRangeIndex += RangeCount; + for (size_t BlockRangeIndex = 0; BlockRangeIndex < PartialBlocksResult.BlockRanges.size();) + { + size_t RangeCount = 1; + size_t RangesLeft = PartialBlocksResult.BlockRanges.size() - BlockRangeIndex; + const ChunkBlockAnalyser::BlockRangeDescriptor& CurrentBlockRange = PartialBlocksResult.BlockRanges[BlockRangeIndex]; + while (RangeCount < RangesLeft && + CurrentBlockRange.BlockIndex == PartialBlocksResult.BlockRanges[BlockRangeIndex + RangeCount].BlockIndex) + { + RangeCount++; } + + DownloadAndSavePartialBlock(Context, + AttachmentWork, + Info, + LoadAttachmentsTimer, + DownloadStartMS, + BlockDescriptions.Blocks[CurrentBlockRange.BlockIndex], + BlockExistsInCache[CurrentBlockRange.BlockIndex], + PartialBlocksResult.BlockRanges, + BlockRangeIndex, + RangeCount, + AllNeededPartialChunkHashesLookup, + ChunkDownloadedFlags, + /* RetriesLeft*/ 3); + + BlockRangeIndex += RangeCount; } - AttachmentsDownloadLatch.CountDown(); - while (!AttachmentsDownloadLatch.Wait(1000)) + for (const IoHash& AttachmentToDownload : AttachmentsToDownload) { - ptrdiff_t Remaining = AttachmentsDownloadLatch.Remaining(); - if (remotestore_impl::IsCancelled(OptionalContext)) + DownloadAndSaveAttachment(Context, AttachmentWork, Info, LoadAttachmentsTimer, DownloadStartMS, AttachmentToDownload); + } + + uint64_t TotalChunksToDownload = AllNeededChunkHashes.size() + AttachmentsToDownload.size(); + AttachmentWork.Wait(1000, [&](bool /*IsAborted*/, bool /*IsPaused*/, std::ptrdiff_t /*Pending*/) { + if (remotestore_impl::IsCancelled(Context.OptionalJobContext) && !AbortFlag) { - if (!RemoteResult.IsError()) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - } + AbortFlag = true; } uint64_t PartialTransferWallTimeMS = TransferWallTimeMS; if (DownloadStartMS != (uint64_t)-1) { PartialTransferWallTimeMS += LoadAttachmentsTimer.GetElapsedTimeMs() - DownloadStartMS.load(); } + + uint64_t CompletedChunkCount = Info.ChunksCompleteCount.load(); + + uint64_t AttachmentsDownloaded = + Info.AttachmentBlocksDownloaded.load() + Info.AttachmentBlocksRangesDownloaded.load() + Info.AttachmentsDownloaded.load(); + uint64_t AttachmentBytesDownloaded = Info.AttachmentBlockBytesDownloaded.load() + Info.AttachmentBlockRangeBytesDownloaded.load() + + Info.AttachmentBytesDownloaded.load(); + remotestore_impl::ReportProgress( - OptionalContext, + Context.OptionalJobContext, "Loading attachments"sv, - fmt::format("{} remaining. {}", Remaining, remotestore_impl::GetStats(RemoteStore.GetStats(), PartialTransferWallTimeMS)), - AttachmentCount.load(), - Remaining); - } + fmt::format("{}/{} ({}) chunks. {} ({}) blobs downloaded. {}", + CompletedChunkCount, + TotalChunksToDownload, + NiceBytes(Info.AttachmentBytesStored.load()), + AttachmentsDownloaded, + NiceBytes(AttachmentBytesDownloaded), + remotestore_impl::GetStats(Context.RemoteStore.GetStats(), Context.OptionalCacheStats, PartialTransferWallTimeMS)), + TotalChunksToDownload, + TotalChunksToDownload - CompletedChunkCount, + AttachmentsDownloadProgressTimer.GetElapsedTimeMs()); + }); + if (DownloadStartMS != (uint64_t)-1) { TransferWallTimeMS += LoadAttachmentsTimer.GetElapsedTimeMs() - DownloadStartMS.load(); @@ -3682,266 +4146,247 @@ LoadOplog(CidStore& ChunkStore, if (AttachmentCount.load() > 0) { - remotestore_impl::ReportProgress(OptionalContext, - "Loading attachments"sv, - fmt::format("{}", remotestore_impl::GetStats(RemoteStore.GetStats(), TransferWallTimeMS)), - AttachmentCount.load(), - 0); - } - - AttachmentsWriteLatch.CountDown(); - while (!AttachmentsWriteLatch.Wait(1000)) - { - ptrdiff_t Remaining = AttachmentsWriteLatch.Remaining(); - if (remotestore_impl::IsCancelled(OptionalContext)) - { - if (!RemoteResult.IsError()) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - } - } - remotestore_impl::ReportProgress(OptionalContext, - "Writing attachments"sv, - fmt::format("{} remaining.", Remaining), + remotestore_impl::ReportProgress(Context.OptionalJobContext, + "Loading attachments", + ""sv, AttachmentCount.load(), - Remaining); + 0, + AttachmentsDownloadProgressTimer.GetElapsedTimeMs()); } - - if (AttachmentCount.load() > 0) + if (!FilesToDechunk.empty()) { - remotestore_impl::ReportProgress(OptionalContext, "Writing attachments", ""sv, AttachmentCount.load(), 0); - } + remotestore_impl::ReportMessage(Context.OptionalJobContext, fmt::format("Dechunking {} attachments", FilesToDechunk.size())); - if (Result.ErrorCode == 0) - { - if (!FilesToDechunk.empty()) + ParallelWork DechunkWork(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + std::filesystem::path TempFilePath = Context.Oplog.TempPath(); + for (size_t ChunkedIndex = 0; ChunkedIndex < FilesToDechunk.size(); ChunkedIndex++) { - remotestore_impl::ReportMessage(OptionalContext, fmt::format("Dechunking {} attachments", FilesToDechunk.size())); - - Latch DechunkLatch(1); - std::filesystem::path TempFilePath = Oplog.TempPath(); - for (const ChunkedInfo& Chunked : FilesToDechunk) - { - std::filesystem::path TempFileName = TempFilePath / Chunked.RawHash.ToHexString(); - DechunkLatch.AddCount(1); - WorkerPool.ScheduleWork( - [&ChunkStore, - &DechunkLatch, - TempFileName, - &Chunked, - &RemoteResult, - IgnoreMissingAttachments, - &Info, - OptionalContext]() { - ZEN_TRACE_CPU("DechunkAttachment"); - - auto _ = MakeGuard([&DechunkLatch, &TempFileName] { - std::error_code Ec; - if (IsFile(TempFileName, Ec)) + const ChunkedInfo& Chunked = FilesToDechunk[ChunkedIndex]; + std::filesystem::path TempFileName = TempFilePath / Chunked.RawHash.ToHexString(); + DechunkWork.ScheduleWork( + Context.WorkerPool, + [&Log, &Context, TempFileName, &FilesToDechunk, ChunkedIndex, &Info](std::atomic<bool>& AbortFlag) { + ZEN_TRACE_CPU("DechunkAttachment"); + + auto _ = MakeGuard([&Log, &TempFileName] { + std::error_code Ec; + if (IsFile(TempFileName, Ec)) + { + RemoveFile(TempFileName, Ec); + if (Ec) { - RemoveFile(TempFileName, Ec); - if (Ec) - { - ZEN_INFO("Failed to remove temporary file '{}'. Reason: {}", TempFileName, Ec.message()); - } + ZEN_INFO("Failed to remove temporary file '{}'. Reason: {}", TempFileName, Ec.message()); } - DechunkLatch.CountDown(); - }); - try + } + }); + const ChunkedInfo& Chunked = FilesToDechunk[ChunkedIndex]; + + try + { + if (AbortFlag.load()) + { + return; + } + Stopwatch Timer; + + IoBuffer TmpBuffer; { - if (RemoteResult.IsError()) + BasicFile TmpFile; + std::error_code Ec; + TmpFile.Open(TempFileName, BasicFile::Mode::kTruncate, Ec); + if (Ec) { - return; + throw RemoteStoreError( + "Write error", + gsl::narrow<int>(HttpResponseCode::InternalServerError), + fmt::format("Failed to open temp file {} for chunked attachment {}", TempFileName, Chunked.RawHash)); } - Stopwatch Timer; - IoBuffer TmpBuffer; + else { - BasicFile TmpFile; - TmpFile.Open(TempFileName, BasicFile::Mode::kTruncate); + BasicFileWriter TmpWriter(TmpFile, 64u * 1024u); + + uint64_t ChunkOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder(); + BLAKE3Stream HashingStream; + for (std::uint32_t SequenceIndex : Chunked.ChunkSequence) { - BasicFileWriter TmpWriter(TmpFile, 64u * 1024u); + if (AbortFlag.load()) + { + return; + } - uint64_t ChunkOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder(); - BLAKE3Stream HashingStream; - for (std::uint32_t SequenceIndex : Chunked.ChunkSequence) + const IoHash& ChunkHash = Chunked.ChunkHashes[SequenceIndex]; + IoBuffer Chunk = Context.ChunkStore.FindChunkByCid(ChunkHash); + if (!Chunk) { - const IoHash& ChunkHash = Chunked.ChunkHashes[SequenceIndex]; - IoBuffer Chunk = ChunkStore.FindChunkByCid(ChunkHash); - if (!Chunk) + remotestore_impl::ReportMessage( + Context.OptionalJobContext, + fmt::format("Missing chunk {} for chunked attachment {}", ChunkHash, Chunked.RawHash)); + + // We only add 1 as the resulting missing count will be 1 for the dechunked file + Info.MissingAttachmentCount.fetch_add(1); + if (!Context.IgnoreMissingAttachments) { - remotestore_impl::ReportMessage( - OptionalContext, + throw RemoteStoreError( + "Missing chunk", + gsl::narrow<int>(HttpResponseCode::NotFound), fmt::format("Missing chunk {} for chunked attachment {}", ChunkHash, Chunked.RawHash)); - - // We only add 1 as the resulting missing count will be 1 for the dechunked file - Info.MissingAttachmentCount.fetch_add(1); - if (!IgnoreMissingAttachments) - { - RemoteResult.SetError( - gsl::narrow<int>(HttpResponseCode::NotFound), - "Missing chunk", - fmt::format("Missing chunk {} for chunked attachment {}", ChunkHash, Chunked.RawHash)); - } - return; } + return; + } - IoHash RawHash; - uint64_t RawSize; + IoHash RawHash; + uint64_t RawSize; - CompressedBuffer Compressed = - CompressedBuffer::FromCompressed(SharedBuffer(std::move(Chunk)), RawHash, RawSize); - if (RawHash != ChunkHash) + CompressedBuffer Compressed = + CompressedBuffer::FromCompressed(SharedBuffer(std::move(Chunk)), RawHash, RawSize); + if (RawHash != ChunkHash || !Compressed) + { + std::string Message = + Compressed ? fmt::format("Mismatching raw hash {} for chunk {} for chunked attachment {}", + RawHash, + ChunkHash, + Chunked.RawHash) + : fmt::format("Malformed data for chunk {} for chunked attachment {}", + ChunkHash, + Chunked.RawHash); + remotestore_impl::ReportMessage(Context.OptionalJobContext, Message); + + // We only add 1 as the resulting missing count will be 1 for the dechunked file + Info.MissingAttachmentCount.fetch_add(1); + if (!Context.IgnoreMissingAttachments) + { + throw RemoteStoreError("Missing chunk", gsl::narrow<int>(HttpResponseCode::NotFound), Message); + } + return; + } + + { + ZEN_TRACE_CPU("DecompressChunk"); + + if (!Compressed.DecompressToStream( + 0, + RawSize, + [&](uint64_t SourceOffset, + uint64_t SourceSize, + uint64_t Offset, + const CompositeBuffer& RangeBuffer) { + ZEN_UNUSED(SourceOffset, SourceSize, Offset); + + for (const SharedBuffer& Segment : RangeBuffer.GetSegments()) + { + MemoryView SegmentData = Segment.GetView(); + HashingStream.Append(SegmentData); + TmpWriter.Write(SegmentData.GetData(), SegmentData.GetSize(), ChunkOffset + Offset); + } + return true; + })) { remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Mismatching raw hash {} for chunk {} for chunked attachment {}", - RawHash, + Context.OptionalJobContext, + fmt::format("Failed to decompress chunk {} for chunked attachment {}", ChunkHash, Chunked.RawHash)); // We only add 1 as the resulting missing count will be 1 for the dechunked file Info.MissingAttachmentCount.fetch_add(1); - if (!IgnoreMissingAttachments) + if (!Context.IgnoreMissingAttachments) { - RemoteResult.SetError( - gsl::narrow<int>(HttpResponseCode::NotFound), + throw RemoteStoreError( "Missing chunk", - fmt::format("Mismatching raw hash {} for chunk {} for chunked attachment {}", - RawHash, - ChunkHash, - Chunked.RawHash)); - } - return; - } - - { - ZEN_TRACE_CPU("DecompressChunk"); - - if (!Compressed.DecompressToStream(0, - RawSize, - [&](uint64_t SourceOffset, - uint64_t SourceSize, - uint64_t Offset, - const CompositeBuffer& RangeBuffer) { - ZEN_UNUSED(SourceOffset, SourceSize, Offset); - - for (const SharedBuffer& Segment : - RangeBuffer.GetSegments()) - { - MemoryView SegmentData = Segment.GetView(); - HashingStream.Append(SegmentData); - TmpWriter.Write(SegmentData.GetData(), - SegmentData.GetSize(), - ChunkOffset + Offset); - } - return true; - })) - { - remotestore_impl::ReportMessage( - OptionalContext, + gsl::narrow<int>(HttpResponseCode::NotFound), fmt::format("Failed to decompress chunk {} for chunked attachment {}", ChunkHash, Chunked.RawHash)); - - // We only add 1 as the resulting missing count will be 1 for the dechunked file - Info.MissingAttachmentCount.fetch_add(1); - if (!IgnoreMissingAttachments) - { - RemoteResult.SetError( - gsl::narrow<int>(HttpResponseCode::NotFound), - "Missing chunk", - fmt::format("Failed to decompress chunk {} for chunked attachment {}", - ChunkHash, - Chunked.RawHash)); - } - return; } + return; } - ChunkOffset += RawSize; } - BLAKE3 RawHash = HashingStream.GetHash(); - ZEN_ASSERT(Chunked.RawHash == IoHash::FromBLAKE3(RawHash)); - UniqueBuffer Header = CompressedBuffer::CreateHeaderForNoneEncoder(Chunked.RawSize, RawHash); - TmpWriter.Write(Header.GetData(), Header.GetSize(), 0); + ChunkOffset += RawSize; } - TmpFile.Close(); - TmpBuffer = IoBufferBuilder::MakeFromTemporaryFile(TempFileName); - } - CidStore::InsertResult InsertResult = - ChunkStore.AddChunk(TmpBuffer, Chunked.RawHash, CidStore::InsertMode::kMayBeMovedInPlace); - if (InsertResult.New) - { - Info.AttachmentBytesStored.fetch_add(TmpBuffer.GetSize()); - Info.AttachmentsStored.fetch_add(1); + BLAKE3 RawHash = HashingStream.GetHash(); + ZEN_ASSERT(Chunked.RawHash == IoHash::FromBLAKE3(RawHash)); + UniqueBuffer Header = CompressedBuffer::CreateHeaderForNoneEncoder(Chunked.RawSize, RawHash); + TmpWriter.Write(Header.GetData(), Header.GetSize(), 0); } - - ZEN_INFO("Dechunked attachment {} ({}) in {}", - Chunked.RawHash, - NiceBytes(Chunked.RawSize), - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + TmpFile.Close(); + TmpBuffer = IoBufferBuilder::MakeFromTemporaryFile(TempFileName); } - catch (const std::exception& Ex) + uint64_t TmpBufferSize = TmpBuffer.GetSize(); + CidStore::InsertResult InsertResult = + Context.ChunkStore.AddChunk(TmpBuffer, Chunked.RawHash, CidStore::InsertMode::kMayBeMovedInPlace); + if (InsertResult.New) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Failed to dechunck file {}", Chunked.RawHash), - Ex.what()); + Info.AttachmentBytesStored.fetch_add(TmpBufferSize); + Info.AttachmentsStored.fetch_add(1); } - }, - WorkerThreadPool::EMode::EnableBacklog); - } - DechunkLatch.CountDown(); - while (!DechunkLatch.Wait(1000)) - { - ptrdiff_t Remaining = DechunkLatch.Remaining(); - if (remotestore_impl::IsCancelled(OptionalContext)) - { - if (!RemoteResult.IsError()) + ZEN_INFO("Dechunked attachment {} ({}) in {}", + Chunked.RawHash, + NiceBytes(Chunked.RawSize), + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + } + catch (const std::exception& Ex) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); + throw RemoteStoreError(fmt::format("Failed to dechunk file {}", Chunked.RawHash), + gsl::narrow<int>(HttpResponseCode::InternalServerError), + Ex.what()); } - } - remotestore_impl::ReportProgress(OptionalContext, - "Dechunking attachments"sv, - fmt::format("{} remaining...", Remaining), - FilesToDechunk.size(), - Remaining); - } - remotestore_impl::ReportProgress(OptionalContext, "Dechunking attachments"sv, ""sv, FilesToDechunk.size(), 0); + }, + WorkerThreadPool::EMode::EnableBacklog); } - Result = RemoteResult.ConvertResult(); - } - if (Result.ErrorCode == 0) - { - if (CleanOplog) - { - RemoteStore.Flush(); - if (!Oplog.Reset()) + Stopwatch DechunkProgressTimer; + DechunkWork.Wait(1000, [&](bool /*IsAborted*/, bool /*IsPaused*/, std::ptrdiff_t Remaining) { + if (remotestore_impl::IsCancelled(Context.OptionalJobContext) && !AbortFlag) { - Result = RemoteProjectStore::Result{.ErrorCode = gsl::narrow<int>(HttpResponseCode::InternalServerError), - .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0, - .Reason = fmt::format("Failed to clean existing oplog '{}'", Oplog.OplogId())}; - remotestore_impl::ReportMessage(OptionalContext, fmt::format("Aborting ({}): {}", Result.ErrorCode, Result.Reason)); + AbortFlag = true; } + remotestore_impl::ReportProgress(Context.OptionalJobContext, + "Dechunking attachments"sv, + fmt::format("{} remaining...", Remaining), + FilesToDechunk.size(), + Remaining, + DechunkProgressTimer.GetElapsedTimeMs()); + }); + remotestore_impl::ReportProgress(Context.OptionalJobContext, + "Dechunking attachments"sv, + ""sv, + FilesToDechunk.size(), + 0, + DechunkProgressTimer.GetElapsedTimeMs()); + } + if (Context.CleanOplog) + { + if (Context.OptionalCache) + { + Context.OptionalCache->Flush(100, [](intptr_t) { return /*DontWaitForPendingOperation*/ false; }); } - if (Result.ErrorCode == 0) + if (!Context.Oplog.Reset()) { - remotestore_impl::WriteOplogSection(Oplog, OplogSection, OptionalContext); + std::string Reason = fmt::format("Failed to clean existing oplog '{}'", Context.Oplog.OplogId()); + remotestore_impl::ReportMessage( + Context.OptionalJobContext, + fmt::format("Aborting ({}): {}", gsl::narrow<int>(HttpResponseCode::InternalServerError), Reason)); + throw RemoteStoreError(Reason, gsl::narrow<int>(HttpResponseCode::InternalServerError), ""); + } + } + { + RemoteProjectStore::Result WriteResult = + remotestore_impl::WriteOplogSection(Context.Oplog, OplogSection, Context.OptionalJobContext); + if (WriteResult.ErrorCode) + { + remotestore_impl::ReportMessage(Context.OptionalJobContext, + fmt::format("Aborting ({}): {}", WriteResult.ErrorCode, WriteResult.Reason)); + throw RemoteStoreError(WriteResult.Reason, WriteResult.ErrorCode, WriteResult.Text); } } - Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; - - remotestore_impl::LogRemoteStoreStatsDetails(RemoteStore.GetStats()); + remotestore_impl::LogRemoteStoreStatsDetails(Context.RemoteStore.GetStats()); { std::string DownloadDetails; RemoteProjectStore::ExtendedStats ExtendedStats; - if (RemoteStore.GetExtendedStats(ExtendedStats)) + if (Context.RemoteStore.GetExtendedStats(ExtendedStats)) { if (!ExtendedStats.m_ReceivedBytesPerSource.empty()) { @@ -3960,7 +4405,8 @@ LoadOplog(CidStore& ChunkStore, Total += It.second; } - remotestore_impl::ReportMessage(OptionalContext, fmt::format("Downloaded {} ({})", NiceBytes(Total), SB.ToView())); + remotestore_impl::ReportMessage(Context.OptionalJobContext, + fmt::format("Downloaded {} ({})", NiceBytes(Total), SB.ToView())); } } } @@ -3970,27 +4416,26 @@ LoadOplog(CidStore& ChunkStore, uint64_t TotalBytesDownloaded = Info.OplogSizeBytes + Info.AttachmentBlockBytesDownloaded.load() + Info.AttachmentBlockRangeBytesDownloaded.load() + Info.AttachmentBytesDownloaded.load(); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Loaded oplog '{}' {} in {} ({}), Blocks: {} ({}), BlockRanges: {} ({}), Attachments: {} " - "({}), Total: {} ({}), Stored: {} ({}), Missing: {} {}", - RemoteStoreInfo.ContainerName, - Result.ErrorCode == 0 ? "SUCCESS" : "FAILURE", - NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000.0)), - NiceBytes(Info.OplogSizeBytes), - Info.AttachmentBlocksDownloaded.load(), - NiceBytes(Info.AttachmentBlockBytesDownloaded.load()), - Info.AttachmentBlocksRangesDownloaded.load(), - NiceBytes(Info.AttachmentBlockRangeBytesDownloaded.load()), - Info.AttachmentsDownloaded.load(), - NiceBytes(Info.AttachmentBytesDownloaded.load()), - TotalDownloads, - NiceBytes(TotalBytesDownloaded), - Info.AttachmentsStored.load(), - NiceBytes(Info.AttachmentBytesStored.load()), - Info.MissingAttachmentCount.load(), - remotestore_impl::GetStats(RemoteStore.GetStats(), TransferWallTimeMS))); - - return Result; + remotestore_impl::ReportMessage( + Context.OptionalJobContext, + fmt::format("Loaded oplog '{}' {} in {} ({}), Blocks: {} ({}), BlockRanges: {} ({}), Attachments: {} " + "({}), Total: {} ({}), Stored: {} ({}), Missing: {} {}", + RemoteStoreInfo.ContainerName, + "SUCCESS", + NiceTimeSpanMs(static_cast<uint64_t>(Timer.GetElapsedTimeMs())), + NiceBytes(Info.OplogSizeBytes), + Info.AttachmentBlocksDownloaded.load(), + NiceBytes(Info.AttachmentBlockBytesDownloaded.load()), + Info.AttachmentBlocksRangesDownloaded.load(), + NiceBytes(Info.AttachmentBlockRangeBytesDownloaded.load()), + Info.AttachmentsDownloaded.load(), + NiceBytes(Info.AttachmentBytesDownloaded.load()), + TotalDownloads, + NiceBytes(TotalBytesDownloaded), + Info.AttachmentsStored.load(), + NiceBytes(Info.AttachmentBytesStored.load()), + Info.MissingAttachmentCount.load(), + remotestore_impl::GetStats(Context.RemoteStore.GetStats(), Context.OptionalCacheStats, TransferWallTimeMS))); } ChunkedInfo @@ -4033,7 +4478,7 @@ RemoteProjectStore::~RemoteProjectStore() #if ZEN_WITH_TESTS -namespace testutils { +namespace projectstore_testutils { using namespace std::literals; static std::string OidAsString(const Oid& Id) @@ -4069,6 +4514,64 @@ namespace testutils { return Package; }; + static CbPackage CreateFilesOplogPackage(const Oid& Id, + const std::filesystem::path ProjectRootDir, + const std::span<const std::pair<Oid, std::filesystem::path>>& Attachments) + { + CbPackage Package; + CbObjectWriter Object; + Object << "key"sv << OidAsString(Id); + if (!Attachments.empty()) + { + Object.BeginArray("files"); + for (const auto& Attachment : Attachments) + { + std::filesystem::path ServerPath = std::filesystem::relative(Attachment.second, ProjectRootDir).generic_string(); + std::filesystem::path ClientPath = ServerPath; // dummy + Object.BeginObject(); + Object << "id"sv << Attachment.first; + Object << "serverpath"sv << ServerPath.string(); + Object << "clientpath"sv << ClientPath.string(); + Object.EndObject(); + } + Object.EndArray(); + } + Package.SetObject(Object.Save()); + return Package; + }; + + // Variant of CreateFilesOplogPackage where each entry includes a "data" field of + // CbFieldType::Hash set to IoHash::Zero. CbFieldView::AsHash() returns Zero for a + // plain Hash field whose stored value is zero, so RewriteOp still enters the rewrite + // path (DataHash == Zero) and calls RewriteCbObject, which then finds the pre-existing + // "data" field, triggering the return-true branch at line 1858. + static CbPackage CreateFilesOplogPackageWithZeroDataHash(const Oid& Id, + const std::filesystem::path ProjectRootDir, + const std::span<const std::pair<Oid, std::filesystem::path>>& Attachments) + { + CbPackage Package; + CbObjectWriter Object; + Object << "key"sv << OidAsString(Id); + if (!Attachments.empty()) + { + Object.BeginArray("files"); + for (const auto& Attachment : Attachments) + { + std::filesystem::path ServerPath = std::filesystem::relative(Attachment.second, ProjectRootDir).generic_string(); + std::filesystem::path ClientPath = ServerPath; // dummy + Object.BeginObject(); + Object << "id"sv << Attachment.first; + Object << "serverpath"sv << ServerPath.string(); + Object << "clientpath"sv << ClientPath.string(); + Object.AddHash("data"sv, IoHash::Zero); + Object.EndObject(); + } + Object.EndArray(); + } + Package.SetObject(Object.Save()); + return Package; + }; + static std::vector<std::pair<Oid, CompressedBuffer>> CreateAttachments( const std::span<const size_t>& Sizes, OodleCompressionLevel CompressionLevel = OodleCompressionLevel::VeryFast, @@ -4085,7 +4588,105 @@ namespace testutils { return Result; } -} // namespace testutils + static std::vector<std::pair<Oid, std::filesystem::path>> CreateFileAttachments(const std::filesystem::path& RootDir, + const std::span<const size_t>& Sizes) + { + std::vector<std::pair<Oid, std::filesystem::path>> Result; + Result.reserve(Sizes.size()); + for (size_t Size : Sizes) + { + IoBuffer FileBlob = CreateRandomBlob(Size); + IoHash FileHash = IoHash::HashBuffer(FileBlob); + std::filesystem::path UncompressedFilePath = RootDir / "content" / "uncompressed_file" / FileHash.ToHexString(); + CreateDirectories(UncompressedFilePath.parent_path()); + WriteFile(UncompressedFilePath, FileBlob); + Result.push_back({Oid::NewOid(), UncompressedFilePath}); + } + return Result; + } + + struct CapturingJobContext : public JobContext + { + bool IsCancelled() const override { return m_Cancel; } + void ReportMessage(std::string_view Message) override + { + RwLock::ExclusiveLockScope _(m_Lock); + Messages.emplace_back(Message); + } + void ReportProgress(std::string_view Op, std::string_view Details, ptrdiff_t, ptrdiff_t, uint64_t) override + { + RwLock::ExclusiveLockScope _(m_Lock); + ProgressMessages.emplace_back(fmt::format("{}: {}", Op, Details)); + } + + bool HasMessage(std::string_view Substr) const + { + RwLock::SharedLockScope _(m_Lock); + return std::any_of(Messages.begin(), Messages.end(), [Substr](const std::string& M) { + return M.find(Substr) != std::string::npos; + }); + } + + bool m_Cancel = false; + std::vector<std::string> Messages; + std::vector<std::string> ProgressMessages; + + private: + mutable RwLock m_Lock; + }; + + // Worker pool pair with separate NetworkPool and WorkerPool. + struct TestWorkerPools + { + private: + uint32_t m_NetworkCount; + uint32_t m_WorkerCount; + + public: + WorkerThreadPool NetworkPool; + WorkerThreadPool WorkerPool; + + TestWorkerPools() + : m_NetworkCount(Max(GetHardwareConcurrency() / 4u, 2u)) + , m_WorkerCount(m_NetworkCount < GetHardwareConcurrency() ? Max(GetHardwareConcurrency() - m_NetworkCount, 4u) : 4u) + , NetworkPool(m_NetworkCount) + , WorkerPool(m_WorkerCount) + { + } + }; + + inline uint32_t GetWorkerCount() { return Max(GetHardwareConcurrency() / 4u, 2u); } + + inline IoHash MakeTestHash(uint8_t Index) + { + uint8_t Data[20] = {}; + Data[0] = Index; + return IoHash::MakeFrom(Data); + } + + inline Oid MakeTestOid(uint32_t Index) + { + uint32_t Data[3] = {Index, 0, 0}; + return Oid::FromMemory(Data); + } + + // MaxChunks must be <= 127 (so MeasureVarUInt(MaxChunks) == 1) and MaxChunkEmbedSize is + // fixed at 100 to keep header sizes deterministic in BlockComposer tests. + inline remotestore_impl::BlockComposer::Configuration MakeTestConfig(uint64_t UsableSize, uint64_t MaxChunks) + { + constexpr uint64_t MaxChunkEmbedSize = 100; + uint64_t MaxHeaderSize = + CompressedBuffer::GetHeaderSizeForNoneEncoder() + MeasureVarUInt(MaxChunks) + MeasureVarUInt(MaxChunkEmbedSize) * MaxChunks; + return remotestore_impl::BlockComposer::Configuration{ + .MaxBlockSize = UsableSize + MaxHeaderSize, + .MaxChunksPerBlock = MaxChunks, + .MaxChunkEmbedSize = MaxChunkEmbedSize, + }; + } + +} // namespace projectstore_testutils + +TEST_SUITE_BEGIN("remotestore.projectstore"); struct ExportForceDisableBlocksTrue_ForceTempBlocksFalse { @@ -4112,7 +4713,7 @@ TEST_CASE_TEMPLATE("project.store.export", ExportForceDisableBlocksFalse_ForceTempBlocksTrue) { using namespace std::literals; - using namespace testutils; + using namespace projectstore_testutils; ScopedTemporaryDirectory TempDir; ScopedTemporaryDirectory ExportDir; @@ -4147,6 +4748,11 @@ TEST_CASE_TEMPLATE("project.store.export", Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{256u * 1024u, 92u * 1024u}, OodleCompressionLevel::None))); + Oplog->AppendNewOplogEntry( + CreateFilesOplogPackage(Oid::NewOid(), + RootDir, + CreateFileAttachments(RootDir, std::initializer_list<size_t>{423 * 1024, 2 * 1024, 3213, 762 * 1024}))); + FileRemoteStoreOptions Options = {RemoteStoreOptions{.MaxBlockSize = 64u * 1024, .MaxChunksPerBlock = 1000, .MaxChunkEmbedSize = 32 * 1024u, @@ -4159,89 +4765,3080 @@ TEST_CASE_TEMPLATE("project.store.export", std::shared_ptr<RemoteProjectStore> RemoteStore = CreateFileRemoteStore(Log(), Options); RemoteProjectStore::RemoteStoreInfo StoreInfo = RemoteStore->GetInfo(); - uint32_t NetworkWorkerCount = Max(GetHardwareConcurrency() / 4u, 2u); - uint32_t WorkerCount = (NetworkWorkerCount < GetHardwareConcurrency()) ? Max(GetHardwareConcurrency() - NetworkWorkerCount, 4u) : 4u; - - WorkerThreadPool WorkerPool(WorkerCount); - WorkerThreadPool NetworkPool(NetworkWorkerCount); - - RemoteProjectStore::Result ExportResult = SaveOplog(CidStore, - *RemoteStore, - *Project.Get(), - *Oplog, - NetworkPool, - WorkerPool, - Options.MaxBlockSize, - Options.MaxChunksPerBlock, - Options.MaxChunkEmbedSize, - Options.ChunkFileSizeLimit, - true, - false, - false, - nullptr); - - CHECK(ExportResult.ErrorCode == 0); + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + SaveOplog(Log(), + CidStore, + *RemoteStore, + *Project.Get(), + *Oplog, + NetworkPool, + WorkerPool, + Options.MaxBlockSize, + Options.MaxChunksPerBlock, + Options.MaxChunkEmbedSize, + Options.ChunkFileSizeLimit, + true, + false, + false, + nullptr); Ref<ProjectStore::Oplog> OplogImport = Project->NewOplog("oplog2", {}); - CHECK(OplogImport); - - RemoteProjectStore::Result ImportResult = LoadOplog(CidStore, - *RemoteStore, - *OplogImport, - NetworkPool, - WorkerPool, - /*Force*/ false, - /*IgnoreMissingAttachments*/ false, - /*CleanOplog*/ false, - EPartialBlockRequestMode::Mixed, - /*HostLatencySec*/ -1.0, - /*CacheLatencySec*/ -1.0, - nullptr); - CHECK(ImportResult.ErrorCode == 0); - - RemoteProjectStore::Result ImportForceResult = LoadOplog(CidStore, - *RemoteStore, - *OplogImport, - NetworkPool, - WorkerPool, - /*Force*/ true, - /*IgnoreMissingAttachments*/ false, - /*CleanOplog*/ false, - EPartialBlockRequestMode::Mixed, - /*HostLatencySec*/ -1.0, - /*CacheLatencySec*/ -1.0, - nullptr); - CHECK(ImportForceResult.ErrorCode == 0); - - RemoteProjectStore::Result ImportCleanResult = LoadOplog(CidStore, - *RemoteStore, - *OplogImport, - NetworkPool, - WorkerPool, - /*Force*/ false, - /*IgnoreMissingAttachments*/ false, - /*CleanOplog*/ true, - EPartialBlockRequestMode::Mixed, - /*HostLatencySec*/ -1.0, - /*CacheLatencySec*/ -1.0, - nullptr); - CHECK(ImportCleanResult.ErrorCode == 0); - - RemoteProjectStore::Result ImportForceCleanResult = LoadOplog(CidStore, - *RemoteStore, - *OplogImport, - NetworkPool, - WorkerPool, - /*Force*/ true, - /*IgnoreMissingAttachments*/ false, - /*CleanOplog*/ true, - EPartialBlockRequestMode::Mixed, - /*HostLatencySec*/ -1.0, - /*CacheLatencySec*/ -1.0, - nullptr); - CHECK(ImportForceCleanResult.ErrorCode == 0); + REQUIRE(OplogImport); + + CapturingJobContext Ctx; + auto DoLoad = [&](bool Force, bool Clean) { + LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .OptionalCache = nullptr, + .CacheBuildId = Oid::Zero, + .Oplog = *OplogImport, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = Force, + .IgnoreMissingAttachments = false, + .CleanOplog = Clean, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, + .OptionalJobContext = &Ctx}); + }; + + DoLoad(false, false); + DoLoad(true, false); + DoLoad(false, true); + DoLoad(true, true); } +// Populates ExportDir with a SaveOplog call using the same data as project.store.export. +static std::shared_ptr<RemoteProjectStore> +SetupExportStore(CidStore& CidStore, + ProjectStore::Project& Project, + WorkerThreadPool& NetworkPool, + WorkerThreadPool& WorkerPool, + const std::filesystem::path& ExportDir) +{ + using namespace projectstore_testutils; + using namespace std::literals; + + Ref<ProjectStore::Oplog> Oplog = Project.NewOplog("oplog_export", {}); + if (!Oplog) + { + throw std::runtime_error("Failed to create oplog"); + } + + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), {})); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{77}))); + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{7123, 583, 690, 99}))); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{55, 122}))); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage( + Oid::NewOid(), + CreateAttachments(std::initializer_list<size_t>{256u * 1024u, 92u * 1024u}, OodleCompressionLevel::None))); + Oplog->AppendNewOplogEntry(CreateFilesOplogPackage( + Oid::NewOid(), + Project.RootDir, + CreateFileAttachments(Project.RootDir, std::initializer_list<size_t>{423 * 1024, 2 * 1024, 3213, 762 * 1024}))); + + FileRemoteStoreOptions Options = {RemoteStoreOptions{.MaxBlockSize = 64u * 1024, + .MaxChunksPerBlock = 1000, + .MaxChunkEmbedSize = 32 * 1024u, + .ChunkFileSizeLimit = 64u * 1024u}, + /*.FolderPath =*/ExportDir, + /*.Name =*/std::string("oplog_export"), + /*.OptionalBaseName =*/std::string(), + /*.ForceDisableBlocks =*/false, + /*.ForceEnableTempBlocks =*/false}; + + std::shared_ptr<RemoteProjectStore> RemoteStore = CreateFileRemoteStore(Log(), Options); + SaveOplog(Log(), + CidStore, + *RemoteStore, + Project, + *Oplog, + NetworkPool, + WorkerPool, + Options.MaxBlockSize, + Options.MaxChunksPerBlock, + Options.MaxChunkEmbedSize, + Options.ChunkFileSizeLimit, + /*EmbedLooseFiles*/ true, + /*ForceUpload*/ false, + /*IgnoreMissingAttachments*/ false, + /*OptionalContext*/ nullptr); + return RemoteStore; +} + +// Creates an export store with six 512 KB chunks packed into one ~3 MB block (MaxBlockSize=8 MB). +// The ~1.5 MB slack exceeds the ChunkBlockAnalyser threshold, enabling partial-block downloads. +// Uses its own GcManager/CidStore/ProjectStore so each call is independent. +static std::shared_ptr<RemoteProjectStore> +SetupPartialBlockExportStore(WorkerThreadPool& NetworkPool, WorkerThreadPool& WorkerPool, const std::filesystem::path& ExportDir) +{ + using namespace projectstore_testutils; + using namespace std::literals; + + GcManager LocalGc; + CidStore LocalCidStore(LocalGc); + CidStoreConfiguration LocalCidConfig = {.RootDirectory = ExportDir / "cas", .TinyValueThreshold = 1024, .HugeValueThreshold = 4096}; + LocalCidStore.Initialize(LocalCidConfig); + + std::filesystem::path LocalProjectBasePath = ExportDir / "proj"; + ProjectStore LocalProjectStore(LocalCidStore, LocalProjectBasePath, LocalGc, ProjectStore::Configuration{}); + Ref<ProjectStore::Project> LocalProject(LocalProjectStore.NewProject(LocalProjectBasePath / "p"sv, + "p"sv, + (ExportDir / "root").string(), + (ExportDir / "engine").string(), + (ExportDir / "game").string(), + (ExportDir / "game" / "game.uproject").string())); + + Ref<ProjectStore::Oplog> Oplog = LocalProject->NewOplog("oplog_partial_block", {}); + if (!Oplog) + { + throw std::runtime_error("Failed to create oplog"); + } + + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage( + Oid::NewOid(), + CreateAttachments(std::initializer_list<size_t>{512u * 1024u, 512u * 1024u, 512u * 1024u, 512u * 1024u, 512u * 1024u, 512u * 1024u}, + OodleCompressionLevel::None))); + + // MaxChunkEmbedSize must exceed 512 KB (compressed size with None encoding) or all chunks + // become loose attachments and no block is created. + FileRemoteStoreOptions Options = {RemoteStoreOptions{.MaxBlockSize = 8u * 1024u * 1024u, + .MaxChunksPerBlock = 1000, + .MaxChunkEmbedSize = RemoteStoreOptions::DefaultMaxChunkEmbedSize, + .ChunkFileSizeLimit = 64u * 1024u * 1024u}, + /*.FolderPath =*/ExportDir, + /*.Name =*/std::string("oplog_partial_block"), + /*.OptionalBaseName =*/std::string(), + /*.ForceDisableBlocks =*/false, + /*.ForceEnableTempBlocks =*/false}; + std::shared_ptr<RemoteProjectStore> RemoteStore = CreateFileRemoteStore(Log(), Options); + SaveOplog(Log(), + LocalCidStore, + *RemoteStore, + *LocalProject, + *Oplog, + NetworkPool, + WorkerPool, + Options.MaxBlockSize, + Options.MaxChunksPerBlock, + Options.MaxChunkEmbedSize, + Options.ChunkFileSizeLimit, + /*EmbedLooseFiles*/ true, + /*ForceUpload*/ false, + /*IgnoreMissingAttachments*/ false, + /*OptionalContext*/ nullptr); + return RemoteStore; +} + +static IoHash +FindBlockWithMultipleChunks(RemoteProjectStore& Store, size_t MinChunkCount) +{ + RemoteProjectStore::LoadContainerResult ContainerResult = Store.LoadContainer(); + if (ContainerResult.ErrorCode != 0) + { + return {}; + } + std::vector<IoHash> BlockHashes = GetBlockHashesFromOplog(ContainerResult.ContainerObject); + if (BlockHashes.empty()) + { + return {}; + } + RemoteProjectStore::GetBlockDescriptionsResult Descriptions = Store.GetBlockDescriptions(BlockHashes, nullptr, Oid{}); + if (Descriptions.ErrorCode != 0) + { + return {}; + } + for (const ChunkBlockDescription& Desc : Descriptions.Blocks) + { + if (Desc.ChunkRawHashes.size() >= MinChunkCount) + { + return Desc.BlockHash; + } + } + return {}; +} + +// Seeds TargetCidStore with even-indexed chunks (0, 2, 4 ...) from BlockHash, leaving +// odd chunks absent to create non-adjacent missing ranges for partial-block download tests. +static void +SeedCidStoreWithAlternateChunks(CidStore& TargetCidStore, RemoteProjectStore& Source, const IoHash& BlockHash) +{ + RemoteProjectStore::LoadAttachmentResult BlockResult = Source.LoadAttachment(BlockHash); + if (BlockResult.ErrorCode != 0 || !BlockResult.Bytes) + { + return; + } + + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(BlockResult.Bytes), RawHash, RawSize); + if (!Compressed) + { + return; + } + CompositeBuffer BlockPayload = Compressed.DecompressToComposite(); + if (!BlockPayload) + { + return; + } + + uint32_t ChunkIndex = 0; + uint64_t HeaderSize = 0; + IterateChunkBlock( + BlockPayload.Flatten(), + [&TargetCidStore, &ChunkIndex](CompressedBuffer&& Chunk, const IoHash& AttachmentHash) { + if (ChunkIndex % 2 == 0) + { + IoBuffer ChunkData = Chunk.GetCompressed().Flatten().AsIoBuffer(); + TargetCidStore.AddChunk(ChunkData, AttachmentHash); + } + ++ChunkIndex; + }, + HeaderSize); +} + +TEST_CASE("project.store.import.context_settings") +{ + using namespace std::literals; + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + std::filesystem::path RootDir = TempDir.Path() / "root"; + std::filesystem::path EngineRootDir = TempDir.Path() / "engine"; + std::filesystem::path ProjectRootDir = TempDir.Path() / "game"; + std::filesystem::path ProjectFilePath = TempDir.Path() / "game" / "game.uproject"; + + // Export-side CAS and project store; kept disjoint from the import side. + GcManager ExportGc; + CidStore ExportCidStore(ExportGc); + CidStoreConfiguration ExportCidConfig = {.RootDirectory = TempDir.Path() / "export_cas", + .TinyValueThreshold = 1024, + .HugeValueThreshold = 4096}; + ExportCidStore.Initialize(ExportCidConfig); + + std::filesystem::path ExportBasePath = TempDir.Path() / "export_projectstore"; + ProjectStore ExportProjectStore(ExportCidStore, ExportBasePath, ExportGc, ProjectStore::Configuration{}); + Ref<ProjectStore::Project> ExportProject(ExportProjectStore.NewProject(ExportBasePath / "proj1"sv, + "proj1"sv, + RootDir.string(), + EngineRootDir.string(), + ProjectRootDir.string(), + ProjectFilePath.string())); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + std::shared_ptr<RemoteProjectStore> RemoteStore = + SetupExportStore(ExportCidStore, *ExportProject, NetworkPool, WorkerPool, ExportDir.Path()); + + // Import-side CAS starts empty so the first import downloads from the remote store without ForceDownload. + GcManager ImportGc; + CidStore ImportCidStore(ImportGc); + CidStoreConfiguration ImportCidConfig = {.RootDirectory = TempDir.Path() / "import_cas", + .TinyValueThreshold = 1024, + .HugeValueThreshold = 4096}; + ImportCidStore.Initialize(ImportCidConfig); + + std::filesystem::path ImportBasePath = TempDir.Path() / "import_projectstore"; + ProjectStore ImportProjectStore(ImportCidStore, ImportBasePath, ImportGc, ProjectStore::Configuration{}); + Ref<ProjectStore::Project> ImportProject(ImportProjectStore.NewProject(ImportBasePath / "proj1"sv, + "proj1"sv, + RootDir.string(), + EngineRootDir.string(), + ProjectRootDir.string(), + ProjectFilePath.string())); + + const Oid CacheBuildId = Oid::NewOid(); + BuildStorageCache::Statistics CacheStats; + std::unique_ptr<BuildStorageCache> Cache = CreateInMemoryBuildStorageCache(256u, CacheStats); + auto ResetCacheStats = [&]() { + CacheStats.TotalBytesRead = 0; + CacheStats.TotalBytesWritten = 0; + CacheStats.TotalRequestCount = 0; + CacheStats.TotalRequestTimeUs = 0; + CacheStats.TotalExecutionTimeUs = 0; + CacheStats.PeakSentBytes = 0; + CacheStats.PeakReceivedBytes = 0; + CacheStats.PeakBytesPerSec = 0; + CacheStats.PutBlobCount = 0; + CacheStats.PutBlobByteCount = 0; + }; + + int OpJobIndex = 0; + + CapturingJobContext OpJobContext; + + // Each call creates a fresh oplog to prevent short-circuiting on already-present data. + auto DoImport = [&](BuildStorageCache* OptCache, + EPartialBlockRequestMode Mode, + double StoreLatency, + uint64_t StoreRanges, + double CacheLatency, + uint64_t CacheRanges, + bool PopulateCache, + bool ForceDownload) -> void { + Ref<ProjectStore::Oplog> ImportOplog = ImportProject->NewOplog(fmt::format("import_{}", OpJobIndex++), {}); + LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = ImportCidStore, + .RemoteStore = *RemoteStore, + .OptionalCache = OptCache, + .CacheBuildId = CacheBuildId, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = ForceDownload, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = Mode, + .PopulateCache = PopulateCache, + .StoreLatencySec = StoreLatency, + .StoreMaxRangeCountPerRequest = StoreRanges, + .CacheLatencySec = CacheLatency, + .CacheMaxRangeCountPerRequest = CacheRanges, + .OptionalJobContext = &OpJobContext}); + }; + + // Shorthand: Mode=All, low latency, 128 ranges for both store and cache. + auto ImportAll = [&](BuildStorageCache* OptCache, bool Populate, bool Force) -> void { + DoImport(OptCache, EPartialBlockRequestMode::All, 0.001, 128u, 0.001, 128u, Populate, Force); + }; + + SUBCASE("mode_off_no_cache") { DoImport(nullptr, EPartialBlockRequestMode::Off, -1.0, (uint64_t)-1, -1.0, (uint64_t)-1, false, false); } + + SUBCASE("mode_all_multirange_cloud_no_cache") + { + // StoreMaxRangeCountPerRequest > 1 -> MultiRange cloud path. + DoImport(nullptr, EPartialBlockRequestMode::All, 0.001, 128u, -1.0, 0u, false, false); + } + + SUBCASE("mode_all_singlerange_cloud_no_cache") + { + // StoreMaxRangeCountPerRequest == 1 -> SingleRange cloud path. + DoImport(nullptr, EPartialBlockRequestMode::All, 0.001, 1u, -1.0, 0u, false, false); + } + + SUBCASE("mode_mixed_high_latency_no_cache") + { + // High store latency encourages range merging; Mixed uses SingleRange for cloud, Off for cache. + DoImport(nullptr, EPartialBlockRequestMode::Mixed, 0.1, 128u, -1.0, 0u, false, false); + } + + SUBCASE("cache_populate_and_hit") + { + // First import: CidStore empty -> blocks downloaded and written to cache. + ImportAll(Cache.get(), /*PopulateCache=*/true, /*Force=*/false); + CHECK(CacheStats.PutBlobCount > 0); + + // Re-import with Force=true: HasAttachment overridden, blocks served from cache. + ResetCacheStats(); + ImportAll(Cache.get(), /*PopulateCache=*/false, /*Force=*/true); + CHECK(CacheStats.PutBlobCount == 0); + CHECK(CacheStats.TotalRequestCount > 0); + } + + SUBCASE("cache_no_populate_flag") + { + ImportAll(Cache.get(), /*PopulateCache=*/false, /*Force=*/false); + CHECK(CacheStats.PutBlobCount == 0); + } + + SUBCASE("mode_zencacheonly_cache_multirange") + { + // Pre-populate; re-import via ZenCacheOnly. All chunks needed -> FullBlockIndexes path (GetBuildBlob). + ImportAll(Cache.get(), /*PopulateCache=*/true, /*Force=*/false); + ResetCacheStats(); + + DoImport(Cache.get(), EPartialBlockRequestMode::ZenCacheOnly, 0.1, 128u, 0.001, 128u, false, true); + CHECK(CacheStats.TotalRequestCount > 0); + } + + SUBCASE("mode_zencacheonly_cache_singlerange") + { + // Pre-populate; re-import via ZenCacheOnly with CacheMaxRangeCountPerRequest=1. All chunks needed -> GetBuildBlob (full-blob). + ImportAll(Cache.get(), /*PopulateCache=*/true, /*Force=*/false); + ResetCacheStats(); + + DoImport(Cache.get(), EPartialBlockRequestMode::ZenCacheOnly, 0.1, 128u, 0.001, 1u, false, true); + CHECK(CacheStats.TotalRequestCount > 0); + } + + SUBCASE("mode_all_cache_and_cloud_multirange") + { + // Pre-populate cache; All mode uses multi-range for both the cache and cloud paths. + ImportAll(Cache.get(), /*PopulateCache=*/true, /*Force=*/false); + ResetCacheStats(); + + ImportAll(Cache.get(), /*PopulateCache=*/false, /*Force=*/true); + CHECK(CacheStats.TotalRequestCount > 0); + } + + SUBCASE("partial_block_cloud_multirange") + { + ScopedTemporaryDirectory PartialExportDir; + std::shared_ptr<RemoteProjectStore> PartialRemoteStore = + SetupPartialBlockExportStore(NetworkPool, WorkerPool, PartialExportDir.Path()); + + // Seeding even-indexed chunks (0, 2, 4) leaves odd ones (1, 3, 5) absent in + // ImportCidStore. Three non-adjacent needed positions -> three BlockRangeDescriptors. + IoHash BlockHash = FindBlockWithMultipleChunks(*PartialRemoteStore, 4u); + CHECK(BlockHash != IoHash::Zero); + SeedCidStoreWithAlternateChunks(ImportCidStore, *PartialRemoteStore, BlockHash); + + // StoreMaxRangeCountPerRequest=128 -> all three ranges sent in one LoadAttachmentRanges call. + Ref<ProjectStore::Oplog> PartialOplog = ImportProject->NewOplog(fmt::format("partial_cloud_multi_{}", OpJobIndex++), {}); + LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = ImportCidStore, + .RemoteStore = *PartialRemoteStore, + .OptionalCache = nullptr, + .CacheBuildId = CacheBuildId, + .Oplog = *PartialOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::All, + .PopulateCache = false, + .StoreLatencySec = 0.001, + .StoreMaxRangeCountPerRequest = 128u, + .CacheLatencySec = -1.0, + .CacheMaxRangeCountPerRequest = 0u, + .OptionalJobContext = &OpJobContext}); + } + + SUBCASE("partial_block_cloud_singlerange") + { + // Same block layout as partial_block_cloud_multirange but StoreMaxRangeCountPerRequest=1. + // DownloadPartialBlock issues one LoadAttachmentRanges call per range. + ScopedTemporaryDirectory PartialExportDir; + std::shared_ptr<RemoteProjectStore> PartialRemoteStore = + SetupPartialBlockExportStore(NetworkPool, WorkerPool, PartialExportDir.Path()); + + IoHash BlockHash = FindBlockWithMultipleChunks(*PartialRemoteStore, 4u); + CHECK(BlockHash != IoHash::Zero); + SeedCidStoreWithAlternateChunks(ImportCidStore, *PartialRemoteStore, BlockHash); + + Ref<ProjectStore::Oplog> PartialOplog = ImportProject->NewOplog(fmt::format("partial_cloud_single_{}", OpJobIndex++), {}); + LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = ImportCidStore, + .RemoteStore = *PartialRemoteStore, + .OptionalCache = nullptr, + .CacheBuildId = CacheBuildId, + .Oplog = *PartialOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::All, + .PopulateCache = false, + .StoreLatencySec = 0.001, + .StoreMaxRangeCountPerRequest = 1u, + .CacheLatencySec = -1.0, + .CacheMaxRangeCountPerRequest = 0u, + .OptionalJobContext = &OpJobContext}); + } + + SUBCASE("partial_block_cache_multirange") + { + ScopedTemporaryDirectory PartialExportDir; + std::shared_ptr<RemoteProjectStore> PartialRemoteStore = + SetupPartialBlockExportStore(NetworkPool, WorkerPool, PartialExportDir.Path()); + + IoHash BlockHash = FindBlockWithMultipleChunks(*PartialRemoteStore, 4u); + CHECK(BlockHash != IoHash::Zero); + + // Phase 1: full block download from remote populates the cache. + { + Ref<ProjectStore::Oplog> Phase1Oplog = ImportProject->NewOplog(fmt::format("partial_cache_multi_p1_{}", OpJobIndex++), {}); + LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = ImportCidStore, + .RemoteStore = *PartialRemoteStore, + .OptionalCache = Cache.get(), + .CacheBuildId = CacheBuildId, + .Oplog = *Phase1Oplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::All, + .PopulateCache = true, + .StoreLatencySec = 0.001, + .StoreMaxRangeCountPerRequest = 128u, + .CacheLatencySec = 0.001, + .CacheMaxRangeCountPerRequest = 128u, + .OptionalJobContext = &OpJobContext}); + + CHECK(CacheStats.PutBlobCount > 0); + } + ResetCacheStats(); + + // Phase 2: fresh CidStore with even chunks seeded; CacheMaxRangeCountPerRequest=128 -> GetBuildBlobRanges. + GcManager Phase2Gc; + CidStore Phase2CidStore(Phase2Gc); + CidStoreConfiguration Phase2CidConfig = {.RootDirectory = TempDir.Path() / "partial_cas", + .TinyValueThreshold = 1024, + .HugeValueThreshold = 4096}; + Phase2CidStore.Initialize(Phase2CidConfig); + SeedCidStoreWithAlternateChunks(Phase2CidStore, *PartialRemoteStore, BlockHash); + + Ref<ProjectStore::Oplog> Phase2Oplog = ImportProject->NewOplog(fmt::format("partial_cache_multi_p2_{}", OpJobIndex++), {}); + LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = Phase2CidStore, + .RemoteStore = *PartialRemoteStore, + .OptionalCache = Cache.get(), + .CacheBuildId = CacheBuildId, + .Oplog = *Phase2Oplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::ZenCacheOnly, + .PopulateCache = false, + .StoreLatencySec = 0.001, + .StoreMaxRangeCountPerRequest = 128u, + .CacheLatencySec = 0.001, + .CacheMaxRangeCountPerRequest = 128u, + .OptionalJobContext = &OpJobContext}); + + CHECK(CacheStats.TotalRequestCount > 0); + } + + SUBCASE("partial_block_cache_singlerange") + { + ScopedTemporaryDirectory PartialExportDir; + std::shared_ptr<RemoteProjectStore> PartialRemoteStore = + SetupPartialBlockExportStore(NetworkPool, WorkerPool, PartialExportDir.Path()); + + IoHash BlockHash = FindBlockWithMultipleChunks(*PartialRemoteStore, 4u); + CHECK(BlockHash != IoHash::Zero); + + // Phase 1: full block download from remote into cache. + { + Ref<ProjectStore::Oplog> Phase1Oplog = ImportProject->NewOplog(fmt::format("partial_cache_single_p1_{}", OpJobIndex++), {}); + LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = ImportCidStore, + .RemoteStore = *PartialRemoteStore, + .OptionalCache = Cache.get(), + .CacheBuildId = CacheBuildId, + .Oplog = *Phase1Oplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::All, + .PopulateCache = true, + .StoreLatencySec = 0.001, + .StoreMaxRangeCountPerRequest = 128u, + .CacheLatencySec = 0.001, + .CacheMaxRangeCountPerRequest = 128u, + .OptionalJobContext = &OpJobContext}); + + CHECK(CacheStats.PutBlobCount > 0); + } + ResetCacheStats(); + + // Phase 2: CacheMaxRangeCountPerRequest=1 -> GetBuildBlob with range offset, called per needed range. + GcManager Phase2Gc; + CidStore Phase2CidStore(Phase2Gc); + CidStoreConfiguration Phase2CidConfig = {.RootDirectory = TempDir.Path() / "partial_cas_single", + .TinyValueThreshold = 1024, + .HugeValueThreshold = 4096}; + Phase2CidStore.Initialize(Phase2CidConfig); + SeedCidStoreWithAlternateChunks(Phase2CidStore, *PartialRemoteStore, BlockHash); + + Ref<ProjectStore::Oplog> Phase2Oplog = ImportProject->NewOplog(fmt::format("partial_cache_single_p2_{}", OpJobIndex++), {}); + LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = Phase2CidStore, + .RemoteStore = *PartialRemoteStore, + .OptionalCache = Cache.get(), + .CacheBuildId = CacheBuildId, + .Oplog = *Phase2Oplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::ZenCacheOnly, + .PopulateCache = false, + .StoreLatencySec = 0.001, + .StoreMaxRangeCountPerRequest = 128u, + .CacheLatencySec = 0.001, + .CacheMaxRangeCountPerRequest = 1u, + .OptionalJobContext = &OpJobContext}); + + CHECK(CacheStats.TotalRequestCount > 0); + } +} + +static Ref<ProjectStore::Project> +MakeTestProject(CidStore& CidStore, + GcManager& Gc, + const std::filesystem::path& TempDir, + std::unique_ptr<class ProjectStore>& OutProjectStore) +{ + using namespace std::literals; + + CidStoreConfiguration CidConfig = {.RootDirectory = TempDir / "cas", .TinyValueThreshold = 1024, .HugeValueThreshold = 4096}; + CidStore.Initialize(CidConfig); + + std::filesystem::path BasePath = TempDir / "projectstore"; + OutProjectStore = std::make_unique<class ProjectStore>(CidStore, BasePath, Gc, ProjectStore::Configuration{}); + + std::filesystem::path RootDir = TempDir / "root"; + std::filesystem::path EngineRootDir = TempDir / "engine"; + std::filesystem::path ProjectRootDir = TempDir / "game"; + std::filesystem::path ProjectFilePath = TempDir / "game" / "game.uproject"; + + return Ref<ProjectStore::Project>(OutProjectStore->NewProject(BasePath / "proj1"sv, + "proj1"sv, + RootDir.string(), + EngineRootDir.string(), + ProjectRootDir.string(), + ProjectFilePath.string())); +} + +static void +RunSaveOplog(CidStore& CidStore, + ProjectStore::Project& Project, + ProjectStore::Oplog& Oplog, + WorkerThreadPool& NetworkPool, + WorkerThreadPool& WorkerPool, + const std::filesystem::path& ExportDir, + const std::string& Name, + size_t MaxBlockSize, + size_t MaxChunksPerBlock, + size_t MaxChunkEmbedSize, + bool EmbedLooseFiles, + bool ForceUpload, + bool IgnoreMissingAttachments, + JobContext* OptionalContext, + bool ForceDisableBlocks, + std::shared_ptr<RemoteProjectStore>* OutRemoteStore = nullptr) +{ + FileRemoteStoreOptions Options = {RemoteStoreOptions{.MaxBlockSize = MaxBlockSize, + .MaxChunksPerBlock = MaxChunksPerBlock, + .MaxChunkEmbedSize = MaxChunkEmbedSize, + .ChunkFileSizeLimit = 64u * 1024u * 1024u}, + /*.FolderPath =*/ExportDir, + /*.Name =*/Name, + /*.OptionalBaseName =*/std::string(), + /*.ForceDisableBlocks =*/ForceDisableBlocks, + /*.ForceEnableTempBlocks =*/false}; + + std::shared_ptr<RemoteProjectStore> RemoteStore = CreateFileRemoteStore(Log(), Options); + if (OutRemoteStore) + { + *OutRemoteStore = RemoteStore; + } + SaveOplog(Log(), + CidStore, + *RemoteStore, + Project, + Oplog, + NetworkPool, + WorkerPool, + Options.MaxBlockSize, + Options.MaxChunksPerBlock, + Options.MaxChunkEmbedSize, + Options.ChunkFileSizeLimit, + EmbedLooseFiles, + ForceUpload, + IgnoreMissingAttachments, + OptionalContext); +} + +TEST_CASE("project.store.export.no_attachments_needed") +{ + // With no binary attachments, UploadAttachments reports "No attachments needed". + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_no_att", {}); + REQUIRE(Oplog); + + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), {})); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), {})); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + CapturingJobContext Ctx; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_no_att", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/true, + /*IgnoreMissingAttachments=*/false, + &Ctx, + /*ForceDisableBlocks=*/false); + + CHECK(Ctx.HasMessage("No attachments needed")); +} + +TEST_CASE("project.store.embed_loose_files_true") +{ + // EmbedLooseFiles=true: file-op entries are rewritten with a BinaryAttachment field. Round-trip must succeed. + using namespace projectstore_testutils; + using namespace std::literals; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_embed_true", {}); + REQUIRE(Oplog); + + Oplog->AppendNewOplogEntry( + CreateFilesOplogPackage(Oid::NewOid(), RootDir, CreateFileAttachments(RootDir, std::initializer_list<size_t>{1024, 2048}))); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_embed_true", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore); + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_embed_true_import", {}); + LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed}); +} + +TEST_CASE("project.store.embed_loose_files_false" * doctest::skip()) // superseded by buildcontainer.embed_loose_files_false_no_rewrite +{ + // EmbedLooseFiles=false: file-op entries pass through unrewritten. Round-trip must succeed. + using namespace projectstore_testutils; + using namespace std::literals; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_embed_false", {}); + REQUIRE(Oplog); + + Oplog->AppendNewOplogEntry( + CreateFilesOplogPackage(Oid::NewOid(), RootDir, CreateFileAttachments(RootDir, std::initializer_list<size_t>{1024, 2048}))); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_embed_false", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/false, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore); + + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_embed_false_import", {}); + LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed}); +} + +TEST_CASE("project.store.export.missing_attachment_ignored" * + doctest::skip()) // superseded by buildcontainer.ignore_missing_file_attachment_warn +{ + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + + auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{512, 1024}); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_missing_att", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(Oid::NewOid(), RootDir, FileAtts)); + + for (const auto& [Id, Path] : FileAtts) + { + std::filesystem::remove(Path); + } + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + CapturingJobContext Ctx; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_missing_att", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/true, + &Ctx, + /*ForceDisableBlocks=*/false); + + CHECK(Ctx.HasMessage("Missing attachment")); +} + +TEST_CASE("project.store.export.missing_chunk_in_cidstore" * + doctest::skip()) // superseded by buildcontainer.ignore_missing_binary_attachment_warn/throw +{ + using namespace projectstore_testutils; + using namespace std::literals; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + IoBuffer FakeData = CreateRandomBlob(256); + IoHash FakeHash = IoHash::HashBuffer(FakeData); + + CbObjectWriter Object; + Object << "key"sv << OidAsString(Oid::NewOid()); + Object.BeginArray("bulkdata"sv); + { + Object.BeginObject(); + Object << "id"sv << Oid::NewOid(); + Object << "type"sv + << "Standard"sv; + Object.AddBinaryAttachment("data"sv, FakeHash); + Object.EndObject(); + } + Object.EndArray(); + CbPackage Package; + Package.SetObject(Object.Save()); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_missing_cid", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(Package); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + CHECK_THROWS(RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_missing_cid", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false)); +} + +TEST_CASE("project.store.export.large_file_attachment_direct") +{ + // File > 2 x MaxChunkEmbedSize: classified as a direct large attachment (no compression attempt). Round-trip must succeed. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + + // 96 KB > 2 x 32 KB -> direct large attachment. + auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{96u * 1024u}); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_large_direct", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(Oid::NewOid(), RootDir, FileAtts)); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + constexpr size_t MaxChunkEmbedSize = 32u * 1024u; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_large_direct", + 64u * 1024u, + 1000, + MaxChunkEmbedSize, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore); + + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_large_direct_import", {}); + LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed}); +} + +TEST_CASE("project.store.export.large_file_attachment_via_temp") +{ + // File with MaxChunkEmbedSize < size <= 2xMaxChunkEmbedSize: compressed to a temp buffer; + // if still large (incompressible), goes to OnLargeAttachment. Round-trip must succeed. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + + // 48 KB: 32 KB < 48 KB <= 64 KB -> temp-compression path; incompressible data stays > 32 KB. + auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{48u * 1024u}); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_large_via_temp", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(Oid::NewOid(), RootDir, FileAtts)); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + constexpr size_t MaxChunkEmbedSize = 32u * 1024u; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_large_via_temp", + 64u * 1024u, + 1000, + MaxChunkEmbedSize, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore); + + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_large_via_temp_import", {}); + LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed}); +} + +TEST_CASE("project.store.export.large_chunk_from_cidstore") +{ + // Bulkdata attachment in CidStore with compressed size > MaxChunkEmbedSize -> OnLargeAttachment. Round-trip must succeed. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + // 64 KB with None encoding -> compressed ~ 64 KB > MaxChunkEmbedSize = 32 KB. + auto Attachments = CreateAttachments(std::initializer_list<size_t>{64u * 1024u}, OodleCompressionLevel::None); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_large_cid", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), Attachments)); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + constexpr size_t MaxChunkEmbedSize = 32u * 1024u; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_large_cid", + 64u * 1024u, + 1000, + MaxChunkEmbedSize, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore); + + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_large_cid_import", {}); + LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed}); +} + +TEST_CASE("project.store.export.block_reuse") +{ + // Second export to the same store: FindReuseBlocks matches existing blocks; no new blocks are written. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + // 20 KB with None encoding: compressed ~ 20 KB < MaxChunkEmbedSize = 32 KB -> goes into a block. + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_reuse", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage( + Oid::NewOid(), + CreateAttachments(std::initializer_list<size_t>{20u * 1024u, 20u * 1024u}, OodleCompressionLevel::None))); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + constexpr size_t MaxChunkEmbedSize = 32u * 1024u; + constexpr size_t MaxBlockSize = 64u * 1024u; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_reuse", + MaxBlockSize, + 1000, + MaxChunkEmbedSize, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore); + + RemoteProjectStore::GetKnownBlocksResult KnownAfterFirst = RemoteStore->GetKnownBlocks(); + REQUIRE(!KnownAfterFirst.Blocks.empty()); + + std::vector<IoHash> BlockHashesAfterFirst; + for (const ChunkBlockDescription& B : KnownAfterFirst.Blocks) + { + BlockHashesAfterFirst.push_back(B.BlockHash); + } + + SaveOplog(Log(), + CidStore, + *RemoteStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + MaxBlockSize, + 1000, + MaxChunkEmbedSize, + 64u * 1024u * 1024u, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr); + + RemoteProjectStore::GetKnownBlocksResult KnownAfterSecond = RemoteStore->GetKnownBlocks(); + std::vector<IoHash> BlockHashesAfterSecond; + for (const ChunkBlockDescription& B : KnownAfterSecond.Blocks) + { + BlockHashesAfterSecond.push_back(B.BlockHash); + } + + std::sort(BlockHashesAfterFirst.begin(), BlockHashesAfterFirst.end()); + std::sort(BlockHashesAfterSecond.begin(), BlockHashesAfterSecond.end()); + CHECK(BlockHashesAfterFirst == BlockHashesAfterSecond); +} + +TEST_CASE("project.store.export.max_chunks_per_block") +{ + // MaxChunksPerBlock=2 with 3 attachments from one op -> at least 2 blocks produced. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + // 2 KB with None encoding: compressed ~ 2 KB < MaxChunkEmbedSize = 4 KB -> enters block assembly. + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_max_chunks", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage( + Oid::NewOid(), + CreateAttachments(std::initializer_list<size_t>{2u * 1024u, 2u * 1024u, 2u * 1024u}, OodleCompressionLevel::None))); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + constexpr size_t MaxChunksPerBlock = 2; + constexpr size_t MaxBlockSize = 1u * 1024u * 1024u; + constexpr size_t MaxChunkEmbedSize = 4u * 1024u; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_max_chunks", + MaxBlockSize, + MaxChunksPerBlock, + MaxChunkEmbedSize, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore); + + RemoteProjectStore::GetKnownBlocksResult KnownBlocks = RemoteStore->GetKnownBlocks(); + CHECK(KnownBlocks.Blocks.size() >= 2); + + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_max_chunks_import", {}); + LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed}); +} + +TEST_CASE("project.store.export.max_data_per_block") +{ + // Verifies ComposeBlocks respects UsableBlockSize = MaxBlockSize - MaxHeaderSize. + // With MaxBlockSize=7168, MaxChunksPerBlock=32: MaxHeaderSize=129, UsableBlockSize=7039. + // Oids[1] contributes 7041 compressed bytes (> 7039) to force a block boundary at that exact limit. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_max_data_per_block", {}); + REQUIRE(Oplog); + + std::vector<Oid> Oids; + Oids.push_back(Oid::NewOid()); + Oids.push_back(Oid::NewOid()); + Oids.push_back(Oid::NewOid()); + Oids.push_back(Oid::NewOid()); + Oids.push_back(Oid::NewOid()); + std::sort(Oids.begin(), Oids.end()); + + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oids[0], CreateAttachments(std::initializer_list<size_t>{2u * 1024u}, OodleCompressionLevel::None))); + + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oids[1], + CreateAttachments(std::initializer_list<size_t>{3u * 1024u, 2u * 1024u, 2u * 1024u, 875u, 875u, 875u}, + OodleCompressionLevel::None))); + + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oids[2], CreateAttachments(std::initializer_list<size_t>{875u, 875u}, OodleCompressionLevel::None))); + + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage( + Oids[3], + CreateAttachments(std::initializer_list<size_t>{875u, 875u, 875u, 875u, 875u, 875u}, OodleCompressionLevel::None))); + + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oids[4], CreateAttachments(std::initializer_list<size_t>{1676, 1678}, OodleCompressionLevel::None))); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + constexpr size_t MaxChunksPerBlock = 32; + constexpr size_t MaxBlockSize = 7u * 1024u; + constexpr size_t MaxChunkEmbedSize = 3u * 1024u; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_max_data_per_block", + MaxBlockSize, + MaxChunksPerBlock, + MaxChunkEmbedSize, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore); + + RemoteProjectStore::GetKnownBlocksResult KnownBlocks = RemoteStore->GetKnownBlocks(); + CHECK(KnownBlocks.Blocks.size() >= 2); + + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_max_data_per_block_import", {}); + LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed}); +} + +TEST_CASE("project.store.export.file_deleted_between_phases") +{ + // File exists during RewriteOp but is deleted before AllowChunking workers run. + // With IgnoreMissingAttachments=true the export continues. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + + auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{512, 1024}); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_file_deleted", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(Oid::NewOid(), RootDir, FileAtts)); + + std::vector<std::filesystem::path> FilePaths; + for (const auto& [Id, Path] : FileAtts) + { + FilePaths.push_back(Path); + } + + // Deletes files when "Rewrote" arrives, before AllowChunking workers run. + struct DeleteOnRewriteContext : public CapturingJobContext + { + std::vector<std::filesystem::path>* Paths = nullptr; + void ReportMessage(std::string_view Message) override + { + CapturingJobContext::ReportMessage(Message); + if (Message.find("Rewrote") != std::string_view::npos && Paths) + { + for (const auto& P : *Paths) + { + std::filesystem::remove(P); + } + } + } + }; + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + DeleteOnRewriteContext Ctx; + Ctx.Paths = &FilePaths; + + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_file_deleted", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/true, + &Ctx, + /*ForceDisableBlocks=*/false); + + CHECK(Ctx.HasMessage("Missing attachment")); + for (const auto& P : FilePaths) + { + CHECK(!std::filesystem::exists(P)); + } +} + +TEST_CASE("project.store.embed_loose_files_zero_data_hash") +{ + // File-op entries with "data": IoHash::Zero (unresolved marker) trigger RewriteOp to + // read from disk and replace with a resolved BinaryAttachment. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{512, 1024}); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_zero_data_hash", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateFilesOplogPackageWithZeroDataHash(Oid::NewOid(), RootDir, FileAtts)); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_zero_data_hash", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore); + + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_zero_data_hash_import", {}); + LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed}); +} + +TEST_CASE("project.store.embed_loose_files_already_resolved") +{ + // After an export->import round-trip, oplog entries carry resolved "data": BinaryAttachment(H). + // A re-export must preserve those fields without re-reading from disk. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir1; + ScopedTemporaryDirectory ExportDir2; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{512, 1024}); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_already_resolved", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(Oid::NewOid(), RootDir, FileAtts)); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + std::shared_ptr<RemoteProjectStore> RemoteStore1; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir1.Path(), + "oplog_already_resolved", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore1); + + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_already_resolved_import", {}); + LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = CidStore, + .RemoteStore = *RemoteStore1, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed}); + + RunSaveOplog(CidStore, + *Project, + *ImportOplog, + NetworkPool, + WorkerPool, + ExportDir2.Path(), + "oplog_already_resolved_reexport", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false); +} + +TEST_CASE("project.store.import.missing_attachment") +{ + // Export a small oplog with ForceDisableBlocks=true (only loose .blob files), delete one + // attachment, then test both sides of IgnoreMissingAttachments. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_missing_att", {}); + REQUIRE(Oplog); + + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{512, 1024}))); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{2048, 3000}))); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_missing_att", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/false, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/true, + &RemoteStore); + + // Find and delete one .blob attachment file from the remote store directory. + std::filesystem::path DeletedBlob; + for (const auto& Entry : std::filesystem::recursive_directory_iterator(ExportDir.Path())) + { + if (Entry.path().extension() == ".blob") + { + DeletedBlob = Entry.path(); + break; + } + } + REQUIRE(!DeletedBlob.empty()); + std::error_code Ec; + std::filesystem::remove(DeletedBlob, Ec); + REQUIRE(!Ec); + + SUBCASE("throws_when_not_ignored") + { + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_missing_att_throw", {}); + REQUIRE(ImportOplog); + CapturingJobContext Ctx; + CHECK_THROWS_AS(LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = true, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, + .OptionalJobContext = &Ctx}), + RemoteStoreError); + } + + SUBCASE("succeeds_when_ignored") + { + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_missing_att_ignore", {}); + REQUIRE(ImportOplog); + CapturingJobContext Ctx; + CHECK_NOTHROW(LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = true, + .IgnoreMissingAttachments = true, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, + .OptionalJobContext = &Ctx})); + CHECK(Ctx.HasMessage("Failed to load attachments")); + } +} + +TEST_CASE("project.store.import.error.load_container_failure") +{ + // LoadContainer() on a nonexistent path returns non-zero ErrorCode -> LoadOplog throws RemoteStoreError. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path NonExistentPath = TempDir.Path() / "does_not_exist"; + FileRemoteStoreOptions Options = {RemoteStoreOptions{.MaxBlockSize = 64u * 1024u, + .MaxChunksPerBlock = 1000, + .MaxChunkEmbedSize = 32u * 1024u, + .ChunkFileSizeLimit = 64u * 1024u * 1024u}, + /*.FolderPath =*/NonExistentPath, + /*.Name =*/"load_container_failure", + /*.OptionalBaseName =*/std::string(), + /*.ForceDisableBlocks =*/false, + /*.ForceEnableTempBlocks =*/false}; + std::shared_ptr<RemoteProjectStore> RemoteStore = CreateFileRemoteStore(Log(), Options); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("load_container_failure_import", {}); + REQUIRE(ImportOplog); + + CapturingJobContext Ctx; + CHECK_THROWS_AS(LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, + .OptionalJobContext = &Ctx}), + RemoteStoreError); +} + +TEST_CASE("project.store.blockcomposer.path_a_standalone_block") +{ + // Path A: one op with exactly MaxChunksPerBlock chunks -> emitted as a standalone block without merging into pending. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 1000; + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4)}; + std::vector<uint64_t> Sizes = {100, 100, 100, 100}; + std::vector<Oid> Keys = {Op1, Op1, Op1, Op1}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 1); + CHECK(Blocks[0].size() == 4); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[0][3] == MakeTestHash(4)); +} + +TEST_CASE("project.store.blockcomposer.path_b_fits_pending") +{ + // Path B: a single op whose chunks fit in the empty pending block. + // No flush occurs during processing; the final flush emits the one pending block. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 1000; + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2)}; + std::vector<uint64_t> Sizes = {60, 80}; // each <= MaxChunkEmbedSize (100); sum=140 << UsableSize (1000) + std::vector<Oid> Keys = {Op1, Op1}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 1); + CHECK(Blocks[0].size() == 2); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[0][1] == MakeTestHash(2)); +} + +TEST_CASE("project.store.blockcomposer.path_b_exact_count_fill") +{ + // Path B: pending reaches MaxChunksPerBlock exactly -> immediate flush, no separate final flush. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 1000; + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + Oid Op2 = MakeTestOid(2); + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4)}; + std::vector<uint64_t> Sizes = {100, 100, 100, 100}; + std::vector<Oid> Keys = {Op1, Op1, Op2, Op2}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 1); + CHECK(Blocks[0].size() == 4); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[0][3] == MakeTestHash(4)); +} + +TEST_CASE("project.store.blockcomposer.path_c_75pct_flush") +{ + // Path C: pending is >75% full when the next op doesn't fit -> pending flushed first, new op placed via Path B. + // UsableSize=100, threshold=75 bytes; Op1=80 bytes > 75%. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 100; // 75% threshold = 75 bytes + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + Oid Op2 = MakeTestOid(2); + // Op1: 80 bytes -> Path B, pending = {80 bytes, 1 chunk} (80 > 75) + // Op2: 30 bytes -> does not fit (80+30=110 > 100) and 80 > 75 -> Path C flush, + // then Path B, pending = {30 bytes} -> final flush + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2)}; + std::vector<uint64_t> Sizes = {80, 30}; + std::vector<Oid> Keys = {Op1, Op2}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 2); + CHECK(Blocks[0].size() == 1); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[1].size() == 1); + CHECK(Blocks[1][0] == MakeTestHash(2)); +} + +TEST_CASE("project.store.blockcomposer.path_d_partial_fill") +{ + // Path D: pending <=75% full but chunk count is the binding constraint. Greedy fill adds chunks until count capacity, then flushes. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 1000; // 75% threshold = 750 bytes + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + Oid Op2 = MakeTestOid(2); + // Op1: 3 x 100 bytes -> Path B, pending = {3 chunks, 300 bytes} (300 <= 750) + // Op2: 2 x 100 bytes -> 3+2=5 > MaxChunks=4; 300+200=500 <= 1000; 300 <= 750 -> Path D + // D adds op2[0] to pending (4 chunks, count capacity reached), flushes -> block 1 + // Remaining op2[1] -> Path B (pending empty) -> final flush -> block 2 + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4), MakeTestHash(5)}; + std::vector<uint64_t> Sizes = {100, 100, 100, 100, 100}; + std::vector<Oid> Keys = {Op1, Op1, Op1, Op2, Op2}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 2); + CHECK(Blocks[0].size() == 4); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[0][1] == MakeTestHash(2)); + CHECK(Blocks[0][2] == MakeTestHash(3)); + CHECK(Blocks[0][3] == MakeTestHash(4)); + CHECK(Blocks[1].size() == 1); + CHECK(Blocks[1][0] == MakeTestHash(5)); +} + +TEST_CASE("project.store.blockcomposer.cancellation") +{ + // IsCancelledFunc returns true on the second outer-loop iteration. + // Op1 (4 chunks, Path A) is fully emitted before cancellation; Op2 is never started. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 1000; + constexpr uint64_t MaxChunks = 4; + + int CallCount = 0; + remotestore_impl::BlockComposer::Configuration Config = MakeTestConfig(UsableSize, MaxChunks); + Config.IsCancelledFunc = [&]() { return ++CallCount > 1; }; + remotestore_impl::BlockComposer Composer(Config); + + Oid Op1 = MakeTestOid(1); + Oid Op2 = MakeTestOid(2); + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4), MakeTestHash(5), MakeTestHash(6)}; + std::vector<uint64_t> Sizes = {100, 100, 100, 100, 100, 100}; + std::vector<Oid> Keys = {Op1, Op1, Op1, Op1, Op2, Op2}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 1); + CHECK(Blocks[0].size() == 4); +} + +TEST_CASE("project.store.blockcomposer.final_flush") +{ + // Three ops with all chunks fitting in pending (no mid-stream flush) -> single block from final flush. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 1000; + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + Oid Op2 = MakeTestOid(2); + Oid Op3 = MakeTestOid(3); + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3)}; + std::vector<uint64_t> Sizes = {60, 80, 70}; // each <= MaxChunkEmbedSize (100); sum=210 << UsableSize (1000) + std::vector<Oid> Keys = {Op1, Op2, Op3}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 1); + CHECK(Blocks[0].size() == 3); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[0][1] == MakeTestHash(2)); + CHECK(Blocks[0][2] == MakeTestHash(3)); +} + +TEST_CASE("project.store.blockcomposer.path_b_b_c") +{ + // Path B -> Path B -> Path C: two ops accumulate past 75% threshold; third op triggers Path C flush. + // UsableSize=200, threshold=150; two ops of 90 bytes each accumulate 180 bytes, exceeding threshold. + using namespace projectstore_testutils; + constexpr uint64_t UsableSize = 200; // 75% threshold = 150 bytes + constexpr uint64_t MaxChunks = 8; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + Oid Op2 = MakeTestOid(2); + Oid Op3 = MakeTestOid(3); + // Op1: 90 bytes -> Path B, pending = {90 bytes, 1 chunk} (90 <= 150) + // Op2: 90 bytes -> Path B, pending = {180 bytes, 2 chunks} (180 > 150) + // Op3: 60 bytes -> does not fit (180+60=240 > 200) and 180 > 150 -> Path C flush -> block 1 + // then Path B, pending = {60 bytes} -> final flush -> block 2 + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3)}; + std::vector<uint64_t> Sizes = {90, 90, 60}; + std::vector<Oid> Keys = {Op1, Op2, Op3}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 2); + CHECK(Blocks[0].size() == 2); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[0][1] == MakeTestHash(2)); + CHECK(Blocks[1].size() == 1); + CHECK(Blocks[1][0] == MakeTestHash(3)); +} + +TEST_CASE("project.store.blockcomposer.path_a_b_final_flush") +{ + // Path A -> Path B -> final flush: first op count-saturates -> standalone block, second op placed via Path B. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 1000; + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + Oid Op2 = MakeTestOid(2); + // Op1: 4 x 100 bytes -> MaxChunksPerBlock reached -> CurrentOpFillFullBlock=true -> Path A + // Op2: 2 x 100 bytes -> Path B (pending empty) -> final flush + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4), MakeTestHash(5), MakeTestHash(6)}; + std::vector<uint64_t> Sizes = {100, 100, 100, 100, 100, 100}; + std::vector<Oid> Keys = {Op1, Op1, Op1, Op1, Op2, Op2}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 2); + CHECK(Blocks[0].size() == 4); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[0][3] == MakeTestHash(4)); + CHECK(Blocks[1].size() == 2); + CHECK(Blocks[1][0] == MakeTestHash(5)); + CHECK(Blocks[1][1] == MakeTestHash(6)); +} + +TEST_CASE("project.store.blockcomposer.empty_input") +{ + // Zero attachments -> no blocks emitted. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 1000; + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose({}, {}, {}, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + CHECK(Blocks.empty()); +} + +TEST_CASE("project.store.blockcomposer.single_attachment") +{ + // Single chunk -> Path B into empty pending, final flush emits it. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 1000; + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + std::vector<IoHash> Hashes = {MakeTestHash(1)}; + std::vector<uint64_t> Sizes = {60}; + std::vector<Oid> Keys = {Op1}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 1); + CHECK(Blocks[0].size() == 1); + CHECK(Blocks[0][0] == MakeTestHash(1)); +} + +TEST_CASE("project.store.blockcomposer.path_a_size_saturation") +{ + // Path A by size overflow: 60+60 > UsableSize=100; first chunk emitted standalone, second via Path B. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 100; // MaxChunkEmbedSize=100; two 60-byte chunks overflow + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + // chunk0=60, chunk1=60: 60+60=120 > UsableSize=100 -> size overflow after gathering chunk0 + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2)}; + std::vector<uint64_t> Sizes = {60, 60}; + std::vector<Oid> Keys = {Op1, Op1}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 2); + CHECK(Blocks[0].size() == 1); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[1].size() == 1); + CHECK(Blocks[1][0] == MakeTestHash(2)); +} + +TEST_CASE("project.store.blockcomposer.path_b_exact_size_fill") +{ + // Path B immediate flush when pending reaches UsableBlockSize exactly (vs count-fill in path_b_exact_count_fill). + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 100; + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + Oid Op2 = MakeTestOid(2); + // Op1: 60 bytes -> Path B, pending = {60 bytes, 1 chunk} + // Op2: 40 bytes -> 60+40=100 == UsableSize -> Path B, immediate size-exact flush + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2)}; + std::vector<uint64_t> Sizes = {60, 40}; + std::vector<Oid> Keys = {Op1, Op2}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 1); + CHECK(Blocks[0].size() == 2); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[0][1] == MakeTestHash(2)); +} + +TEST_CASE("project.store.blockcomposer.path_d_size_limited_greedy") +{ + // Path D where greedy fill is limited by size (not count). MaxChunks=8 ensures size is binding. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 200; // 75% threshold = 150 bytes + constexpr uint64_t MaxChunks = 8; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + Oid Op2 = MakeTestOid(2); + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4)}; + std::vector<uint64_t> Sizes = {90, 60, 60, 60}; + std::vector<Oid> Keys = {Op1, Op2, Op2, Op2}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 2); + CHECK(Blocks[0].size() == 2); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[0][1] == MakeTestHash(2)); + CHECK(Blocks[1].size() == 2); + CHECK(Blocks[1][0] == MakeTestHash(3)); + CHECK(Blocks[1][1] == MakeTestHash(4)); +} + +TEST_CASE("project.store.blockcomposer.path_a_pending_untouched") +{ + // Path A leaves pending untouched: Op1 in pending, Op2 count-saturates -> standalone block. Final flush emits Op1. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 1000; + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + Oid Op2 = MakeTestOid(2); + // Op1: 2 x 60 bytes -> Path B, pending = {2 chunks, 120 bytes} + // Op2: 4 x 100 bytes -> count reaches MaxChunks=4 -> CurrentOpFillFullBlock=true -> Path A + // Path A emits Op2 standalone as block 1; pending (Op1's chunks) is left untouched. + // Final flush emits pending -> block 2. + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4), MakeTestHash(5), MakeTestHash(6)}; + std::vector<uint64_t> Sizes = {60, 60, 100, 100, 100, 100}; + std::vector<Oid> Keys = {Op1, Op1, Op2, Op2, Op2, Op2}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 2); + CHECK(Blocks[0].size() == 4); + CHECK(Blocks[0][0] == MakeTestHash(3)); + CHECK(Blocks[0][3] == MakeTestHash(6)); + CHECK(Blocks[1].size() == 2); + CHECK(Blocks[1][0] == MakeTestHash(1)); + CHECK(Blocks[1][1] == MakeTestHash(2)); +} + +// --------------------------------------------------------------------------- +// BuildContainer-direct tests +// --------------------------------------------------------------------------- + +TEST_CASE("buildcontainer.public_overload_smoke") +{ + // Verifies the public BuildContainer overload runs successfully and calls AsyncOnBlock. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_smoke", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024}))); + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + std::atomic<int> BlockCallCount{0}; + CbObject Container = BuildContainer( + Log(), + CidStore, + *Project, + *Oplog, + WorkerPool, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/true, + [&](CompressedBuffer&&, ChunkBlockDescription&&) { BlockCallCount.fetch_add(1); }, + [](const IoHash&, TGetAttachmentBufferFunc&&) {}, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/false); + + CHECK(Container.GetSize() > 0); + CHECK(BlockCallCount.load() >= 1); +} + +TEST_CASE("buildcontainer.build_blocks_false_on_block_chunks") +{ + // BuildBlocks=false: small attachments go to OnBlockChunks instead of AsyncOnBlock. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_no_blocks", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024, 1024}))); + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024, 1024}))); + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + std::atomic<int> BlockChunksCallCount{0}; + CbObject Container = BuildContainer( + Log(), + CidStore, + *Project, + *Oplog, + WorkerPool, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/false, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/true, + [](CompressedBuffer&&, ChunkBlockDescription&&) { CHECK(false); }, + [](const IoHash&, TGetAttachmentBufferFunc&&) {}, + [&](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) { BlockChunksCallCount.fetch_add(1); }, + /*EmbedLooseFiles=*/false); + + CHECK(Container.GetSize() > 0); + CHECK(BlockChunksCallCount.load() >= 1); +} + +TEST_CASE("buildcontainer.ignore_missing_binary_attachment_warn") +{ + // A bulk-data op references a hash that is absent from CidStore. + // SUBCASE warn: IgnoreMissingAttachments=true -> ReportMessage("Missing attachment ..."). + // SUBCASE throw: IgnoreMissingAttachments=false -> std::runtime_error. + using namespace projectstore_testutils; + using namespace std::literals; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + // Fabricate a hash not in CidStore and build a package that references it as a + // BinaryAttachment field but carries no inline attachment data. + IoBuffer FakeData = CreateRandomBlob(256); + IoHash FakeHash = IoHash::HashBuffer(FakeData); + + CbObjectWriter Object; + Object << "key"sv << OidAsString(Oid::NewOid()); + Object.BeginArray("bulkdata"sv); + { + Object.BeginObject(); + Object << "id"sv << Oid::NewOid(); + Object << "type"sv + << "Standard"sv; + Object.AddBinaryAttachment("data"sv, FakeHash); + Object.EndObject(); + } + Object.EndArray(); + CbPackage Package; + Package.SetObject(Object.Save()); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_missing_bin", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(Package); + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + SUBCASE("warn") + { + CapturingJobContext Ctx; + BuildContainer( + Log(), + CidStore, + *Project, + *Oplog, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/true, + /*AllowChunking=*/true, + {}, + WorkerPool, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, + [](const IoHash&, TGetAttachmentBufferFunc&&) {}, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/false, + &Ctx); + CHECK(Ctx.HasMessage("Missing attachment")); + } + + SUBCASE("throw") + { + CHECK_THROWS(BuildContainer( + Log(), + CidStore, + *Project, + *Oplog, + WorkerPool, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/true, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, + [](const IoHash&, TGetAttachmentBufferFunc&&) {}, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/false)); + } +} + +TEST_CASE("buildcontainer.ignore_missing_file_attachment_warn") +{ + // File attachments are created on disk then deleted before BuildContainer runs. + // SUBCASE warn: IgnoreMissingAttachments=true -> ReportMessage("Missing attachment ..."). + // SUBCASE throw: IgnoreMissingAttachments=false -> exception. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{512, 1024}); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_missing_file", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(Oid::NewOid(), RootDir, FileAtts)); + + // Delete files before BuildContainer runs so RewriteOp finds them missing. + for (const auto& [Id, Path] : FileAtts) + { + std::filesystem::remove(Path); + } + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + SUBCASE("warn") + { + CapturingJobContext Ctx; + BuildContainer( + Log(), + CidStore, + *Project, + *Oplog, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/true, + /*AllowChunking=*/true, + {}, + WorkerPool, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, + [](const IoHash&, TGetAttachmentBufferFunc&&) {}, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/true, + &Ctx); + CHECK(Ctx.HasMessage("Missing attachment")); + } + + SUBCASE("throw") + { + CHECK_THROWS(BuildContainer( + Log(), + CidStore, + *Project, + *Oplog, + WorkerPool, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/true, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, + [](const IoHash&, TGetAttachmentBufferFunc&&) {}, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/true)); + } +} + +TEST_CASE("buildcontainer.zero_byte_file_attachment") +{ + // A zero-byte file on disk is a valid attachment. BuildContainer must process + // it without hitting ZEN_ASSERT(UploadAttachment->Size != 0) in + // ResolveAttachments. The empty file flows through the compress-inline path + // and becomes a LooseUploadAttachment with raw size 0. + using namespace projectstore_testutils; + using namespace std::literals; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{512}); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_zero_byte_file", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(Oid::NewOid(), RootDir, FileAtts)); + + // Truncate the file to zero bytes after the oplog entry is created. + // The file still exists on disk so RewriteOplog's IsFile() check passes, + // but MakeFromFile returns a zero-size buffer. + std::filesystem::resize_file(FileAtts[0].second, 0); + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + CbObject Container = BuildContainer( + Log(), + CidStore, + *Project, + *Oplog, + WorkerPool, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/true, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, + [](const IoHash&, TGetAttachmentBufferFunc&&) {}, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/true); + + CHECK(Container.GetSize() > 0); + + // The zero-byte attachment is packed into a block via the compress-inline path. + CbArrayView Blocks = Container["blocks"sv].AsArrayView(); + CHECK(Blocks.Num() > 0); +} + +TEST_CASE("buildcontainer.embed_loose_files_false_no_rewrite") +{ + // EmbedLooseFiles=false: RewriteOp is skipped for file-op entries; they pass through + // unchanged. Neither AsyncOnBlock nor OnLargeAttachment should fire. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_embed_false", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry( + CreateFilesOplogPackage(Oid::NewOid(), RootDir, CreateFileAttachments(RootDir, std::initializer_list<size_t>{1024, 2048}))); + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + CbObject Container = BuildContainer( + Log(), + CidStore, + *Project, + *Oplog, + WorkerPool, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/true, + [](CompressedBuffer&&, ChunkBlockDescription&&) { CHECK(false); }, + [](const IoHash&, TGetAttachmentBufferFunc&&) { CHECK(false); }, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/false); + + CHECK(Container.GetSize() > 0); +} + +TEST_CASE("buildcontainer.allow_chunking_false") +{ + // AllowChunking=false: attachments exceeding ChunkFileSizeLimit skip chunking -> OnLargeAttachment. + // AllowChunking=true: same data is chunked, but chunk still exceeds MaxChunkEmbedSize -> OnLargeAttachment; + // exercises the AllowChunking branch in FindChunkSizes. + // 4 KB attachment: > MaxChunkEmbedSize (2 KB) and > ChunkFileSizeLimit (1 KB). + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + // None encoding: compressed ~ 4 KB > MaxChunkEmbedSize (2 KB) and ChunkFileSizeLimit (1 KB). + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_allow_chunk", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{4096}, OodleCompressionLevel::None))); + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + constexpr size_t TestMaxBlockSize = 16u * 1024u; + constexpr size_t TestMaxChunkEmbedSize = 2u * 1024u; + constexpr size_t TestChunkFileSizeLimit = 1u * 1024u; + + SUBCASE("allow_chunking_false") + { + std::atomic<int> LargeAttachmentCallCount{0}; + BuildContainer( + Log(), + CidStore, + *Project, + *Oplog, + WorkerPool, + TestMaxBlockSize, + 1000, + TestMaxChunkEmbedSize, + TestChunkFileSizeLimit, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/false, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, + [&](const IoHash&, TGetAttachmentBufferFunc&&) { LargeAttachmentCallCount.fetch_add(1); }, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/false); + CHECK(LargeAttachmentCallCount.load() >= 1); + } + + SUBCASE("allow_chunking_true") + { + // Chunking branch in FindChunkSizes is taken, but the ~4 KB chunk still exceeds MaxChunkEmbedSize -> OnLargeAttachment. + std::atomic<int> LargeAttachmentCallCount{0}; + BuildContainer( + Log(), + CidStore, + *Project, + *Oplog, + WorkerPool, + TestMaxBlockSize, + 1000, + TestMaxChunkEmbedSize, + TestChunkFileSizeLimit, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/true, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, + [&](const IoHash&, TGetAttachmentBufferFunc&&) { LargeAttachmentCallCount.fetch_add(1); }, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/false); + CHECK(LargeAttachmentCallCount.load() >= 1); + } +} + +TEST_CASE("buildcontainer.async_on_block_exception_propagates") +{ + // If AsyncOnBlock throws, the exception must propagate out of BuildContainer. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_block_exc", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024, 1024}))); + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024, 1024}))); + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + CHECK_THROWS_AS(BuildContainer( + Log(), + CidStore, + *Project, + *Oplog, + WorkerPool, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/true, + [](CompressedBuffer&&, ChunkBlockDescription&&) { throw std::runtime_error("inject_block"); }, + [](const IoHash&, TGetAttachmentBufferFunc&&) {}, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/false), + std::runtime_error); +} + +TEST_CASE("buildcontainer.on_large_attachment_exception_propagates") +{ + // OnLargeAttachment exception must propagate. 64 KB with MaxChunkEmbedSize=32 KB -> OnLargeAttachment. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_large_exc", {}); + REQUIRE(Oplog); + // 64 KB with OodleCompressionLevel::None -> compressed ~ 64 KB > MaxChunkEmbedSize (32 KB). + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oid::NewOid(), + CreateAttachments(std::initializer_list<size_t>{64u * 1024u}, OodleCompressionLevel::None))); + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + CHECK_THROWS_AS(BuildContainer( + Log(), + CidStore, + *Project, + *Oplog, + WorkerPool, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/false, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, + [](const IoHash&, TGetAttachmentBufferFunc&&) { throw std::runtime_error("inject_large"); }, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/false), + std::runtime_error); +} + +TEST_CASE("buildcontainer.context_cancellation_aborts") +{ + // IsCancelled() returns true from the start; BuildContainer must not crash or throw. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_cancel", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024}))); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024}))); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024}))); + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + CapturingJobContext Ctx; + Ctx.m_Cancel = true; + + CHECK_NOTHROW(BuildContainer( + Log(), + CidStore, + *Project, + *Oplog, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/true, + {}, + WorkerPool, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, + [](const IoHash&, TGetAttachmentBufferFunc&&) {}, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/false, + &Ctx)); +} + +TEST_CASE("buildcontainer.context_progress_reporting") +{ + // BuildContainer calls ReportProgress at least once ("Scanning oplog"). + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_progress", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024}))); + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + CapturingJobContext Ctx; + BuildContainer( + Log(), + CidStore, + *Project, + *Oplog, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/true, + {}, + WorkerPool, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, + [](const IoHash&, TGetAttachmentBufferFunc&&) {}, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/false, + &Ctx); + + CHECK(!Ctx.ProgressMessages.empty()); +} + +TEST_CASE("getblocksfromoplog.filtered") +{ + // GetBlocksFromOplog(ContainerObject, IncludeBlockHashes) returns only the requested blocks. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + std::shared_ptr<RemoteProjectStore> RemoteStore = SetupExportStore(CidStore, *Project, NetworkPool, WorkerPool, ExportDir.Path()); + + RemoteProjectStore::LoadContainerResult ContainerResult = RemoteStore->LoadContainer(); + REQUIRE(ContainerResult.ErrorCode == 0); + + std::vector<IoHash> AllBlockHashes = GetBlockHashesFromOplog(ContainerResult.ContainerObject); + REQUIRE(!AllBlockHashes.empty()); + + // Filter to the first block only. + std::vector<IoHash> Subset = {AllBlockHashes[0]}; + std::vector<ThinChunkBlockDescription> Filtered = GetBlocksFromOplog(ContainerResult.ContainerObject, Subset); + CHECK(Filtered.size() == 1); + CHECK(Filtered[0].BlockHash == AllBlockHashes[0]); + CHECK(!Filtered[0].ChunkRawHashes.empty()); + + // Empty include set returns empty result (exercises the no-match branch). + std::vector<ThinChunkBlockDescription> Empty = GetBlocksFromOplog(ContainerResult.ContainerObject, std::span<const IoHash>{}); + CHECK(Empty.empty()); +} + +// --------------------------------------------------------------------------- +// SaveOplog-focused tests +// --------------------------------------------------------------------------- + +TEST_CASE("saveoplog.cancellation") +{ + // IsCancelled() returns true from the start; SaveOplog must not throw. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_cancel_save", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 2048}))); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + CapturingJobContext Ctx; + Ctx.m_Cancel = true; + + CHECK_NOTHROW(RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_cancel_save", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/false, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + &Ctx, + /*ForceDisableBlocks=*/false)); +} + +// --------------------------------------------------------------------------- +// LoadOplog-focused tests +// --------------------------------------------------------------------------- + +TEST_CASE("loadoplog.missing_block_attachment_ignored") +{ + // Export creates a block file; deleting it then loading with IgnoreMissingAttachments=true + // must succeed and report the failure via "Failed to download block attachment". + using namespace projectstore_testutils; + using namespace std::literals; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_missing_block", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024, 2048, 512}))); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_missing_block", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/false, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore); + + RemoteProjectStore::GetKnownBlocksResult KnownBlocks = RemoteStore->GetKnownBlocks(); + REQUIRE(KnownBlocks.ErrorCode == 0); + REQUIRE(!KnownBlocks.Blocks.empty()); + + for (const ChunkBlockDescription& BlockDesc : KnownBlocks.Blocks) + { + std::string HexStr = BlockDesc.BlockHash.ToHexString(); + std::filesystem::path BlockPath = ExportDir.Path() / HexStr.substr(0, 3) / HexStr.substr(3, 2) / (HexStr.substr(5) + ".blob"); + REQUIRE(std::filesystem::exists(BlockPath)); + std::filesystem::remove(BlockPath); + } + + CapturingJobContext Ctx; + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_missing_block_import", {}); + CHECK_NOTHROW(LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = true, + .IgnoreMissingAttachments = true, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, + .OptionalJobContext = &Ctx})); + CHECK(Ctx.HasMessage("Failed to download block attachment")); +} + +TEST_CASE("loadoplog.clean_oplog_with_populated_cache") +{ + // Second import with CleanOplog=true and a non-null cache exercises the OptionalCache->Flush() path. + using namespace projectstore_testutils; + using namespace std::literals; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + std::filesystem::path RootDir = TempDir.Path() / "root"; + std::filesystem::path EngineRootDir = TempDir.Path() / "engine"; + std::filesystem::path ProjectRootDir = TempDir.Path() / "game"; + std::filesystem::path ProjectFilePath = TempDir.Path() / "game" / "game.uproject"; + + // Export side. + GcManager ExportGc; + CidStore ExportCidStore(ExportGc); + CidStoreConfiguration ExportCidConfig = {.RootDirectory = TempDir.Path() / "export_cas", + .TinyValueThreshold = 1024, + .HugeValueThreshold = 4096}; + ExportCidStore.Initialize(ExportCidConfig); + + std::filesystem::path ExportBasePath = TempDir.Path() / "export_projectstore"; + ProjectStore ExportProjectStore(ExportCidStore, ExportBasePath, ExportGc, ProjectStore::Configuration{}); + Ref<ProjectStore::Project> ExportProject(ExportProjectStore.NewProject(ExportBasePath / "proj1"sv, + "proj1"sv, + RootDir.string(), + EngineRootDir.string(), + ProjectRootDir.string(), + ProjectFilePath.string())); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + std::shared_ptr<RemoteProjectStore> RemoteStore = + SetupExportStore(ExportCidStore, *ExportProject, NetworkPool, WorkerPool, ExportDir.Path()); + + // Import side, starts empty. + GcManager ImportGc; + CidStore ImportCidStore(ImportGc); + CidStoreConfiguration ImportCidConfig = {.RootDirectory = TempDir.Path() / "import_cas", + .TinyValueThreshold = 1024, + .HugeValueThreshold = 4096}; + ImportCidStore.Initialize(ImportCidConfig); + + std::filesystem::path ImportBasePath = TempDir.Path() / "import_projectstore"; + ProjectStore ImportProjectStore(ImportCidStore, ImportBasePath, ImportGc, ProjectStore::Configuration{}); + Ref<ProjectStore::Project> ImportProject(ImportProjectStore.NewProject(ImportBasePath / "proj1"sv, + "proj1"sv, + RootDir.string(), + EngineRootDir.string(), + ProjectRootDir.string(), + ProjectFilePath.string())); + + const Oid CacheBuildId = Oid::NewOid(); + BuildStorageCache::Statistics CacheStats; + std::unique_ptr<BuildStorageCache> Cache = CreateInMemoryBuildStorageCache(256u, CacheStats); + + { + Ref<ProjectStore::Oplog> Phase1Oplog = ImportProject->NewOplog("oplog_clean_cache_p1", {}); + LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = ImportCidStore, + .RemoteStore = *RemoteStore, + .OptionalCache = Cache.get(), + .CacheBuildId = CacheBuildId, + .Oplog = *Phase1Oplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, + .PopulateCache = true}); + } + + { + Ref<ProjectStore::Oplog> Phase2Oplog = ImportProject->NewOplog("oplog_clean_cache_p2", {}); + CHECK_NOTHROW(LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = ImportCidStore, + .RemoteStore = *RemoteStore, + .OptionalCache = Cache.get(), + .CacheBuildId = CacheBuildId, + .Oplog = *Phase2Oplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = true, + .IgnoreMissingAttachments = false, + .CleanOplog = true, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, + .PopulateCache = false})); + } +} + +TEST_CASE("project.store.export.block_reuse_fresh_receiver") +{ + // Regression test: after a second export that reuses existing blocks, a fresh import must still + // receive all chunks. The bug: FindReuseBlocks erases reused-block chunks from UploadAttachments, + // but never adds the reused blocks to the container's "blocks" section. A fresh receiver then + // silently misses those chunks because ParseOplogContainer never sees them. + using namespace projectstore_testutils; + using namespace std::literals; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + // -- Export side ---------------------------------------------------------- + GcManager ExportGc; + CidStore ExportCidStore(ExportGc); + CidStoreConfiguration ExportCidConfig = {.RootDirectory = TempDir.Path() / "export_cas", + .TinyValueThreshold = 1024, + .HugeValueThreshold = 4096}; + ExportCidStore.Initialize(ExportCidConfig); + + std::filesystem::path ExportBasePath = TempDir.Path() / "export_projectstore"; + ProjectStore ExportProjectStore(ExportCidStore, ExportBasePath, ExportGc, ProjectStore::Configuration{}); + std::filesystem::path RootDir = TempDir.Path() / "root"; + std::filesystem::path EngineRootDir = TempDir.Path() / "engine"; + std::filesystem::path ProjectRootDir = TempDir.Path() / "game"; + std::filesystem::path ProjectFilePath = TempDir.Path() / "game" / "game.uproject"; + Ref<ProjectStore::Project> ExportProject(ExportProjectStore.NewProject(ExportBasePath / "proj1"sv, + "proj1"sv, + RootDir.string(), + EngineRootDir.string(), + ProjectRootDir.string(), + ProjectFilePath.string())); + + // 20 KB with None encoding: compressed ~ 20 KB < MaxChunkEmbedSize (32 KB) -> packed into blocks. + Ref<ProjectStore::Oplog> Oplog = ExportProject->NewOplog("oplog_reuse_rt", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage( + Oid::NewOid(), + CreateAttachments(std::initializer_list<size_t>{20u * 1024u, 20u * 1024u}, OodleCompressionLevel::None))); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + constexpr size_t MaxBlockSize = 64u * 1024u; + constexpr size_t MaxChunksPerBlock = 1000; + constexpr size_t MaxChunkEmbedSize = 32u * 1024u; + constexpr size_t ChunkFileSizeLimit = 64u * 1024u * 1024u; + + // First export: creates blocks on disk. + FileRemoteStoreOptions Options = {RemoteStoreOptions{.MaxBlockSize = MaxBlockSize, + .MaxChunksPerBlock = MaxChunksPerBlock, + .MaxChunkEmbedSize = MaxChunkEmbedSize, + .ChunkFileSizeLimit = ChunkFileSizeLimit}, + /*.FolderPath =*/ExportDir.Path(), + /*.Name =*/std::string("oplog_reuse_rt"), + /*.OptionalBaseName =*/std::string(), + /*.ForceDisableBlocks =*/false, + /*.ForceEnableTempBlocks =*/false}; + + std::shared_ptr<RemoteProjectStore> RemoteStore = CreateFileRemoteStore(Log(), Options); + SaveOplog(Log(), + ExportCidStore, + *RemoteStore, + *ExportProject, + *Oplog, + NetworkPool, + WorkerPool, + MaxBlockSize, + MaxChunksPerBlock, + MaxChunkEmbedSize, + ChunkFileSizeLimit, + /*EmbedLooseFiles*/ true, + /*ForceUpload*/ false, + /*IgnoreMissingAttachments*/ false, + /*OptionalContext*/ nullptr); + + // Verify first export produced blocks. + RemoteProjectStore::GetKnownBlocksResult KnownAfterFirst = RemoteStore->GetKnownBlocks(); + REQUIRE(!KnownAfterFirst.Blocks.empty()); + + // Second export to the SAME store: triggers block reuse via GetKnownBlocks. + SaveOplog(Log(), + ExportCidStore, + *RemoteStore, + *ExportProject, + *Oplog, + NetworkPool, + WorkerPool, + MaxBlockSize, + MaxChunksPerBlock, + MaxChunkEmbedSize, + ChunkFileSizeLimit, + /*EmbedLooseFiles*/ true, + /*ForceUpload*/ false, + /*IgnoreMissingAttachments*/ false, + /*OptionalContext*/ nullptr); + + // Verify the container has no duplicate block entries. + { + RemoteProjectStore::LoadContainerResult ContainerResult = RemoteStore->LoadContainer(); + REQUIRE(ContainerResult.ErrorCode == 0); + std::vector<IoHash> BlockHashes = GetBlockHashesFromOplog(ContainerResult.ContainerObject); + REQUIRE(!BlockHashes.empty()); + std::unordered_set<IoHash, IoHash::Hasher> UniqueBlockHashes(BlockHashes.begin(), BlockHashes.end()); + CHECK(UniqueBlockHashes.size() == BlockHashes.size()); + } + + // Collect all attachment hashes referenced by the oplog ops. + std::unordered_set<IoHash, IoHash::Hasher> ExpectedHashes; + Oplog->IterateOplogWithKey([&](int, const Oid&, CbObjectView Op) { + Op.IterateAttachments([&](CbFieldView FieldView) { ExpectedHashes.insert(FieldView.AsAttachment()); }); + }); + REQUIRE(!ExpectedHashes.empty()); + + // -- Import side (fresh, empty CAS) -------------------------------------- + GcManager ImportGc; + CidStore ImportCidStore(ImportGc); + CidStoreConfiguration ImportCidConfig = {.RootDirectory = TempDir.Path() / "import_cas", + .TinyValueThreshold = 1024, + .HugeValueThreshold = 4096}; + ImportCidStore.Initialize(ImportCidConfig); + + std::filesystem::path ImportBasePath = TempDir.Path() / "import_projectstore"; + ProjectStore ImportProjectStore(ImportCidStore, ImportBasePath, ImportGc, ProjectStore::Configuration{}); + Ref<ProjectStore::Project> ImportProject(ImportProjectStore.NewProject(ImportBasePath / "proj1"sv, + "proj1"sv, + RootDir.string(), + EngineRootDir.string(), + ProjectRootDir.string(), + ProjectFilePath.string())); + + Ref<ProjectStore::Oplog> ImportOplog = ImportProject->NewOplog("oplog_reuse_rt_import", {}); + REQUIRE(ImportOplog); + + LoadOplog(LoadOplogContext{.Log = Log(), + .ChunkStore = ImportCidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = true, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::All}); + + // Every attachment hash from the original oplog must be present in the import CAS. + for (const IoHash& Hash : ExpectedHashes) + { + CHECK_MESSAGE(ImportCidStore.ContainsChunk(Hash), "Missing chunk after import: ", Hash); + } +} + +TEST_SUITE_END(); + #endif // ZEN_WITH_TESTS void diff --git a/src/zenremotestore/projectstore/zenremoteprojectstore.cpp b/src/zenremotestore/projectstore/zenremoteprojectstore.cpp index b4c1156ac..a08a07fcd 100644 --- a/src/zenremotestore/projectstore/zenremoteprojectstore.cpp +++ b/src/zenremotestore/projectstore/zenremoteprojectstore.cpp @@ -159,7 +159,8 @@ public: virtual LoadAttachmentsResult LoadAttachments(const std::vector<IoHash>& RawHashes) override { - std::string LoadRequest = fmt::format("/{}/oplog/{}/rpc"sv, m_Project, m_Oplog); + LoadAttachmentsResult Result; + std::string LoadRequest = fmt::format("/{}/oplog/{}/rpc"sv, m_Project, m_Oplog); CbObject Request; { @@ -187,7 +188,7 @@ public: HttpClient::Response Response = m_Client.Post(LoadRequest, Request, HttpClient::Accept(ZenContentType::kCbPackage)); AddStats(Response); - LoadAttachmentsResult Result = LoadAttachmentsResult{ConvertResult(Response)}; + Result = LoadAttachmentsResult{ConvertResult(Response)}; if (Result.ErrorCode) { Result.Reason = fmt::format("Failed fetching {} oplog attachments from {}/{}/{}. Reason: '{}'", @@ -249,25 +250,23 @@ public: return GetKnownBlocksResult{{.ErrorCode = static_cast<int>(HttpResponseCode::NoContent)}}; } - virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes) override + virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes, + BuildStorageCache* OptionalCache, + const Oid& CacheBuildId) override { - ZEN_UNUSED(BlockHashes); + ZEN_UNUSED(BlockHashes, OptionalCache, CacheBuildId); return GetBlockDescriptionsResult{Result{.ErrorCode = int(HttpResponseCode::NotFound)}}; } - virtual AttachmentExistsInCacheResult AttachmentExistsInCache(std::span<const IoHash> RawHashes) override - { - return AttachmentExistsInCacheResult{Result{.ErrorCode = 0}, std::vector<bool>(RawHashes.size(), false)}; - } - - virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash, const AttachmentRange& Range) override + virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash) override { + LoadAttachmentResult Result; std::string LoadRequest = fmt::format("/{}/oplog/{}/{}"sv, m_Project, m_Oplog, RawHash); HttpClient::Response Response = m_Client.Download(LoadRequest, m_TempFilePath, HttpClient::Accept(ZenContentType::kCompressedBinary)); AddStats(Response); - LoadAttachmentResult Result = LoadAttachmentResult{ConvertResult(Response)}; + Result = LoadAttachmentResult{ConvertResult(Response)}; if (Result.ErrorCode) { Result.Reason = fmt::format("Failed fetching oplog attachment from {}/{}/{}/{}. Reason: '{}'", @@ -277,20 +276,38 @@ public: RawHash, Result.Reason); } - if (!Result.ErrorCode && Range) + Result.Bytes = Response.ResponsePayload; + Result.Bytes.MakeOwned(); + return Result; + } + + virtual LoadAttachmentRangesResult LoadAttachmentRanges(const IoHash& RawHash, + std::span<const std::pair<uint64_t, uint64_t>> Ranges) override + { + ZEN_ASSERT(!Ranges.empty()); + LoadAttachmentRangesResult Result; + std::string LoadRequest = fmt::format("/{}/oplog/{}/{}"sv, m_Project, m_Oplog, RawHash); + HttpClient::Response Response = + m_Client.Download(LoadRequest, m_TempFilePath, HttpClient::Accept(ZenContentType::kCompressedBinary)); + AddStats(Response); + + Result = LoadAttachmentRangesResult{ConvertResult(Response)}; + if (Result.ErrorCode) { - Result.Bytes = IoBuffer(Response.ResponsePayload, Range.Offset, Range.Bytes); + Result.Reason = fmt::format("Failed fetching oplog attachment from {}/{}/{}/{}. Reason: '{}'", + m_ProjectStoreUrl, + m_Project, + m_Oplog, + RawHash, + Result.Reason); } else { - Result.Bytes = Response.ResponsePayload; + Result.Ranges = std::vector<std::pair<uint64_t, uint64_t>>(Ranges.begin(), Ranges.end()); } - Result.Bytes.MakeOwned(); return Result; } - virtual void Flush() override {} - private: void AddStats(const HttpClient::Response& Result) { @@ -312,7 +329,7 @@ private: { if (Response.Error) { - return {.ErrorCode = Response.Error.value().ErrorCode, + return {.ErrorCode = static_cast<int32_t>(Response.Error.value().ErrorCode), .ElapsedSeconds = Response.ElapsedSeconds, .Reason = Response.ErrorMessage(""), .Text = Response.ToText()}; diff --git a/src/zenremotestore/zenremotestore.cpp b/src/zenremotestore/zenremotestore.cpp index a0bb17260..74d0efb9e 100644 --- a/src/zenremotestore/zenremotestore.cpp +++ b/src/zenremotestore/zenremotestore.cpp @@ -4,28 +4,31 @@ #include <zenremotestore/builds/buildmanifest.h> #include <zenremotestore/builds/buildsavedstate.h> -#include <zenremotestore/builds/buildstorageoperations.h> +#include <zenremotestore/builds/buildstorageutil.h> +#include <zenremotestore/builds/jupiterbuildstorage.h> #include <zenremotestore/chunking/chunkedcontent.h> #include <zenremotestore/chunking/chunkedfile.h> #include <zenremotestore/chunking/chunkingcache.h> -#include <zenremotestore/filesystemutils.h> #include <zenremotestore/projectstore/remoteprojectstore.h> #if ZEN_WITH_TESTS namespace zen { +void buildoperations_tests_forcelink(); + void zenremotestore_forcelinktests() { buildmanifest_forcelink(); buildsavedstate_forcelink(); - buildstorageoperations_forcelink(); + jupiterbuildstorage_forcelink(); + buildstorageutil_forcelink(); + buildoperations_tests_forcelink(); chunkblock_forcelink(); chunkedcontent_forcelink(); chunkedfile_forcelink(); chunkingcache_forcelink(); - filesystemutils_forcelink(); remoteprojectstore_forcelink(); } |