aboutsummaryrefslogtreecommitdiff
path: root/src/zenremotestore
diff options
context:
space:
mode:
Diffstat (limited to 'src/zenremotestore')
-rw-r--r--src/zenremotestore/builds/buildinspect.cpp463
-rw-r--r--src/zenremotestore/builds/buildmanifest.cpp4
-rw-r--r--src/zenremotestore/builds/buildoperations-tests.cpp454
-rw-r--r--src/zenremotestore/builds/buildprimecache.cpp350
-rw-r--r--src/zenremotestore/builds/buildsavedstate.cpp12
-rw-r--r--src/zenremotestore/builds/buildstoragecache.cpp353
-rw-r--r--src/zenremotestore/builds/buildstorageoperations.cpp7943
-rw-r--r--src/zenremotestore/builds/buildstorageresolve.cpp249
-rw-r--r--src/zenremotestore/builds/buildstorageutil.cpp1792
-rw-r--r--src/zenremotestore/builds/buildupdatefolder.cpp4947
-rw-r--r--src/zenremotestore/builds/builduploadfolder.cpp2634
-rw-r--r--src/zenremotestore/builds/buildvalidatebuildpart.cpp374
-rw-r--r--src/zenremotestore/builds/filebuildstorage.cpp39
-rw-r--r--src/zenremotestore/builds/jupiterbuildstorage.cpp233
-rw-r--r--src/zenremotestore/chunking/chunkblock.cpp1749
-rw-r--r--src/zenremotestore/chunking/chunkedcontent.cpp8
-rw-r--r--src/zenremotestore/chunking/chunkedfile.cpp4
-rw-r--r--src/zenremotestore/chunking/chunkingcache.cpp12
-rw-r--r--src/zenremotestore/filesystemutils.cpp697
-rw-r--r--src/zenremotestore/include/zenremotestore/builds/buildinspect.h60
-rw-r--r--src/zenremotestore/include/zenremotestore/builds/buildprimecache.h96
-rw-r--r--src/zenremotestore/include/zenremotestore/builds/buildsavedstate.h5
-rw-r--r--src/zenremotestore/include/zenremotestore/builds/buildstorage.h21
-rw-r--r--src/zenremotestore/include/zenremotestore/builds/buildstoragecache.h22
-rw-r--r--src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h774
-rw-r--r--src/zenremotestore/include/zenremotestore/builds/buildstorageresolve.h46
-rw-r--r--src/zenremotestore/include/zenremotestore/builds/buildstoragestats.h182
-rw-r--r--src/zenremotestore/include/zenremotestore/builds/buildstorageutil.h139
-rw-r--r--src/zenremotestore/include/zenremotestore/builds/buildupdatefolder.h529
-rw-r--r--src/zenremotestore/include/zenremotestore/builds/builduploadfolder.h393
-rw-r--r--src/zenremotestore/include/zenremotestore/builds/buildvalidatebuildpart.h125
-rw-r--r--src/zenremotestore/include/zenremotestore/builds/jupiterbuildstorage.h2
-rw-r--r--src/zenremotestore/include/zenremotestore/chunking/chunkblock.h70
-rw-r--r--src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h3
-rw-r--r--src/zenremotestore/include/zenremotestore/filesystemutils.h121
-rw-r--r--src/zenremotestore/include/zenremotestore/jupiter/jupiterhost.h4
-rw-r--r--src/zenremotestore/include/zenremotestore/jupiter/jupitersession.h12
-rw-r--r--src/zenremotestore/include/zenremotestore/operationlogoutput.h78
-rw-r--r--src/zenremotestore/include/zenremotestore/projectstore/buildsremoteprojectstore.h22
-rw-r--r--src/zenremotestore/include/zenremotestore/projectstore/projectstoreoperations.h35
-rw-r--r--src/zenremotestore/include/zenremotestore/projectstore/remoteprojectstore.h177
-rw-r--r--src/zenremotestore/include/zenremotestore/transferthreadworkers.h1
-rw-r--r--src/zenremotestore/jupiter/jupiterhost.cpp11
-rw-r--r--src/zenremotestore/jupiter/jupitersession.cpp70
-rw-r--r--src/zenremotestore/operationlogoutput.cpp103
-rw-r--r--src/zenremotestore/projectstore/buildsremoteprojectstore.cpp303
-rw-r--r--src/zenremotestore/projectstore/fileremoteprojectstore.cpp269
-rw-r--r--src/zenremotestore/projectstore/jupiterremoteprojectstore.cpp43
-rw-r--r--src/zenremotestore/projectstore/projectstoreoperations.cpp258
-rw-r--r--src/zenremotestore/projectstore/remoteprojectstore.cpp8669
-rw-r--r--src/zenremotestore/projectstore/zenremoteprojectstore.cpp53
-rw-r--r--src/zenremotestore/zenremotestore.cpp11
52 files changed, 21257 insertions, 13767 deletions
diff --git a/src/zenremotestore/builds/buildinspect.cpp b/src/zenremotestore/builds/buildinspect.cpp
new file mode 100644
index 000000000..1af9e20af
--- /dev/null
+++ b/src/zenremotestore/builds/buildinspect.cpp
@@ -0,0 +1,463 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zenremotestore/builds/buildinspect.h>
+
+#include <zencore/compactbinarybuilder.h>
+#include <zencore/fmtutils.h>
+#include <zencore/scopeguard.h>
+#include <zencore/timer.h>
+#include <zencore/trace.h>
+#include <zenremotestore/builds/buildcontent.h>
+#include <zenremotestore/builds/buildmanifest.h>
+#include <zenremotestore/builds/buildstoragecache.h>
+#include <zenremotestore/builds/buildupdatefolder.h>
+#include <zenremotestore/builds/builduploadfolder.h>
+#include <zenremotestore/chunking/chunkingcache.h>
+#include <zenremotestore/chunking/chunkingcontroller.h>
+#include <zenremotestore/transferthreadworkers.h>
+#include <zenutil/filesystemutils.h>
+#include <zenutil/filteredrate.h>
+#include <zenutil/progress.h>
+#include <zenutil/wildcard.h>
+
+#include <numeric>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <tsl/robin_map.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+namespace zen {
+
+using namespace std::literals;
+
+ChunkedFolderContent
+ScanAndChunkFolder(ProgressBase& Progress,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ bool IsQuiet,
+ TransferThreadWorkers& Workers,
+ GetFolderContentStatistics& GetFolderContentStats,
+ ChunkingStatistics& ChunkingStats,
+ const std::filesystem::path& Path,
+ std::function<bool(const std::string_view& RelativePath)>&& IsAcceptedFolder,
+ std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& IsAcceptedFile,
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache)
+{
+ Stopwatch Timer;
+
+ ZEN_TRACE_CPU("ScanAndChunkFolder");
+
+ FolderContent Content = GetFolderContent(
+ GetFolderContentStats,
+ Path,
+ std::move(IsAcceptedFolder),
+ std::move(IsAcceptedFile),
+ Workers.GetIOWorkerPool(),
+ Progress.GetProgressUpdateDelayMS(),
+ [](bool, std::ptrdiff_t) {},
+ AbortFlag);
+ if (AbortFlag)
+ {
+ return {};
+ }
+
+ BuildState LocalContent = GetLocalContent(Progress,
+ AbortFlag,
+ PauseFlag,
+ IsQuiet,
+ Workers,
+ GetFolderContentStats,
+ ChunkingStats,
+ Path,
+ ZenStateFilePath(Path / ZenFolderName),
+ ChunkController,
+ ChunkCache)
+ .State;
+
+ std::vector<std::filesystem::path> UntrackedPaths = GetNewPaths(LocalContent.ChunkedContent.Paths, Content.Paths);
+
+ BuildState UntrackedLocalContent = GetLocalStateFromPaths(Progress,
+ AbortFlag,
+ PauseFlag,
+ Workers,
+ GetFolderContentStats,
+ ChunkingStats,
+ Path,
+ ChunkController,
+ ChunkCache,
+ UntrackedPaths)
+ .State;
+
+ ChunkedFolderContent Result =
+ MergeChunkedFolderContents(LocalContent.ChunkedContent, std::vector<ChunkedFolderContent>{UntrackedLocalContent.ChunkedContent});
+
+ const uint64_t TotalRawSize = std::accumulate(Result.RawSizes.begin(), Result.RawSizes.end(), std::uint64_t(0));
+ const uint64_t ChunkedRawSize =
+ std::accumulate(Result.ChunkedContent.ChunkRawSizes.begin(), Result.ChunkedContent.ChunkRawSizes.end(), std::uint64_t(0));
+
+ if (!IsQuiet)
+ {
+ ZEN_CONSOLE("Found {} ({}) files divided into {} ({}) unique chunks in '{}' in {}. Average hash rate {}B/sec",
+ Result.Paths.size(),
+ NiceBytes(TotalRawSize),
+ Result.ChunkedContent.ChunkHashes.size(),
+ NiceBytes(ChunkedRawSize),
+ Path,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()),
+ NiceNum(GetBytesPerSecond(ChunkingStats.ElapsedWallTimeUS, ChunkingStats.BytesHashed)));
+ }
+ return Result;
+};
+
+void
+ListBuild(bool IsQuiet,
+ StorageInstance& Storage,
+ const Oid& BuildId,
+ const std::vector<Oid>& BuildPartIds,
+ std::span<const std::string> BuildPartNames,
+ std::span<const std::string> IncludeWildcards,
+ std::span<const std::string> ExcludeWildcards,
+ CbObjectWriter* OptionalStructuredOutput)
+{
+ std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u;
+
+ CbObject BuildObject = GetBuild(*Storage.BuildStorage, BuildId, IsQuiet);
+
+ if (OptionalStructuredOutput != nullptr)
+ {
+ OptionalStructuredOutput->AddObjectId("buildId"sv, BuildId);
+ OptionalStructuredOutput->AddObject("build"sv, BuildObject);
+ }
+
+ std::vector<std::pair<Oid, std::string>> AllBuildParts =
+ ResolveBuildPartNames(BuildObject, BuildId, BuildPartIds, BuildPartNames, PreferredMultipartChunkSize);
+
+ if (!AllBuildParts.empty())
+ {
+ Stopwatch GetBuildPartTimer;
+
+ if (OptionalStructuredOutput != nullptr)
+ {
+ OptionalStructuredOutput->BeginArray("parts"sv);
+ }
+
+ for (size_t BuildPartIndex = 0; BuildPartIndex < AllBuildParts.size(); BuildPartIndex++)
+ {
+ const Oid BuildPartId = AllBuildParts[BuildPartIndex].first;
+ const std::string_view BuildPartName = AllBuildParts[BuildPartIndex].second;
+ CbObject BuildPartManifest = Storage.BuildStorage->GetBuildPart(BuildId, BuildPartId);
+
+ if (OptionalStructuredOutput != nullptr)
+ {
+ OptionalStructuredOutput->BeginObject();
+ OptionalStructuredOutput->AddObjectId("id"sv, BuildPartId);
+ OptionalStructuredOutput->AddString("partName"sv, BuildPartName);
+ }
+ {
+ if (OptionalStructuredOutput != nullptr)
+ {
+ }
+ else if (!IsQuiet)
+ {
+ ZEN_CONSOLE("{}Part: {} ('{}'):\n",
+ BuildPartIndex > 0 ? "\n" : "",
+ BuildPartId,
+ BuildPartName,
+ NiceTimeSpanMs(GetBuildPartTimer.GetElapsedTimeMs()),
+ NiceBytes(BuildPartManifest.GetSize()));
+ }
+
+ std::vector<std::filesystem::path> Paths;
+ std::vector<IoHash> RawHashes;
+ std::vector<uint64_t> RawSizes;
+ std::vector<uint32_t> Attributes;
+
+ SourcePlatform Platform;
+ std::vector<IoHash> SequenceRawHashes;
+ std::vector<uint32_t> ChunkCounts;
+ std::vector<uint32_t> AbsoluteChunkOrders;
+ std::vector<IoHash> LooseChunkHashes;
+ std::vector<uint64_t> LooseChunkRawSizes;
+ std::vector<IoHash> BlockRawHashes;
+
+ ReadBuildContentFromCompactBinary(BuildPartManifest,
+ Platform,
+ Paths,
+ RawHashes,
+ RawSizes,
+ Attributes,
+ SequenceRawHashes,
+ ChunkCounts,
+ AbsoluteChunkOrders,
+ LooseChunkHashes,
+ LooseChunkRawSizes,
+ BlockRawHashes);
+
+ std::vector<size_t> Order(Paths.size());
+ std::iota(Order.begin(), Order.end(), 0);
+
+ std::sort(Order.begin(), Order.end(), [&](size_t Lhs, size_t Rhs) {
+ const std::filesystem::path& LhsPath = Paths[Lhs];
+ const std::filesystem::path& RhsPath = Paths[Rhs];
+ return LhsPath < RhsPath;
+ });
+
+ if (OptionalStructuredOutput != nullptr)
+ {
+ OptionalStructuredOutput->BeginArray("files"sv);
+ }
+ {
+ for (size_t Index : Order)
+ {
+ const std::filesystem::path& Path = Paths[Index];
+ if (IncludePath(IncludeWildcards, ExcludeWildcards, ToLower(Path.generic_string()), /*CaseSensitive*/ true))
+ {
+ const IoHash& RawHash = RawHashes[Index];
+ const uint64_t RawSize = RawSizes[Index];
+ const uint32_t Attribute = Attributes[Index];
+
+ if (OptionalStructuredOutput != nullptr)
+ {
+ OptionalStructuredOutput->BeginObject();
+ {
+ OptionalStructuredOutput->AddString("path"sv, fmt::format("{}", Path));
+ OptionalStructuredOutput->AddInteger("rawSize"sv, RawSize);
+ OptionalStructuredOutput->AddHash("rawHash"sv, RawHash);
+ switch (Platform)
+ {
+ case SourcePlatform::Windows:
+ OptionalStructuredOutput->AddInteger("attributes"sv, Attribute);
+ break;
+ case SourcePlatform::MacOS:
+ case SourcePlatform::Linux:
+ OptionalStructuredOutput->AddString("chmod"sv, fmt::format("{:#04o}", Attribute));
+ break;
+ default:
+ throw std::runtime_error(fmt::format("Unsupported platform: {}", (int)Platform));
+ }
+ }
+ OptionalStructuredOutput->EndObject();
+ }
+ else
+ {
+ ZEN_CONSOLE("{}\t{}\t{}", Path, RawSize, RawHash);
+ }
+ }
+ }
+ }
+ if (OptionalStructuredOutput != nullptr)
+ {
+ OptionalStructuredOutput->EndArray(); // "files"
+ }
+ }
+ if (OptionalStructuredOutput != nullptr)
+ {
+ OptionalStructuredOutput->EndObject();
+ }
+ }
+ if (OptionalStructuredOutput != nullptr)
+ {
+ OptionalStructuredOutput->EndArray(); // parts
+ }
+ }
+}
+
+void
+DiffFolders(ProgressBase& Progress,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ bool IsQuiet,
+ TransferThreadWorkers& Workers,
+ const std::filesystem::path& BasePath,
+ const std::filesystem::path& ComparePath,
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache,
+ const std::vector<std::string>& ExcludeFolders,
+ const std::vector<std::string>& ExcludeExtensions)
+{
+ ZEN_TRACE_CPU("DiffFolders");
+
+ Progress.SetLogOperationName("Diff Folders");
+
+ enum TaskSteps : uint32_t
+ {
+ CheckBase,
+ CheckCompare,
+ Diff,
+ Cleanup,
+ StepCount
+ };
+
+ auto EndProgress = MakeGuard([&]() { Progress.SetLogOperationProgress(TaskSteps::StepCount, TaskSteps::StepCount); });
+
+ ChunkedFolderContent BaseFolderContent;
+ ChunkedFolderContent CompareFolderContent;
+
+ {
+ auto IsAcceptedFolder = [ExcludeFolders](const std::string_view& RelativePath) -> bool {
+ for (const std::string& ExcludeFolder : ExcludeFolders)
+ {
+ if (RelativePath.starts_with(ExcludeFolder))
+ {
+ if (RelativePath.length() == ExcludeFolder.length())
+ {
+ return false;
+ }
+ else if (RelativePath[ExcludeFolder.length()] == '/')
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ };
+
+ auto IsAcceptedFile = [ExcludeExtensions](const std::string_view& RelativePath, uint64_t, uint32_t) -> bool {
+ for (const std::string& ExcludeExtension : ExcludeExtensions)
+ {
+ if (RelativePath.ends_with(ExcludeExtension))
+ {
+ return false;
+ }
+ }
+ return true;
+ };
+
+ Progress.SetLogOperationProgress(TaskSteps::CheckBase, TaskSteps::StepCount);
+
+ GetFolderContentStatistics BaseGetFolderContentStats;
+ ChunkingStatistics BaseChunkingStats;
+ BaseFolderContent = ScanAndChunkFolder(Progress,
+ AbortFlag,
+ PauseFlag,
+ IsQuiet,
+ Workers,
+ BaseGetFolderContentStats,
+ BaseChunkingStats,
+ BasePath,
+ IsAcceptedFolder,
+ IsAcceptedFile,
+ ChunkController,
+ ChunkCache);
+ if (AbortFlag)
+ {
+ return;
+ }
+
+ Progress.SetLogOperationProgress(TaskSteps::CheckCompare, TaskSteps::StepCount);
+
+ GetFolderContentStatistics CompareGetFolderContentStats;
+ ChunkingStatistics CompareChunkingStats;
+ CompareFolderContent = ScanAndChunkFolder(Progress,
+ AbortFlag,
+ PauseFlag,
+ IsQuiet,
+ Workers,
+ CompareGetFolderContentStats,
+ CompareChunkingStats,
+ ComparePath,
+ IsAcceptedFolder,
+ IsAcceptedFile,
+ ChunkController,
+ ChunkCache);
+
+ if (AbortFlag)
+ {
+ return;
+ }
+ }
+
+ Progress.SetLogOperationProgress(TaskSteps::Diff, TaskSteps::StepCount);
+
+ std::vector<IoHash> AddedHashes;
+ std::vector<IoHash> RemovedHashes;
+ uint64_t RemovedSize = 0;
+ uint64_t AddedSize = 0;
+
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> BaseRawHashLookup;
+ for (size_t PathIndex = 0; PathIndex < BaseFolderContent.RawHashes.size(); PathIndex++)
+ {
+ const IoHash& RawHash = BaseFolderContent.RawHashes[PathIndex];
+ BaseRawHashLookup.insert_or_assign(RawHash, PathIndex);
+ }
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CompareRawHashLookup;
+ for (size_t PathIndex = 0; PathIndex < CompareFolderContent.RawHashes.size(); PathIndex++)
+ {
+ const IoHash& RawHash = CompareFolderContent.RawHashes[PathIndex];
+ if (!BaseRawHashLookup.contains(RawHash))
+ {
+ AddedHashes.push_back(RawHash);
+ AddedSize += CompareFolderContent.RawSizes[PathIndex];
+ }
+ CompareRawHashLookup.insert_or_assign(RawHash, PathIndex);
+ }
+ for (uint32_t PathIndex = 0; PathIndex < BaseFolderContent.Paths.size(); PathIndex++)
+ {
+ const IoHash& RawHash = BaseFolderContent.RawHashes[PathIndex];
+ if (!CompareRawHashLookup.contains(RawHash))
+ {
+ RemovedHashes.push_back(RawHash);
+ RemovedSize += BaseFolderContent.RawSizes[PathIndex];
+ }
+ }
+
+ uint64_t BaseTotalRawSize = 0;
+ for (uint32_t PathIndex = 0; PathIndex < BaseFolderContent.Paths.size(); PathIndex++)
+ {
+ BaseTotalRawSize += BaseFolderContent.RawSizes[PathIndex];
+ }
+
+ double KeptPercent = BaseTotalRawSize > 0 ? (100.0 * (BaseTotalRawSize - RemovedSize)) / BaseTotalRawSize : 0;
+
+ ZEN_CONSOLE("File diff : {} ({}) removed, {} ({}) added, {} ({} {:.1f}%) kept",
+ RemovedHashes.size(),
+ NiceBytes(RemovedSize),
+ AddedHashes.size(),
+ NiceBytes(AddedSize),
+ BaseFolderContent.Paths.size() - RemovedHashes.size(),
+ NiceBytes(BaseTotalRawSize - RemovedSize),
+ KeptPercent);
+
+ uint64_t CompareTotalRawSize = 0;
+
+ uint64_t FoundChunkCount = 0;
+ uint64_t FoundChunkSize = 0;
+ uint64_t NewChunkCount = 0;
+ uint64_t NewChunkSize = 0;
+ const ChunkedContentLookup BaseFolderLookup = BuildChunkedContentLookup(BaseFolderContent);
+ for (uint32_t ChunkIndex = 0; ChunkIndex < CompareFolderContent.ChunkedContent.ChunkHashes.size(); ChunkIndex++)
+ {
+ const IoHash& ChunkHash = CompareFolderContent.ChunkedContent.ChunkHashes[ChunkIndex];
+ if (BaseFolderLookup.ChunkHashToChunkIndex.contains(ChunkHash))
+ {
+ FoundChunkCount++;
+ FoundChunkSize += CompareFolderContent.ChunkedContent.ChunkRawSizes[ChunkIndex];
+ }
+ else
+ {
+ NewChunkCount++;
+ NewChunkSize += CompareFolderContent.ChunkedContent.ChunkRawSizes[ChunkIndex];
+ }
+ CompareTotalRawSize += CompareFolderContent.ChunkedContent.ChunkRawSizes[ChunkIndex];
+ }
+
+ double FoundPercent = CompareTotalRawSize > 0 ? (100.0 * FoundChunkSize) / CompareTotalRawSize : 0;
+ double NewPercent = CompareTotalRawSize > 0 ? (100.0 * NewChunkSize) / CompareTotalRawSize : 0;
+
+ ZEN_CONSOLE("Chunk diff: {} ({} {:.1f}%) out of {} ({}) chunks in {} ({}) base chunks. Added {} ({} {:.1f}%) chunks.",
+ FoundChunkCount,
+ NiceBytes(FoundChunkSize),
+ FoundPercent,
+ CompareFolderContent.ChunkedContent.ChunkHashes.size(),
+ NiceBytes(CompareTotalRawSize),
+ BaseFolderContent.ChunkedContent.ChunkHashes.size(),
+ NiceBytes(BaseTotalRawSize),
+ NewChunkCount,
+ NiceBytes(NewChunkSize),
+ NewPercent);
+
+ Progress.SetLogOperationProgress(TaskSteps::Cleanup, TaskSteps::StepCount);
+}
+
+} // namespace zen
diff --git a/src/zenremotestore/builds/buildmanifest.cpp b/src/zenremotestore/builds/buildmanifest.cpp
index 051436e96..738e4b33b 100644
--- a/src/zenremotestore/builds/buildmanifest.cpp
+++ b/src/zenremotestore/builds/buildmanifest.cpp
@@ -97,6 +97,8 @@ ParseBuildManifest(const std::filesystem::path& ManifestPath)
}
#if ZEN_WITH_TESTS
+TEST_SUITE_BEGIN("remotestore.buildmanifest");
+
TEST_CASE("buildmanifest.unstructured")
{
ScopedTemporaryDirectory Root;
@@ -163,6 +165,8 @@ TEST_CASE("buildmanifest.structured")
CHECK_EQ(Manifest.Parts[1].Files[0].generic_string(), "baz.pdb");
}
+TEST_SUITE_END();
+
void
buildmanifest_forcelink()
{
diff --git a/src/zenremotestore/builds/buildoperations-tests.cpp b/src/zenremotestore/builds/buildoperations-tests.cpp
new file mode 100644
index 000000000..b1c856193
--- /dev/null
+++ b/src/zenremotestore/builds/buildoperations-tests.cpp
@@ -0,0 +1,454 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+// Round-trip integration tests for BuildsOperationUploadFolder / BuildsOperationUpdateFolder.
+// Runs in-process against CreateFileBuildStorage so no HTTP server is needed.
+
+#include <zenremotestore/builds/buildupdatefolder.h>
+#include <zenremotestore/builds/builduploadfolder.h>
+#include <zenremotestore/builds/filebuildstorage.h>
+#include <zenremotestore/chunking/chunkingcache.h>
+#include <zenremotestore/chunking/chunkingcontroller.h>
+#include <zenremotestore/transferthreadworkers.h>
+
+#include <zencore/basicfile.h>
+#include <zencore/compactbinary.h>
+#include <zencore/compactbinarybuilder.h>
+#include <zencore/filesystem.h>
+#include <zencore/fmtutils.h>
+#include <zencore/iohash.h>
+#include <zencore/logging.h>
+#include <zencore/scopeguard.h>
+#include <zencore/testing.h>
+#include <zencore/testutils.h>
+#include <zencore/workthreadpool.h>
+#include <zenutil/progress.h>
+
+#include <algorithm>
+#include <atomic>
+#include <filesystem>
+#include <string>
+#include <vector>
+
+namespace zen {
+
+void
+buildoperations_tests_forcelink()
+{
+}
+
+#if ZEN_WITH_TESTS
+
+namespace buildops_test {
+
+ using namespace std::literals;
+
+ struct FolderSpec
+ {
+ uint64_t Seed = 1;
+ uint32_t SmallFileCount = 40;
+ uint32_t MediumFileCount = 10;
+ uint32_t LargeFileCount = 2;
+ uint32_t DuplicateFileCount = 6;
+ };
+
+ static IoBuffer MakeBlob(uint64_t Seed, size_t Size)
+ {
+ FastRandom Rnd{.Seed = Seed};
+ IoBuffer Blob(Size);
+ uint8_t* Data = static_cast<uint8_t*>(Blob.MutableData());
+ size_t Offset = 0;
+ while (Offset < Size)
+ {
+ uint64_t Word = Rnd.Next();
+ size_t Chunk = std::min<size_t>(sizeof(Word), Size - Offset);
+ std::memcpy(Data + Offset, &Word, Chunk);
+ Offset += Chunk;
+ }
+ return Blob;
+ }
+
+ static void WriteTestFile(const std::filesystem::path& Path, const IoBuffer& Blob)
+ {
+ CreateDirectories(Path.parent_path());
+ zen::WriteFile(Path, Blob);
+ }
+
+ static std::vector<std::filesystem::path> MakeTestFolder(const std::filesystem::path& Root, const FolderSpec& Spec)
+ {
+ CreateDirectories(Root);
+ FastRandom Rnd{.Seed = Spec.Seed};
+
+ std::vector<std::filesystem::path> Written;
+ Written.reserve(Spec.SmallFileCount + Spec.MediumFileCount + Spec.LargeFileCount + Spec.DuplicateFileCount);
+
+ auto Emit = [&](std::string_view SubDir, uint32_t Index, size_t Size) {
+ std::filesystem::path Rel = std::filesystem::path(std::string(SubDir)) / fmt::format("f_{:05}.bin", Index);
+ WriteTestFile(Root / Rel, MakeBlob(Spec.Seed * 7919ull + Index, Size));
+ Written.push_back(Rel);
+ };
+
+ for (uint32_t I = 0; I < Spec.SmallFileCount; ++I)
+ {
+ Emit("small", I, 1024u + static_cast<size_t>(Rnd.Next() & 0xFFFu));
+ }
+ for (uint32_t I = 0; I < Spec.MediumFileCount; ++I)
+ {
+ Emit("medium", I, 60u * 1024u + static_cast<size_t>(Rnd.Next() & 0x3FFFu));
+ }
+ for (uint32_t I = 0; I < Spec.LargeFileCount; ++I)
+ {
+ Emit("large", I, 900u * 1024u + static_cast<size_t>(Rnd.Next() & 0x1FFFFu));
+ }
+
+ // Duplicates of previously-written small files so upload can re-use blocks / chunks.
+ for (uint32_t I = 0; I < Spec.DuplicateFileCount && Spec.SmallFileCount > 0; ++I)
+ {
+ std::filesystem::path Source = Root / Written[I % Spec.SmallFileCount];
+ std::filesystem::path Rel = std::filesystem::path("dupes") / fmt::format("d_{:05}.bin", I);
+ CreateDirectories((Root / Rel).parent_path());
+ std::error_code Ec;
+ std::filesystem::copy_file(Source, Root / Rel, std::filesystem::copy_options::overwrite_existing, Ec);
+ if (!Ec)
+ {
+ Written.push_back(Rel);
+ }
+ }
+
+ return Written;
+ }
+
+ static void CopyTreeExcludingZen(const std::filesystem::path& Src, const std::filesystem::path& Dst)
+ {
+ CreateDirectories(Dst);
+ std::error_code Ec;
+ for (auto It = std::filesystem::recursive_directory_iterator(Src, Ec); !Ec && It != std::filesystem::recursive_directory_iterator();
+ It.increment(Ec))
+ {
+ const std::filesystem::path Rel = std::filesystem::relative(It->path(), Src);
+ if (!Rel.empty() && Rel.begin()->string() == ".zen")
+ {
+ It.disable_recursion_pending();
+ continue;
+ }
+ if (It->is_directory())
+ {
+ CreateDirectories(Dst / Rel);
+ }
+ else if (It->is_regular_file())
+ {
+ CreateDirectories((Dst / Rel).parent_path());
+ std::error_code CopyEc;
+ std::filesystem::copy_file(It->path(), Dst / Rel, std::filesystem::copy_options::overwrite_existing, CopyEc);
+ }
+ }
+ }
+
+ static std::vector<std::filesystem::path> ListRelative(const std::filesystem::path& Root)
+ {
+ std::vector<std::filesystem::path> Paths;
+ std::error_code Ec;
+ for (auto It = std::filesystem::recursive_directory_iterator(Root, Ec);
+ !Ec && It != std::filesystem::recursive_directory_iterator();
+ It.increment(Ec))
+ {
+ const std::filesystem::path Rel = std::filesystem::relative(It->path(), Root);
+ if (!Rel.empty() && Rel.begin()->string() == ".zen")
+ {
+ It.disable_recursion_pending();
+ continue;
+ }
+ if (It->is_regular_file())
+ {
+ Paths.push_back(Rel);
+ }
+ }
+ std::sort(Paths.begin(), Paths.end());
+ return Paths;
+ }
+
+ static bool FoldersEquivalent(const std::filesystem::path& A, const std::filesystem::path& B)
+ {
+ const auto AFiles = ListRelative(A);
+ const auto BFiles = ListRelative(B);
+ if (AFiles != BFiles)
+ {
+ return false;
+ }
+ for (const std::filesystem::path& Rel : AFiles)
+ {
+ const IoHash HA = IoHash::HashBuffer(ReadFile(A / Rel).Flatten());
+ const IoHash HB = IoHash::HashBuffer(ReadFile(B / Rel).Flatten());
+ if (HA != HB)
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ struct TestHarness
+ {
+ TestHarness() : Workers(/*BoostWorkers*/ false, /*SingleThreaded*/ false), Progress(CreateStandardProgress(zen::logging::Default()))
+ {
+ }
+
+ StorageInstance MakeStorage(const std::filesystem::path& StoragePath)
+ {
+ StorageInstance SI;
+ SI.BuildStorage = CreateFileBuildStorage(StoragePath, StorageStats, /*EnableJsonOutput*/ false);
+ return SI;
+ }
+
+ std::pair<Oid, Oid> UploadOnce(StorageInstance& Storage,
+ const std::filesystem::path& SourceFolder,
+ const std::filesystem::path& TempDir)
+ {
+ const Oid BuildId = Oid::NewOid();
+ const Oid BuildPartId = Oid::NewOid();
+
+ auto ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{});
+ auto ChunkCache = CreateNullChunkingCache();
+
+ CreateDirectories(TempDir);
+
+ UploadFolderOptions Options{};
+ Options.TempDir = TempDir;
+ Options.FindBlockMaxCount = 10000;
+ Options.BlockReuseMinPercentLimit = 85;
+ Options.AllowMultiparts = true;
+ Options.CreateBuild = true;
+ Options.IgnoreExistingBlocks = false;
+ Options.UploadToZenCache = false;
+ Options.IsQuiet = true;
+
+ const CbObject MetaData;
+
+ UploadFolder(zen::logging::Default(),
+ *Progress,
+ Workers,
+ Storage,
+ AbortFlag,
+ PauseFlag,
+ BuildId,
+ BuildPartId,
+ /*BuildPartName*/ "default"sv,
+ SourceFolder,
+ /*ManifestPath*/ {},
+ MetaData,
+ *ChunkController,
+ *ChunkCache,
+ Options);
+
+ return {BuildId, BuildPartId};
+ }
+
+ void DownloadOnce(StorageInstance& Storage,
+ const Oid& BuildId,
+ const std::filesystem::path& TargetFolder,
+ const std::filesystem::path& ZenFolderPath,
+ const std::filesystem::path& SystemRootDir,
+ const DownloadOptions* OverrideOptions = nullptr)
+ {
+ CreateDirectories(TargetFolder);
+ CreateDirectories(ZenFolderPath);
+ CreateDirectories(SystemRootDir);
+
+ DownloadOptions Options;
+ if (OverrideOptions)
+ {
+ Options = *OverrideOptions;
+ }
+ Options.ZenFolderPath = ZenFolderPath;
+ Options.SystemRootDir = SystemRootDir;
+ Options.IsQuiet = true;
+
+ const std::vector<Oid> BuildPartIds;
+ const std::vector<std::string> BuildPartNames;
+
+ DownloadFolder(zen::logging::Default(),
+ *Progress,
+ Workers,
+ Storage,
+ AbortFlag,
+ PauseFlag,
+ StorageCacheStats,
+ BuildId,
+ BuildPartIds,
+ BuildPartNames,
+ /*DownloadSpecPath*/ {},
+ TargetFolder,
+ Options);
+ }
+
+ std::atomic<bool> AbortFlag{false};
+ std::atomic<bool> PauseFlag{false};
+ TransferThreadWorkers Workers;
+ std::unique_ptr<ProgressBase> Progress;
+ BuildStorageBase::Statistics StorageStats;
+ BuildStorageCache::Statistics StorageCacheStats;
+ };
+
+} // namespace buildops_test
+
+TEST_SUITE_BEGIN("remotestore.buildoperations");
+
+// Flagship case: one upload + reupload + multiple download variants against
+// the same in-process storage. Exercises scavenge, local-chunk copy,
+// cached-block reuse, partial-block fetch, and full-block download.
+TEST_CASE("buildoperations.roundtrip.full_variations")
+{
+ using namespace buildops_test;
+
+ ScopedTemporaryDirectory Root;
+ TestHarness H;
+
+ const std::filesystem::path FolderA = Root.Path() / "src_a";
+ const std::filesystem::path FolderB = Root.Path() / "src_b";
+ const std::filesystem::path StoragePath = Root.Path() / "storage";
+ const std::filesystem::path UploadTemp = Root.Path() / "upload_tmp";
+ const std::filesystem::path SystemRoot = Root.Path() / "sys";
+
+ MakeTestFolder(FolderA, FolderSpec{.Seed = 1});
+ MakeTestFolder(FolderB, FolderSpec{.Seed = 1, .DuplicateFileCount = 20});
+
+ CreateDirectories(StoragePath);
+ StorageInstance Storage = H.MakeStorage(StoragePath);
+
+ const auto [BuildIdA, PartIdA] = H.UploadOnce(Storage, FolderA, UploadTemp);
+ CHECK(BuildIdA != Oid::Zero);
+
+ // Re-upload A: should round-trip without error and still produce a
+ // usable build (we verify via a subsequent download).
+ const auto [BuildIdA2, PartIdA2] = H.UploadOnce(Storage, FolderA, UploadTemp);
+ CHECK(BuildIdA2 != Oid::Zero);
+
+ // Upload B (shares content with A).
+ const auto [BuildIdB, PartIdB] = H.UploadOnce(Storage, FolderB, UploadTemp);
+ CHECK(BuildIdB != Oid::Zero);
+
+ // Download A into an empty target. Exercises ScheduleFullBlockDownloads +
+ // ScheduleLooseChunkWrites.
+ {
+ const std::filesystem::path Target = Root.Path() / "dl_empty";
+ const std::filesystem::path ZenState = Target / ".zen";
+ H.DownloadOnce(Storage, BuildIdA, Target, ZenState, SystemRoot);
+ CHECK(FoldersEquivalent(FolderA, Target));
+ }
+
+ // Re-download A after removing some files but keeping the .zen state
+ // dir. Exercises ScheduleCachedBlockWrites.
+ {
+ const std::filesystem::path Target = Root.Path() / "dl_cached";
+ const std::filesystem::path ZenState = Target / ".zen";
+ H.DownloadOnce(Storage, BuildIdA, Target, ZenState, SystemRoot);
+
+ int Deleted = 0;
+ for (auto& E : std::filesystem::recursive_directory_iterator(Target))
+ {
+ if (Deleted >= 5)
+ break;
+ if (E.is_regular_file())
+ {
+ const std::filesystem::path Rel = std::filesystem::relative(E.path(), Target);
+ if (!Rel.empty() && Rel.begin()->string() == ".zen")
+ continue;
+ std::error_code Ec;
+ std::filesystem::remove(E.path(), Ec);
+ if (!Ec)
+ ++Deleted;
+ }
+ }
+ CHECK(Deleted > 0);
+
+ H.DownloadOnce(Storage, BuildIdA, Target, ZenState, SystemRoot);
+ CHECK(FoldersEquivalent(FolderA, Target));
+ }
+
+ // Download B into a target pre-seeded with A's content. Exercises
+ // ScheduleLocalChunkCopies and ScheduleScavengedSequenceWrites (the two
+ // span-capture sites that were fixed).
+ {
+ const std::filesystem::path Target = Root.Path() / "dl_scavenge";
+ const std::filesystem::path ZenState = Target / ".zen";
+ CopyTreeExcludingZen(FolderA, Target);
+
+ DownloadOptions Opts;
+ Opts.EnableTargetFolderScavenging = true;
+ Opts.EnableOtherDownloadsScavenging = true;
+ H.DownloadOnce(Storage, BuildIdB, Target, ZenState, SystemRoot, &Opts);
+
+ CHECK(FoldersEquivalent(FolderB, Target));
+ }
+
+ // Partial-block mode.
+ {
+ const std::filesystem::path Target = Root.Path() / "dl_partial";
+ const std::filesystem::path ZenState = Target / ".zen";
+
+ DownloadOptions Opts;
+ Opts.PartialBlockRequestMode = EPartialBlockRequestMode::All;
+ H.DownloadOnce(Storage, BuildIdB, Target, ZenState, SystemRoot, &Opts);
+
+ CHECK(FoldersEquivalent(FolderB, Target));
+ }
+}
+
+// Abort the download before it can do meaningful work. Expected to unwind
+// cleanly, not crash or assert.
+TEST_CASE("buildoperations.download.abort_midway")
+{
+ using namespace buildops_test;
+
+ ScopedTemporaryDirectory Root;
+ TestHarness H;
+
+ const std::filesystem::path Folder = Root.Path() / "src";
+ const std::filesystem::path StoragePath = Root.Path() / "storage";
+ const std::filesystem::path UploadTemp = Root.Path() / "upload_tmp";
+ const std::filesystem::path SystemRoot = Root.Path() / "sys";
+
+ MakeTestFolder(Folder, FolderSpec{.Seed = 42});
+ CreateDirectories(StoragePath);
+ StorageInstance Storage = H.MakeStorage(StoragePath);
+
+ const auto [BuildId, PartId] = H.UploadOnce(Storage, Folder, UploadTemp);
+
+ const std::filesystem::path Target = Root.Path() / "dl_abort";
+ const std::filesystem::path ZenState = Target / ".zen";
+
+ H.AbortFlag.store(true);
+ CHECK_NOTHROW(H.DownloadOnce(Storage, BuildId, Target, ZenState, SystemRoot));
+}
+
+// Empty source folder round-trip: must not crash, must produce an empty
+// download target.
+TEST_CASE("buildoperations.roundtrip.empty_folder")
+{
+ using namespace buildops_test;
+
+ ScopedTemporaryDirectory Root;
+ TestHarness H;
+
+ const std::filesystem::path Folder = Root.Path() / "empty";
+ const std::filesystem::path StoragePath = Root.Path() / "storage";
+ const std::filesystem::path UploadTemp = Root.Path() / "upload_tmp";
+ const std::filesystem::path SystemRoot = Root.Path() / "sys";
+
+ CreateDirectories(Folder);
+ CreateDirectories(StoragePath);
+ StorageInstance Storage = H.MakeStorage(StoragePath);
+
+ const auto [BuildId, PartId] = H.UploadOnce(Storage, Folder, UploadTemp);
+
+ const std::filesystem::path Target = Root.Path() / "dl_empty";
+ const std::filesystem::path ZenState = Target / ".zen";
+ H.DownloadOnce(Storage, BuildId, Target, ZenState, SystemRoot);
+
+ CHECK(ListRelative(Target).empty());
+}
+
+TEST_SUITE_END();
+
+#endif // ZEN_WITH_TESTS
+
+} // namespace zen
diff --git a/src/zenremotestore/builds/buildprimecache.cpp b/src/zenremotestore/builds/buildprimecache.cpp
new file mode 100644
index 000000000..12791f718
--- /dev/null
+++ b/src/zenremotestore/builds/buildprimecache.cpp
@@ -0,0 +1,350 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zenremotestore/builds/buildprimecache.h>
+
+#include <zencore/compactbinaryutil.h>
+#include <zencore/filesystem.h>
+#include <zencore/fmtutils.h>
+#include <zencore/parallelwork.h>
+#include <zencore/timer.h>
+#include <zencore/trace.h>
+#include <zenremotestore/builds/buildstorageutil.h>
+#include <zenremotestore/builds/builduploadfolder.h>
+#include <zenutil/filteredrate.h>
+#include <zenutil/progress.h>
+
+namespace zen {
+
+using namespace std::literals;
+
+BuildsOperationPrimeCache::BuildsOperationPrimeCache(LoggerRef Log,
+ ProgressBase& Progress,
+ StorageInstance& Storage,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ WorkerThreadPool& NetworkPool,
+ const Oid& BuildId,
+ std::span<const Oid> BuildPartIds,
+ const Options& Options,
+ BuildStorageCache::Statistics& StorageCacheStats)
+: m_Log(Log)
+, m_Progress(Progress)
+, m_Storage(Storage)
+, m_AbortFlag(AbortFlag)
+, m_PauseFlag(PauseFlag)
+, m_NetworkPool(NetworkPool)
+, m_BuildId(BuildId)
+, m_BuildPartIds(BuildPartIds.begin(), BuildPartIds.end())
+, m_Options(Options)
+, m_StorageCacheStats(StorageCacheStats)
+{
+ m_TempPath = m_Options.ZenFolderPath / "tmp";
+ CreateDirectories(m_TempPath);
+}
+
+void
+BuildsOperationPrimeCache::Execute()
+{
+ ZEN_TRACE_CPU("BuildsOperationPrimeCache::Execute");
+
+ Stopwatch PrimeTimer;
+
+ tsl::robin_map<IoHash, uint64_t, IoHash::Hasher> LooseChunkRawSizes;
+ tsl::robin_set<IoHash, IoHash::Hasher> BuildBlobs;
+ CollectReferencedBlobs(BuildBlobs, LooseChunkRawSizes);
+
+ if (!m_Options.IsQuiet)
+ {
+ ZEN_INFO("Found {} referenced blobs", BuildBlobs.size());
+ }
+
+ if (BuildBlobs.empty())
+ {
+ return;
+ }
+
+ std::vector<IoHash> BlobsToDownload = FilterAlreadyCachedBlobs(BuildBlobs);
+
+ if (BlobsToDownload.empty())
+ {
+ return;
+ }
+
+ std::atomic<uint64_t> MultipartAttachmentCount;
+ std::atomic<size_t> CompletedDownloadCount;
+ FilteredRate FilteredDownloadedBytesPerSecond;
+
+ ScheduleBlobDownloads(BlobsToDownload,
+ LooseChunkRawSizes,
+ MultipartAttachmentCount,
+ CompletedDownloadCount,
+ FilteredDownloadedBytesPerSecond);
+
+ if (m_AbortFlag)
+ {
+ return;
+ }
+
+ if (m_Storage.CacheStorage)
+ {
+ m_Storage.CacheStorage->Flush(m_Progress.GetProgressUpdateDelayMS(), [this](intptr_t Remaining) -> bool {
+ ZEN_UNUSED(Remaining);
+ if (!m_Options.IsQuiet)
+ {
+ ZEN_INFO("Waiting for {} blobs to finish upload to '{}'", Remaining, m_Storage.CacheHost.Name);
+ }
+ return !m_AbortFlag;
+ });
+ }
+
+ if (!m_Options.IsQuiet)
+ {
+ uint64_t DownloadedBytes = m_DownloadStats.DownloadedChunkByteCount.load() + m_DownloadStats.DownloadedBlockByteCount.load();
+ ZEN_INFO("Downloaded {} ({}bits/s) in {}. {} as multipart. Completed in {}",
+ NiceBytes(DownloadedBytes),
+ NiceNum(GetBytesPerSecond(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS(), DownloadedBytes * 8)),
+ NiceTimeSpanMs(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS() / 1000),
+ MultipartAttachmentCount.load(),
+ NiceTimeSpanMs(PrimeTimer.GetElapsedTimeMs()));
+ }
+}
+
+void
+BuildsOperationPrimeCache::CollectReferencedBlobs(tsl::robin_set<IoHash, IoHash::Hasher>& OutBuildBlobs,
+ tsl::robin_map<IoHash, uint64_t, IoHash::Hasher>& OutLooseChunkRawSizes)
+{
+ for (const Oid& BuildPartId : m_BuildPartIds)
+ {
+ CbObject BuildPart = m_Storage.BuildStorage->GetBuildPart(m_BuildId, BuildPartId);
+
+ CbObjectView BlockAttachmentsView = BuildPart["blockAttachments"sv].AsObjectView();
+ std::vector<IoHash> BlockAttachments = compactbinary_helpers::ReadBinaryAttachmentArray("rawHashes"sv, BlockAttachmentsView);
+
+ CbObjectView ChunkAttachmentsView = BuildPart["chunkAttachments"sv].AsObjectView();
+ std::vector<IoHash> ChunkAttachments = compactbinary_helpers::ReadBinaryAttachmentArray("rawHashes"sv, ChunkAttachmentsView);
+ std::vector<uint64_t> ChunkRawSizes = compactbinary_helpers::ReadArray<uint64_t>("chunkRawSizes"sv, ChunkAttachmentsView);
+ if (ChunkAttachments.size() != ChunkRawSizes.size())
+ {
+ throw std::runtime_error(fmt::format("Mismatch of loose chunk raw size array, expected {}, found {}",
+ ChunkAttachments.size(),
+ ChunkRawSizes.size()));
+ }
+
+ OutBuildBlobs.reserve(ChunkAttachments.size() + BlockAttachments.size());
+ OutBuildBlobs.insert(BlockAttachments.begin(), BlockAttachments.end());
+ OutBuildBlobs.insert(ChunkAttachments.begin(), ChunkAttachments.end());
+
+ for (size_t ChunkAttachmentIndex = 0; ChunkAttachmentIndex < ChunkAttachments.size(); ChunkAttachmentIndex++)
+ {
+ OutLooseChunkRawSizes.insert_or_assign(ChunkAttachments[ChunkAttachmentIndex], ChunkRawSizes[ChunkAttachmentIndex]);
+ }
+ }
+}
+
+std::vector<IoHash>
+BuildsOperationPrimeCache::FilterAlreadyCachedBlobs(const tsl::robin_set<IoHash, IoHash::Hasher>& BuildBlobs)
+{
+ std::vector<IoHash> BlobsToDownload;
+ BlobsToDownload.reserve(BuildBlobs.size());
+
+ if (m_Storage.CacheStorage && !BuildBlobs.empty() && !m_Options.ForceUpload)
+ {
+ ZEN_TRACE_CPU("BlobCacheExistCheck");
+ Stopwatch Timer;
+
+ const std::vector<IoHash> BlobHashes(BuildBlobs.begin(), BuildBlobs.end());
+ const std::vector<BuildStorageCache::BlobExistsResult> CacheExistsResult =
+ m_Storage.CacheStorage->BlobsExists(m_BuildId, BlobHashes);
+
+ if (CacheExistsResult.size() == BlobHashes.size())
+ {
+ for (size_t BlobIndex = 0; BlobIndex < BlobHashes.size(); BlobIndex++)
+ {
+ if (!CacheExistsResult[BlobIndex].HasBody)
+ {
+ BlobsToDownload.push_back(BlobHashes[BlobIndex]);
+ }
+ }
+ size_t FoundCount = BuildBlobs.size() - BlobsToDownload.size();
+
+ if (FoundCount > 0 && !m_Options.IsQuiet)
+ {
+ ZEN_INFO("Remote cache : Found {} out of {} needed blobs in {}",
+ FoundCount,
+ BuildBlobs.size(),
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ }
+ }
+ }
+ else
+ {
+ BlobsToDownload.insert(BlobsToDownload.end(), BuildBlobs.begin(), BuildBlobs.end());
+ }
+ return BlobsToDownload;
+}
+
+void
+BuildsOperationPrimeCache::ScheduleBlobDownloads(std::span<const IoHash> BlobsToDownload,
+ const tsl::robin_map<IoHash, uint64_t, IoHash::Hasher>& LooseChunkRawSizes,
+ std::atomic<uint64_t>& MultipartAttachmentCount,
+ std::atomic<size_t>& CompletedDownloadCount,
+ FilteredRate& FilteredDownloadedBytesPerSecond)
+{
+ std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Downloading");
+
+ ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
+
+ const size_t BlobCount = BlobsToDownload.size();
+
+ for (size_t BlobIndex = 0; BlobIndex < BlobCount; BlobIndex++)
+ {
+ Work.ScheduleWork(
+ m_NetworkPool,
+ [this,
+ &Work,
+ BlobsToDownload,
+ BlobCount,
+ &LooseChunkRawSizes,
+ &CompletedDownloadCount,
+ &FilteredDownloadedBytesPerSecond,
+ &MultipartAttachmentCount,
+ BlobIndex](std::atomic<bool>&) {
+ if (!m_AbortFlag)
+ {
+ const IoHash& BlobHash = BlobsToDownload[BlobIndex];
+ bool IsLargeBlob = false;
+ if (auto It = LooseChunkRawSizes.find(BlobHash); It != LooseChunkRawSizes.end())
+ {
+ IsLargeBlob = It->second >= m_Options.LargeAttachmentSize;
+ }
+
+ FilteredDownloadedBytesPerSecond.Start();
+
+ if (IsLargeBlob)
+ {
+ DownloadLargeBlobForCache(Work,
+ BlobHash,
+ BlobCount,
+ CompletedDownloadCount,
+ MultipartAttachmentCount,
+ FilteredDownloadedBytesPerSecond);
+ }
+ else
+ {
+ DownloadSingleBlobForCache(BlobHash, BlobCount, CompletedDownloadCount, FilteredDownloadedBytesPerSecond);
+ }
+ }
+ });
+ }
+
+ Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
+ ZEN_UNUSED(PendingWork);
+
+ uint64_t DownloadedBytes = m_DownloadStats.DownloadedChunkByteCount.load() + m_DownloadStats.DownloadedBlockByteCount.load();
+ FilteredDownloadedBytesPerSecond.Update(DownloadedBytes);
+
+ std::string DownloadRateString = (CompletedDownloadCount == BlobCount)
+ ? ""
+ : fmt::format(" {}bits/s", NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8));
+ std::string UploadDetails = m_Storage.CacheStorage ? fmt::format(" {} ({}) uploaded.",
+ m_StorageCacheStats.PutBlobCount.load(),
+ NiceBytes(m_StorageCacheStats.PutBlobByteCount.load()))
+ : "";
+
+ std::string Details = fmt::format("{}/{} ({}{}) downloaded.{}",
+ CompletedDownloadCount.load(),
+ BlobCount,
+ NiceBytes(DownloadedBytes),
+ DownloadRateString,
+ UploadDetails);
+ ProgressBar->UpdateState({.Task = "Downloading",
+ .Details = Details,
+ .TotalCount = BlobCount,
+ .RemainingCount = BlobCount - CompletedDownloadCount.load(),
+ .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
+ false);
+ });
+
+ FilteredDownloadedBytesPerSecond.Stop();
+ ProgressBar->Finish();
+}
+
+void
+BuildsOperationPrimeCache::DownloadLargeBlobForCache(ParallelWork& Work,
+ const IoHash& BlobHash,
+ size_t BlobCount,
+ std::atomic<size_t>& CompletedDownloadCount,
+ std::atomic<uint64_t>& MultipartAttachmentCount,
+ FilteredRate& FilteredDownloadedBytesPerSecond)
+{
+ DownloadLargeBlob(*m_Storage.BuildStorage,
+ m_TempPath,
+ m_BuildId,
+ BlobHash,
+ m_Options.PreferredMultipartChunkSize,
+ Work,
+ m_NetworkPool,
+ m_DownloadStats.DownloadedChunkByteCount,
+ MultipartAttachmentCount,
+ [this, BlobCount, BlobHash, &FilteredDownloadedBytesPerSecond, &CompletedDownloadCount](IoBuffer&& Payload) {
+ m_DownloadStats.DownloadedChunkCount++;
+ m_DownloadStats.RequestsCompleteCount++;
+
+ if (!m_AbortFlag)
+ {
+ if (Payload && m_Storage.CacheStorage)
+ {
+ m_Storage.CacheStorage->PutBuildBlob(m_BuildId,
+ BlobHash,
+ ZenContentType::kCompressedBinary,
+ CompositeBuffer(SharedBuffer(Payload)));
+ }
+ }
+ if (CompletedDownloadCount.fetch_add(1) + 1 == BlobCount)
+ {
+ FilteredDownloadedBytesPerSecond.Stop();
+ }
+ });
+}
+
+void
+BuildsOperationPrimeCache::DownloadSingleBlobForCache(const IoHash& BlobHash,
+ size_t BlobCount,
+ std::atomic<size_t>& CompletedDownloadCount,
+ FilteredRate& FilteredDownloadedBytesPerSecond)
+{
+ IoBuffer Payload;
+ try
+ {
+ Payload = m_Storage.BuildStorage->GetBuildBlob(m_BuildId, BlobHash);
+
+ m_DownloadStats.DownloadedBlockCount++;
+ m_DownloadStats.DownloadedBlockByteCount += Payload.GetSize();
+ m_DownloadStats.RequestsCompleteCount++;
+ }
+ catch (const std::exception&)
+ {
+ // Silence http errors due to abort
+ if (!m_AbortFlag)
+ {
+ throw;
+ }
+ }
+
+ if (!m_AbortFlag)
+ {
+ if (Payload && m_Storage.CacheStorage)
+ {
+ m_Storage.CacheStorage->PutBuildBlob(m_BuildId,
+ BlobHash,
+ ZenContentType::kCompressedBinary,
+ CompositeBuffer(SharedBuffer(std::move(Payload))));
+ }
+ if (CompletedDownloadCount.fetch_add(1) + 1 == BlobCount)
+ {
+ FilteredDownloadedBytesPerSecond.Stop();
+ }
+ }
+}
+
+} // namespace zen
diff --git a/src/zenremotestore/builds/buildsavedstate.cpp b/src/zenremotestore/builds/buildsavedstate.cpp
index 1d1f4605f..dfc565d4a 100644
--- a/src/zenremotestore/builds/buildsavedstate.cpp
+++ b/src/zenremotestore/builds/buildsavedstate.cpp
@@ -163,6 +163,8 @@ BuildSaveState::Write(const BuildSaveState& SaveState, CbWriter& Output)
{
ZEN_ASSERT(!SaveState.LocalPath.empty());
+ Output.AddInteger("version", SaveState.Version);
+
Output.AddString("path", (const char*)SaveState.LocalPath.u8string().c_str());
BuildsSelection::Write(SaveState.State.Selection, Output);
@@ -182,6 +184,7 @@ BuildSaveState::Write(const BuildSaveState& SaveState, CbWriter& Output)
BuildSaveState
BuildSaveState::Read(CbObjectView& Input)
{
+ uint32_t Version = Input["version"].AsUInt32(BuildSaveState::NoVersion);
BuildState State = BuildState::Read(Input);
CbObjectView LocalFolderStateObject = Input["localFolderState"sv].AsObjectView();
FolderContent FolderState = LoadFolderContentToCompactBinary(LocalFolderStateObject);
@@ -191,7 +194,10 @@ BuildSaveState::Read(CbObjectView& Input)
throw std::runtime_error("BuildSaveState is invalid, 'path' field is empty");
}
- return BuildSaveState{.State = std::move(State), .FolderState = std::move(FolderState), .LocalPath = std::move(LocalPath)};
+ return BuildSaveState{.Version = Version,
+ .State = std::move(State),
+ .FolderState = std::move(FolderState),
+ .LocalPath = std::move(LocalPath)};
}
CbObject
@@ -588,6 +594,8 @@ namespace buildsavestate_test {
}
} // namespace buildsavestate_test
+TEST_SUITE_BEGIN("remotestore.buildsavedstate");
+
TEST_CASE("buildsavestate.BuildsSelection")
{
using namespace buildsavestate_test;
@@ -696,6 +704,8 @@ TEST_CASE("buildsavestate.DownloadedPaths")
}
}
+TEST_SUITE_END();
+
#endif // ZEN_WITH_TESTS
} // namespace zen
diff --git a/src/zenremotestore/builds/buildstoragecache.cpp b/src/zenremotestore/builds/buildstoragecache.cpp
index faa85f81b..8fd31a326 100644
--- a/src/zenremotestore/builds/buildstoragecache.cpp
+++ b/src/zenremotestore/builds/buildstoragecache.cpp
@@ -96,7 +96,8 @@ public:
ZEN_ASSERT(!IsFlushed);
ZEN_ASSERT(ContentType == ZenContentType::kCompressedBinary);
- // Move all segments in Payload to be file handle based so if Payload is materialized it does not affect buffers in queue
+ // Move all segments in Payload to be file handle based unless they are very small so if Payload is materialized it does not affect
+ // buffers in queue
std::vector<SharedBuffer> FileBasedSegments;
std::span<const SharedBuffer> Segments = Payload.GetSegments();
FileBasedSegments.reserve(Segments.size());
@@ -104,42 +105,56 @@ public:
tsl::robin_map<void*, std::filesystem::path> HandleToPath;
for (const SharedBuffer& Segment : Segments)
{
- std::filesystem::path FilePath;
- IoBufferFileReference Ref;
- if (Segment.AsIoBuffer().GetFileReference(Ref))
+ const uint64_t SegmentSize = Segment.GetSize();
+ if (SegmentSize < 16u * 1024u)
{
- if (auto It = HandleToPath.find(Ref.FileHandle); It != HandleToPath.end())
- {
- FilePath = It->second;
- }
- else
+ FileBasedSegments.push_back(Segment);
+ }
+ else
+ {
+ std::filesystem::path FilePath;
+ IoBufferFileReference Ref;
+ if (Segment.AsIoBuffer().GetFileReference(Ref))
{
- std::error_code Ec;
- std::filesystem::path Path = PathFromHandle(Ref.FileHandle, Ec);
- if (!Ec && !Path.empty())
+ if (auto It = HandleToPath.find(Ref.FileHandle); It != HandleToPath.end())
+ {
+ FilePath = It->second;
+ }
+ else
{
- HandleToPath.insert_or_assign(Ref.FileHandle, Path);
- FilePath = std::move(Path);
+ std::error_code Ec;
+ std::filesystem::path Path = PathFromHandle(Ref.FileHandle, Ec);
+ if (!Ec && !Path.empty())
+ {
+ HandleToPath.insert_or_assign(Ref.FileHandle, Path);
+ FilePath = std::move(Path);
+ }
+ else
+ {
+ ZEN_WARN("Failed getting path for chunk to upload to cache. Skipping upload.");
+ return;
+ }
}
}
- }
- if (!FilePath.empty())
- {
- IoBuffer BufferFromFile = IoBufferBuilder::MakeFromFile(FilePath, Ref.FileChunkOffset, Ref.FileChunkSize);
- if (BufferFromFile)
+ if (!FilePath.empty())
{
- FileBasedSegments.push_back(SharedBuffer(std::move(BufferFromFile)));
+ IoBuffer BufferFromFile = IoBufferBuilder::MakeFromFile(FilePath, Ref.FileChunkOffset, Ref.FileChunkSize);
+ if (BufferFromFile)
+ {
+ FileBasedSegments.push_back(SharedBuffer(std::move(BufferFromFile)));
+ }
+ else
+ {
+ ZEN_WARN("Failed opening file '{}' to upload to cache. Skipping upload.", FilePath);
+ return;
+ }
}
else
{
FileBasedSegments.push_back(Segment);
}
}
- else
- {
- FileBasedSegments.push_back(Segment);
- }
}
}
@@ -151,7 +166,7 @@ public:
auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); });
HttpClient::Response CacheResponse =
- m_HttpClient.Upload(fmt::format("/builds/{}/{}/{}/blobs/{}", m_Namespace, m_Bucket, BuildId, RawHash.ToHexString()),
+ m_HttpClient.Upload(fmt::format("/builds/{}/{}/{}/blobs/{}", m_Namespace, m_Bucket, BuildId, RawHash),
Payload,
ContentType);
@@ -178,9 +193,12 @@ public:
{
Headers.Entries.insert({"Range", fmt::format("bytes={}-{}", RangeOffset, RangeOffset + RangeBytes - 1)});
}
- CreateDirectories(m_TempFolderPath);
+ if (!m_TempFolderPath.empty())
+ {
+ CreateDirectories(m_TempFolderPath);
+ }
HttpClient::Response CacheResponse =
- m_HttpClient.Download(fmt::format("/builds/{}/{}/{}/blobs/{}", m_Namespace, m_Bucket, BuildId, RawHash.ToHexString()),
+ m_HttpClient.Download(fmt::format("/builds/{}/{}/{}/blobs/{}", m_Namespace, m_Bucket, BuildId, RawHash),
m_TempFolderPath,
Headers);
AddStatistic(CacheResponse);
@@ -191,6 +209,78 @@ public:
return {};
}
+ virtual BuildBlobRanges GetBuildBlobRanges(const Oid& BuildId,
+ const IoHash& RawHash,
+ std::span<const std::pair<uint64_t, uint64_t>> Ranges) override
+ {
+ ZEN_TRACE_CPU("ZenBuildStorageCache::GetBuildBlobRanges");
+
+ Stopwatch ExecutionTimer;
+ auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); });
+
+ CbObjectWriter Writer;
+ Writer.BeginArray("ranges"sv);
+ {
+ for (const std::pair<uint64_t, uint64_t>& Range : Ranges)
+ {
+ Writer.BeginObject();
+ {
+ Writer.AddInteger("offset"sv, Range.first);
+ Writer.AddInteger("length"sv, Range.second);
+ }
+ Writer.EndObject();
+ }
+ }
+ Writer.EndArray(); // ranges
+
+ if (!m_TempFolderPath.empty())
+ {
+ CreateDirectories(m_TempFolderPath);
+ }
+ HttpClient::Response CacheResponse =
+ m_HttpClient.Post(fmt::format("/builds/{}/{}/{}/blobs/{}", m_Namespace, m_Bucket, BuildId, RawHash),
+ Writer.Save(),
+ HttpClient::Accept(ZenContentType::kCbPackage),
+ m_TempFolderPath);
+ AddStatistic(CacheResponse);
+ if (CacheResponse.IsSuccess())
+ {
+ CbPackage ResponsePackage = ParsePackageMessage(CacheResponse.ResponsePayload);
+ CbObjectView ResponseObject = ResponsePackage.GetObject();
+
+ CbArrayView RangeArray = ResponseObject["ranges"sv].AsArrayView();
+
+ std::vector<std::pair<uint64_t, uint64_t>> ReceivedRanges;
+ ReceivedRanges.reserve(RangeArray.Num());
+
+ uint64_t OffsetInPayloadRanges = 0;
+
+ for (CbFieldView View : RangeArray)
+ {
+ CbObjectView RangeView = View.AsObjectView();
+ uint64_t Offset = RangeView["offset"sv].AsUInt64();
+ uint64_t Length = RangeView["length"sv].AsUInt64();
+
+ const std::pair<uint64_t, uint64_t>& Range = Ranges[ReceivedRanges.size()];
+
+ if (Offset != Range.first || Length != Range.second)
+ {
+ return {};
+ }
+ ReceivedRanges.push_back(std::make_pair(OffsetInPayloadRanges, Length));
+ OffsetInPayloadRanges += Length;
+ }
+
+ const CbAttachment* DataAttachment = ResponsePackage.FindAttachment(RawHash);
+ if (DataAttachment)
+ {
+ SharedBuffer PayloadRanges = DataAttachment->AsBinary();
+ return BuildBlobRanges{.PayloadBuffer = PayloadRanges.AsIoBuffer(), .Ranges = std::move(ReceivedRanges)};
+ }
+ }
+ return {};
+ }
+
virtual void PutBlobMetadatas(const Oid& BuildId, std::span<const IoHash> BlobHashes, std::span<const CbObject> MetaDatas) override
{
ZEN_ASSERT(!IsFlushed);
@@ -460,6 +550,192 @@ CreateZenBuildStorageCache(HttpClient& HttpClient,
return std::make_unique<ZenBuildStorageCache>(HttpClient, Stats, Namespace, Bucket, TempFolderPath, BackgroundWorkerPool);
}
+#if ZEN_WITH_TESTS
+
+class InMemoryBuildStorageCache : public BuildStorageCache
+{
+public:
+ // MaxRangeSupported == 0 : no range requests are accepted, always return full blob
+ // MaxRangeSupported == 1 : single range is supported, multi range returns full blob
+ // MaxRangeSupported > 1 : multirange is supported up to MaxRangeSupported, more ranges returns empty blob (bad request)
+ explicit InMemoryBuildStorageCache(uint64_t MaxRangeSupported,
+ BuildStorageCache::Statistics& Stats,
+ double LatencySec = 0.0,
+ double DelayPerKBSec = 0.0)
+ : m_MaxRangeSupported(MaxRangeSupported)
+ , m_Stats(Stats)
+ , m_LatencySec(LatencySec)
+ , m_DelayPerKBSec(DelayPerKBSec)
+ {
+ }
+ void PutBuildBlob(const Oid&, const IoHash& RawHash, ZenContentType, const CompositeBuffer& Payload) override
+ {
+ IoBuffer Buf = Payload.Flatten().AsIoBuffer();
+ Buf.MakeOwned();
+ const uint64_t SentBytes = Buf.Size();
+ uint64_t ReceivedBytes = 0;
+ SimulateLatency(SentBytes, 0);
+ auto _ = MakeGuard([&]() { SimulateLatency(0, ReceivedBytes); });
+ Stopwatch ExecutionTimer;
+ auto __ = MakeGuard([&]() { AddStatistic(ExecutionTimer.GetElapsedTimeUs(), ReceivedBytes, SentBytes); });
+ {
+ std::lock_guard Lock(m_Mutex);
+ m_Entries[RawHash] = std::move(Buf);
+ }
+ m_Stats.PutBlobCount.fetch_add(1);
+ m_Stats.PutBlobByteCount.fetch_add(SentBytes);
+ }
+
+ IoBuffer GetBuildBlob(const Oid&, const IoHash& RawHash, uint64_t RangeOffset = 0, uint64_t RangeBytes = (uint64_t)-1) override
+ {
+ uint64_t SentBytes = 0;
+ uint64_t ReceivedBytes = 0;
+ SimulateLatency(SentBytes, 0);
+ auto _ = MakeGuard([&]() { SimulateLatency(0, ReceivedBytes); });
+ Stopwatch ExecutionTimer;
+ auto __ = MakeGuard([&]() { AddStatistic(ExecutionTimer.GetElapsedTimeUs(), ReceivedBytes, SentBytes); });
+ IoBuffer FullPayload;
+ {
+ std::lock_guard Lock(m_Mutex);
+ auto It = m_Entries.find(RawHash);
+ if (It == m_Entries.end())
+ {
+ return {};
+ }
+ FullPayload = It->second;
+ }
+
+ if (RangeOffset != 0 || RangeBytes != (uint64_t)-1)
+ {
+ if (m_MaxRangeSupported == 0)
+ {
+ ReceivedBytes = FullPayload.Size();
+ return FullPayload;
+ }
+ else
+ {
+ ReceivedBytes = (RangeBytes == (uint64_t)-1) ? FullPayload.Size() - RangeOffset : RangeBytes;
+ return IoBuffer(FullPayload, RangeOffset, RangeBytes);
+ }
+ }
+ else
+ {
+ ReceivedBytes = FullPayload.Size();
+ return FullPayload;
+ }
+ }
+
+ BuildBlobRanges GetBuildBlobRanges(const Oid&, const IoHash& RawHash, std::span<const std::pair<uint64_t, uint64_t>> Ranges) override
+ {
+ ZEN_ASSERT(!Ranges.empty());
+ uint64_t SentBytes = 0;
+ uint64_t ReceivedBytes = 0;
+ SimulateLatency(SentBytes, 0);
+ auto _ = MakeGuard([&]() { SimulateLatency(0, ReceivedBytes); });
+ Stopwatch ExecutionTimer;
+ auto __ = MakeGuard([&]() { AddStatistic(ExecutionTimer.GetElapsedTimeUs(), ReceivedBytes, SentBytes); });
+ if (m_MaxRangeSupported > 1 && Ranges.size() > m_MaxRangeSupported)
+ {
+ return {};
+ }
+ IoBuffer FullPayload;
+ {
+ std::lock_guard Lock(m_Mutex);
+ auto It = m_Entries.find(RawHash);
+ if (It == m_Entries.end())
+ {
+ return {};
+ }
+ FullPayload = It->second;
+ }
+
+ if (Ranges.size() > m_MaxRangeSupported)
+ {
+ // An empty Ranges signals to the caller: "full buffer given, use it for all requested ranges".
+ ReceivedBytes = FullPayload.Size();
+ return {.PayloadBuffer = FullPayload};
+ }
+ else
+ {
+ uint64_t PayloadStart = Ranges.front().first;
+ uint64_t PayloadSize = Ranges.back().first + Ranges.back().second - PayloadStart;
+ IoBuffer RangeBuffer = IoBuffer(FullPayload, PayloadStart, PayloadSize);
+ std::vector<std::pair<uint64_t, uint64_t>> PayloadRanges;
+ PayloadRanges.reserve(Ranges.size());
+ for (const std::pair<uint64_t, uint64_t>& Range : Ranges)
+ {
+ PayloadRanges.push_back(std::make_pair(Range.first - PayloadStart, Range.second));
+ }
+ ReceivedBytes = PayloadSize;
+ return {.PayloadBuffer = RangeBuffer, .Ranges = std::move(PayloadRanges)};
+ }
+ }
+
+ void PutBlobMetadatas(const Oid&, std::span<const IoHash>, std::span<const CbObject>) override {}
+
+ std::vector<CbObject> GetBlobMetadatas(const Oid&, std::span<const IoHash> Hashes) override
+ {
+ return std::vector<CbObject>(Hashes.size());
+ }
+
+ std::vector<BlobExistsResult> BlobsExists(const Oid&, std::span<const IoHash> Hashes) override
+ {
+ std::lock_guard Lock(m_Mutex);
+ std::vector<BlobExistsResult> Result;
+ Result.reserve(Hashes.size());
+ for (const IoHash& Hash : Hashes)
+ {
+ auto It = m_Entries.find(Hash);
+ Result.push_back({.HasBody = (It != m_Entries.end() && It->second)});
+ }
+ return Result;
+ }
+
+ void Flush(int32_t, std::function<bool(intptr_t)>&&) override {}
+
+private:
+ void AddStatistic(uint64_t ElapsedTimeUs, uint64_t ReceivedBytes, uint64_t SentBytes)
+ {
+ m_Stats.TotalBytesWritten += SentBytes;
+ m_Stats.TotalBytesRead += ReceivedBytes;
+ m_Stats.TotalExecutionTimeUs += ElapsedTimeUs;
+ m_Stats.TotalRequestCount++;
+ SetAtomicMax(m_Stats.PeakSentBytes, SentBytes);
+ SetAtomicMax(m_Stats.PeakReceivedBytes, ReceivedBytes);
+ if (ElapsedTimeUs > 0)
+ {
+ SetAtomicMax(m_Stats.PeakBytesPerSec, (ReceivedBytes + SentBytes) * 1000000 / ElapsedTimeUs);
+ }
+ }
+
+ void SimulateLatency(uint64_t SendBytes, uint64_t ReceiveBytes)
+ {
+ double SleepSec = m_LatencySec;
+ if (m_DelayPerKBSec > 0.0)
+ {
+ SleepSec += m_DelayPerKBSec * (double(SendBytes + ReceiveBytes) / 1024u);
+ }
+ if (SleepSec > 0)
+ {
+ Sleep(int(SleepSec * 1000));
+ }
+ }
+
+ uint64_t m_MaxRangeSupported = 0;
+ BuildStorageCache::Statistics& m_Stats;
+ const double m_LatencySec = 0.0;
+ const double m_DelayPerKBSec = 0.0;
+ std::mutex m_Mutex;
+ std::unordered_map<IoHash, IoBuffer, IoHash::Hasher> m_Entries;
+};
+
+std::unique_ptr<BuildStorageCache>
+CreateInMemoryBuildStorageCache(uint64_t MaxRangeSupported, BuildStorageCache::Statistics& Stats, double LatencySec, double DelayPerKBSec)
+{
+ return std::make_unique<InMemoryBuildStorageCache>(MaxRangeSupported, Stats, LatencySec, DelayPerKBSec);
+}
+#endif // ZEN_WITH_TESTS
+
ZenCacheEndpointTestResult
TestZenCacheEndpoint(std::string_view BaseUrl, const bool AssumeHttp2, const bool HttpVerbose)
{
@@ -474,15 +750,28 @@ TestZenCacheEndpoint(std::string_view BaseUrl, const bool AssumeHttp2, const boo
HttpClient::Response TestResponse = TestHttpClient.Get("/status/builds");
if (TestResponse.IsSuccess())
{
- LatencyTestResult LatencyResult = MeasureLatency(TestHttpClient, "/health");
+ uint64_t MaxRangeCountPerRequest = 1;
+ CbObject StatusResponse = TestResponse.AsObject();
+ if (StatusResponse["ok"].AsBool())
+ {
+ MaxRangeCountPerRequest = StatusResponse["capabilities"].AsObjectView()["maxrangecountperrequest"].AsUInt64(1);
+
+ LatencyTestResult LatencyResult = MeasureLatency(TestHttpClient, "/health");
+
+ if (!LatencyResult.Success)
+ {
+ return {.Success = false, .FailureReason = LatencyResult.FailureReason};
+ }
- if (!LatencyResult.Success)
+ return {.Success = true, .LatencySeconds = LatencyResult.LatencySeconds, .MaxRangeCountPerRequest = MaxRangeCountPerRequest};
+ }
+ else
{
- return {.Success = false, .FailureReason = LatencyResult.FailureReason};
+ return {.Success = false,
+ .FailureReason = fmt::format("ZenCache endpoint {}/status/builds did not respond with \"ok\"", BaseUrl)};
}
- return {.Success = true, .LatencySeconds = LatencyResult.LatencySeconds};
}
return {.Success = false, .FailureReason = TestResponse.ErrorMessage("")};
-};
+}
} // namespace zen
diff --git a/src/zenremotestore/builds/buildstorageoperations.cpp b/src/zenremotestore/builds/buildstorageoperations.cpp
deleted file mode 100644
index 08a896f37..000000000
--- a/src/zenremotestore/builds/buildstorageoperations.cpp
+++ /dev/null
@@ -1,7943 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#include <zenremotestore/builds/buildstorageoperations.h>
-
-#include <zenremotestore/builds/buildcontent.h>
-#include <zenremotestore/builds/buildmanifest.h>
-#include <zenremotestore/builds/buildsavedstate.h>
-#include <zenremotestore/builds/buildstorage.h>
-#include <zenremotestore/builds/buildstoragecache.h>
-#include <zenremotestore/builds/buildstorageutil.h>
-#include <zenremotestore/chunking/chunkblock.h>
-#include <zenremotestore/chunking/chunkingcache.h>
-#include <zenremotestore/chunking/chunkingcontroller.h>
-#include <zenremotestore/filesystemutils.h>
-#include <zenremotestore/operationlogoutput.h>
-
-#include <zencore/basicfile.h>
-#include <zencore/compactbinary.h>
-#include <zencore/compactbinaryfile.h>
-#include <zencore/compactbinaryutil.h>
-#include <zencore/compactbinaryvalue.h>
-#include <zencore/filesystem.h>
-#include <zencore/fmtutils.h>
-#include <zencore/parallelwork.h>
-#include <zencore/scopeguard.h>
-#include <zencore/string.h>
-#include <zencore/timer.h>
-#include <zencore/trace.h>
-#include <zenutil/wildcard.h>
-
-#include <numeric>
-
-ZEN_THIRD_PARTY_INCLUDES_START
-#include <tsl/robin_map.h>
-#include <tsl/robin_set.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-
-#if ZEN_WITH_TESTS
-# include <zencore/testing.h>
-# include <zencore/testutils.h>
-# include <zenremotestore/builds/filebuildstorage.h>
-#endif // ZEN_WITH_TESTS
-
-namespace zen {
-
-using namespace std::literals;
-
-namespace {
- std::filesystem::path ZenTempCacheFolderPath(const std::filesystem::path& ZenFolderPath)
- {
- return ZenTempFolderPath(ZenFolderPath) / "cache"; // Decompressed and verified data - chunks & sequences
- }
- std::filesystem::path ZenTempBlockFolderPath(const std::filesystem::path& ZenFolderPath)
- {
- return ZenTempFolderPath(ZenFolderPath) / "blocks"; // Temp storage for whole and partial blocks
- }
- std::filesystem::path ZenTempDownloadFolderPath(const std::filesystem::path& ZenFolderPath)
- {
- return ZenTempFolderPath(ZenFolderPath) / "download"; // Temp storage for decompressed and validated chunks
- }
-
- uint64_t GetBytesPerSecond(uint64_t ElapsedWallTimeUS, uint64_t Count)
- {
- if (ElapsedWallTimeUS == 0)
- {
- return 0;
- }
- return Count * 1000000 / ElapsedWallTimeUS;
- }
-
- std::filesystem::path GetTempChunkedSequenceFileName(const std::filesystem::path& CacheFolderPath, const IoHash& RawHash)
- {
- return CacheFolderPath / (RawHash.ToHexString() + ".tmp");
- }
-
- std::filesystem::path GetFinalChunkedSequenceFileName(const std::filesystem::path& CacheFolderPath, const IoHash& RawHash)
- {
- return CacheFolderPath / RawHash.ToHexString();
- }
-
- bool CleanDirectory(OperationLogOutput& OperationLogOutput,
- WorkerThreadPool& IOWorkerPool,
- std::atomic<bool>& AbortFlag,
- std::atomic<bool>& PauseFlag,
- bool IsQuiet,
- const std::filesystem::path& Path,
- std::span<const std::string> ExcludeDirectories)
- {
- ZEN_TRACE_CPU("CleanDirectory");
- Stopwatch Timer;
-
- std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(OperationLogOutput.CreateProgressBar("Clean Folder"));
- OperationLogOutput::ProgressBar& Progress(*ProgressBarPtr);
-
- CleanDirectoryResult Result = CleanDirectory(
- IOWorkerPool,
- AbortFlag,
- PauseFlag,
- Path,
- ExcludeDirectories,
- [&](const std::string_view Details, uint64_t TotalCount, uint64_t RemainingCount, bool IsPaused, bool IsAborted) {
- Progress.UpdateState({.Task = "Cleaning folder ",
- .Details = std::string(Details),
- .TotalCount = TotalCount,
- .RemainingCount = RemainingCount,
- .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
- false);
- },
- OperationLogOutput.GetProgressUpdateDelayMS());
-
- Progress.Finish();
-
- if (AbortFlag)
- {
- return false;
- }
-
- uint64_t ElapsedTimeMs = Timer.GetElapsedTimeMs();
-
- if (!Result.FailedRemovePaths.empty())
- {
- ExtendableStringBuilder<512> SB;
- for (size_t FailedPathIndex = 0; FailedPathIndex < Result.FailedRemovePaths.size(); FailedPathIndex++)
- {
- SB << fmt::format("\n '{}': ({}) {}",
- Result.FailedRemovePaths[FailedPathIndex].first,
- Result.FailedRemovePaths[FailedPathIndex].second.value(),
- Result.FailedRemovePaths[FailedPathIndex].second.message());
- }
- ZEN_OPERATION_LOG_WARN(OperationLogOutput, "Clean failed to remove files from '{}': {}", Path, SB.ToView());
- }
-
- if (ElapsedTimeMs >= 200 && !IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(OperationLogOutput,
- "Wiped folder '{}' {} ({}) in {}",
- Path,
- Result.FoundCount,
- NiceBytes(Result.DeletedByteCount),
- NiceTimeSpanMs(ElapsedTimeMs));
- }
-
- return Result.FailedRemovePaths.empty();
- }
-
- bool IsExtensionHashCompressable(const tsl::robin_set<uint32_t>& NonCompressableExtensionHashes, const uint32_t PathHash)
- {
- return !NonCompressableExtensionHashes.contains(PathHash);
- }
-
- bool IsChunkCompressable(const tsl::robin_set<uint32_t>& NonCompressableExtensionHashes,
- const ChunkedFolderContent& Content,
- const ChunkedContentLookup& Lookup,
- uint32_t ChunkIndex)
- {
- ZEN_UNUSED(Content);
- const uint32_t ChunkLocationCount = Lookup.ChunkSequenceLocationCounts[ChunkIndex];
- if (ChunkLocationCount == 0)
- {
- return false;
- }
- const size_t ChunkLocationOffset = Lookup.ChunkSequenceLocationOffset[ChunkIndex];
- const uint32_t SequenceIndex = Lookup.ChunkSequenceLocations[ChunkLocationOffset].SequenceIndex;
- const uint32_t PathIndex = Lookup.SequenceIndexFirstPathIndex[SequenceIndex];
- const uint32_t ExtensionHash = Lookup.PathExtensionHash[PathIndex];
-
- const bool IsCompressable = IsExtensionHashCompressable(NonCompressableExtensionHashes, ExtensionHash);
- return IsCompressable;
- }
-
- template<typename T>
- std::string FormatArray(std::span<const T> Items, std::string_view Prefix)
- {
- ExtendableStringBuilder<512> SB;
- for (const T& Item : Items)
- {
- SB.Append(fmt::format("{}{}", Prefix, Item));
- }
- return SB.ToString();
- }
-
- void DownloadLargeBlob(BuildStorageBase& Storage,
- const std::filesystem::path& DownloadFolder,
- const Oid& BuildId,
- const IoHash& ChunkHash,
- const std::uint64_t PreferredMultipartChunkSize,
- ParallelWork& Work,
- WorkerThreadPool& NetworkPool,
- std::atomic<uint64_t>& DownloadedChunkByteCount,
- std::atomic<uint64_t>& MultipartAttachmentCount,
- std::function<void(IoBuffer&& Payload)>&& OnDownloadComplete)
- {
- ZEN_TRACE_CPU("DownloadLargeBlob");
-
- struct WorkloadData
- {
- TemporaryFile TempFile;
- };
- std::shared_ptr<WorkloadData> Workload(std::make_shared<WorkloadData>());
-
- std::error_code Ec;
- Workload->TempFile.CreateTemporary(DownloadFolder, Ec);
- if (Ec)
- {
- throw std::runtime_error(
- fmt::format("Failed opening temporary file '{}', reason: ({}) {}", Workload->TempFile.GetPath(), Ec.message(), Ec.value()));
- }
- std::vector<std::function<void()>> WorkItems = Storage.GetLargeBuildBlob(
- BuildId,
- ChunkHash,
- PreferredMultipartChunkSize,
- [&Work, Workload, &DownloadedChunkByteCount](uint64_t Offset, const IoBuffer& Chunk) {
- DownloadedChunkByteCount += Chunk.GetSize();
-
- if (!Work.IsAborted())
- {
- ZEN_TRACE_CPU("Async_DownloadLargeBlob_OnReceive");
- Workload->TempFile.Write(Chunk.GetView(), Offset);
- }
- },
- [&Work, Workload, &DownloadedChunkByteCount, OnDownloadComplete = std::move(OnDownloadComplete)]() {
- if (!Work.IsAborted())
- {
- ZEN_TRACE_CPU("Async_DownloadLargeBlob_OnComplete");
-
- uint64_t PayloadSize = Workload->TempFile.FileSize();
- void* FileHandle = Workload->TempFile.Detach();
- ZEN_ASSERT(FileHandle != nullptr);
- IoBuffer Payload(IoBuffer::File, FileHandle, 0, PayloadSize, true);
- Payload.SetDeleteOnClose(true);
- OnDownloadComplete(std::move(Payload));
- }
- });
- if (!WorkItems.empty())
- {
- MultipartAttachmentCount++;
- }
- for (auto& WorkItem : WorkItems)
- {
- Work.ScheduleWork(NetworkPool, [WorkItem = std::move(WorkItem)](std::atomic<bool>& AbortFlag) {
- if (!AbortFlag)
- {
- ZEN_TRACE_CPU("Async_DownloadLargeBlob_Work");
-
- WorkItem();
- }
- });
- }
- }
-
- CompositeBuffer ValidateBlob(std::atomic<bool>& AbortFlag,
- IoBuffer&& Payload,
- const IoHash& BlobHash,
- uint64_t& OutCompressedSize,
- uint64_t& OutDecompressedSize)
- {
- ZEN_TRACE_CPU("ValidateBlob");
-
- if (Payload.GetContentType() != ZenContentType::kCompressedBinary)
- {
- throw std::runtime_error(fmt::format("Blob {} ({} bytes) has unexpected content type '{}'",
- BlobHash,
- Payload.GetSize(),
- ToString(Payload.GetContentType())));
- }
- IoHash RawHash;
- uint64_t RawSize;
- CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(Payload), RawHash, RawSize);
- if (!Compressed)
- {
- throw std::runtime_error(fmt::format("Blob {} ({} bytes) compressed header is invalid", BlobHash, Payload.GetSize()));
- }
- if (RawHash != BlobHash)
- {
- throw std::runtime_error(
- fmt::format("Blob {} ({} bytes) compressed header has a mismatching raw hash {}", BlobHash, Payload.GetSize(), RawHash));
- }
-
- IoHashStream Hash;
- bool CouldDecompress = Compressed.DecompressToStream(
- 0,
- RawSize,
- [&AbortFlag, &Hash](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) {
- ZEN_UNUSED(SourceOffset, SourceSize, Offset);
- if (!AbortFlag)
- {
- for (const SharedBuffer& Segment : RangeBuffer.GetSegments())
- {
- Hash.Append(Segment.GetView());
- }
- return true;
- }
- return false;
- });
-
- if (AbortFlag)
- {
- return CompositeBuffer{};
- }
-
- if (!CouldDecompress)
- {
- throw std::runtime_error(
- fmt::format("Blob {} ({} bytes) failed to decompress - header information mismatch", BlobHash, Payload.GetSize()));
- }
- IoHash ValidateRawHash = Hash.GetHash();
- if (ValidateRawHash != BlobHash)
- {
- throw std::runtime_error(fmt::format("Blob {} ({} bytes) decompressed hash {} does not match header information",
- BlobHash,
- Payload.GetSize(),
- ValidateRawHash));
- }
- OodleCompressor Compressor;
- OodleCompressionLevel CompressionLevel;
- uint64_t BlockSize;
- if (!Compressed.TryGetCompressParameters(Compressor, CompressionLevel, BlockSize))
- {
- throw std::runtime_error(fmt::format("Blob {} ({} bytes) failed to get compression details", BlobHash, Payload.GetSize()));
- }
- OutCompressedSize = Payload.GetSize();
- OutDecompressedSize = RawSize;
- if (CompressionLevel == OodleCompressionLevel::None)
- {
- // Only decompress to composite if we need it for block verification
- CompositeBuffer DecompressedComposite = Compressed.DecompressToComposite();
- if (!DecompressedComposite)
- {
- throw std::runtime_error(fmt::format("Blob {} ({} bytes) failed to decompress to composite", BlobHash, Payload.GetSize()));
- }
- return DecompressedComposite;
- }
- return CompositeBuffer{};
- }
-
-} // namespace
-
-bool
-IsSingleFileChunk(const ChunkedFolderContent& RemoteContent,
- const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> Locations)
-{
- if (Locations.size() == 1)
- {
- const uint32_t FirstSequenceIndex = Locations[0]->SequenceIndex;
- if (RemoteContent.ChunkedContent.ChunkCounts[FirstSequenceIndex] == 1)
- {
- ZEN_ASSERT_SLOW(Locations[0]->Offset == 0);
- return true;
- }
- }
- return false;
-}
-
-IoBuffer
-MakeBufferMemoryBased(const CompositeBuffer& PartialBlockBuffer)
-{
- ZEN_TRACE_CPU("MakeBufferMemoryBased");
- IoBuffer BlockMemoryBuffer;
- std::span<const SharedBuffer> Segments = PartialBlockBuffer.GetSegments();
- if (Segments.size() == 1)
- {
- IoBufferFileReference FileRef = {};
- if (PartialBlockBuffer.GetSegments().front().AsIoBuffer().GetFileReference(FileRef))
- {
- BlockMemoryBuffer = UniqueBuffer::Alloc(FileRef.FileChunkSize).MoveToShared().AsIoBuffer();
- BasicFile Reader;
- Reader.Attach(FileRef.FileHandle);
- auto _ = MakeGuard([&Reader]() { Reader.Detach(); });
- MutableMemoryView ReadMem = BlockMemoryBuffer.GetMutableView();
- Reader.Read(ReadMem.GetData(), FileRef.FileChunkSize, FileRef.FileChunkOffset);
- return BlockMemoryBuffer;
- }
- else
- {
- return PartialBlockBuffer.GetSegments().front().AsIoBuffer();
- }
- }
- else
- {
- // Not a homogenous memory buffer, read all to memory
-
- BlockMemoryBuffer = UniqueBuffer::Alloc(PartialBlockBuffer.GetSize()).MoveToShared().AsIoBuffer();
- MutableMemoryView ReadMem = BlockMemoryBuffer.GetMutableView();
- for (const SharedBuffer& Segment : Segments)
- {
- IoBufferFileReference FileRef = {};
- if (Segment.AsIoBuffer().GetFileReference(FileRef))
- {
- BasicFile Reader;
- Reader.Attach(FileRef.FileHandle);
- auto _ = MakeGuard([&Reader]() { Reader.Detach(); });
- Reader.Read(ReadMem.GetData(), FileRef.FileChunkSize, FileRef.FileChunkOffset);
- ReadMem = ReadMem.Mid(FileRef.FileChunkSize);
- }
- else
- {
- ReadMem = ReadMem.CopyFrom(Segment.AsIoBuffer().GetView());
- }
- }
- return BlockMemoryBuffer;
- }
-}
-
-class FilteredRate
-{
-public:
- FilteredRate() {}
-
- void Start()
- {
- if (StartTimeUS == (uint64_t)-1)
- {
- uint64_t Expected = (uint64_t)-1;
- if (StartTimeUS.compare_exchange_weak(Expected, Timer.GetElapsedTimeUs()))
- {
- LastTimeUS = StartTimeUS.load();
- }
- }
- }
- void Stop()
- {
- if (EndTimeUS == (uint64_t)-1)
- {
- uint64_t Expected = (uint64_t)-1;
- EndTimeUS.compare_exchange_weak(Expected, Timer.GetElapsedTimeUs());
- }
- }
-
- void Update(uint64_t Count)
- {
- if (LastTimeUS == (uint64_t)-1)
- {
- return;
- }
- uint64_t TimeUS = Timer.GetElapsedTimeUs();
- uint64_t TimeDeltaUS = TimeUS - LastTimeUS;
- if (TimeDeltaUS >= 2000000)
- {
- uint64_t Delta = Count - LastCount;
- uint64_t PerSecond = (Delta * 1000000) / TimeDeltaUS;
-
- LastPerSecond = PerSecond;
-
- LastCount = Count;
-
- FilteredPerSecond = (PerSecond + (LastPerSecond * 7)) / 8;
-
- LastTimeUS = TimeUS;
- }
- }
-
- uint64_t GetCurrent() const // If Stopped - return total count / total time
- {
- if (LastTimeUS == (uint64_t)-1)
- {
- return 0;
- }
- return FilteredPerSecond;
- }
-
- uint64_t GetElapsedTimeUS() const
- {
- if (StartTimeUS == (uint64_t)-1)
- {
- return 0;
- }
- if (EndTimeUS == (uint64_t)-1)
- {
- return 0;
- }
- uint64_t TimeDeltaUS = EndTimeUS - StartTimeUS;
- return TimeDeltaUS;
- }
-
- bool IsActive() const { return (StartTimeUS != (uint64_t)-1) && (EndTimeUS == (uint64_t)-1); }
-
-private:
- Stopwatch Timer;
- std::atomic<uint64_t> StartTimeUS = (uint64_t)-1;
- std::atomic<uint64_t> EndTimeUS = (uint64_t)-1;
- std::atomic<uint64_t> LastTimeUS = (uint64_t)-1;
- uint64_t LastCount = 0;
- uint64_t LastPerSecond = 0;
- uint64_t FilteredPerSecond = 0;
-};
-
-std::filesystem::path
-ZenStateFilePath(const std::filesystem::path& ZenFolderPath)
-{
- return ZenFolderPath / "current_state.cbo";
-}
-std::filesystem::path
-ZenTempFolderPath(const std::filesystem::path& ZenFolderPath)
-{
- return ZenFolderPath / "tmp";
-}
-
-////////////////////// BuildsOperationUpdateFolder
-
-BuildsOperationUpdateFolder::BuildsOperationUpdateFolder(OperationLogOutput& OperationLogOutput,
- StorageInstance& Storage,
- std::atomic<bool>& AbortFlag,
- std::atomic<bool>& PauseFlag,
- WorkerThreadPool& IOWorkerPool,
- WorkerThreadPool& NetworkPool,
- const Oid& BuildId,
- const std::filesystem::path& Path,
- const ChunkedFolderContent& LocalContent,
- const ChunkedContentLookup& LocalLookup,
- const ChunkedFolderContent& RemoteContent,
- const ChunkedContentLookup& RemoteLookup,
- const std::vector<ChunkBlockDescription>& BlockDescriptions,
- const std::vector<IoHash>& LooseChunkHashes,
- const Options& Options)
-: m_LogOutput(OperationLogOutput)
-, m_Storage(Storage)
-, m_AbortFlag(AbortFlag)
-, m_PauseFlag(PauseFlag)
-, m_IOWorkerPool(IOWorkerPool)
-, m_NetworkPool(NetworkPool)
-, m_BuildId(BuildId)
-, m_Path(Path)
-, m_LocalContent(LocalContent)
-, m_LocalLookup(LocalLookup)
-, m_RemoteContent(RemoteContent)
-, m_RemoteLookup(RemoteLookup)
-, m_BlockDescriptions(BlockDescriptions)
-, m_LooseChunkHashes(LooseChunkHashes)
-, m_Options(Options)
-, m_CacheFolderPath(ZenTempCacheFolderPath(m_Options.ZenFolderPath))
-, m_TempDownloadFolderPath(ZenTempDownloadFolderPath(m_Options.ZenFolderPath))
-, m_TempBlockFolderPath(ZenTempBlockFolderPath(m_Options.ZenFolderPath))
-{
-}
-
-void
-BuildsOperationUpdateFolder::Execute(FolderContent& OutLocalFolderState)
-{
- ZEN_TRACE_CPU("BuildsOperationUpdateFolder::Execute");
- try
- {
- enum class TaskSteps : uint32_t
- {
- ScanExistingData,
- WriteChunks,
- PrepareTarget,
- FinalizeTarget,
- Cleanup,
- StepCount
- };
-
- auto EndProgress =
- MakeGuard([&]() { m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::StepCount, (uint32_t)TaskSteps::StepCount); });
-
- ZEN_ASSERT((!m_Options.PrimeCacheOnly) ||
- (m_Options.PrimeCacheOnly && (m_Options.PartialBlockRequestMode == EPartialBlockRequestMode::Off)));
-
- m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::ScanExistingData, (uint32_t)TaskSteps::StepCount);
-
- CreateDirectories(m_CacheFolderPath);
- CreateDirectories(m_TempDownloadFolderPath);
- CreateDirectories(m_TempBlockFolderPath);
-
- Stopwatch CacheMappingTimer;
-
- std::vector<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters(m_RemoteContent.ChunkedContent.SequenceRawHashes.size());
- std::vector<bool> RemoteChunkIndexNeedsCopyFromLocalFileFlags(m_RemoteContent.ChunkedContent.ChunkHashes.size());
- std::vector<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags(m_RemoteContent.ChunkedContent.ChunkHashes.size());
-
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CachedChunkHashesFound;
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CachedSequenceHashesFound;
- if (!m_Options.PrimeCacheOnly)
- {
- ScanCacheFolder(CachedChunkHashesFound, CachedSequenceHashesFound);
- }
-
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CachedBlocksFound;
- if (!m_Options.PrimeCacheOnly)
- {
- ScanTempBlocksFolder(CachedBlocksFound);
- }
-
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> SequenceIndexesLeftToFindToRemoteIndex;
-
- if (!m_Options.PrimeCacheOnly && m_Options.EnableTargetFolderScavenging)
- {
- // Pick up all whole files we can use from current local state
- ZEN_TRACE_CPU("GetLocalSequences");
-
- Stopwatch LocalTimer;
-
- std::vector<uint32_t> MissingSequenceIndexes = ScanTargetFolder(CachedChunkHashesFound, CachedSequenceHashesFound);
-
- for (uint32_t RemoteSequenceIndex : MissingSequenceIndexes)
- {
- // We must write the sequence
- const uint32_t ChunkCount = m_RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex];
- const IoHash& RemoteSequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex];
- SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = ChunkCount;
- SequenceIndexesLeftToFindToRemoteIndex.insert({RemoteSequenceRawHash, RemoteSequenceIndex});
- }
- }
- else
- {
- for (uint32_t RemoteSequenceIndex = 0; RemoteSequenceIndex < m_RemoteContent.ChunkedContent.SequenceRawHashes.size();
- RemoteSequenceIndex++)
- {
- const uint32_t ChunkCount = m_RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex];
- SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = ChunkCount;
- }
- }
-
- std::vector<ChunkedFolderContent> ScavengedContents;
- std::vector<ChunkedContentLookup> ScavengedLookups;
- std::vector<std::filesystem::path> ScavengedPaths;
-
- std::vector<ScavengedSequenceCopyOperation> ScavengedSequenceCopyOperations;
- uint64_t ScavengedPathsCount = 0;
-
- if (!m_Options.PrimeCacheOnly && m_Options.EnableOtherDownloadsScavenging)
- {
- ZEN_TRACE_CPU("GetScavengedSequences");
-
- Stopwatch ScavengeTimer;
-
- if (!SequenceIndexesLeftToFindToRemoteIndex.empty())
- {
- std::vector<ScavengeSource> ScavengeSources = FindScavengeSources();
-
- const size_t ScavengePathCount = ScavengeSources.size();
-
- ScavengedContents.resize(ScavengePathCount);
- ScavengedLookups.resize(ScavengePathCount);
- ScavengedPaths.resize(ScavengePathCount);
-
- std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(m_LogOutput.CreateProgressBar("Scavenging"));
- OperationLogOutput::ProgressBar& ScavengeProgressBar(*ProgressBarPtr);
-
- ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
-
- std::atomic<uint64_t> PathsFound(0);
- std::atomic<uint64_t> ChunksFound(0);
- std::atomic<uint64_t> PathsScavenged(0);
-
- for (size_t ScavengeIndex = 0; ScavengeIndex < ScavengePathCount; ScavengeIndex++)
- {
- Work.ScheduleWork(m_IOWorkerPool,
- [this,
- &ScavengeSources,
- &ScavengedContents,
- &ScavengedPaths,
- &ScavengedLookups,
- &PathsFound,
- &ChunksFound,
- &PathsScavenged,
- ScavengeIndex](std::atomic<bool>&) {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("Async_FindScavengeContent");
-
- const ScavengeSource& Source = ScavengeSources[ScavengeIndex];
- ChunkedFolderContent& ScavengedLocalContent = ScavengedContents[ScavengeIndex];
- ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengeIndex];
-
- if (FindScavengeContent(Source, ScavengedLocalContent, ScavengedLookup))
- {
- ScavengedPaths[ScavengeIndex] = Source.Path;
- PathsFound += ScavengedLocalContent.Paths.size();
- ChunksFound += ScavengedLocalContent.ChunkedContent.ChunkHashes.size();
- }
- else
- {
- ScavengedPaths[ScavengeIndex].clear();
- }
- PathsScavenged++;
- }
- });
- }
- {
- ZEN_TRACE_CPU("ScavengeScan_Wait");
-
- Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
- ZEN_UNUSED(PendingWork);
- std::string Details = fmt::format("{}/{} scanned. {} paths and {} chunks found for scavenging",
- PathsScavenged.load(),
- ScavengePathCount,
- PathsFound.load(),
- ChunksFound.load());
- ScavengeProgressBar.UpdateState(
- {.Task = "Scavenging ",
- .Details = Details,
- .TotalCount = ScavengePathCount,
- .RemainingCount = ScavengePathCount - PathsScavenged.load(),
- .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
- false);
- });
- }
-
- ScavengeProgressBar.Finish();
- if (m_AbortFlag)
- {
- return;
- }
-
- for (uint32_t ScavengedContentIndex = 0;
- ScavengedContentIndex < ScavengedContents.size() && (!SequenceIndexesLeftToFindToRemoteIndex.empty());
- ScavengedContentIndex++)
- {
- const std::filesystem::path& ScavengePath = ScavengedPaths[ScavengedContentIndex];
- if (!ScavengePath.empty())
- {
- const ChunkedFolderContent& ScavengedLocalContent = ScavengedContents[ScavengedContentIndex];
- const ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengedContentIndex];
-
- for (uint32_t ScavengedSequenceIndex = 0;
- ScavengedSequenceIndex < ScavengedLocalContent.ChunkedContent.SequenceRawHashes.size();
- ScavengedSequenceIndex++)
- {
- const IoHash& SequenceRawHash = ScavengedLocalContent.ChunkedContent.SequenceRawHashes[ScavengedSequenceIndex];
- if (auto It = SequenceIndexesLeftToFindToRemoteIndex.find(SequenceRawHash);
- It != SequenceIndexesLeftToFindToRemoteIndex.end())
- {
- const uint32_t RemoteSequenceIndex = It->second;
- const uint64_t RawSize =
- m_RemoteContent.RawSizes[m_RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex]];
- ZEN_ASSERT(RawSize > 0);
-
- const uint32_t ScavengedPathIndex = ScavengedLookup.SequenceIndexFirstPathIndex[ScavengedSequenceIndex];
- ZEN_ASSERT_SLOW(IsFile((ScavengePath / ScavengedLocalContent.Paths[ScavengedPathIndex]).make_preferred()));
-
- ScavengedSequenceCopyOperations.push_back({.ScavengedContentIndex = ScavengedContentIndex,
- .ScavengedPathIndex = ScavengedPathIndex,
- .RemoteSequenceIndex = RemoteSequenceIndex,
- .RawSize = RawSize});
-
- SequenceIndexesLeftToFindToRemoteIndex.erase(SequenceRawHash);
- SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = 0;
-
- m_CacheMappingStats.ScavengedPathsMatchingSequencesCount++;
- m_CacheMappingStats.ScavengedPathsMatchingSequencesByteCount += RawSize;
- }
- }
- ScavengedPathsCount++;
- }
- }
- }
- m_CacheMappingStats.ScavengeElapsedWallTimeUs += ScavengeTimer.GetElapsedTimeUs();
- }
-
- uint32_t RemainingChunkCount = 0;
- for (uint32_t RemoteChunkIndex = 0; RemoteChunkIndex < m_RemoteContent.ChunkedContent.ChunkHashes.size(); RemoteChunkIndex++)
- {
- uint64_t ChunkWriteCount = GetChunkWriteCount(SequenceIndexChunksLeftToWriteCounters, RemoteChunkIndex);
- if (ChunkWriteCount > 0)
- {
- RemainingChunkCount++;
- }
- }
-
- // Pick up all chunks in current local state
- tsl::robin_map<IoHash, size_t, IoHash::Hasher> RawHashToCopyChunkDataIndex;
- std::vector<CopyChunkData> CopyChunkDatas;
-
- if (!m_Options.PrimeCacheOnly && m_Options.EnableTargetFolderScavenging)
- {
- ZEN_TRACE_CPU("GetLocalChunks");
-
- Stopwatch LocalTimer;
-
- ScavengeSourceForChunks(RemainingChunkCount,
- RemoteChunkIndexNeedsCopyFromLocalFileFlags,
- RawHashToCopyChunkDataIndex,
- SequenceIndexChunksLeftToWriteCounters,
- m_LocalContent,
- m_LocalLookup,
- CopyChunkDatas,
- uint32_t(-1),
- m_CacheMappingStats.LocalChunkMatchingRemoteCount,
- m_CacheMappingStats.LocalChunkMatchingRemoteByteCount);
-
- m_CacheMappingStats.LocalScanElapsedWallTimeUs += LocalTimer.GetElapsedTimeUs();
- }
-
- if (!m_Options.PrimeCacheOnly && m_Options.EnableOtherDownloadsScavenging)
- {
- ZEN_TRACE_CPU("GetScavengeChunks");
-
- Stopwatch ScavengeTimer;
-
- for (uint32_t ScavengedContentIndex = 0; ScavengedContentIndex < ScavengedContents.size() && (RemainingChunkCount > 0);
- ScavengedContentIndex++)
- {
- const ChunkedFolderContent& ScavengedContent = ScavengedContents[ScavengedContentIndex];
- const ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengedContentIndex];
-
- ScavengeSourceForChunks(RemainingChunkCount,
- RemoteChunkIndexNeedsCopyFromLocalFileFlags,
- RawHashToCopyChunkDataIndex,
- SequenceIndexChunksLeftToWriteCounters,
- ScavengedContent,
- ScavengedLookup,
- CopyChunkDatas,
- ScavengedContentIndex,
- m_CacheMappingStats.ScavengedChunkMatchingRemoteCount,
- m_CacheMappingStats.ScavengedChunkMatchingRemoteByteCount);
- }
- m_CacheMappingStats.ScavengeElapsedWallTimeUs += ScavengeTimer.GetElapsedTimeUs();
- }
-
- if (!m_Options.IsQuiet)
- {
- if (m_CacheMappingStats.CacheSequenceHashesCount > 0 || m_CacheMappingStats.CacheChunkCount > 0 ||
- m_CacheMappingStats.CacheBlockCount > 0)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Download cache: Found {} ({}) chunk sequences, {} ({}) chunks, {} ({}) blocks in {}",
- m_CacheMappingStats.CacheSequenceHashesCount,
- NiceBytes(m_CacheMappingStats.CacheSequenceHashesByteCount),
- m_CacheMappingStats.CacheChunkCount,
- NiceBytes(m_CacheMappingStats.CacheChunkByteCount),
- m_CacheMappingStats.CacheBlockCount,
- NiceBytes(m_CacheMappingStats.CacheBlocksByteCount),
- NiceTimeSpanMs(m_CacheMappingStats.CacheScanElapsedWallTimeUs / 1000));
- }
-
- if (m_CacheMappingStats.LocalPathsMatchingSequencesCount > 0 || m_CacheMappingStats.LocalChunkMatchingRemoteCount > 0)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Local state : Found {} ({}) chunk sequences, {} ({}) chunks in {}",
- m_CacheMappingStats.LocalPathsMatchingSequencesCount,
- NiceBytes(m_CacheMappingStats.LocalPathsMatchingSequencesByteCount),
- m_CacheMappingStats.LocalChunkMatchingRemoteCount,
- NiceBytes(m_CacheMappingStats.LocalChunkMatchingRemoteByteCount),
- NiceTimeSpanMs(m_CacheMappingStats.LocalScanElapsedWallTimeUs / 1000));
- }
- if (m_CacheMappingStats.ScavengedPathsMatchingSequencesCount > 0 || m_CacheMappingStats.ScavengedChunkMatchingRemoteCount > 0)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Scavenge of {} paths, found {} ({}) chunk sequences, {} ({}) chunks in {}",
- ScavengedPathsCount,
- m_CacheMappingStats.ScavengedPathsMatchingSequencesCount,
- NiceBytes(m_CacheMappingStats.ScavengedPathsMatchingSequencesByteCount),
- m_CacheMappingStats.ScavengedChunkMatchingRemoteCount,
- NiceBytes(m_CacheMappingStats.ScavengedChunkMatchingRemoteByteCount),
- NiceTimeSpanMs(m_CacheMappingStats.ScavengeElapsedWallTimeUs / 1000));
- }
- }
-
- uint64_t BytesToWrite = 0;
-
- for (uint32_t RemoteChunkIndex = 0; RemoteChunkIndex < m_RemoteContent.ChunkedContent.ChunkHashes.size(); RemoteChunkIndex++)
- {
- uint64_t ChunkWriteCount = GetChunkWriteCount(SequenceIndexChunksLeftToWriteCounters, RemoteChunkIndex);
- if (ChunkWriteCount > 0)
- {
- BytesToWrite += m_RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex] * ChunkWriteCount;
- if (!RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex])
- {
- RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex] = true;
- }
- }
- }
-
- for (const ScavengedSequenceCopyOperation& ScavengeCopyOp : ScavengedSequenceCopyOperations)
- {
- BytesToWrite += ScavengeCopyOp.RawSize;
- }
-
- uint64_t BytesToValidate = m_Options.ValidateCompletedSequences ? BytesToWrite : 0;
-
- uint64_t TotalRequestCount = 0;
- uint64_t TotalPartWriteCount = 0;
- std::atomic<uint64_t> WritePartsComplete = 0;
-
- tsl::robin_map<std::string, uint32_t> RemotePathToRemoteIndex;
- RemotePathToRemoteIndex.reserve(m_RemoteContent.Paths.size());
- for (uint32_t RemotePathIndex = 0; RemotePathIndex < m_RemoteContent.Paths.size(); RemotePathIndex++)
- {
- RemotePathToRemoteIndex.insert({m_RemoteContent.Paths[RemotePathIndex].generic_string(), RemotePathIndex});
- }
-
- CheckRequiredDiskSpace(RemotePathToRemoteIndex);
-
- BlobsExistsResult ExistsResult;
- {
- ChunkBlockAnalyser BlockAnalyser(m_LogOutput,
- m_BlockDescriptions,
- ChunkBlockAnalyser::Options{.IsQuiet = m_Options.IsQuiet,
- .IsVerbose = m_Options.IsVerbose,
- .HostLatencySec = m_Storage.BuildStorageLatencySec,
- .HostHighSpeedLatencySec = m_Storage.CacheLatencySec});
-
- std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = BlockAnalyser.GetNeeded(
- m_RemoteLookup.ChunkHashToChunkIndex,
- [&](uint32_t RemoteChunkIndex) -> bool { return RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex]; });
-
- std::vector<uint32_t> FetchBlockIndexes;
- std::vector<uint32_t> CachedChunkBlockIndexes;
-
- {
- ZEN_TRACE_CPU("BlockCacheFileExists");
- for (const ChunkBlockAnalyser::NeededBlock& NeededBlock : NeededBlocks)
- {
- if (m_Options.PrimeCacheOnly)
- {
- FetchBlockIndexes.push_back(NeededBlock.BlockIndex);
- }
- else
- {
- const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[NeededBlock.BlockIndex];
- bool UsingCachedBlock = false;
- if (auto It = CachedBlocksFound.find(BlockDescription.BlockHash); It != CachedBlocksFound.end())
- {
- TotalPartWriteCount++;
-
- std::filesystem::path BlockPath = m_TempBlockFolderPath / BlockDescription.BlockHash.ToHexString();
- if (IsFile(BlockPath))
- {
- CachedChunkBlockIndexes.push_back(NeededBlock.BlockIndex);
- UsingCachedBlock = true;
- }
- }
- if (!UsingCachedBlock)
- {
- FetchBlockIndexes.push_back(NeededBlock.BlockIndex);
- }
- }
- }
- }
-
- std::vector<uint32_t> NeededLooseChunkIndexes;
-
- {
- NeededLooseChunkIndexes.reserve(m_LooseChunkHashes.size());
- for (uint32_t LooseChunkIndex = 0; LooseChunkIndex < m_LooseChunkHashes.size(); LooseChunkIndex++)
- {
- const IoHash& ChunkHash = m_LooseChunkHashes[LooseChunkIndex];
- auto RemoteChunkIndexIt = m_RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash);
- ZEN_ASSERT(RemoteChunkIndexIt != m_RemoteLookup.ChunkHashToChunkIndex.end());
- const uint32_t RemoteChunkIndex = RemoteChunkIndexIt->second;
-
- if (RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex])
- {
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Skipping chunk {} due to cache reuse",
- m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]);
- }
- continue;
- }
-
- bool NeedsCopy = true;
- if (RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex].compare_exchange_strong(NeedsCopy, false))
- {
- uint64_t WriteCount = GetChunkWriteCount(SequenceIndexChunksLeftToWriteCounters, RemoteChunkIndex);
- if (WriteCount == 0)
- {
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Skipping chunk {} due to cache reuse",
- m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]);
- }
- }
- else
- {
- NeededLooseChunkIndexes.push_back(LooseChunkIndex);
- }
- }
- }
- }
-
- if (m_Storage.BuildCacheStorage)
- {
- ZEN_TRACE_CPU("BlobCacheExistCheck");
- Stopwatch Timer;
-
- std::vector<IoHash> BlobHashes;
- BlobHashes.reserve(NeededLooseChunkIndexes.size() + FetchBlockIndexes.size());
-
- for (const uint32_t LooseChunkIndex : NeededLooseChunkIndexes)
- {
- BlobHashes.push_back(m_LooseChunkHashes[LooseChunkIndex]);
- }
-
- for (uint32_t BlockIndex : FetchBlockIndexes)
- {
- BlobHashes.push_back(m_BlockDescriptions[BlockIndex].BlockHash);
- }
-
- const std::vector<BuildStorageCache::BlobExistsResult> CacheExistsResult =
- m_Storage.BuildCacheStorage->BlobsExists(m_BuildId, BlobHashes);
-
- if (CacheExistsResult.size() == BlobHashes.size())
- {
- ExistsResult.ExistingBlobs.reserve(CacheExistsResult.size());
- for (size_t BlobIndex = 0; BlobIndex < BlobHashes.size(); BlobIndex++)
- {
- if (CacheExistsResult[BlobIndex].HasBody)
- {
- ExistsResult.ExistingBlobs.insert(BlobHashes[BlobIndex]);
- }
- }
- }
- ExistsResult.ElapsedTimeMs = Timer.GetElapsedTimeMs();
- if (!ExistsResult.ExistingBlobs.empty() && !m_Options.IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Remote cache : Found {} out of {} needed blobs in {}",
- ExistsResult.ExistingBlobs.size(),
- BlobHashes.size(),
- NiceTimeSpanMs(ExistsResult.ElapsedTimeMs));
- }
- }
-
- std::vector<ChunkBlockAnalyser::EPartialBlockDownloadMode> BlockPartialDownloadModes;
- if (m_Options.PartialBlockRequestMode == EPartialBlockRequestMode::Off)
- {
- BlockPartialDownloadModes.resize(m_BlockDescriptions.size(), ChunkBlockAnalyser::EPartialBlockDownloadMode::Off);
- }
- else
- {
- BlockPartialDownloadModes.reserve(m_BlockDescriptions.size());
- for (uint32_t BlockIndex = 0; BlockIndex < m_BlockDescriptions.size(); BlockIndex++)
- {
- const bool BlockExistInCache = ExistsResult.ExistingBlobs.contains(m_BlockDescriptions[BlockIndex].BlockHash);
- if (m_Options.PartialBlockRequestMode == EPartialBlockRequestMode::All)
- {
- BlockPartialDownloadModes.push_back(BlockExistInCache
- ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed
- : ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange);
- }
- else if (m_Options.PartialBlockRequestMode == EPartialBlockRequestMode::ZenCacheOnly)
- {
- BlockPartialDownloadModes.push_back(BlockExistInCache
- ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed
- : ChunkBlockAnalyser::EPartialBlockDownloadMode::Off);
- }
- else if (m_Options.PartialBlockRequestMode == EPartialBlockRequestMode::Mixed)
- {
- BlockPartialDownloadModes.push_back(BlockExistInCache
- ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed
- : ChunkBlockAnalyser::EPartialBlockDownloadMode::SingleRange);
- }
- }
- }
- ZEN_ASSERT(BlockPartialDownloadModes.size() == m_BlockDescriptions.size());
-
- ChunkBlockAnalyser::BlockResult PartialBlocks =
- BlockAnalyser.CalculatePartialBlockDownloads(NeededBlocks, BlockPartialDownloadModes);
-
- struct LooseChunkHashWorkData
- {
- std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs;
- uint32_t RemoteChunkIndex = (uint32_t)-1;
- };
-
- TotalRequestCount += NeededLooseChunkIndexes.size();
- TotalPartWriteCount += NeededLooseChunkIndexes.size();
- TotalRequestCount += PartialBlocks.BlockRanges.size();
- TotalPartWriteCount += PartialBlocks.BlockRanges.size();
- TotalRequestCount += PartialBlocks.FullBlockIndexes.size();
- TotalPartWriteCount += PartialBlocks.FullBlockIndexes.size();
-
- std::vector<LooseChunkHashWorkData> LooseChunkHashWorks;
- for (uint32_t LooseChunkIndex : NeededLooseChunkIndexes)
- {
- const IoHash& ChunkHash = m_LooseChunkHashes[LooseChunkIndex];
- auto RemoteChunkIndexIt = m_RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash);
- ZEN_ASSERT(RemoteChunkIndexIt != m_RemoteLookup.ChunkHashToChunkIndex.end());
- const uint32_t RemoteChunkIndex = RemoteChunkIndexIt->second;
-
- std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs =
- GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteChunkIndex);
-
- ZEN_ASSERT(!ChunkTargetPtrs.empty());
- LooseChunkHashWorks.push_back(
- LooseChunkHashWorkData{.ChunkTargetPtrs = ChunkTargetPtrs, .RemoteChunkIndex = RemoteChunkIndex});
- }
-
- ZEN_TRACE_CPU("WriteChunks");
-
- m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::WriteChunks, (uint32_t)TaskSteps::StepCount);
-
- Stopwatch WriteTimer;
-
- FilteredRate FilteredDownloadedBytesPerSecond;
- FilteredRate FilteredWrittenBytesPerSecond;
-
- std::unique_ptr<OperationLogOutput::ProgressBar> WriteProgressBarPtr(
- m_LogOutput.CreateProgressBar(m_Options.PrimeCacheOnly ? "Downloading" : "Writing"));
- OperationLogOutput::ProgressBar& WriteProgressBar(*WriteProgressBarPtr);
- ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
-
- TotalPartWriteCount += CopyChunkDatas.size();
- TotalPartWriteCount += ScavengedSequenceCopyOperations.size();
-
- BufferedWriteFileCache WriteCache;
-
- for (uint32_t ScavengeOpIndex = 0; ScavengeOpIndex < ScavengedSequenceCopyOperations.size(); ScavengeOpIndex++)
- {
- if (m_AbortFlag)
- {
- break;
- }
- if (!m_Options.PrimeCacheOnly)
- {
- Work.ScheduleWork(
- m_IOWorkerPool,
- [this,
- &ScavengedPaths,
- &ScavengedSequenceCopyOperations,
- &ScavengedContents,
- &FilteredWrittenBytesPerSecond,
- ScavengeOpIndex,
- &WritePartsComplete,
- TotalPartWriteCount](std::atomic<bool>&) mutable {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("Async_WriteScavenged");
-
- FilteredWrittenBytesPerSecond.Start();
-
- const ScavengedSequenceCopyOperation& ScavengeOp = ScavengedSequenceCopyOperations[ScavengeOpIndex];
- const ChunkedFolderContent& ScavengedContent = ScavengedContents[ScavengeOp.ScavengedContentIndex];
- const std::filesystem::path& ScavengeRootPath = ScavengedPaths[ScavengeOp.ScavengedContentIndex];
-
- WriteScavengedSequenceToCache(ScavengeRootPath, ScavengedContent, ScavengeOp);
-
- WritePartsComplete++;
- if (WritePartsComplete == TotalPartWriteCount)
- {
- FilteredWrittenBytesPerSecond.Stop();
- }
- }
- });
- }
- }
-
- for (uint32_t LooseChunkHashWorkIndex = 0; LooseChunkHashWorkIndex < LooseChunkHashWorks.size(); LooseChunkHashWorkIndex++)
- {
- if (m_AbortFlag)
- {
- break;
- }
-
- if (m_Options.PrimeCacheOnly)
- {
- const uint32_t RemoteChunkIndex = LooseChunkHashWorks[LooseChunkHashWorkIndex].RemoteChunkIndex;
- if (ExistsResult.ExistingBlobs.contains(m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]))
- {
- m_DownloadStats.RequestsCompleteCount++;
- continue;
- }
- }
-
- Work.ScheduleWork(
- m_IOWorkerPool,
- [this,
- &SequenceIndexChunksLeftToWriteCounters,
- &Work,
- &ExistsResult,
- &WritePartsComplete,
- &LooseChunkHashWorks,
- LooseChunkHashWorkIndex,
- TotalRequestCount,
- TotalPartWriteCount,
- &WriteCache,
- &FilteredDownloadedBytesPerSecond,
- &FilteredWrittenBytesPerSecond](std::atomic<bool>&) mutable {
- ZEN_TRACE_CPU("Async_ReadPreDownloadedChunk");
- if (!m_AbortFlag)
- {
- LooseChunkHashWorkData& LooseChunkHashWork = LooseChunkHashWorks[LooseChunkHashWorkIndex];
- const uint32_t RemoteChunkIndex = LooseChunkHashWorks[LooseChunkHashWorkIndex].RemoteChunkIndex;
- WriteLooseChunk(RemoteChunkIndex,
- ExistsResult,
- SequenceIndexChunksLeftToWriteCounters,
- WritePartsComplete,
- std::move(LooseChunkHashWork.ChunkTargetPtrs),
- WriteCache,
- Work,
- TotalRequestCount,
- TotalPartWriteCount,
- FilteredDownloadedBytesPerSecond,
- FilteredWrittenBytesPerSecond);
- }
- },
- WorkerThreadPool::EMode::EnableBacklog);
- }
-
- std::unique_ptr<CloneQueryInterface> CloneQuery;
- if (m_Options.AllowFileClone)
- {
- CloneQuery = GetCloneQueryInterface(m_CacheFolderPath);
- }
-
- for (size_t CopyDataIndex = 0; CopyDataIndex < CopyChunkDatas.size(); CopyDataIndex++)
- {
- ZEN_ASSERT(!m_Options.PrimeCacheOnly);
- if (m_AbortFlag)
- {
- break;
- }
-
- Work.ScheduleWork(m_IOWorkerPool,
- [this,
- &CloneQuery,
- &SequenceIndexChunksLeftToWriteCounters,
- &WriteCache,
- &Work,
- &FilteredWrittenBytesPerSecond,
- &CopyChunkDatas,
- &ScavengedContents,
- &ScavengedLookups,
- &ScavengedPaths,
- &WritePartsComplete,
- TotalPartWriteCount,
- CopyDataIndex](std::atomic<bool>&) {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("Async_CopyLocal");
-
- FilteredWrittenBytesPerSecond.Start();
- const CopyChunkData& CopyData = CopyChunkDatas[CopyDataIndex];
-
- std::vector<uint32_t> WrittenSequenceIndexes = WriteLocalChunkToCache(CloneQuery.get(),
- CopyData,
- ScavengedContents,
- ScavengedLookups,
- ScavengedPaths,
- WriteCache);
- WritePartsComplete++;
- if (!m_AbortFlag)
- {
- if (WritePartsComplete == TotalPartWriteCount)
- {
- FilteredWrittenBytesPerSecond.Stop();
- }
-
- // Write tracking, updating this must be done without any files open
- std::vector<uint32_t> CompletedChunkSequences;
- for (uint32_t RemoteSequenceIndex : WrittenSequenceIndexes)
- {
- if (CompleteSequenceChunk(RemoteSequenceIndex, SequenceIndexChunksLeftToWriteCounters))
- {
- CompletedChunkSequences.push_back(RemoteSequenceIndex);
- }
- }
- WriteCache.Close(CompletedChunkSequences);
- VerifyAndCompleteChunkSequencesAsync(CompletedChunkSequences, Work);
- }
- }
- });
- }
-
- for (uint32_t BlockIndex : CachedChunkBlockIndexes)
- {
- ZEN_ASSERT(!m_Options.PrimeCacheOnly);
- if (m_AbortFlag)
- {
- break;
- }
-
- Work.ScheduleWork(
- m_IOWorkerPool,
- [this,
- &RemoteChunkIndexNeedsCopyFromSourceFlags,
- &SequenceIndexChunksLeftToWriteCounters,
- &WriteCache,
- &Work,
- &FilteredWrittenBytesPerSecond,
- &WritePartsComplete,
- TotalPartWriteCount,
- BlockIndex](std::atomic<bool>&) mutable {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("Async_WriteCachedBlock");
-
- const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex];
- FilteredWrittenBytesPerSecond.Start();
-
- std::filesystem::path BlockChunkPath = m_TempBlockFolderPath / BlockDescription.BlockHash.ToHexString();
- IoBuffer BlockBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath);
- if (!BlockBuffer)
- {
- throw std::runtime_error(
- fmt::format("Can not read block {} at {}", BlockDescription.BlockHash, BlockChunkPath));
- }
-
- if (!m_AbortFlag)
- {
- if (!WriteChunksBlockToCache(BlockDescription,
- SequenceIndexChunksLeftToWriteCounters,
- Work,
- CompositeBuffer(std::move(BlockBuffer)),
- RemoteChunkIndexNeedsCopyFromSourceFlags,
- WriteCache))
- {
- std::error_code DummyEc;
- RemoveFile(BlockChunkPath, DummyEc);
- throw std::runtime_error(fmt::format("Block {} is malformed", BlockDescription.BlockHash));
- }
-
- std::error_code Ec = TryRemoveFile(BlockChunkPath);
- if (Ec)
- {
- ZEN_OPERATION_LOG_DEBUG(m_LogOutput,
- "Failed removing file '{}', reason: ({}) {}",
- BlockChunkPath,
- Ec.value(),
- Ec.message());
- }
-
- WritePartsComplete++;
-
- if (WritePartsComplete == TotalPartWriteCount)
- {
- FilteredWrittenBytesPerSecond.Stop();
- }
- }
- }
- });
- }
-
- for (size_t BlockRangeIndex = 0; BlockRangeIndex < PartialBlocks.BlockRanges.size();)
- {
- ZEN_ASSERT(!m_Options.PrimeCacheOnly);
- if (m_AbortFlag)
- {
- break;
- }
-
- size_t RangeCount = 1;
- size_t RangesLeft = PartialBlocks.BlockRanges.size() - BlockRangeIndex;
- const ChunkBlockAnalyser::BlockRangeDescriptor& CurrentBlockRange = PartialBlocks.BlockRanges[BlockRangeIndex];
- while (RangeCount < RangesLeft &&
- CurrentBlockRange.BlockIndex == PartialBlocks.BlockRanges[BlockRangeIndex + RangeCount].BlockIndex)
- {
- RangeCount++;
- }
-
- Work.ScheduleWork(
- m_NetworkPool,
- [this,
- &RemoteChunkIndexNeedsCopyFromSourceFlags,
- &SequenceIndexChunksLeftToWriteCounters,
- &ExistsResult,
- &WriteCache,
- &FilteredDownloadedBytesPerSecond,
- TotalRequestCount,
- &WritePartsComplete,
- TotalPartWriteCount,
- &FilteredWrittenBytesPerSecond,
- &Work,
- &PartialBlocks,
- BlockRangeStartIndex = BlockRangeIndex,
- RangeCount](std::atomic<bool>&) {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("Async_GetPartialBlockRanges");
-
- FilteredDownloadedBytesPerSecond.Start();
-
- for (size_t BlockRangeIndex = BlockRangeStartIndex; BlockRangeIndex < BlockRangeStartIndex + RangeCount;
- BlockRangeIndex++)
- {
- ZEN_TRACE_CPU("GetPartialBlock");
-
- const ChunkBlockAnalyser::BlockRangeDescriptor& BlockRange = PartialBlocks.BlockRanges[BlockRangeIndex];
-
- DownloadPartialBlock(
- BlockRange,
- ExistsResult,
- [this,
- &RemoteChunkIndexNeedsCopyFromSourceFlags,
- &SequenceIndexChunksLeftToWriteCounters,
- &WritePartsComplete,
- &WriteCache,
- &Work,
- TotalRequestCount,
- TotalPartWriteCount,
- &FilteredDownloadedBytesPerSecond,
- &FilteredWrittenBytesPerSecond,
- &BlockRange](IoBuffer&& InMemoryBuffer, const std::filesystem::path& OnDiskPath) {
- if (m_DownloadStats.RequestsCompleteCount == TotalRequestCount)
- {
- FilteredDownloadedBytesPerSecond.Stop();
- }
-
- if (!m_AbortFlag)
- {
- Work.ScheduleWork(
- m_IOWorkerPool,
- [this,
- &RemoteChunkIndexNeedsCopyFromSourceFlags,
- &SequenceIndexChunksLeftToWriteCounters,
- &WritePartsComplete,
- &WriteCache,
- &Work,
- TotalPartWriteCount,
- &FilteredWrittenBytesPerSecond,
- &BlockRange,
- BlockChunkPath = std::filesystem::path(OnDiskPath),
- BlockPartialBuffer = std::move(InMemoryBuffer)](std::atomic<bool>&) mutable {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("Async_WritePartialBlock");
-
- const uint32_t BlockIndex = BlockRange.BlockIndex;
-
- const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex];
-
- if (BlockChunkPath.empty())
- {
- ZEN_ASSERT(BlockPartialBuffer);
- }
- else
- {
- ZEN_ASSERT(!BlockPartialBuffer);
- BlockPartialBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath);
- if (!BlockPartialBuffer)
- {
- throw std::runtime_error(
- fmt::format("Could not open downloaded block {} from {}",
- BlockDescription.BlockHash,
- BlockChunkPath));
- }
- }
-
- FilteredWrittenBytesPerSecond.Start();
-
- if (!WritePartialBlockChunksToCache(
- BlockDescription,
- SequenceIndexChunksLeftToWriteCounters,
- Work,
- CompositeBuffer(std::move(BlockPartialBuffer)),
- BlockRange.ChunkBlockIndexStart,
- BlockRange.ChunkBlockIndexStart + BlockRange.ChunkBlockIndexCount - 1,
- RemoteChunkIndexNeedsCopyFromSourceFlags,
- WriteCache))
- {
- std::error_code DummyEc;
- RemoveFile(BlockChunkPath, DummyEc);
- throw std::runtime_error(
- fmt::format("Partial block {} is malformed", BlockDescription.BlockHash));
- }
-
- std::error_code Ec = TryRemoveFile(BlockChunkPath);
- if (Ec)
- {
- ZEN_OPERATION_LOG_DEBUG(m_LogOutput,
- "Failed removing file '{}', reason: ({}) {}",
- BlockChunkPath,
- Ec.value(),
- Ec.message());
- }
-
- WritePartsComplete++;
- if (WritePartsComplete == TotalPartWriteCount)
- {
- FilteredWrittenBytesPerSecond.Stop();
- }
- }
- },
- OnDiskPath.empty() ? WorkerThreadPool::EMode::DisableBacklog
- : WorkerThreadPool::EMode::EnableBacklog);
- }
- });
- }
- }
- });
- BlockRangeIndex += RangeCount;
- }
-
- for (uint32_t BlockIndex : PartialBlocks.FullBlockIndexes)
- {
- if (m_AbortFlag)
- {
- break;
- }
-
- if (m_Options.PrimeCacheOnly && ExistsResult.ExistingBlobs.contains(m_BlockDescriptions[BlockIndex].BlockHash))
- {
- m_DownloadStats.RequestsCompleteCount++;
- continue;
- }
-
- Work.ScheduleWork(
- m_NetworkPool,
- [this,
- &WritePartsComplete,
- TotalPartWriteCount,
- &FilteredWrittenBytesPerSecond,
- &ExistsResult,
- &Work,
- &WriteCache,
- &RemoteChunkIndexNeedsCopyFromSourceFlags,
- &SequenceIndexChunksLeftToWriteCounters,
- &FilteredDownloadedBytesPerSecond,
- TotalRequestCount,
- BlockIndex](std::atomic<bool>&) {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("Async_GetFullBlock");
-
- const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex];
-
- FilteredDownloadedBytesPerSecond.Start();
-
- IoBuffer BlockBuffer;
- const bool ExistsInCache =
- m_Storage.BuildCacheStorage && ExistsResult.ExistingBlobs.contains(BlockDescription.BlockHash);
- if (ExistsInCache)
- {
- BlockBuffer = m_Storage.BuildCacheStorage->GetBuildBlob(m_BuildId, BlockDescription.BlockHash);
- }
- if (!BlockBuffer)
- {
- BlockBuffer = m_Storage.BuildStorage->GetBuildBlob(m_BuildId, BlockDescription.BlockHash);
- if (BlockBuffer && m_Storage.BuildCacheStorage && m_Options.PopulateCache)
- {
- m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId,
- BlockDescription.BlockHash,
- ZenContentType::kCompressedBinary,
- CompositeBuffer(SharedBuffer(BlockBuffer)));
- }
- }
- if (!BlockBuffer)
- {
- throw std::runtime_error(fmt::format("Block {} is missing", BlockDescription.BlockHash));
- }
- if (!m_AbortFlag)
- {
- uint64_t BlockSize = BlockBuffer.GetSize();
- m_DownloadStats.DownloadedBlockCount++;
- m_DownloadStats.DownloadedBlockByteCount += BlockSize;
- m_DownloadStats.RequestsCompleteCount++;
- if (m_DownloadStats.RequestsCompleteCount == TotalRequestCount)
- {
- FilteredDownloadedBytesPerSecond.Stop();
- }
-
- if (!m_Options.PrimeCacheOnly)
- {
- std::filesystem::path BlockChunkPath;
-
- // Check if the dowloaded block is file based and we can move it directly without rewriting it
- {
- IoBufferFileReference FileRef;
- if (BlockBuffer.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) &&
- (FileRef.FileChunkSize == BlockSize))
- {
- ZEN_TRACE_CPU("MoveTempFullBlock");
- std::error_code Ec;
- std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec);
- if (!Ec)
- {
- BlockBuffer.SetDeleteOnClose(false);
- BlockBuffer = {};
- BlockChunkPath = m_TempBlockFolderPath / BlockDescription.BlockHash.ToHexString();
- RenameFile(TempBlobPath, BlockChunkPath, Ec);
- if (Ec)
- {
- BlockChunkPath = std::filesystem::path{};
-
- // Re-open the temp file again
- BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete);
- BlockBuffer = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BlockSize, true);
- BlockBuffer.SetDeleteOnClose(true);
- }
- }
- }
- }
-
- if (BlockChunkPath.empty() && (BlockSize > m_Options.MaximumInMemoryPayloadSize))
- {
- ZEN_TRACE_CPU("WriteTempFullBlock");
- // Could not be moved and rather large, lets store it on disk
- BlockChunkPath = m_TempBlockFolderPath / BlockDescription.BlockHash.ToHexString();
- TemporaryFile::SafeWriteFile(BlockChunkPath, BlockBuffer);
- BlockBuffer = {};
- }
-
- if (!m_AbortFlag)
- {
- Work.ScheduleWork(
- m_IOWorkerPool,
- [this,
- &Work,
- &RemoteChunkIndexNeedsCopyFromSourceFlags,
- &SequenceIndexChunksLeftToWriteCounters,
- BlockIndex,
- &WriteCache,
- &WritePartsComplete,
- TotalPartWriteCount,
- &FilteredWrittenBytesPerSecond,
- BlockChunkPath,
- BlockBuffer = std::move(BlockBuffer)](std::atomic<bool>&) mutable {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("Async_WriteFullBlock");
-
- const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex];
-
- if (BlockChunkPath.empty())
- {
- ZEN_ASSERT(BlockBuffer);
- }
- else
- {
- ZEN_ASSERT(!BlockBuffer);
- BlockBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath);
- if (!BlockBuffer)
- {
- throw std::runtime_error(
- fmt::format("Could not open dowloaded block {} from {}",
- BlockDescription.BlockHash,
- BlockChunkPath));
- }
- }
-
- FilteredWrittenBytesPerSecond.Start();
- if (!WriteChunksBlockToCache(BlockDescription,
- SequenceIndexChunksLeftToWriteCounters,
- Work,
- CompositeBuffer(std::move(BlockBuffer)),
- RemoteChunkIndexNeedsCopyFromSourceFlags,
- WriteCache))
- {
- std::error_code DummyEc;
- RemoveFile(BlockChunkPath, DummyEc);
- throw std::runtime_error(
- fmt::format("Block {} is malformed", BlockDescription.BlockHash));
- }
-
- if (!BlockChunkPath.empty())
- {
- std::error_code Ec = TryRemoveFile(BlockChunkPath);
- if (Ec)
- {
- ZEN_OPERATION_LOG_DEBUG(m_LogOutput,
- "Failed removing file '{}', reason: ({}) {}",
- BlockChunkPath,
- Ec.value(),
- Ec.message());
- }
- }
-
- WritePartsComplete++;
-
- if (WritePartsComplete == TotalPartWriteCount)
- {
- FilteredWrittenBytesPerSecond.Stop();
- }
- }
- },
- BlockChunkPath.empty() ? WorkerThreadPool::EMode::DisableBacklog
- : WorkerThreadPool::EMode::EnableBacklog);
- }
- }
- }
- }
- });
- }
-
- {
- ZEN_TRACE_CPU("WriteChunks_Wait");
-
- Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
- ZEN_UNUSED(PendingWork);
- uint64_t DownloadedBytes = m_DownloadStats.DownloadedChunkByteCount.load() +
- m_DownloadStats.DownloadedBlockByteCount.load() +
- +m_DownloadStats.DownloadedPartialBlockByteCount.load();
- FilteredWrittenBytesPerSecond.Update(m_DiskStats.WriteByteCount.load());
- FilteredDownloadedBytesPerSecond.Update(DownloadedBytes);
- std::string DownloadRateString =
- (m_DownloadStats.RequestsCompleteCount == TotalRequestCount)
- ? ""
- : fmt::format(" {}bits/s", NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8));
- std::string CloneDetails;
- if (m_DiskStats.CloneCount.load() > 0)
- {
- CloneDetails = fmt::format(" ({} cloned)", NiceBytes(m_DiskStats.CloneByteCount.load()));
- }
- std::string WriteDetails = m_Options.PrimeCacheOnly ? ""
- : fmt::format(" {}/{} ({}B/s) written{}",
- NiceBytes(m_WrittenChunkByteCount.load()),
- NiceBytes(BytesToWrite),
- NiceNum(FilteredWrittenBytesPerSecond.GetCurrent()),
- CloneDetails);
-
- std::string Details = fmt::format("{}/{} ({}{}) downloaded.{}",
- m_DownloadStats.RequestsCompleteCount.load(),
- TotalRequestCount,
- NiceBytes(DownloadedBytes),
- DownloadRateString,
- WriteDetails);
-
- std::string Task;
- if (m_Options.PrimeCacheOnly)
- {
- Task = "Downloading ";
- }
- else if ((m_WrittenChunkByteCount < BytesToWrite) || (BytesToValidate == 0))
- {
- Task = "Writing chunks ";
- }
- else
- {
- Task = "Verifying chunks ";
- }
-
- WriteProgressBar.UpdateState(
- {.Task = Task,
- .Details = Details,
- .TotalCount = m_Options.PrimeCacheOnly ? TotalRequestCount : (BytesToWrite + BytesToValidate),
- .RemainingCount = m_Options.PrimeCacheOnly ? (TotalRequestCount - m_DownloadStats.RequestsCompleteCount.load())
- : ((BytesToWrite + BytesToValidate) -
- (m_WrittenChunkByteCount.load() + m_ValidatedChunkByteCount.load())),
- .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
- false);
- });
- }
-
- CloneQuery.reset();
-
- FilteredWrittenBytesPerSecond.Stop();
- FilteredDownloadedBytesPerSecond.Stop();
-
- WriteProgressBar.Finish();
- if (m_AbortFlag)
- {
- return;
- }
-
- if (!m_Options.PrimeCacheOnly)
- {
- uint32_t RawSequencesMissingWriteCount = 0;
- for (uint32_t SequenceIndex = 0; SequenceIndex < SequenceIndexChunksLeftToWriteCounters.size(); SequenceIndex++)
- {
- const auto& SequenceIndexChunksLeftToWriteCounter = SequenceIndexChunksLeftToWriteCounters[SequenceIndex];
- if (SequenceIndexChunksLeftToWriteCounter.load() != 0)
- {
- RawSequencesMissingWriteCount++;
- const uint32_t PathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex];
- const std::filesystem::path& IncompletePath = m_RemoteContent.Paths[PathIndex];
- ZEN_ASSERT(!IncompletePath.empty());
- const uint32_t ExpectedSequenceCount = m_RemoteContent.ChunkedContent.ChunkCounts[SequenceIndex];
- if (!m_Options.IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "{}: Max count {}, Current count {}",
- IncompletePath,
- ExpectedSequenceCount,
- SequenceIndexChunksLeftToWriteCounter.load());
- }
- ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounter.load() <= ExpectedSequenceCount);
- }
- }
- ZEN_ASSERT(RawSequencesMissingWriteCount == 0);
- ZEN_ASSERT(m_WrittenChunkByteCount == BytesToWrite);
- ZEN_ASSERT(m_ValidatedChunkByteCount == BytesToValidate);
- }
-
- const uint64_t DownloadedBytes = m_DownloadStats.DownloadedChunkByteCount.load() +
- m_DownloadStats.DownloadedBlockByteCount.load() +
- m_DownloadStats.DownloadedPartialBlockByteCount.load();
- if (!m_Options.IsQuiet)
- {
- std::string CloneDetails;
- if (m_DiskStats.CloneCount.load() > 0)
- {
- CloneDetails = fmt::format(" ({} cloned)", NiceBytes(m_DiskStats.CloneByteCount.load()));
- }
- ZEN_OPERATION_LOG_INFO(
- m_LogOutput,
- "Downloaded {} ({}bits/s) in {}. Wrote {} ({}B/s){} in {}. Completed in {}",
- NiceBytes(DownloadedBytes),
- NiceNum(GetBytesPerSecond(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS(), DownloadedBytes * 8)),
- NiceTimeSpanMs(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS() / 1000),
- NiceBytes(m_WrittenChunkByteCount.load()),
- NiceNum(GetBytesPerSecond(FilteredWrittenBytesPerSecond.GetElapsedTimeUS(), m_DiskStats.WriteByteCount.load())),
- CloneDetails,
- NiceTimeSpanMs(FilteredWrittenBytesPerSecond.GetElapsedTimeUS() / 1000),
- NiceTimeSpanMs(WriteTimer.GetElapsedTimeMs()));
- }
-
- m_WriteChunkStats.WriteChunksElapsedWallTimeUs = WriteTimer.GetElapsedTimeUs();
- m_WriteChunkStats.DownloadTimeUs = FilteredDownloadedBytesPerSecond.GetElapsedTimeUS();
- m_WriteChunkStats.WriteTimeUs = FilteredWrittenBytesPerSecond.GetElapsedTimeUS();
- }
-
- if (m_Options.PrimeCacheOnly)
- {
- return;
- }
-
- m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::PrepareTarget, (uint32_t)TaskSteps::StepCount);
-
- tsl::robin_map<uint32_t, uint32_t> RemotePathIndexToLocalPathIndex;
- RemotePathIndexToLocalPathIndex.reserve(m_RemoteContent.Paths.size());
-
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> SequenceHashToLocalPathIndex;
- std::vector<uint32_t> RemoveLocalPathIndexes;
-
- if (m_AbortFlag)
- {
- return;
- }
-
- {
- ZEN_TRACE_CPU("PrepareTarget");
-
- tsl::robin_set<IoHash, IoHash::Hasher> CachedRemoteSequences;
-
- std::vector<uint32_t> FilesToCache;
-
- uint64_t MatchCount = 0;
- uint64_t PathMismatchCount = 0;
- uint64_t HashMismatchCount = 0;
- std::atomic<uint64_t> CachedCount = 0;
- std::atomic<uint64_t> CachedByteCount = 0;
- uint64_t SkippedCount = 0;
- uint64_t DeleteCount = 0;
- for (uint32_t LocalPathIndex = 0; LocalPathIndex < m_LocalContent.Paths.size(); LocalPathIndex++)
- {
- if (m_AbortFlag)
- {
- break;
- }
- const IoHash& RawHash = m_LocalContent.RawHashes[LocalPathIndex];
- const std::filesystem::path& LocalPath = m_LocalContent.Paths[LocalPathIndex];
-
- ZEN_ASSERT_SLOW(IsFile((m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred()));
-
- if (m_Options.EnableTargetFolderScavenging)
- {
- if (!m_Options.WipeTargetFolder)
- {
- // Check if it is already in the correct place
- if (auto RemotePathIt = RemotePathToRemoteIndex.find(LocalPath.generic_string());
- RemotePathIt != RemotePathToRemoteIndex.end())
- {
- const uint32_t RemotePathIndex = RemotePathIt->second;
- if (m_RemoteContent.RawHashes[RemotePathIndex] == RawHash)
- {
- // It is already in it's correct place
- RemotePathIndexToLocalPathIndex[RemotePathIndex] = LocalPathIndex;
- SequenceHashToLocalPathIndex.insert({RawHash, LocalPathIndex});
- MatchCount++;
- continue;
- }
- else
- {
- HashMismatchCount++;
- }
- }
- else
- {
- PathMismatchCount++;
- }
- }
-
- // Do we need it?
- if (m_RemoteLookup.RawHashToSequenceIndex.contains(RawHash))
- {
- if (!CachedRemoteSequences.contains(RawHash))
- {
- // We need it, make sure we move it to the cache
- FilesToCache.push_back(LocalPathIndex);
- CachedRemoteSequences.insert(RawHash);
- continue;
- }
- else
- {
- SkippedCount++;
- }
- }
- }
-
- if (!m_Options.WipeTargetFolder)
- {
- // Explicitly delete the unneeded local file
- RemoveLocalPathIndexes.push_back(LocalPathIndex);
- DeleteCount++;
- }
- }
-
- if (m_AbortFlag)
- {
- return;
- }
-
- {
- ZEN_TRACE_CPU("CopyToCache");
-
- Stopwatch Timer;
-
- std::unique_ptr<OperationLogOutput::ProgressBar> CacheLocalProgressBarPtr(
- m_LogOutput.CreateProgressBar("Cache Local Data"));
- OperationLogOutput::ProgressBar& CacheLocalProgressBar(*CacheLocalProgressBarPtr);
- ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
-
- for (uint32_t LocalPathIndex : FilesToCache)
- {
- if (m_AbortFlag)
- {
- break;
- }
- Work.ScheduleWork(m_IOWorkerPool, [this, &CachedCount, &CachedByteCount, LocalPathIndex](std::atomic<bool>&) {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("Async_CopyToCache");
-
- const IoHash& RawHash = m_LocalContent.RawHashes[LocalPathIndex];
- const std::filesystem::path& LocalPath = m_LocalContent.Paths[LocalPathIndex];
- const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(m_CacheFolderPath, RawHash);
- ZEN_ASSERT_SLOW(!IsFileWithRetry(CacheFilePath));
- const std::filesystem::path LocalFilePath = (m_Path / LocalPath).make_preferred();
-
- std::error_code Ec = RenameFileWithRetry(LocalFilePath, CacheFilePath);
- if (Ec)
- {
- ZEN_OPERATION_LOG_WARN(m_LogOutput,
- "Failed to move file from '{}' to '{}', reason: ({}) {}, retrying...",
- LocalFilePath,
- CacheFilePath,
- Ec.value(),
- Ec.message());
- Ec = RenameFileWithRetry(LocalFilePath, CacheFilePath);
- if (Ec)
- {
- throw std::system_error(std::error_code(Ec.value(), std::system_category()),
- fmt::format("Failed to file from '{}' to '{}', reason: ({}) {}",
- LocalFilePath,
- CacheFilePath,
- Ec.value(),
- Ec.message()));
- }
- }
-
- CachedCount++;
- CachedByteCount += m_LocalContent.RawSizes[LocalPathIndex];
- }
- });
- }
-
- {
- ZEN_TRACE_CPU("CopyToCache_Wait");
-
- Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
- ZEN_UNUSED(PendingWork);
- const uint64_t WorkTotal = FilesToCache.size();
- const uint64_t WorkComplete = CachedCount.load();
- std::string Details = fmt::format("{}/{} ({}) files", WorkComplete, WorkTotal, NiceBytes(CachedByteCount));
- CacheLocalProgressBar.UpdateState(
- {.Task = "Caching local ",
- .Details = Details,
- .TotalCount = gsl::narrow<uint64_t>(WorkTotal),
- .RemainingCount = gsl::narrow<uint64_t>(WorkTotal - WorkComplete),
- .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
- false);
- });
- }
-
- CacheLocalProgressBar.Finish();
- if (m_AbortFlag)
- {
- return;
- }
-
- ZEN_OPERATION_LOG_DEBUG(m_LogOutput,
- "Local state prep: Match: {}, PathMismatch: {}, HashMismatch: {}, Cached: {} ({}), Skipped: {}, "
- "Delete: {}",
- MatchCount,
- PathMismatchCount,
- HashMismatchCount,
- CachedCount.load(),
- NiceBytes(CachedByteCount.load()),
- SkippedCount,
- DeleteCount);
- }
- }
-
- m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::FinalizeTarget, (uint32_t)TaskSteps::StepCount);
-
- if (m_Options.WipeTargetFolder)
- {
- ZEN_TRACE_CPU("WipeTarget");
- Stopwatch Timer;
-
- // Clean target folder
- if (!CleanDirectory(m_LogOutput, m_IOWorkerPool, m_AbortFlag, m_PauseFlag, m_Options.IsQuiet, m_Path, m_Options.ExcludeFolders))
- {
- ZEN_OPERATION_LOG_WARN(m_LogOutput, "Some files in {} could not be removed", m_Path);
- }
- m_RebuildFolderStateStats.CleanFolderElapsedWallTimeUs = Timer.GetElapsedTimeUs();
- }
-
- if (m_AbortFlag)
- {
- return;
- }
-
- {
- ZEN_TRACE_CPU("FinalizeTree");
-
- Stopwatch Timer;
-
- std::unique_ptr<OperationLogOutput::ProgressBar> RebuildProgressBarPtr(m_LogOutput.CreateProgressBar("Rebuild State"));
- OperationLogOutput::ProgressBar& RebuildProgressBar(*RebuildProgressBarPtr);
- ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
-
- OutLocalFolderState.Paths.resize(m_RemoteContent.Paths.size());
- OutLocalFolderState.RawSizes.resize(m_RemoteContent.Paths.size());
- OutLocalFolderState.Attributes.resize(m_RemoteContent.Paths.size());
- OutLocalFolderState.ModificationTicks.resize(m_RemoteContent.Paths.size());
-
- std::atomic<uint64_t> DeletedCount = 0;
-
- for (uint32_t LocalPathIndex : RemoveLocalPathIndexes)
- {
- if (m_AbortFlag)
- {
- break;
- }
- Work.ScheduleWork(m_IOWorkerPool, [this, &DeletedCount, LocalPathIndex](std::atomic<bool>&) {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("Async_RemoveFile");
-
- const std::filesystem::path LocalFilePath = (m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred();
- SetFileReadOnlyWithRetry(LocalFilePath, false);
- RemoveFileWithRetry(LocalFilePath);
- DeletedCount++;
- }
- });
- }
-
- std::atomic<uint64_t> TargetsComplete = 0;
-
- struct FinalizeTarget
- {
- IoHash RawHash;
- uint32_t RemotePathIndex;
- };
-
- std::vector<FinalizeTarget> Targets;
- Targets.reserve(m_RemoteContent.Paths.size());
- for (uint32_t RemotePathIndex = 0; RemotePathIndex < m_RemoteContent.Paths.size(); RemotePathIndex++)
- {
- Targets.push_back(
- FinalizeTarget{.RawHash = m_RemoteContent.RawHashes[RemotePathIndex], .RemotePathIndex = RemotePathIndex});
- }
- std::sort(Targets.begin(), Targets.end(), [](const FinalizeTarget& Lhs, const FinalizeTarget& Rhs) {
- if (Lhs.RawHash < Rhs.RawHash)
- {
- return true;
- }
- else if (Lhs.RawHash > Rhs.RawHash)
- {
- return false;
- }
- return Lhs.RemotePathIndex < Rhs.RemotePathIndex;
- });
-
- size_t TargetOffset = 0;
- while (TargetOffset < Targets.size())
- {
- if (m_AbortFlag)
- {
- break;
- }
-
- size_t TargetCount = 1;
- while ((TargetOffset + TargetCount) < Targets.size() &&
- (Targets[TargetOffset + TargetCount].RawHash == Targets[TargetOffset].RawHash))
- {
- TargetCount++;
- }
-
- Work.ScheduleWork(
- m_IOWorkerPool,
- [this,
- &SequenceHashToLocalPathIndex,
- &Targets,
- &RemotePathIndexToLocalPathIndex,
- &OutLocalFolderState,
- BaseTargetOffset = TargetOffset,
- TargetCount,
- &TargetsComplete](std::atomic<bool>&) {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("Async_FinalizeChunkSequence");
-
- size_t TargetOffset = BaseTargetOffset;
- const IoHash& RawHash = Targets[TargetOffset].RawHash;
-
- if (RawHash == IoHash::Zero)
- {
- ZEN_TRACE_CPU("CreateEmptyFiles");
- while (TargetOffset < (BaseTargetOffset + TargetCount))
- {
- const uint32_t RemotePathIndex = Targets[TargetOffset].RemotePathIndex;
- ZEN_ASSERT(Targets[TargetOffset].RawHash == RawHash);
- const std::filesystem::path& TargetPath = m_RemoteContent.Paths[RemotePathIndex];
- std::filesystem::path TargetFilePath = (m_Path / TargetPath).make_preferred();
- if (!RemotePathIndexToLocalPathIndex[RemotePathIndex])
- {
- if (IsFileWithRetry(TargetFilePath))
- {
- SetFileReadOnlyWithRetry(TargetFilePath, false);
- }
- else
- {
- CreateDirectories(TargetFilePath.parent_path());
- }
- BasicFile OutputFile;
- OutputFile.Open(TargetFilePath, BasicFile::Mode::kTruncate);
- }
- OutLocalFolderState.Paths[RemotePathIndex] = TargetPath;
- OutLocalFolderState.RawSizes[RemotePathIndex] = m_RemoteContent.RawSizes[RemotePathIndex];
-
- OutLocalFolderState.Attributes[RemotePathIndex] =
- m_RemoteContent.Attributes.empty()
- ? GetNativeFileAttributes(TargetFilePath)
- : SetNativeFileAttributes(TargetFilePath,
- m_RemoteContent.Platform,
- m_RemoteContent.Attributes[RemotePathIndex]);
- OutLocalFolderState.ModificationTicks[RemotePathIndex] = GetModificationTickFromPath(TargetFilePath);
-
- TargetOffset++;
- TargetsComplete++;
- }
- }
- else
- {
- ZEN_TRACE_CPU("FinalizeFile");
- ZEN_ASSERT(m_RemoteLookup.RawHashToSequenceIndex.contains(RawHash));
- const uint32_t FirstRemotePathIndex = Targets[TargetOffset].RemotePathIndex;
- const std::filesystem::path& FirstTargetPath = m_RemoteContent.Paths[FirstRemotePathIndex];
- std::filesystem::path FirstTargetFilePath = (m_Path / FirstTargetPath).make_preferred();
-
- if (auto InPlaceIt = RemotePathIndexToLocalPathIndex.find(FirstRemotePathIndex);
- InPlaceIt != RemotePathIndexToLocalPathIndex.end())
- {
- ZEN_ASSERT_SLOW(IsFileWithRetry(FirstTargetFilePath));
- }
- else
- {
- if (IsFileWithRetry(FirstTargetFilePath))
- {
- SetFileReadOnlyWithRetry(FirstTargetFilePath, false);
- }
- else
- {
- CreateDirectories(FirstTargetFilePath.parent_path());
- }
-
- if (auto InplaceIt = SequenceHashToLocalPathIndex.find(RawHash);
- InplaceIt != SequenceHashToLocalPathIndex.end())
- {
- ZEN_TRACE_CPU("Copy");
- const uint32_t LocalPathIndex = InplaceIt->second;
- const std::filesystem::path& SourcePath = m_LocalContent.Paths[LocalPathIndex];
- std::filesystem::path SourceFilePath = (m_Path / SourcePath).make_preferred();
- ZEN_ASSERT_SLOW(IsFileWithRetry(SourceFilePath));
-
- ZEN_OPERATION_LOG_DEBUG(m_LogOutput,
- "Copying from '{}' -> '{}'",
- SourceFilePath,
- FirstTargetFilePath);
- const uint64_t RawSize = m_LocalContent.RawSizes[LocalPathIndex];
- FastCopyFile(m_Options.AllowFileClone,
- m_Options.UseSparseFiles,
- SourceFilePath,
- FirstTargetFilePath,
- RawSize,
- m_DiskStats.WriteCount,
- m_DiskStats.WriteByteCount,
- m_DiskStats.CloneCount,
- m_DiskStats.CloneByteCount);
-
- m_RebuildFolderStateStats.FinalizeTreeFilesCopiedCount++;
- }
- else
- {
- ZEN_TRACE_CPU("Rename");
- const std::filesystem::path CacheFilePath =
- GetFinalChunkedSequenceFileName(m_CacheFolderPath, RawHash);
- ZEN_ASSERT_SLOW(IsFileWithRetry(CacheFilePath));
-
- std::error_code Ec = RenameFileWithRetry(CacheFilePath, FirstTargetFilePath);
- if (Ec)
- {
- ZEN_OPERATION_LOG_WARN(m_LogOutput,
- "Failed to move file from '{}' to '{}', reason: ({}) {}, retrying...",
- CacheFilePath,
- FirstTargetFilePath,
- Ec.value(),
- Ec.message());
- Ec = RenameFileWithRetry(CacheFilePath, FirstTargetFilePath);
- if (Ec)
- {
- throw std::system_error(
- std::error_code(Ec.value(), std::system_category()),
- fmt::format("Failed to move file from '{}' to '{}', reason: ({}) {}",
- CacheFilePath,
- FirstTargetFilePath,
- Ec.value(),
- Ec.message()));
- }
- }
-
- m_RebuildFolderStateStats.FinalizeTreeFilesMovedCount++;
- }
- }
-
- OutLocalFolderState.Paths[FirstRemotePathIndex] = FirstTargetPath;
- OutLocalFolderState.RawSizes[FirstRemotePathIndex] = m_RemoteContent.RawSizes[FirstRemotePathIndex];
-
- OutLocalFolderState.Attributes[FirstRemotePathIndex] =
- m_RemoteContent.Attributes.empty()
- ? GetNativeFileAttributes(FirstTargetFilePath)
- : SetNativeFileAttributes(FirstTargetFilePath,
- m_RemoteContent.Platform,
- m_RemoteContent.Attributes[FirstRemotePathIndex]);
- OutLocalFolderState.ModificationTicks[FirstRemotePathIndex] =
- GetModificationTickFromPath(FirstTargetFilePath);
-
- TargetOffset++;
- TargetsComplete++;
-
- while (TargetOffset < (BaseTargetOffset + TargetCount))
- {
- const uint32_t RemotePathIndex = Targets[TargetOffset].RemotePathIndex;
- ZEN_ASSERT(Targets[TargetOffset].RawHash == RawHash);
- const std::filesystem::path& TargetPath = m_RemoteContent.Paths[RemotePathIndex];
- std::filesystem::path TargetFilePath = (m_Path / TargetPath).make_preferred();
-
- if (auto InPlaceIt = RemotePathIndexToLocalPathIndex.find(RemotePathIndex);
- InPlaceIt != RemotePathIndexToLocalPathIndex.end())
- {
- ZEN_ASSERT_SLOW(IsFileWithRetry(TargetFilePath));
- }
- else
- {
- ZEN_TRACE_CPU("Copy");
- if (IsFileWithRetry(TargetFilePath))
- {
- SetFileReadOnlyWithRetry(TargetFilePath, false);
- }
- else
- {
- CreateDirectories(TargetFilePath.parent_path());
- }
-
- ZEN_ASSERT_SLOW(IsFileWithRetry(FirstTargetFilePath));
- ZEN_OPERATION_LOG_DEBUG(m_LogOutput,
- "Copying from '{}' -> '{}'",
- FirstTargetFilePath,
- TargetFilePath);
- const uint64_t RawSize = m_RemoteContent.RawSizes[RemotePathIndex];
- FastCopyFile(m_Options.AllowFileClone,
- m_Options.UseSparseFiles,
- FirstTargetFilePath,
- TargetFilePath,
- RawSize,
- m_DiskStats.WriteCount,
- m_DiskStats.WriteByteCount,
- m_DiskStats.CloneCount,
- m_DiskStats.CloneByteCount);
-
- m_RebuildFolderStateStats.FinalizeTreeFilesCopiedCount++;
- }
-
- OutLocalFolderState.Paths[RemotePathIndex] = TargetPath;
- OutLocalFolderState.RawSizes[RemotePathIndex] = m_RemoteContent.RawSizes[RemotePathIndex];
-
- OutLocalFolderState.Attributes[RemotePathIndex] =
- m_RemoteContent.Attributes.empty()
- ? GetNativeFileAttributes(TargetFilePath)
- : SetNativeFileAttributes(TargetFilePath,
- m_RemoteContent.Platform,
- m_RemoteContent.Attributes[RemotePathIndex]);
- OutLocalFolderState.ModificationTicks[RemotePathIndex] = GetModificationTickFromPath(TargetFilePath);
-
- TargetOffset++;
- TargetsComplete++;
- }
- }
- }
- });
-
- TargetOffset += TargetCount;
- }
-
- {
- ZEN_TRACE_CPU("FinalizeTree_Wait");
-
- Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
- ZEN_UNUSED(PendingWork);
- const uint64_t WorkTotal = Targets.size() + RemoveLocalPathIndexes.size();
- const uint64_t WorkComplete = TargetsComplete.load() + DeletedCount.load();
- std::string Details = fmt::format("{}/{} files", WorkComplete, WorkTotal);
- RebuildProgressBar.UpdateState({.Task = "Rebuilding state ",
- .Details = Details,
- .TotalCount = gsl::narrow<uint64_t>(WorkTotal),
- .RemainingCount = gsl::narrow<uint64_t>(WorkTotal - WorkComplete),
- .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
- false);
- });
- }
-
- m_RebuildFolderStateStats.FinalizeTreeElapsedWallTimeUs = Timer.GetElapsedTimeUs();
- RebuildProgressBar.Finish();
- }
- m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::Cleanup, (uint32_t)TaskSteps::StepCount);
- }
- catch (const std::exception&)
- {
- m_AbortFlag = true;
- throw;
- }
-}
-
-void
-BuildsOperationUpdateFolder::ScanCacheFolder(tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedChunkHashesFound,
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedSequenceHashesFound)
-{
- ZEN_TRACE_CPU("ScanCacheFolder");
-
- Stopwatch CacheTimer;
-
- DirectoryContent CacheDirContent;
- GetDirectoryContent(m_CacheFolderPath, DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes, CacheDirContent);
- for (size_t Index = 0; Index < CacheDirContent.Files.size(); Index++)
- {
- if (m_Options.EnableTargetFolderScavenging)
- {
- IoHash FileHash;
- if (IoHash::TryParse(CacheDirContent.Files[Index].filename().string(), FileHash))
- {
- if (auto ChunkIt = m_RemoteLookup.ChunkHashToChunkIndex.find(FileHash);
- ChunkIt != m_RemoteLookup.ChunkHashToChunkIndex.end())
- {
- const uint32_t ChunkIndex = ChunkIt->second;
- const uint64_t ChunkSize = m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex];
- if (ChunkSize == CacheDirContent.FileSizes[Index])
- {
- OutCachedChunkHashesFound.insert({FileHash, ChunkIndex});
- m_CacheMappingStats.CacheChunkCount++;
- m_CacheMappingStats.CacheChunkByteCount += ChunkSize;
- continue;
- }
- }
- else if (auto SequenceIt = m_RemoteLookup.RawHashToSequenceIndex.find(FileHash);
- SequenceIt != m_RemoteLookup.RawHashToSequenceIndex.end())
- {
- const uint32_t SequenceIndex = SequenceIt->second;
- const uint32_t PathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex];
- const uint64_t SequenceSize = m_RemoteContent.RawSizes[PathIndex];
- if (SequenceSize == CacheDirContent.FileSizes[Index])
- {
- OutCachedSequenceHashesFound.insert({FileHash, SequenceIndex});
- m_CacheMappingStats.CacheSequenceHashesCount++;
- m_CacheMappingStats.CacheSequenceHashesByteCount += SequenceSize;
-
- const std::filesystem::path CacheFilePath =
- GetFinalChunkedSequenceFileName(m_CacheFolderPath,
- m_RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]);
- ZEN_ASSERT_SLOW(IsFile(CacheFilePath));
-
- continue;
- }
- }
- }
- }
- std::error_code Ec = TryRemoveFile(CacheDirContent.Files[Index]);
- if (Ec)
- {
- ZEN_OPERATION_LOG_DEBUG(m_LogOutput,
- "Failed removing file '{}', reason: ({}) {}",
- CacheDirContent.Files[Index],
- Ec.value(),
- Ec.message());
- }
- }
- m_CacheMappingStats.CacheScanElapsedWallTimeUs += CacheTimer.GetElapsedTimeUs();
-}
-
-void
-BuildsOperationUpdateFolder::ScanTempBlocksFolder(tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedBlocksFound)
-{
- ZEN_TRACE_CPU("ScanTempBlocksFolder");
-
- Stopwatch CacheTimer;
-
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> AllBlockSizes;
- AllBlockSizes.reserve(m_BlockDescriptions.size());
- for (uint32_t BlockIndex = 0; BlockIndex < m_BlockDescriptions.size(); BlockIndex++)
- {
- const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex];
- AllBlockSizes.insert({BlockDescription.BlockHash, BlockIndex});
- }
-
- DirectoryContent BlockDirContent;
- GetDirectoryContent(m_TempBlockFolderPath,
- DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes,
- BlockDirContent);
- OutCachedBlocksFound.reserve(BlockDirContent.Files.size());
- for (size_t Index = 0; Index < BlockDirContent.Files.size(); Index++)
- {
- if (m_Options.EnableTargetFolderScavenging)
- {
- IoHash FileHash;
- if (IoHash::TryParse(BlockDirContent.Files[Index].filename().string(), FileHash))
- {
- if (auto BlockIt = AllBlockSizes.find(FileHash); BlockIt != AllBlockSizes.end())
- {
- const uint32_t BlockIndex = BlockIt->second;
- const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex];
- uint64_t BlockSize = CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize;
- for (uint64_t ChunkSize : BlockDescription.ChunkCompressedLengths)
- {
- BlockSize += ChunkSize;
- }
-
- if (BlockSize == BlockDirContent.FileSizes[Index])
- {
- OutCachedBlocksFound.insert({FileHash, BlockIndex});
- m_CacheMappingStats.CacheBlockCount++;
- m_CacheMappingStats.CacheBlocksByteCount += BlockSize;
- continue;
- }
- }
- }
- }
- std::error_code Ec = TryRemoveFile(BlockDirContent.Files[Index]);
- if (Ec)
- {
- ZEN_OPERATION_LOG_DEBUG(m_LogOutput,
- "Failed removing file '{}', reason: ({}) {}",
- BlockDirContent.Files[Index],
- Ec.value(),
- Ec.message());
- }
- }
-
- m_CacheMappingStats.CacheScanElapsedWallTimeUs += CacheTimer.GetElapsedTimeUs();
-}
-
-std::vector<BuildsOperationUpdateFolder::ScavengeSource>
-BuildsOperationUpdateFolder::FindScavengeSources()
-{
- ZEN_TRACE_CPU("FindScavengeSources");
-
- const bool TargetPathExists = IsDir(m_Path);
-
- std::vector<std::filesystem::path> StatePaths = GetDownloadedStatePaths(m_Options.SystemRootDir);
-
- std::vector<ScavengeSource> Result;
- for (const std::filesystem::path& EntryPath : StatePaths)
- {
- if (IsFile(EntryPath))
- {
- bool DeleteEntry = false;
-
- try
- {
- BuildsDownloadInfo Info = ReadDownloadedInfoFile(EntryPath);
- const bool LocalPathExists = !Info.LocalPath.empty() && IsDir(Info.LocalPath);
- const bool LocalStateFileExists = IsFile(Info.StateFilePath);
- if (LocalPathExists && LocalStateFileExists)
- {
- if (TargetPathExists && std::filesystem::equivalent(Info.LocalPath, m_Path))
- {
- DeleteEntry = true;
- }
- else
- {
- Result.push_back({.StateFilePath = std::move(Info.StateFilePath), .Path = std::move(Info.LocalPath)});
- }
- }
- else
- {
- DeleteEntry = true;
- }
- }
- catch (const std::exception& Ex)
- {
- ZEN_OPERATION_LOG_WARN(m_LogOutput, "{}", Ex.what());
- DeleteEntry = true;
- }
-
- if (DeleteEntry)
- {
- std::error_code DummyEc;
- std::filesystem::remove(EntryPath, DummyEc);
- }
- }
- }
- return Result;
-}
-
-std::vector<uint32_t>
-BuildsOperationUpdateFolder::ScanTargetFolder(const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedChunkHashesFound,
- const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedSequenceHashesFound)
-{
- ZEN_TRACE_CPU("ScanTargetFolder");
-
- Stopwatch LocalTimer;
-
- std::vector<uint32_t> MissingSequenceIndexes;
-
- for (uint32_t RemoteSequenceIndex = 0; RemoteSequenceIndex < m_RemoteContent.ChunkedContent.SequenceRawHashes.size();
- RemoteSequenceIndex++)
- {
- const IoHash& RemoteSequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex];
- const uint32_t RemotePathIndex = GetFirstPathIndexForSeqeuenceIndex(m_RemoteLookup, RemoteSequenceIndex);
- const uint64_t RemoteRawSize = m_RemoteContent.RawSizes[RemotePathIndex];
- if (auto CacheSequenceIt = CachedSequenceHashesFound.find(RemoteSequenceRawHash);
- CacheSequenceIt != CachedSequenceHashesFound.end())
- {
- const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(m_CacheFolderPath, RemoteSequenceRawHash);
- ZEN_ASSERT_SLOW(IsFile(CacheFilePath));
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Found sequence {} at {} ({})",
- RemoteSequenceRawHash,
- CacheFilePath,
- NiceBytes(RemoteRawSize));
- }
- }
- else if (auto CacheChunkIt = CachedChunkHashesFound.find(RemoteSequenceRawHash); CacheChunkIt != CachedChunkHashesFound.end())
- {
- const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(m_CacheFolderPath, RemoteSequenceRawHash);
- ZEN_ASSERT_SLOW(IsFile(CacheFilePath));
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Found chunk {} at {} ({})",
- RemoteSequenceRawHash,
- CacheFilePath,
- NiceBytes(RemoteRawSize));
- }
- }
- else if (auto It = m_LocalLookup.RawHashToSequenceIndex.find(RemoteSequenceRawHash);
- It != m_LocalLookup.RawHashToSequenceIndex.end())
- {
- const uint32_t LocalSequenceIndex = It->second;
- const uint32_t LocalPathIndex = GetFirstPathIndexForSeqeuenceIndex(m_LocalLookup, LocalSequenceIndex);
- const std::filesystem::path LocalFilePath = (m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred();
- ZEN_ASSERT_SLOW(IsFile(LocalFilePath));
- m_CacheMappingStats.LocalPathsMatchingSequencesCount++;
- m_CacheMappingStats.LocalPathsMatchingSequencesByteCount += RemoteRawSize;
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Found sequence {} at {} ({})",
- RemoteSequenceRawHash,
- LocalFilePath,
- NiceBytes(RemoteRawSize));
- }
- }
- else
- {
- MissingSequenceIndexes.push_back(RemoteSequenceIndex);
- }
- }
-
- m_CacheMappingStats.LocalScanElapsedWallTimeUs += LocalTimer.GetElapsedTimeUs();
- return MissingSequenceIndexes;
-}
-
-bool
-BuildsOperationUpdateFolder::FindScavengeContent(const ScavengeSource& Source,
- ChunkedFolderContent& OutScavengedLocalContent,
- ChunkedContentLookup& OutScavengedLookup)
-{
- ZEN_TRACE_CPU("FindScavengeContent");
-
- FolderContent LocalFolderState;
- try
- {
- BuildSaveState SavedState = ReadBuildSaveStateFile(Source.StateFilePath);
- OutScavengedLocalContent = std::move(SavedState.State.ChunkedContent);
- LocalFolderState = std::move(SavedState.FolderState);
- }
- catch (const std::exception& Ex)
- {
- ZEN_OPERATION_LOG_DEBUG(m_LogOutput, "Skipping invalid build state at '{}', reason: {}", Source.StateFilePath, Ex.what());
- return false;
- }
-
- tsl::robin_set<uint32_t> PathIndexesToScavenge;
- PathIndexesToScavenge.reserve(OutScavengedLocalContent.Paths.size());
- std::vector<uint32_t> ChunkOrderOffsets = BuildChunkOrderOffset(OutScavengedLocalContent.ChunkedContent.ChunkCounts);
-
- {
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToPathIndex;
-
- RawHashToPathIndex.reserve(OutScavengedLocalContent.Paths.size());
- for (uint32_t ScavengedPathIndex = 0; ScavengedPathIndex < OutScavengedLocalContent.RawHashes.size(); ScavengedPathIndex++)
- {
- if (!RawHashToPathIndex.contains(OutScavengedLocalContent.RawHashes[ScavengedPathIndex]))
- {
- RawHashToPathIndex.insert_or_assign(OutScavengedLocalContent.RawHashes[ScavengedPathIndex], ScavengedPathIndex);
- }
- }
-
- for (uint32_t ScavengeSequenceIndex = 0; ScavengeSequenceIndex < OutScavengedLocalContent.ChunkedContent.SequenceRawHashes.size();
- ScavengeSequenceIndex++)
- {
- const IoHash& SequenceHash = OutScavengedLocalContent.ChunkedContent.SequenceRawHashes[ScavengeSequenceIndex];
- if (auto It = RawHashToPathIndex.find(SequenceHash); It != RawHashToPathIndex.end())
- {
- uint32_t PathIndex = It->second;
- if (!PathIndexesToScavenge.contains(PathIndex))
- {
- if (m_RemoteLookup.RawHashToSequenceIndex.contains(SequenceHash))
- {
- PathIndexesToScavenge.insert(PathIndex);
- }
- else
- {
- uint32_t ChunkOrderIndexStart = ChunkOrderOffsets[ScavengeSequenceIndex];
- const uint32_t ChunkCount = OutScavengedLocalContent.ChunkedContent.ChunkCounts[ScavengeSequenceIndex];
- for (uint32_t ChunkOrderIndex = 0; ChunkOrderIndex < ChunkCount; ChunkOrderIndex++)
- {
- const uint32_t ChunkIndex =
- OutScavengedLocalContent.ChunkedContent.ChunkOrders[ChunkOrderIndexStart + ChunkOrderIndex];
- const IoHash& ChunkHash = OutScavengedLocalContent.ChunkedContent.ChunkHashes[ChunkIndex];
- if (m_RemoteLookup.ChunkHashToChunkIndex.contains(ChunkHash))
- {
- PathIndexesToScavenge.insert(PathIndex);
- break;
- }
- }
- }
- }
- }
- else
- {
- ZEN_OPERATION_LOG_WARN(m_LogOutput,
- "Scavenged state file at '{}' for '{}' is invalid, skipping scavenging for sequence {}",
- Source.StateFilePath,
- Source.Path,
- SequenceHash);
- }
- }
- }
-
- if (PathIndexesToScavenge.empty())
- {
- OutScavengedLocalContent = {};
- return false;
- }
-
- std::vector<std::filesystem::path> PathsToScavenge;
- PathsToScavenge.reserve(PathIndexesToScavenge.size());
- for (uint32_t ScavengedStatePathIndex : PathIndexesToScavenge)
- {
- PathsToScavenge.push_back(OutScavengedLocalContent.Paths[ScavengedStatePathIndex]);
- }
-
- FolderContent ValidFolderContent =
- GetValidFolderContent(m_IOWorkerPool, m_ScavengedFolderScanStats, Source.Path, PathsToScavenge, {}, 0, m_AbortFlag, m_PauseFlag);
-
- if (!LocalFolderState.AreKnownFilesEqual(ValidFolderContent))
- {
- std::vector<std::filesystem::path> DeletedPaths;
- FolderContent UpdatedContent = GetUpdatedContent(LocalFolderState, ValidFolderContent, DeletedPaths);
-
- // If the files are modified since the state was saved we ignore the files since we don't
- // want to incur the cost of scanning/hashing scavenged files
- DeletedPaths.insert(DeletedPaths.end(), UpdatedContent.Paths.begin(), UpdatedContent.Paths.end());
- if (!DeletedPaths.empty())
- {
- OutScavengedLocalContent =
- DeletePathsFromChunkedContent(OutScavengedLocalContent,
- BuildHashLookup(OutScavengedLocalContent.ChunkedContent.SequenceRawHashes),
- ChunkOrderOffsets,
- DeletedPaths);
- }
- }
-
- if (OutScavengedLocalContent.Paths.empty())
- {
- OutScavengedLocalContent = {};
- return false;
- }
-
- OutScavengedLookup = BuildChunkedContentLookup(OutScavengedLocalContent);
-
- return true;
-}
-
-void
-BuildsOperationUpdateFolder::ScavengeSourceForChunks(uint32_t& InOutRemainingChunkCount,
- std::vector<bool>& InOutRemoteChunkIndexNeedsCopyFromLocalFileFlags,
- tsl::robin_map<IoHash, size_t, IoHash::Hasher>& InOutRawHashToCopyChunkDataIndex,
- const std::vector<std::atomic<uint32_t>>& SequenceIndexChunksLeftToWriteCounters,
- const ChunkedFolderContent& ScavengedContent,
- const ChunkedContentLookup& ScavengedLookup,
- std::vector<CopyChunkData>& InOutCopyChunkDatas,
- uint32_t ScavengedContentIndex,
- uint64_t& InOutChunkMatchingRemoteCount,
- uint64_t& InOutChunkMatchingRemoteByteCount)
-{
- for (uint32_t RemoteChunkIndex = 0;
- RemoteChunkIndex < m_RemoteContent.ChunkedContent.ChunkHashes.size() && (InOutRemainingChunkCount > 0);
- RemoteChunkIndex++)
- {
- if (!InOutRemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex])
- {
- const IoHash& RemoteChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex];
- if (auto It = ScavengedLookup.ChunkHashToChunkIndex.find(RemoteChunkHash); It != ScavengedLookup.ChunkHashToChunkIndex.end())
- {
- std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs =
- GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteChunkIndex);
-
- if (!ChunkTargetPtrs.empty())
- {
- const uint32_t ScavengedChunkIndex = It->second;
- const uint64_t ScavengedChunkRawSize = ScavengedContent.ChunkedContent.ChunkRawSizes[ScavengedChunkIndex];
- const size_t ChunkSequenceLocationOffset = ScavengedLookup.ChunkSequenceLocationOffset[ScavengedChunkIndex];
- const ChunkedContentLookup::ChunkSequenceLocation& ScavengeLocation =
- ScavengedLookup.ChunkSequenceLocations[ChunkSequenceLocationOffset];
- const IoHash& ScavengedSequenceRawHash =
- ScavengedContent.ChunkedContent.SequenceRawHashes[ScavengeLocation.SequenceIndex];
-
- CopyChunkData::ChunkTarget Target = {.TargetChunkLocationCount = gsl::narrow<uint32_t>(ChunkTargetPtrs.size()),
- .RemoteChunkIndex = RemoteChunkIndex,
- .CacheFileOffset = ScavengeLocation.Offset};
- if (auto CopySourceIt = InOutRawHashToCopyChunkDataIndex.find(ScavengedSequenceRawHash);
- CopySourceIt != InOutRawHashToCopyChunkDataIndex.end())
- {
- CopyChunkData& Data = InOutCopyChunkDatas[CopySourceIt->second];
- if (Data.TargetChunkLocationPtrs.size() > 1024)
- {
- InOutRawHashToCopyChunkDataIndex.insert_or_assign(ScavengedSequenceRawHash, InOutCopyChunkDatas.size());
- InOutCopyChunkDatas.push_back(CopyChunkData{.ScavengeSourceIndex = ScavengedContentIndex,
- .SourceSequenceIndex = ScavengeLocation.SequenceIndex,
- .TargetChunkLocationPtrs = ChunkTargetPtrs,
- .ChunkTargets = std::vector<CopyChunkData::ChunkTarget>{Target}});
- }
- else
- {
- Data.TargetChunkLocationPtrs.insert(Data.TargetChunkLocationPtrs.end(),
- ChunkTargetPtrs.begin(),
- ChunkTargetPtrs.end());
- Data.ChunkTargets.push_back(Target);
- }
- }
- else
- {
- InOutRawHashToCopyChunkDataIndex.insert_or_assign(ScavengedSequenceRawHash, InOutCopyChunkDatas.size());
- InOutCopyChunkDatas.push_back(CopyChunkData{.ScavengeSourceIndex = ScavengedContentIndex,
- .SourceSequenceIndex = ScavengeLocation.SequenceIndex,
- .TargetChunkLocationPtrs = ChunkTargetPtrs,
- .ChunkTargets = std::vector<CopyChunkData::ChunkTarget>{Target}});
- }
- InOutChunkMatchingRemoteCount++;
- InOutChunkMatchingRemoteByteCount += ScavengedChunkRawSize;
- InOutRemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex] = true;
- InOutRemainingChunkCount--;
- }
- }
- }
- }
-}
-
-std::filesystem::path
-BuildsOperationUpdateFolder::FindDownloadedChunk(const IoHash& ChunkHash)
-{
- ZEN_TRACE_CPU("FindDownloadedChunk");
-
- std::filesystem::path CompressedChunkPath = m_TempDownloadFolderPath / ChunkHash.ToHexString();
- if (IsFile(CompressedChunkPath))
- {
- IoBuffer ExistingCompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath);
- if (ExistingCompressedPart)
- {
- IoHash RawHash;
- uint64_t RawSize;
- if (CompressedBuffer::ValidateCompressedHeader(ExistingCompressedPart,
- RawHash,
- RawSize,
- /*OutOptionalTotalCompressedSize*/ nullptr))
- {
- return CompressedChunkPath;
- }
- else
- {
- std::error_code DummyEc;
- RemoveFile(CompressedChunkPath, DummyEc);
- }
- }
- }
- return {};
-}
-
-std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>
-BuildsOperationUpdateFolder::GetRemainingChunkTargets(std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
- uint32_t ChunkIndex)
-{
- ZEN_TRACE_CPU("GetRemainingChunkTargets");
-
- std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkSources = GetChunkSequenceLocations(m_RemoteLookup, ChunkIndex);
- std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs;
- if (!ChunkSources.empty())
- {
- ChunkTargetPtrs.reserve(ChunkSources.size());
- for (const ChunkedContentLookup::ChunkSequenceLocation& Source : ChunkSources)
- {
- if (SequenceIndexChunksLeftToWriteCounters[Source.SequenceIndex].load() > 0)
- {
- ChunkTargetPtrs.push_back(&Source);
- }
- }
- }
- return ChunkTargetPtrs;
-};
-
-uint64_t
-BuildsOperationUpdateFolder::GetChunkWriteCount(std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
- uint32_t ChunkIndex)
-{
- ZEN_TRACE_CPU("GetChunkWriteCount");
-
- uint64_t WriteCount = 0;
- std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkSources = GetChunkSequenceLocations(m_RemoteLookup, ChunkIndex);
- for (const ChunkedContentLookup::ChunkSequenceLocation& Source : ChunkSources)
- {
- if (SequenceIndexChunksLeftToWriteCounters[Source.SequenceIndex].load() > 0)
- {
- WriteCount++;
- }
- }
- return WriteCount;
-};
-
-void
-BuildsOperationUpdateFolder::CheckRequiredDiskSpace(const tsl::robin_map<std::string, uint32_t>& RemotePathToRemoteIndex)
-{
- tsl::robin_set<uint32_t> ExistingRemotePaths;
-
- if (m_Options.EnableTargetFolderScavenging)
- {
- for (uint32_t LocalPathIndex = 0; LocalPathIndex < m_LocalContent.Paths.size(); LocalPathIndex++)
- {
- const IoHash& RawHash = m_LocalContent.RawHashes[LocalPathIndex];
- const std::filesystem::path& LocalPath = m_LocalContent.Paths[LocalPathIndex];
-
- if (auto RemotePathIt = RemotePathToRemoteIndex.find(LocalPath.generic_string()); RemotePathIt != RemotePathToRemoteIndex.end())
- {
- const uint32_t RemotePathIndex = RemotePathIt->second;
- if (m_RemoteContent.RawHashes[RemotePathIndex] == RawHash)
- {
- ExistingRemotePaths.insert(RemotePathIndex);
- }
- }
- }
- }
-
- uint64_t RequiredSpace = 0;
- for (uint32_t RemotePathIndex = 0; RemotePathIndex < m_RemoteContent.Paths.size(); RemotePathIndex++)
- {
- if (!ExistingRemotePaths.contains(RemotePathIndex))
- {
- RequiredSpace += m_RemoteContent.RawSizes[RemotePathIndex];
- }
- }
-
- std::error_code Ec;
- DiskSpace Space = DiskSpaceInfo(m_Path, Ec);
- if (Ec)
- {
- throw std::runtime_error(fmt::format("Get free disk space for target path '{}' FAILED, reason: {}", m_Path, Ec.message()));
- }
- if (Space.Free < (RequiredSpace + 16u * 1024u * 1024u))
- {
- throw std::runtime_error(
- fmt::format("Not enough free space for target path '{}', {} of free space is needed", m_Path, RequiredSpace));
- }
-}
-
-void
-BuildsOperationUpdateFolder::WriteScavengedSequenceToCache(const std::filesystem::path& ScavengeRootPath,
- const ChunkedFolderContent& ScavengedContent,
- const ScavengedSequenceCopyOperation& ScavengeOp)
-{
- ZEN_TRACE_CPU("WriteScavengedSequenceToCache");
-
- const std::filesystem::path ScavengedPath = ScavengedContent.Paths[ScavengeOp.ScavengedPathIndex];
- const std::filesystem::path ScavengedFilePath = (ScavengeRootPath / ScavengedPath).make_preferred();
- ZEN_ASSERT_SLOW(FileSizeFromPath(ScavengedFilePath) == ScavengeOp.RawSize);
-
- const IoHash& RemoteSequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[ScavengeOp.RemoteSequenceIndex];
- const std::filesystem::path TempFilePath = GetTempChunkedSequenceFileName(m_CacheFolderPath, RemoteSequenceRawHash);
-
- const uint64_t RawSize = ScavengedContent.RawSizes[ScavengeOp.ScavengedPathIndex];
- FastCopyFile(m_Options.AllowFileClone,
- m_Options.UseSparseFiles,
- ScavengedFilePath,
- TempFilePath,
- RawSize,
- m_DiskStats.WriteCount,
- m_DiskStats.WriteByteCount,
- m_DiskStats.CloneCount,
- m_DiskStats.CloneByteCount);
-
- const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(m_CacheFolderPath, RemoteSequenceRawHash);
- RenameFile(TempFilePath, CacheFilePath);
-
- m_WrittenChunkByteCount += RawSize;
- if (m_Options.ValidateCompletedSequences)
- {
- m_ValidatedChunkByteCount += RawSize;
- }
-}
-
-void
-BuildsOperationUpdateFolder::WriteLooseChunk(const uint32_t RemoteChunkIndex,
- const BlobsExistsResult& ExistsResult,
- std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
- std::atomic<uint64_t>& WritePartsComplete,
- std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>&& ChunkTargetPtrs,
- BufferedWriteFileCache& WriteCache,
- ParallelWork& Work,
- uint64_t TotalRequestCount,
- uint64_t TotalPartWriteCount,
- FilteredRate& FilteredDownloadedBytesPerSecond,
- FilteredRate& FilteredWrittenBytesPerSecond)
-{
- std::filesystem::path ExistingCompressedChunkPath;
- if (!m_Options.PrimeCacheOnly)
- {
- const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex];
- ExistingCompressedChunkPath = FindDownloadedChunk(ChunkHash);
- if (!ExistingCompressedChunkPath.empty())
- {
- m_DownloadStats.RequestsCompleteCount++;
- if (m_DownloadStats.RequestsCompleteCount == TotalRequestCount)
- {
- FilteredDownloadedBytesPerSecond.Stop();
- }
- }
- }
- if (!m_AbortFlag)
- {
- if (!ExistingCompressedChunkPath.empty())
- {
- Work.ScheduleWork(
- m_IOWorkerPool,
- [this,
- SequenceIndexChunksLeftToWriteCounters,
- &WriteCache,
- &Work,
- &WritePartsComplete,
- TotalPartWriteCount,
- &FilteredWrittenBytesPerSecond,
- RemoteChunkIndex,
- ChunkTargetPtrs = std::move(ChunkTargetPtrs),
- CompressedChunkPath = std::move(ExistingCompressedChunkPath)](std::atomic<bool>& AbortFlag) mutable {
- if (!AbortFlag)
- {
- ZEN_TRACE_CPU("Async_WritePreDownloadedChunk");
-
- FilteredWrittenBytesPerSecond.Start();
-
- const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex];
-
- IoBuffer CompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath);
- if (!CompressedPart)
- {
- throw std::runtime_error(
- fmt::format("Could not open dowloaded compressed chunk {} from {}", ChunkHash, CompressedChunkPath));
- }
-
- bool NeedHashVerify =
- WriteCompressedChunkToCache(ChunkHash, ChunkTargetPtrs, WriteCache, std::move(CompressedPart));
- WritePartsComplete++;
-
- if (!AbortFlag)
- {
- if (WritePartsComplete == TotalPartWriteCount)
- {
- FilteredWrittenBytesPerSecond.Stop();
- }
-
- std::error_code Ec = TryRemoveFile(CompressedChunkPath);
- if (Ec)
- {
- ZEN_OPERATION_LOG_DEBUG(m_LogOutput,
- "Failed removing file '{}', reason: ({}) {}",
- CompressedChunkPath,
- Ec.value(),
- Ec.message());
- }
-
- std::vector<uint32_t> CompletedSequences =
- CompleteChunkTargets(ChunkTargetPtrs, SequenceIndexChunksLeftToWriteCounters);
- WriteCache.Close(CompletedSequences);
- if (NeedHashVerify)
- {
- VerifyAndCompleteChunkSequencesAsync(CompletedSequences, Work);
- }
- else
- {
- FinalizeChunkSequences(CompletedSequences);
- }
- }
- }
- });
- }
- else
- {
- Work.ScheduleWork(m_NetworkPool,
- [this,
- &ExistsResult,
- SequenceIndexChunksLeftToWriteCounters,
- &WriteCache,
- &Work,
- &WritePartsComplete,
- TotalPartWriteCount,
- TotalRequestCount,
- &FilteredDownloadedBytesPerSecond,
- &FilteredWrittenBytesPerSecond,
- RemoteChunkIndex,
- ChunkTargetPtrs = std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>(
- std::move(ChunkTargetPtrs))](std::atomic<bool>&) mutable {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("Async_DownloadChunk");
-
- FilteredDownloadedBytesPerSecond.Start();
- DownloadBuildBlob(RemoteChunkIndex,
- ExistsResult,
- Work,
- [this,
- &ExistsResult,
- SequenceIndexChunksLeftToWriteCounters,
- &WriteCache,
- &Work,
- &WritePartsComplete,
- TotalPartWriteCount,
- TotalRequestCount,
- RemoteChunkIndex,
- &FilteredDownloadedBytesPerSecond,
- &FilteredWrittenBytesPerSecond,
- ChunkTargetPtrs = std::move(ChunkTargetPtrs)](IoBuffer&& Payload) mutable {
- if (m_DownloadStats.RequestsCompleteCount == TotalRequestCount)
- {
- FilteredDownloadedBytesPerSecond.Stop();
- }
- IoBufferFileReference FileRef;
- bool EnableBacklog = Payload.GetFileReference(FileRef);
- AsyncWriteDownloadedChunk(m_Options.ZenFolderPath,
- RemoteChunkIndex,
- std::move(ChunkTargetPtrs),
- WriteCache,
- Work,
- std::move(Payload),
- SequenceIndexChunksLeftToWriteCounters,
- WritePartsComplete,
- TotalPartWriteCount,
- FilteredWrittenBytesPerSecond,
- EnableBacklog);
- });
- }
- });
- }
- }
-}
-
-void
-BuildsOperationUpdateFolder::DownloadBuildBlob(uint32_t RemoteChunkIndex,
- const BlobsExistsResult& ExistsResult,
- ParallelWork& Work,
- std::function<void(IoBuffer&& Payload)>&& OnDownloaded)
-{
- const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex];
- // FilteredDownloadedBytesPerSecond.Start();
- IoBuffer BuildBlob;
- const bool ExistsInCache = m_Storage.BuildCacheStorage && ExistsResult.ExistingBlobs.contains(ChunkHash);
- if (ExistsInCache)
- {
- BuildBlob = m_Storage.BuildCacheStorage->GetBuildBlob(m_BuildId, ChunkHash);
- }
- if (BuildBlob)
- {
- uint64_t BlobSize = BuildBlob.GetSize();
- m_DownloadStats.DownloadedChunkCount++;
- m_DownloadStats.DownloadedChunkByteCount += BlobSize;
- m_DownloadStats.RequestsCompleteCount++;
- OnDownloaded(std::move(BuildBlob));
- }
- else
- {
- if (m_RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex] >= m_Options.LargeAttachmentSize)
- {
- DownloadLargeBlob(
- *m_Storage.BuildStorage,
- m_TempDownloadFolderPath,
- m_BuildId,
- ChunkHash,
- m_Options.PreferredMultipartChunkSize,
- Work,
- m_NetworkPool,
- m_DownloadStats.DownloadedChunkByteCount,
- m_DownloadStats.MultipartAttachmentCount,
- [this, &Work, ChunkHash, RemoteChunkIndex, OnDownloaded = std::move(OnDownloaded)](IoBuffer&& Payload) mutable {
- m_DownloadStats.DownloadedChunkCount++;
- m_DownloadStats.RequestsCompleteCount++;
-
- if (Payload && m_Storage.BuildCacheStorage && m_Options.PopulateCache)
- {
- m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId,
- ChunkHash,
- ZenContentType::kCompressedBinary,
- CompositeBuffer(SharedBuffer(Payload)));
- }
-
- OnDownloaded(std::move(Payload));
- });
- }
- else
- {
- BuildBlob = m_Storage.BuildStorage->GetBuildBlob(m_BuildId, ChunkHash);
- if (BuildBlob && m_Storage.BuildCacheStorage && m_Options.PopulateCache)
- {
- m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId,
- ChunkHash,
- ZenContentType::kCompressedBinary,
- CompositeBuffer(SharedBuffer(BuildBlob)));
- }
- if (!BuildBlob)
- {
- throw std::runtime_error(fmt::format("Chunk {} is missing", ChunkHash));
- }
- if (!m_Options.PrimeCacheOnly)
- {
- if (!m_AbortFlag)
- {
- uint64_t BlobSize = BuildBlob.GetSize();
- m_DownloadStats.DownloadedChunkCount++;
- m_DownloadStats.DownloadedChunkByteCount += BlobSize;
- m_DownloadStats.RequestsCompleteCount++;
-
- OnDownloaded(std::move(BuildBlob));
- }
- }
- }
- }
-}
-
-void
-BuildsOperationUpdateFolder::DownloadPartialBlock(
- const ChunkBlockAnalyser::BlockRangeDescriptor BlockRange,
- const BlobsExistsResult& ExistsResult,
- std::function<void(IoBuffer&& InMemoryBuffer, const std::filesystem::path& OnDiskPath)>&& OnDownloaded)
-{
- const uint32_t BlockIndex = BlockRange.BlockIndex;
-
- const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex];
-
- IoBuffer BlockBuffer;
- if (m_Storage.BuildCacheStorage && ExistsResult.ExistingBlobs.contains(BlockDescription.BlockHash))
- {
- BlockBuffer =
- m_Storage.BuildCacheStorage->GetBuildBlob(m_BuildId, BlockDescription.BlockHash, BlockRange.RangeStart, BlockRange.RangeLength);
- }
- if (!BlockBuffer)
- {
- BlockBuffer =
- m_Storage.BuildStorage->GetBuildBlob(m_BuildId, BlockDescription.BlockHash, BlockRange.RangeStart, BlockRange.RangeLength);
- }
- if (!BlockBuffer)
- {
- throw std::runtime_error(fmt::format("Block {} is missing when fetching range {} -> {}",
- BlockDescription.BlockHash,
- BlockRange.RangeStart,
- BlockRange.RangeStart + BlockRange.RangeLength));
- }
- if (!m_AbortFlag)
- {
- uint64_t BlockSize = BlockBuffer.GetSize();
- m_DownloadStats.DownloadedBlockCount++;
- m_DownloadStats.DownloadedBlockByteCount += BlockSize;
- m_DownloadStats.RequestsCompleteCount++;
-
- std::filesystem::path BlockChunkPath;
-
- // Check if the dowloaded block is file based and we can move it directly without rewriting it
- {
- IoBufferFileReference FileRef;
- if (BlockBuffer.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) && (FileRef.FileChunkSize == BlockSize))
- {
- ZEN_TRACE_CPU("MoveTempPartialBlock");
-
- std::error_code Ec;
- std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec);
- if (!Ec)
- {
- BlockBuffer.SetDeleteOnClose(false);
- BlockBuffer = {};
- BlockChunkPath = m_TempBlockFolderPath /
- fmt::format("{}_{:x}_{:x}", BlockDescription.BlockHash, BlockRange.RangeStart, BlockRange.RangeLength);
- RenameFile(TempBlobPath, BlockChunkPath, Ec);
- if (Ec)
- {
- BlockChunkPath = std::filesystem::path{};
-
- // Re-open the temp file again
- BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete);
- BlockBuffer = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BlockSize, true);
- BlockBuffer.SetDeleteOnClose(true);
- }
- }
- }
- }
-
- if (BlockChunkPath.empty() && (BlockSize > m_Options.MaximumInMemoryPayloadSize))
- {
- ZEN_TRACE_CPU("WriteTempPartialBlock");
- // Could not be moved and rather large, lets store it on disk
- BlockChunkPath = m_TempBlockFolderPath /
- fmt::format("{}_{:x}_{:x}", BlockDescription.BlockHash, BlockRange.RangeStart, BlockRange.RangeLength);
- TemporaryFile::SafeWriteFile(BlockChunkPath, BlockBuffer);
- BlockBuffer = {};
- }
- if (!m_AbortFlag)
- {
- OnDownloaded(std::move(BlockBuffer), std::move(BlockChunkPath));
- }
- }
-}
-
-std::vector<uint32_t>
-BuildsOperationUpdateFolder::WriteLocalChunkToCache(CloneQueryInterface* CloneQuery,
- const CopyChunkData& CopyData,
- const std::vector<ChunkedFolderContent>& ScavengedContents,
- const std::vector<ChunkedContentLookup>& ScavengedLookups,
- const std::vector<std::filesystem::path>& ScavengedPaths,
- BufferedWriteFileCache& WriteCache)
-{
- ZEN_TRACE_CPU("WriteLocalChunkToCache");
-
- std::filesystem::path SourceFilePath;
-
- if (CopyData.ScavengeSourceIndex == (uint32_t)-1)
- {
- const uint32_t LocalPathIndex = m_LocalLookup.SequenceIndexFirstPathIndex[CopyData.SourceSequenceIndex];
- SourceFilePath = (m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred();
- }
- else
- {
- const ChunkedFolderContent& ScavengedContent = ScavengedContents[CopyData.ScavengeSourceIndex];
- const ChunkedContentLookup& ScavengedLookup = ScavengedLookups[CopyData.ScavengeSourceIndex];
- const std::filesystem::path ScavengedPath = ScavengedPaths[CopyData.ScavengeSourceIndex];
- const uint32_t ScavengedPathIndex = ScavengedLookup.SequenceIndexFirstPathIndex[CopyData.SourceSequenceIndex];
- SourceFilePath = (ScavengedPath / ScavengedContent.Paths[ScavengedPathIndex]).make_preferred();
- }
- ZEN_ASSERT_SLOW(IsFile(SourceFilePath));
- ZEN_ASSERT(!CopyData.TargetChunkLocationPtrs.empty());
-
- uint64_t CacheLocalFileBytesRead = 0;
-
- size_t TargetStart = 0;
- const std::span<const ChunkedContentLookup::ChunkSequenceLocation* const> AllTargets(CopyData.TargetChunkLocationPtrs);
-
- struct WriteOp
- {
- const ChunkedContentLookup::ChunkSequenceLocation* Target = nullptr;
- uint64_t CacheFileOffset = (uint64_t)-1;
- uint32_t ChunkIndex = (uint32_t)-1;
- };
-
- std::vector<WriteOp> WriteOps;
-
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("Sort");
- WriteOps.reserve(AllTargets.size());
- for (const CopyChunkData::ChunkTarget& ChunkTarget : CopyData.ChunkTargets)
- {
- std::span<const ChunkedContentLookup::ChunkSequenceLocation* const> TargetRange =
- AllTargets.subspan(TargetStart, ChunkTarget.TargetChunkLocationCount);
- for (const ChunkedContentLookup::ChunkSequenceLocation* Target : TargetRange)
- {
- WriteOps.push_back(
- WriteOp{.Target = Target, .CacheFileOffset = ChunkTarget.CacheFileOffset, .ChunkIndex = ChunkTarget.RemoteChunkIndex});
- }
- TargetStart += ChunkTarget.TargetChunkLocationCount;
- }
-
- std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOp& Lhs, const WriteOp& Rhs) {
- if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex)
- {
- return true;
- }
- else if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex)
- {
- return false;
- }
- if (Lhs.Target->Offset < Rhs.Target->Offset)
- {
- return true;
- }
- return false;
- });
- }
-
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("Write");
-
- tsl::robin_set<uint32_t> ChunkIndexesWritten;
-
- BufferedOpenFile SourceFile(SourceFilePath,
- m_DiskStats.OpenReadCount,
- m_DiskStats.CurrentOpenFileCount,
- m_DiskStats.ReadCount,
- m_DiskStats.ReadByteCount);
-
- bool CanCloneSource = CloneQuery && CloneQuery->CanClone(SourceFile.Handle());
-
- BufferedWriteFileCache::Local LocalWriter(WriteCache);
-
- for (size_t WriteOpIndex = 0; WriteOpIndex < WriteOps.size();)
- {
- if (m_AbortFlag)
- {
- break;
- }
- const WriteOp& Op = WriteOps[WriteOpIndex];
-
- const uint32_t RemoteSequenceIndex = Op.Target->SequenceIndex;
- const uint32_t RemotePathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex];
- const uint64_t TargetSize = m_RemoteContent.RawSizes[RemotePathIndex];
- const uint64_t ChunkSize = m_RemoteContent.ChunkedContent.ChunkRawSizes[Op.ChunkIndex];
-
- uint64_t ReadLength = ChunkSize;
- size_t WriteCount = 1;
- uint64_t OpSourceEnd = Op.CacheFileOffset + ChunkSize;
- uint64_t OpTargetEnd = Op.Target->Offset + ChunkSize;
- while ((WriteOpIndex + WriteCount) < WriteOps.size())
- {
- const WriteOp& NextOp = WriteOps[WriteOpIndex + WriteCount];
- if (NextOp.Target->SequenceIndex != Op.Target->SequenceIndex)
- {
- break;
- }
- if (NextOp.Target->Offset != OpTargetEnd)
- {
- break;
- }
- if (NextOp.CacheFileOffset != OpSourceEnd)
- {
- break;
- }
- const uint64_t NextChunkLength = m_RemoteContent.ChunkedContent.ChunkRawSizes[NextOp.ChunkIndex];
- if (ReadLength + NextChunkLength > BufferedOpenFile::BlockSize)
- {
- break;
- }
- ReadLength += NextChunkLength;
- OpSourceEnd += NextChunkLength;
- OpTargetEnd += NextChunkLength;
- WriteCount++;
- }
-
- {
- bool DidClone = false;
-
- if (CanCloneSource)
- {
- uint64_t PreBytes = 0;
- uint64_t PostBytes = 0;
- uint64_t ClonableBytes =
- CloneQuery->GetClonableRange(Op.CacheFileOffset, Op.Target->Offset, ReadLength, PreBytes, PostBytes);
- if (ClonableBytes > 0)
- {
- // We need to open the file...
- BufferedWriteFileCache::Local::Writer* Writer = LocalWriter.GetWriter(RemoteSequenceIndex);
- if (!Writer)
- {
- Writer = LocalWriter.PutWriter(RemoteSequenceIndex, std::make_unique<BufferedWriteFileCache::Local::Writer>());
-
- Writer->File = std::make_unique<BasicFile>();
-
- const std::filesystem::path FileName =
- GetTempChunkedSequenceFileName(m_CacheFolderPath,
- m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]);
- Writer->File->Open(FileName, BasicFile::Mode::kWrite);
- if (m_Options.UseSparseFiles)
- {
- PrepareFileForScatteredWrite(Writer->File->Handle(), TargetSize);
- }
- }
- DidClone = CloneQuery->TryClone(SourceFile.Handle(),
- Writer->File->Handle(),
- Op.CacheFileOffset + PreBytes,
- Op.Target->Offset + PreBytes,
- ClonableBytes,
- TargetSize);
- if (DidClone)
- {
- m_DiskStats.WriteCount++;
- m_DiskStats.WriteByteCount += ClonableBytes;
-
- m_DiskStats.CloneCount++;
- m_DiskStats.CloneByteCount += ClonableBytes;
-
- m_WrittenChunkByteCount += ClonableBytes;
-
- if (PreBytes > 0)
- {
- CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset, PreBytes);
- const uint64_t FileOffset = Op.Target->Offset;
-
- WriteSequenceChunkToCache(LocalWriter, ChunkSource, RemoteSequenceIndex, FileOffset, RemotePathIndex);
- }
- if (PostBytes > 0)
- {
- CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset + ReadLength - PostBytes, PostBytes);
- const uint64_t FileOffset = Op.Target->Offset + ReadLength - PostBytes;
-
- WriteSequenceChunkToCache(LocalWriter, ChunkSource, RemoteSequenceIndex, FileOffset, RemotePathIndex);
- }
- }
- }
- }
-
- if (!DidClone)
- {
- CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset, ReadLength);
-
- const uint64_t FileOffset = Op.Target->Offset;
-
- WriteSequenceChunkToCache(LocalWriter, ChunkSource, RemoteSequenceIndex, FileOffset, RemotePathIndex);
- }
- }
-
- CacheLocalFileBytesRead += ReadLength; // TODO: This should be the sum of unique chunk sizes?
-
- WriteOpIndex += WriteCount;
- }
- }
-
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput, "Copied {} from {}", NiceBytes(CacheLocalFileBytesRead), SourceFilePath);
- }
-
- std::vector<uint32_t> Result;
- Result.reserve(WriteOps.size());
-
- for (const WriteOp& Op : WriteOps)
- {
- Result.push_back(Op.Target->SequenceIndex);
- }
- return Result;
-}
-
-bool
-BuildsOperationUpdateFolder::WriteCompressedChunkToCache(
- const IoHash& ChunkHash,
- const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>& ChunkTargetPtrs,
- BufferedWriteFileCache& WriteCache,
- IoBuffer&& CompressedPart)
-{
- ZEN_TRACE_CPU("WriteCompressedChunkToCache");
-
- auto ChunkHashToChunkIndexIt = m_RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash);
- ZEN_ASSERT(ChunkHashToChunkIndexIt != m_RemoteLookup.ChunkHashToChunkIndex.end());
- if (IsSingleFileChunk(m_RemoteContent, ChunkTargetPtrs))
- {
- const std::uint32_t SequenceIndex = ChunkTargetPtrs.front()->SequenceIndex;
- const IoHash& SequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex];
- StreamDecompress(SequenceRawHash, CompositeBuffer(std::move(CompressedPart)));
- return false;
- }
- else
- {
- IoHash RawHash;
- uint64_t RawSize;
- CompressedBuffer Compressed = CompressedBuffer::FromCompressed(CompositeBuffer(std::move(CompressedPart)), RawHash, RawSize);
- if (!Compressed)
- {
- throw std::runtime_error(fmt::format("Failed to parse header of compressed large blob {}", ChunkHash));
- }
- if (RawHash != ChunkHash)
- {
- throw std::runtime_error(fmt::format("RawHash in header {} in large blob {} does match.", RawHash, ChunkHash));
- }
-
- BufferedWriteFileCache::Local LocalWriter(WriteCache);
-
- IoHashStream Hash;
- bool CouldDecompress = Compressed.DecompressToStream(
- 0,
- (uint64_t)-1,
- [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) {
- ZEN_UNUSED(SourceOffset);
- ZEN_TRACE_CPU("Async_StreamDecompress_Write");
- m_DiskStats.ReadByteCount += SourceSize;
- if (!m_AbortFlag)
- {
- for (const ChunkedContentLookup::ChunkSequenceLocation* TargetPtr : ChunkTargetPtrs)
- {
- const auto& Target = *TargetPtr;
- const uint64_t FileOffset = Target.Offset + Offset;
- const uint32_t SequenceIndex = Target.SequenceIndex;
- const uint32_t PathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex];
-
- WriteSequenceChunkToCache(LocalWriter, RangeBuffer, SequenceIndex, FileOffset, PathIndex);
- }
-
- return true;
- }
- return false;
- });
-
- if (m_AbortFlag)
- {
- return false;
- }
-
- if (!CouldDecompress)
- {
- throw std::runtime_error(fmt::format("Failed to decompress large chunk {}", ChunkHash));
- }
-
- return true;
- }
-}
-
-void
-BuildsOperationUpdateFolder::StreamDecompress(const IoHash& SequenceRawHash, CompositeBuffer&& CompressedPart)
-{
- ZEN_TRACE_CPU("StreamDecompress");
- const std::filesystem::path TempChunkSequenceFileName = GetTempChunkedSequenceFileName(m_CacheFolderPath, SequenceRawHash);
- TemporaryFile DecompressedTemp;
- std::error_code Ec;
- DecompressedTemp.CreateTemporary(TempChunkSequenceFileName.parent_path(), Ec);
- if (Ec)
- {
- throw std::runtime_error(fmt::format("Failed creating temporary file for decompressing large blob {}, reason: ({}) {}",
- SequenceRawHash,
- Ec.value(),
- Ec.message()));
- }
- IoHash RawHash;
- uint64_t RawSize;
- CompressedBuffer Compressed = CompressedBuffer::FromCompressed(CompressedPart, RawHash, RawSize);
- if (!Compressed)
- {
- throw std::runtime_error(fmt::format("Failed to parse header of compressed large blob {}", SequenceRawHash));
- }
- if (RawHash != SequenceRawHash)
- {
- throw std::runtime_error(fmt::format("RawHash in header {} in large blob {} does match.", RawHash, SequenceRawHash));
- }
- PrepareFileForScatteredWrite(DecompressedTemp.Handle(), RawSize);
-
- IoHashStream Hash;
- bool CouldDecompress =
- Compressed.DecompressToStream(0,
- (uint64_t)-1,
- [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) {
- ZEN_UNUSED(SourceOffset);
- ZEN_TRACE_CPU("StreamDecompress_Write");
- m_DiskStats.ReadCount++;
- m_DiskStats.ReadByteCount += SourceSize;
- if (!m_AbortFlag)
- {
- for (const SharedBuffer& Segment : RangeBuffer.GetSegments())
- {
- if (m_Options.ValidateCompletedSequences)
- {
- Hash.Append(Segment.GetView());
- m_ValidatedChunkByteCount += Segment.GetSize();
- }
- DecompressedTemp.Write(Segment, Offset);
- Offset += Segment.GetSize();
- m_DiskStats.WriteByteCount += Segment.GetSize();
- m_DiskStats.WriteCount++;
- m_WrittenChunkByteCount += Segment.GetSize();
- }
- return true;
- }
- return false;
- });
-
- if (m_AbortFlag)
- {
- return;
- }
-
- if (!CouldDecompress)
- {
- throw std::runtime_error(fmt::format("Failed to decompress large blob {}", SequenceRawHash));
- }
- if (m_Options.ValidateCompletedSequences)
- {
- const IoHash VerifyHash = Hash.GetHash();
- if (VerifyHash != SequenceRawHash)
- {
- throw std::runtime_error(
- fmt::format("Decompressed blob payload hash {} does not match expected hash {}", VerifyHash, SequenceRawHash));
- }
- }
- DecompressedTemp.MoveTemporaryIntoPlace(TempChunkSequenceFileName, Ec);
- if (Ec)
- {
- throw std::runtime_error(fmt::format("Failed moving temporary file for decompressing large blob {}, reason: ({}) {}",
- SequenceRawHash,
- Ec.value(),
- Ec.message()));
- }
- // WriteChunkStats.ChunkCountWritten++;
-}
-
-void
-BuildsOperationUpdateFolder::WriteSequenceChunkToCache(BufferedWriteFileCache::Local& LocalWriter,
- const CompositeBuffer& Chunk,
- const uint32_t SequenceIndex,
- const uint64_t FileOffset,
- const uint32_t PathIndex)
-{
- ZEN_TRACE_CPU("WriteSequenceChunkToCache");
-
- const uint64_t SequenceSize = m_RemoteContent.RawSizes[PathIndex];
-
- auto OpenFile = [&](BasicFile& File) {
- const std::filesystem::path FileName =
- GetTempChunkedSequenceFileName(m_CacheFolderPath, m_RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]);
- File.Open(FileName, BasicFile::Mode::kWrite);
- if (m_Options.UseSparseFiles)
- {
- PrepareFileForScatteredWrite(File.Handle(), SequenceSize);
- }
- };
-
- const uint64_t ChunkSize = Chunk.GetSize();
- ZEN_ASSERT(FileOffset + ChunkSize <= SequenceSize);
- if (ChunkSize == SequenceSize)
- {
- BasicFile SingleChunkFile;
- OpenFile(SingleChunkFile);
-
- m_DiskStats.CurrentOpenFileCount++;
- auto _ = MakeGuard([this]() { m_DiskStats.CurrentOpenFileCount--; });
- SingleChunkFile.Write(Chunk, FileOffset);
- }
- else
- {
- const uint64_t MaxWriterBufferSize = 256u * 1025u;
-
- BufferedWriteFileCache::Local::Writer* Writer = LocalWriter.GetWriter(SequenceIndex);
- if (Writer)
- {
- if ((!Writer->Writer) && (ChunkSize < MaxWriterBufferSize))
- {
- Writer->Writer = std::make_unique<BasicFileWriter>(*Writer->File, Min(SequenceSize, MaxWriterBufferSize));
- }
- Writer->Write(Chunk, FileOffset);
- }
- else
- {
- Writer = LocalWriter.PutWriter(SequenceIndex, std::make_unique<BufferedWriteFileCache::Local::Writer>());
-
- Writer->File = std::make_unique<BasicFile>();
- OpenFile(*Writer->File);
- if (ChunkSize < MaxWriterBufferSize)
- {
- Writer->Writer = std::make_unique<BasicFileWriter>(*Writer->File, Min(SequenceSize, MaxWriterBufferSize));
- }
- Writer->Write(Chunk, FileOffset);
- }
- }
- m_DiskStats.WriteCount++;
- m_DiskStats.WriteByteCount += ChunkSize;
- m_WrittenChunkByteCount += ChunkSize;
-}
-
-bool
-BuildsOperationUpdateFolder::GetBlockWriteOps(const IoHash& BlockRawHash,
- std::span<const IoHash> ChunkRawHashes,
- std::span<const uint32_t> ChunkCompressedLengths,
- std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
- std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags,
- const MemoryView BlockView,
- uint32_t FirstIncludedBlockChunkIndex,
- uint32_t LastIncludedBlockChunkIndex,
- BlockWriteOps& OutOps)
-{
- ZEN_TRACE_CPU("GetBlockWriteOps");
-
- uint32_t OffsetInBlock = 0;
- for (uint32_t ChunkBlockIndex = FirstIncludedBlockChunkIndex; ChunkBlockIndex <= LastIncludedBlockChunkIndex; ChunkBlockIndex++)
- {
- const uint32_t ChunkCompressedSize = ChunkCompressedLengths[ChunkBlockIndex];
- const IoHash& ChunkHash = ChunkRawHashes[ChunkBlockIndex];
- if (auto It = m_RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); It != m_RemoteLookup.ChunkHashToChunkIndex.end())
- {
- const uint32_t ChunkIndex = It->second;
- std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs =
- GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, ChunkIndex);
-
- if (!ChunkTargetPtrs.empty())
- {
- bool NeedsWrite = true;
- if (RemoteChunkIndexNeedsCopyFromSourceFlags[ChunkIndex].compare_exchange_strong(NeedsWrite, false))
- {
- MemoryView ChunkMemoryView = BlockView.Mid(OffsetInBlock, ChunkCompressedSize);
- IoHash VerifyChunkHash;
- uint64_t VerifyChunkSize;
- CompressedBuffer CompressedChunk =
- CompressedBuffer::FromCompressed(SharedBuffer::MakeView(ChunkMemoryView), VerifyChunkHash, VerifyChunkSize);
- if (!CompressedChunk)
- {
- throw std::runtime_error(fmt::format("Chunk {} at {}, size {} in block {} is not a valid compressed buffer",
- ChunkHash,
- OffsetInBlock,
- ChunkCompressedSize,
- BlockRawHash));
- }
- if (VerifyChunkHash != ChunkHash)
- {
- throw std::runtime_error(fmt::format("Chunk {} at {}, size {} in block {} has a mismatching content hash {}",
- ChunkHash,
- OffsetInBlock,
- ChunkCompressedSize,
- BlockRawHash,
- VerifyChunkHash));
- }
- if (VerifyChunkSize != m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex])
- {
- throw std::runtime_error(
- fmt::format("Chunk {} at {}, size {} in block {} has a mismatching raw size {}, expected {}",
- ChunkHash,
- OffsetInBlock,
- ChunkCompressedSize,
- BlockRawHash,
- VerifyChunkSize,
- m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]));
- }
-
- OodleCompressor ChunkCompressor;
- OodleCompressionLevel ChunkCompressionLevel;
- uint64_t ChunkBlockSize;
-
- bool GetCompressParametersSuccess =
- CompressedChunk.TryGetCompressParameters(ChunkCompressor, ChunkCompressionLevel, ChunkBlockSize);
- ZEN_ASSERT(GetCompressParametersSuccess);
-
- IoBuffer Decompressed;
- if (ChunkCompressionLevel == OodleCompressionLevel::None)
- {
- MemoryView ChunkDecompressedMemoryView = ChunkMemoryView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder());
- Decompressed =
- IoBuffer(IoBuffer::Wrap, ChunkDecompressedMemoryView.GetData(), ChunkDecompressedMemoryView.GetSize());
- }
- else
- {
- Decompressed = CompressedChunk.Decompress().AsIoBuffer();
- }
-
- if (Decompressed.GetSize() != m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex])
- {
- throw std::runtime_error(fmt::format("Chunk {} at {}, size {} in block {} decompressed to size {}, expected {}",
- ChunkHash,
- OffsetInBlock,
- ChunkCompressedSize,
- BlockRawHash,
- Decompressed.GetSize(),
- m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]));
- }
-
- ZEN_ASSERT_SLOW(ChunkHash == IoHash::HashBuffer(Decompressed));
- for (const ChunkedContentLookup::ChunkSequenceLocation* Target : ChunkTargetPtrs)
- {
- OutOps.WriteOps.push_back(
- BlockWriteOps::WriteOpData{.Target = Target, .ChunkBufferIndex = OutOps.ChunkBuffers.size()});
- }
- OutOps.ChunkBuffers.emplace_back(std::move(Decompressed));
- }
- }
- }
-
- OffsetInBlock += ChunkCompressedSize;
- }
- {
- ZEN_TRACE_CPU("Sort");
- std::sort(OutOps.WriteOps.begin(),
- OutOps.WriteOps.end(),
- [](const BlockWriteOps::WriteOpData& Lhs, const BlockWriteOps::WriteOpData& Rhs) {
- if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex)
- {
- return true;
- }
- if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex)
- {
- return false;
- }
- return Lhs.Target->Offset < Rhs.Target->Offset;
- });
- }
- return true;
-}
-
-void
-BuildsOperationUpdateFolder::WriteBlockChunkOpsToCache(std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
- const BlockWriteOps& Ops,
- BufferedWriteFileCache& WriteCache,
- ParallelWork& Work)
-{
- ZEN_TRACE_CPU("WriteBlockChunkOpsToCache");
-
- {
- BufferedWriteFileCache::Local LocalWriter(WriteCache);
- for (const BlockWriteOps::WriteOpData& WriteOp : Ops.WriteOps)
- {
- if (Work.IsAborted())
- {
- break;
- }
- const CompositeBuffer& Chunk = Ops.ChunkBuffers[WriteOp.ChunkBufferIndex];
- const uint32_t SequenceIndex = WriteOp.Target->SequenceIndex;
- ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[SequenceIndex].load() <=
- m_RemoteContent.ChunkedContent.ChunkCounts[SequenceIndex]);
- ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[SequenceIndex].load() > 0);
- const uint64_t FileOffset = WriteOp.Target->Offset;
- const uint32_t PathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex];
-
- WriteSequenceChunkToCache(LocalWriter, Chunk, SequenceIndex, FileOffset, PathIndex);
- }
- }
- if (!Work.IsAborted())
- {
- // Write tracking, updating this must be done without any files open (BufferedWriteFileCache::Local)
- std::vector<uint32_t> CompletedChunkSequences;
- for (const BlockWriteOps::WriteOpData& WriteOp : Ops.WriteOps)
- {
- const uint32_t RemoteSequenceIndex = WriteOp.Target->SequenceIndex;
- if (CompleteSequenceChunk(RemoteSequenceIndex, SequenceIndexChunksLeftToWriteCounters))
- {
- CompletedChunkSequences.push_back(RemoteSequenceIndex);
- }
- }
- WriteCache.Close(CompletedChunkSequences);
- VerifyAndCompleteChunkSequencesAsync(CompletedChunkSequences, Work);
- }
-}
-
-bool
-BuildsOperationUpdateFolder::WriteChunksBlockToCache(const ChunkBlockDescription& BlockDescription,
- std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
- ParallelWork& Work,
- CompositeBuffer&& BlockBuffer,
- std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags,
- BufferedWriteFileCache& WriteCache)
-{
- ZEN_TRACE_CPU("WriteChunksBlockToCache");
-
- IoBuffer BlockMemoryBuffer = MakeBufferMemoryBased(BlockBuffer);
- const MemoryView BlockView = BlockMemoryBuffer.GetView();
-
- BlockWriteOps Ops;
- if ((BlockDescription.HeaderSize == 0) || BlockDescription.ChunkCompressedLengths.empty())
- {
- ZEN_TRACE_CPU("WriteChunksBlockToCache_Legacy");
-
- uint64_t HeaderSize;
- const std::vector<uint32_t> ChunkCompressedLengths =
- ReadChunkBlockHeader(BlockView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder()), HeaderSize);
-
- if (GetBlockWriteOps(BlockDescription.BlockHash,
- BlockDescription.ChunkRawHashes,
- ChunkCompressedLengths,
- SequenceIndexChunksLeftToWriteCounters,
- RemoteChunkIndexNeedsCopyFromSourceFlags,
- BlockView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder() + HeaderSize),
- 0,
- gsl::narrow<uint32_t>(BlockDescription.ChunkRawHashes.size() - 1),
- Ops))
- {
- WriteBlockChunkOpsToCache(SequenceIndexChunksLeftToWriteCounters, Ops, WriteCache, Work);
- return true;
- }
- return false;
- }
-
- if (GetBlockWriteOps(BlockDescription.BlockHash,
- BlockDescription.ChunkRawHashes,
- BlockDescription.ChunkCompressedLengths,
- SequenceIndexChunksLeftToWriteCounters,
- RemoteChunkIndexNeedsCopyFromSourceFlags,
- BlockView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize),
- 0,
- gsl::narrow<uint32_t>(BlockDescription.ChunkRawHashes.size() - 1),
- Ops))
- {
- WriteBlockChunkOpsToCache(SequenceIndexChunksLeftToWriteCounters, Ops, WriteCache, Work);
- return true;
- }
- return false;
-}
-
-bool
-BuildsOperationUpdateFolder::WritePartialBlockChunksToCache(const ChunkBlockDescription& BlockDescription,
- std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
- ParallelWork& Work,
- CompositeBuffer&& PartialBlockBuffer,
- uint32_t FirstIncludedBlockChunkIndex,
- uint32_t LastIncludedBlockChunkIndex,
- std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags,
- BufferedWriteFileCache& WriteCache)
-{
- ZEN_TRACE_CPU("WritePartialBlockChunksToCache");
-
- IoBuffer BlockMemoryBuffer = MakeBufferMemoryBased(PartialBlockBuffer);
- const MemoryView BlockView = BlockMemoryBuffer.GetView();
-
- BlockWriteOps Ops;
- if (GetBlockWriteOps(BlockDescription.BlockHash,
- BlockDescription.ChunkRawHashes,
- BlockDescription.ChunkCompressedLengths,
- SequenceIndexChunksLeftToWriteCounters,
- RemoteChunkIndexNeedsCopyFromSourceFlags,
- BlockView,
- FirstIncludedBlockChunkIndex,
- LastIncludedBlockChunkIndex,
- Ops))
- {
- WriteBlockChunkOpsToCache(SequenceIndexChunksLeftToWriteCounters, Ops, WriteCache, Work);
- return true;
- }
- else
- {
- return false;
- }
-}
-
-void
-BuildsOperationUpdateFolder::AsyncWriteDownloadedChunk(const std::filesystem::path& ZenFolderPath,
- uint32_t RemoteChunkIndex,
- std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>&& ChunkTargetPtrs,
- BufferedWriteFileCache& WriteCache,
- ParallelWork& Work,
- IoBuffer&& Payload,
- std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
- std::atomic<uint64_t>& WritePartsComplete,
- const uint64_t TotalPartWriteCount,
- FilteredRate& FilteredWrittenBytesPerSecond,
- bool EnableBacklog)
-{
- ZEN_TRACE_CPU("AsyncWriteDownloadedChunk");
-
- const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex];
-
- const uint64_t Size = Payload.GetSize();
-
- std::filesystem::path CompressedChunkPath;
-
- // Check if the dowloaded chunk is file based and we can move it directly without rewriting it
- {
- IoBufferFileReference FileRef;
- if (Payload.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) && (FileRef.FileChunkSize == Size))
- {
- ZEN_TRACE_CPU("MoveTempChunk");
- std::error_code Ec;
- std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec);
- if (!Ec)
- {
- Payload.SetDeleteOnClose(false);
- Payload = {};
- CompressedChunkPath = m_TempDownloadFolderPath / ChunkHash.ToHexString();
- RenameFile(TempBlobPath, CompressedChunkPath, Ec);
- if (Ec)
- {
- CompressedChunkPath = std::filesystem::path{};
-
- // Re-open the temp file again
- BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete);
- Payload = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, Size, true);
- Payload.SetDeleteOnClose(true);
- }
- }
- }
- }
-
- if (CompressedChunkPath.empty() && (Size > m_Options.MaximumInMemoryPayloadSize))
- {
- ZEN_TRACE_CPU("WriteTempChunk");
- // Could not be moved and rather large, lets store it on disk
- CompressedChunkPath = m_TempDownloadFolderPath / ChunkHash.ToHexString();
- TemporaryFile::SafeWriteFile(CompressedChunkPath, Payload);
- Payload = {};
- }
-
- Work.ScheduleWork(
- m_IOWorkerPool,
- [&ZenFolderPath,
- this,
- SequenceIndexChunksLeftToWriteCounters,
- &Work,
- CompressedChunkPath,
- RemoteChunkIndex,
- TotalPartWriteCount,
- &WriteCache,
- &WritePartsComplete,
- &FilteredWrittenBytesPerSecond,
- ChunkTargetPtrs = std::move(ChunkTargetPtrs),
- CompressedPart = IoBuffer(std::move(Payload))](std::atomic<bool>&) mutable {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("Async_WriteChunk");
-
- FilteredWrittenBytesPerSecond.Start();
-
- const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex];
- if (CompressedChunkPath.empty())
- {
- ZEN_ASSERT(CompressedPart);
- }
- else
- {
- ZEN_ASSERT(!CompressedPart);
- CompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath);
- if (!CompressedPart)
- {
- throw std::runtime_error(
- fmt::format("Could not open dowloaded compressed chunk {} from {}", ChunkHash, CompressedChunkPath));
- }
- }
-
- bool NeedHashVerify = WriteCompressedChunkToCache(ChunkHash, ChunkTargetPtrs, WriteCache, std::move(CompressedPart));
- if (!m_AbortFlag)
- {
- WritePartsComplete++;
- if (WritePartsComplete == TotalPartWriteCount)
- {
- FilteredWrittenBytesPerSecond.Stop();
- }
-
- if (!CompressedChunkPath.empty())
- {
- std::error_code Ec = TryRemoveFile(CompressedChunkPath);
- if (Ec)
- {
- ZEN_OPERATION_LOG_DEBUG(m_LogOutput,
- "Failed removing file '{}', reason: ({}) {}",
- CompressedChunkPath,
- Ec.value(),
- Ec.message());
- }
- }
-
- std::vector<uint32_t> CompletedSequences =
- CompleteChunkTargets(ChunkTargetPtrs, SequenceIndexChunksLeftToWriteCounters);
- WriteCache.Close(CompletedSequences);
- if (NeedHashVerify)
- {
- VerifyAndCompleteChunkSequencesAsync(CompletedSequences, Work);
- }
- else
- {
- FinalizeChunkSequences(CompletedSequences);
- }
- }
- }
- },
- EnableBacklog ? WorkerThreadPool::EMode::EnableBacklog : WorkerThreadPool::EMode::DisableBacklog);
-}
-
-void
-BuildsOperationUpdateFolder::VerifyAndCompleteChunkSequencesAsync(std::span<const uint32_t> RemoteSequenceIndexes, ParallelWork& Work)
-{
- if (RemoteSequenceIndexes.empty())
- {
- return;
- }
- ZEN_TRACE_CPU("VerifyAndCompleteChunkSequence");
- if (m_Options.ValidateCompletedSequences)
- {
- for (uint32_t RemoteSequenceIndexOffset = 1; RemoteSequenceIndexOffset < RemoteSequenceIndexes.size(); RemoteSequenceIndexOffset++)
- {
- const uint32_t RemoteSequenceIndex = RemoteSequenceIndexes[RemoteSequenceIndexOffset];
- Work.ScheduleWork(m_IOWorkerPool, [this, RemoteSequenceIndex](std::atomic<bool>&) {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("Async_VerifyAndFinalizeSequence");
-
- VerifySequence(RemoteSequenceIndex);
- if (!m_AbortFlag)
- {
- const IoHash& SequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex];
- FinalizeChunkSequence(SequenceRawHash);
- }
- }
- });
- }
- const uint32_t RemoteSequenceIndex = RemoteSequenceIndexes[0];
-
- VerifySequence(RemoteSequenceIndex);
- const IoHash& SequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex];
- FinalizeChunkSequence(SequenceRawHash);
- }
- else
- {
- for (uint32_t RemoteSequenceIndexOffset = 0; RemoteSequenceIndexOffset < RemoteSequenceIndexes.size(); RemoteSequenceIndexOffset++)
- {
- const uint32_t RemoteSequenceIndex = RemoteSequenceIndexes[RemoteSequenceIndexOffset];
- const IoHash& SequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex];
- FinalizeChunkSequence(SequenceRawHash);
- }
- }
-}
-
-bool
-BuildsOperationUpdateFolder::CompleteSequenceChunk(uint32_t RemoteSequenceIndex,
- std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters)
-{
- uint32_t PreviousValue = SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1);
- ZEN_ASSERT(PreviousValue >= 1);
- ZEN_ASSERT(PreviousValue != (uint32_t)-1);
- return PreviousValue == 1;
-}
-
-std::vector<uint32_t>
-BuildsOperationUpdateFolder::CompleteChunkTargets(const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>& ChunkTargetPtrs,
- std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters)
-{
- ZEN_TRACE_CPU("CompleteChunkTargets");
-
- std::vector<uint32_t> CompletedSequenceIndexes;
- for (const ChunkedContentLookup::ChunkSequenceLocation* Location : ChunkTargetPtrs)
- {
- const uint32_t RemoteSequenceIndex = Location->SequenceIndex;
- if (CompleteSequenceChunk(RemoteSequenceIndex, SequenceIndexChunksLeftToWriteCounters))
- {
- CompletedSequenceIndexes.push_back(RemoteSequenceIndex);
- }
- }
- return CompletedSequenceIndexes;
-}
-
-void
-BuildsOperationUpdateFolder::FinalizeChunkSequence(const IoHash& SequenceRawHash)
-{
- ZEN_TRACE_CPU("FinalizeChunkSequence");
-
- ZEN_ASSERT_SLOW(!IsFile(GetFinalChunkedSequenceFileName(m_CacheFolderPath, SequenceRawHash)));
- std::error_code Ec;
- RenameFile(GetTempChunkedSequenceFileName(m_CacheFolderPath, SequenceRawHash),
- GetFinalChunkedSequenceFileName(m_CacheFolderPath, SequenceRawHash),
- Ec);
- if (Ec)
- {
- throw std::system_error(Ec);
- }
-}
-
-void
-BuildsOperationUpdateFolder::FinalizeChunkSequences(std::span<const uint32_t> RemoteSequenceIndexes)
-{
- ZEN_TRACE_CPU("FinalizeChunkSequences");
-
- for (uint32_t SequenceIndex : RemoteSequenceIndexes)
- {
- FinalizeChunkSequence(m_RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]);
- }
-}
-
-void
-BuildsOperationUpdateFolder::VerifySequence(uint32_t RemoteSequenceIndex)
-{
- ZEN_TRACE_CPU("VerifySequence");
-
- ZEN_ASSERT(m_Options.ValidateCompletedSequences);
-
- const IoHash& SequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex];
- {
- ZEN_TRACE_CPU("HashSequence");
- const std::uint32_t RemotePathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex];
- const uint64_t ExpectedSize = m_RemoteContent.RawSizes[RemotePathIndex];
- IoBuffer VerifyBuffer = IoBufferBuilder::MakeFromFile(GetTempChunkedSequenceFileName(m_CacheFolderPath, SequenceRawHash));
- const uint64_t VerifySize = VerifyBuffer.GetSize();
- if (VerifySize != ExpectedSize)
- {
- throw std::runtime_error(fmt::format("Written chunk sequence {} size {} does not match expected size {}",
- SequenceRawHash,
- VerifySize,
- ExpectedSize));
- }
-
- const IoHash VerifyChunkHash = IoHash::HashBuffer(std::move(VerifyBuffer), &m_ValidatedChunkByteCount);
- if (VerifyChunkHash != SequenceRawHash)
- {
- throw std::runtime_error(
- fmt::format("Written chunk sequence {} hash does not match expected hash {}", VerifyChunkHash, SequenceRawHash));
- }
- }
-}
-
-////////////////////// BuildsOperationUploadFolder
-
-BuildsOperationUploadFolder::BuildsOperationUploadFolder(OperationLogOutput& OperationLogOutput,
- StorageInstance& Storage,
- std::atomic<bool>& AbortFlag,
- std::atomic<bool>& PauseFlag,
- WorkerThreadPool& IOWorkerPool,
- WorkerThreadPool& NetworkPool,
- const Oid& BuildId,
- const std::filesystem::path& Path,
- bool CreateBuild,
- const CbObject& MetaData,
- const Options& Options)
-: m_LogOutput(OperationLogOutput)
-, m_Storage(Storage)
-, m_AbortFlag(AbortFlag)
-, m_PauseFlag(PauseFlag)
-, m_IOWorkerPool(IOWorkerPool)
-, m_NetworkPool(NetworkPool)
-, m_BuildId(BuildId)
-, m_Path(Path)
-, m_CreateBuild(CreateBuild)
-, m_MetaData(MetaData)
-, m_Options(Options)
-{
- m_NonCompressableExtensionHashes.reserve(Options.NonCompressableExtensions.size());
- for (const std::string& Extension : Options.NonCompressableExtensions)
- {
- m_NonCompressableExtensionHashes.insert(HashStringAsLowerDjb2(Extension));
- }
-}
-
-BuildsOperationUploadFolder::PrepareBuildResult
-BuildsOperationUploadFolder::PrepareBuild()
-{
- ZEN_TRACE_CPU("PrepareBuild");
-
- PrepareBuildResult Result;
- Result.PreferredMultipartChunkSize = m_Options.PreferredMultipartChunkSize;
- Stopwatch Timer;
- if (m_CreateBuild)
- {
- ZEN_TRACE_CPU("CreateBuild");
-
- Stopwatch PutBuildTimer;
- CbObject PutBuildResult = m_Storage.BuildStorage->PutBuild(m_BuildId, m_MetaData);
- Result.PrepareBuildTimeMs = PutBuildTimer.GetElapsedTimeMs();
- if (auto ChunkSize = PutBuildResult["chunkSize"sv].AsUInt64(); ChunkSize != 0)
- {
- Result.PreferredMultipartChunkSize = ChunkSize;
- }
- Result.PayloadSize = m_MetaData.GetSize();
- }
- else
- {
- ZEN_TRACE_CPU("PutBuild");
- Stopwatch GetBuildTimer;
- CbObject Build = m_Storage.BuildStorage->GetBuild(m_BuildId);
- Result.PrepareBuildTimeMs = GetBuildTimer.GetElapsedTimeMs();
- Result.PayloadSize = Build.GetSize();
- if (auto ChunkSize = Build["chunkSize"sv].AsUInt64(); ChunkSize != 0)
- {
- Result.PreferredMultipartChunkSize = ChunkSize;
- }
- else if (m_Options.AllowMultiparts)
- {
- ZEN_OPERATION_LOG_WARN(m_LogOutput,
- "PreferredMultipartChunkSize is unknown. Defaulting to '{}'",
- NiceBytes(Result.PreferredMultipartChunkSize));
- }
- }
-
- if (!m_Options.IgnoreExistingBlocks)
- {
- ZEN_TRACE_CPU("FindBlocks");
- Stopwatch KnownBlocksTimer;
- CbObject BlockDescriptionList = m_Storage.BuildStorage->FindBlocks(m_BuildId, m_Options.FindBlockMaxCount);
- if (BlockDescriptionList)
- {
- Result.KnownBlocks = ParseChunkBlockDescriptionList(BlockDescriptionList);
- }
- Result.FindBlocksTimeMs = KnownBlocksTimer.GetElapsedTimeMs();
- }
- Result.ElapsedTimeMs = Timer.GetElapsedTimeMs();
- return Result;
-}
-
-std::vector<BuildsOperationUploadFolder::UploadPart>
-BuildsOperationUploadFolder::ReadFolder()
-{
- std::vector<UploadPart> UploadParts;
- std::filesystem::path ExcludeManifestPath = m_Path / m_Options.ZenExcludeManifestName;
- tsl::robin_set<std::string> ExcludeAssetPaths;
- if (IsFile(ExcludeManifestPath))
- {
- std::filesystem::path AbsoluteExcludeManifestPath =
- MakeSafeAbsolutePath(ExcludeManifestPath.is_absolute() ? ExcludeManifestPath : m_Path / ExcludeManifestPath);
- BuildManifest Manifest = ParseBuildManifest(AbsoluteExcludeManifestPath);
- const std::vector<std::filesystem::path>& AssetPaths = Manifest.Parts.front().Files;
- ExcludeAssetPaths.reserve(AssetPaths.size());
- for (const std::filesystem::path& AssetPath : AssetPaths)
- {
- ExcludeAssetPaths.insert(AssetPath.generic_string());
- }
- }
-
- UploadParts.resize(1);
-
- UploadPart& Part = UploadParts.front();
- GetFolderContentStatistics& LocalFolderScanStats = Part.LocalFolderScanStats;
-
- Part.Content = GetFolderContent(
- Part.LocalFolderScanStats,
- m_Path,
- [this](const std::string_view& RelativePath) { return IsAcceptedFolder(RelativePath); },
- [this, &ExcludeAssetPaths](const std::string_view& RelativePath, uint64_t Size, uint32_t Attributes) -> bool {
- ZEN_UNUSED(Size, Attributes);
- if (!IsAcceptedFile(RelativePath))
- {
- return false;
- }
- if (ExcludeAssetPaths.contains(std::filesystem::path(RelativePath).generic_string()))
- {
- return false;
- }
- return true;
- },
- m_IOWorkerPool,
- m_LogOutput.GetProgressUpdateDelayMS(),
- [&](bool, std::ptrdiff_t) {
- ZEN_OPERATION_LOG_INFO(m_LogOutput, "Found {} files in '{}'...", LocalFolderScanStats.AcceptedFileCount.load(), m_Path);
- },
- m_AbortFlag);
- Part.TotalRawSize = std::accumulate(Part.Content.RawSizes.begin(), Part.Content.RawSizes.end(), std::uint64_t(0));
-
- return UploadParts;
-}
-
-std::vector<BuildsOperationUploadFolder::UploadPart>
-BuildsOperationUploadFolder::ReadManifestParts(const std::filesystem::path& ManifestPath)
-{
- std::vector<UploadPart> UploadParts;
- Stopwatch ManifestParseTimer;
- std::filesystem::path AbsoluteManifestPath = MakeSafeAbsolutePath(ManifestPath.is_absolute() ? ManifestPath : m_Path / ManifestPath);
- BuildManifest Manifest = ParseBuildManifest(AbsoluteManifestPath);
- if (Manifest.Parts.empty())
- {
- throw std::runtime_error(fmt::format("Manifest file at '{}' is invalid", ManifestPath));
- }
-
- UploadParts.resize(Manifest.Parts.size());
- for (size_t PartIndex = 0; PartIndex < Manifest.Parts.size(); PartIndex++)
- {
- BuildManifest::Part& PartManifest = Manifest.Parts[PartIndex];
- if (ManifestPath.is_relative())
- {
- PartManifest.Files.push_back(ManifestPath);
- }
-
- UploadPart& Part = UploadParts[PartIndex];
- FolderContent& Content = Part.Content;
-
- GetFolderContentStatistics& LocalFolderScanStats = Part.LocalFolderScanStats;
-
- const std::vector<std::filesystem::path>& AssetPaths = PartManifest.Files;
- Content = GetValidFolderContent(
- m_IOWorkerPool,
- LocalFolderScanStats,
- m_Path,
- AssetPaths,
- [](uint64_t PathCount, uint64_t CompletedPathCount) { ZEN_UNUSED(PathCount, CompletedPathCount); },
- 1000,
- m_AbortFlag,
- m_PauseFlag);
-
- if (Content.Paths.size() != AssetPaths.size())
- {
- const tsl::robin_set<std::filesystem::path> FoundPaths(Content.Paths.begin(), Content.Paths.end());
- ExtendableStringBuilder<1024> SB;
- for (const std::filesystem::path& AssetPath : AssetPaths)
- {
- if (!FoundPaths.contains(AssetPath))
- {
- SB << "\n " << AssetPath.generic_string();
- }
- }
- throw std::runtime_error(
- fmt::format("Manifest file at '{}' references files that does not exist{}", ManifestPath, SB.ToView()));
- }
-
- Part.PartId = PartManifest.PartId;
- Part.PartName = PartManifest.PartName;
- Part.TotalRawSize = std::accumulate(Part.Content.RawSizes.begin(), Part.Content.RawSizes.end(), std::uint64_t(0));
- }
-
- return UploadParts;
-}
-
-std::vector<std::pair<Oid, std::string>>
-BuildsOperationUploadFolder::Execute(const Oid& BuildPartId,
- const std::string_view BuildPartName,
- const std::filesystem::path& ManifestPath,
- ChunkingController& ChunkController,
- ChunkingCache& ChunkCache)
-{
- ZEN_TRACE_CPU("BuildsOperationUploadFolder::Execute");
- try
- {
- Stopwatch ReadPartsTimer;
- std::vector<UploadPart> UploadParts = ManifestPath.empty() ? ReadFolder() : ReadManifestParts(ManifestPath);
-
- for (UploadPart& Part : UploadParts)
- {
- if (Part.PartId == Oid::Zero)
- {
- if (UploadParts.size() != 1)
- {
- throw std::runtime_error(fmt::format("Multi part upload manifest '{}' must contains build part id", ManifestPath));
- }
-
- if (BuildPartId == Oid::Zero)
- {
- Part.PartId = Oid::NewOid();
- }
- else
- {
- Part.PartId = BuildPartId;
- }
- }
- if (Part.PartName.empty())
- {
- if (UploadParts.size() != 1)
- {
- throw std::runtime_error(fmt::format("Multi part upload manifest '{}' must contains build part name", ManifestPath));
- }
- if (BuildPartName.empty())
- {
- throw std::runtime_error("Build part name must be set");
- }
- Part.PartName = std::string(BuildPartName);
- }
- }
-
- if (!m_Options.IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Reading {} parts took {}",
- UploadParts.size(),
- NiceTimeSpanMs(ReadPartsTimer.GetElapsedTimeMs()));
- }
-
- const uint32_t PartsUploadStepCount = gsl::narrow<uint32_t>(uint32_t(PartTaskSteps::StepCount) * UploadParts.size());
-
- const uint32_t PrepareBuildStep = 0;
- const uint32_t UploadPartsStep = 1;
- const uint32_t FinalizeBuildStep = UploadPartsStep + PartsUploadStepCount;
- const uint32_t CleanupStep = FinalizeBuildStep + 1;
- const uint32_t StepCount = CleanupStep + 1;
-
- auto EndProgress = MakeGuard([&]() { m_LogOutput.SetLogOperationProgress(StepCount, StepCount); });
-
- Stopwatch ProcessTimer;
-
- CleanAndRemoveDirectory(m_IOWorkerPool, m_AbortFlag, m_PauseFlag, m_Options.TempDir);
- CreateDirectories(m_Options.TempDir);
- auto _ = MakeGuard([&]() { CleanAndRemoveDirectory(m_IOWorkerPool, m_AbortFlag, m_PauseFlag, m_Options.TempDir); });
-
- m_LogOutput.SetLogOperationProgress(PrepareBuildStep, StepCount);
-
- m_PrepBuildResultFuture = m_NetworkPool.EnqueueTask(std::packaged_task<PrepareBuildResult()>{[this] { return PrepareBuild(); }},
- WorkerThreadPool::EMode::EnableBacklog);
-
- for (uint32_t PartIndex = 0; PartIndex < UploadParts.size(); PartIndex++)
- {
- const uint32_t PartStepOffset = UploadPartsStep + (PartIndex * uint32_t(PartTaskSteps::StepCount));
-
- const UploadPart& Part = UploadParts[PartIndex];
- UploadBuildPart(ChunkController, ChunkCache, PartIndex, Part, PartStepOffset, StepCount);
- if (m_AbortFlag)
- {
- return {};
- }
- }
-
- m_LogOutput.SetLogOperationProgress(FinalizeBuildStep, StepCount);
-
- if (m_CreateBuild && !m_AbortFlag)
- {
- Stopwatch FinalizeBuildTimer;
- m_Storage.BuildStorage->FinalizeBuild(m_BuildId);
- if (!m_Options.IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput, "FinalizeBuild took {}", NiceTimeSpanMs(FinalizeBuildTimer.GetElapsedTimeMs()));
- }
- }
-
- m_LogOutput.SetLogOperationProgress(CleanupStep, StepCount);
-
- std::vector<std::pair<Oid, std::string>> Result;
- Result.reserve(UploadParts.size());
- for (UploadPart& Part : UploadParts)
- {
- Result.push_back(std::make_pair(Part.PartId, Part.PartName));
- }
- return Result;
- }
- catch (const std::exception&)
- {
- m_AbortFlag = true;
- throw;
- }
-}
-
-bool
-BuildsOperationUploadFolder::IsAcceptedFolder(const std::string_view& RelativePath) const
-{
- for (const std::string& ExcludeFolder : m_Options.ExcludeFolders)
- {
- if (RelativePath.starts_with(ExcludeFolder))
- {
- if (RelativePath.length() == ExcludeFolder.length())
- {
- return false;
- }
- else if (RelativePath[ExcludeFolder.length()] == '/')
- {
- return false;
- }
- }
- }
- return true;
-}
-
-bool
-BuildsOperationUploadFolder::IsAcceptedFile(const std::string_view& RelativePath) const
-{
- if (RelativePath == m_Options.ZenExcludeManifestName)
- {
- return false;
- }
- for (const std::string& ExcludeExtension : m_Options.ExcludeExtensions)
- {
- if (RelativePath.ends_with(ExcludeExtension))
- {
- return false;
- }
- }
- return true;
-}
-
-void
-BuildsOperationUploadFolder::ArrangeChunksIntoBlocks(const ChunkedFolderContent& Content,
- const ChunkedContentLookup& Lookup,
- std::vector<uint32_t>& ChunkIndexes,
- std::vector<std::vector<uint32_t>>& OutBlocks)
-{
- ZEN_TRACE_CPU("ArrangeChunksIntoBlocks");
- std::sort(ChunkIndexes.begin(), ChunkIndexes.end(), [&Content, &Lookup](uint32_t Lhs, uint32_t Rhs) {
- const ChunkedContentLookup::ChunkSequenceLocation& LhsLocation = GetChunkSequenceLocations(Lookup, Lhs)[0];
- const ChunkedContentLookup::ChunkSequenceLocation& RhsLocation = GetChunkSequenceLocations(Lookup, Rhs)[0];
- if (LhsLocation.SequenceIndex < RhsLocation.SequenceIndex)
- {
- return true;
- }
- else if (LhsLocation.SequenceIndex > RhsLocation.SequenceIndex)
- {
- return false;
- }
- return LhsLocation.Offset < RhsLocation.Offset;
- });
-
- uint64_t MaxBlockSizeLowThreshold = m_Options.BlockParameters.MaxBlockSize - (m_Options.BlockParameters.MaxBlockSize / 16);
-
- uint64_t BlockSize = 0;
-
- uint32_t ChunkIndexStart = 0;
- for (uint32_t ChunkIndexOffset = 0; ChunkIndexOffset < ChunkIndexes.size();)
- {
- const uint32_t ChunkIndex = ChunkIndexes[ChunkIndexOffset];
- const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex];
-
- if (((BlockSize + ChunkSize) > m_Options.BlockParameters.MaxBlockSize) ||
- (ChunkIndexOffset - ChunkIndexStart) > m_Options.BlockParameters.MaxChunksPerBlock)
- {
- // Within the span of MaxBlockSizeLowThreshold and MaxBlockSize, see if there is a break
- // between source paths for chunks. Break the block at the last such break if any.
- ZEN_ASSERT(ChunkIndexOffset > ChunkIndexStart);
-
- const uint32_t ChunkSequenceIndex = Lookup.ChunkSequenceLocations[Lookup.ChunkSequenceLocationOffset[ChunkIndex]].SequenceIndex;
-
- uint64_t ScanBlockSize = BlockSize;
-
- uint32_t ScanChunkIndexOffset = ChunkIndexOffset - 1;
- while (ScanChunkIndexOffset > (ChunkIndexStart + 2))
- {
- const uint32_t TestChunkIndex = ChunkIndexes[ScanChunkIndexOffset];
- const uint64_t TestChunkSize = Content.ChunkedContent.ChunkRawSizes[TestChunkIndex];
- if ((ScanBlockSize - TestChunkSize) < MaxBlockSizeLowThreshold)
- {
- break;
- }
-
- const uint32_t TestSequenceIndex =
- Lookup.ChunkSequenceLocations[Lookup.ChunkSequenceLocationOffset[TestChunkIndex]].SequenceIndex;
- if (ChunkSequenceIndex != TestSequenceIndex)
- {
- ChunkIndexOffset = ScanChunkIndexOffset + 1;
- break;
- }
-
- ScanBlockSize -= TestChunkSize;
- ScanChunkIndexOffset--;
- }
-
- std::vector<uint32_t> ChunksInBlock;
- ChunksInBlock.reserve(ChunkIndexOffset - ChunkIndexStart);
- for (uint32_t AddIndexOffset = ChunkIndexStart; AddIndexOffset < ChunkIndexOffset; AddIndexOffset++)
- {
- const uint32_t AddChunkIndex = ChunkIndexes[AddIndexOffset];
- ChunksInBlock.push_back(AddChunkIndex);
- }
- OutBlocks.emplace_back(std::move(ChunksInBlock));
- BlockSize = 0;
- ChunkIndexStart = ChunkIndexOffset;
- }
- else
- {
- ChunkIndexOffset++;
- BlockSize += ChunkSize;
- }
- }
- if (ChunkIndexStart < ChunkIndexes.size())
- {
- std::vector<uint32_t> ChunksInBlock;
- ChunksInBlock.reserve(ChunkIndexes.size() - ChunkIndexStart);
- for (uint32_t AddIndexOffset = ChunkIndexStart; AddIndexOffset < ChunkIndexes.size(); AddIndexOffset++)
- {
- const uint32_t AddChunkIndex = ChunkIndexes[AddIndexOffset];
- ChunksInBlock.push_back(AddChunkIndex);
- }
- OutBlocks.emplace_back(std::move(ChunksInBlock));
- }
-}
-
-void
-BuildsOperationUploadFolder::GenerateBuildBlocks(const ChunkedFolderContent& Content,
- const ChunkedContentLookup& Lookup,
- const std::vector<std::vector<uint32_t>>& NewBlockChunks,
- GeneratedBlocks& OutBlocks,
- GenerateBlocksStatistics& GenerateBlocksStats,
- UploadStatistics& UploadStats)
-{
- ZEN_TRACE_CPU("GenerateBuildBlocks");
- const std::size_t NewBlockCount = NewBlockChunks.size();
- if (NewBlockCount > 0)
- {
- std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(m_LogOutput.CreateProgressBar("Generate Blocks"));
- OperationLogOutput::ProgressBar& Progress(*ProgressBarPtr);
-
- OutBlocks.BlockDescriptions.resize(NewBlockCount);
- OutBlocks.BlockSizes.resize(NewBlockCount);
- OutBlocks.BlockMetaDatas.resize(NewBlockCount);
- OutBlocks.BlockHeaders.resize(NewBlockCount);
- OutBlocks.MetaDataHasBeenUploaded.resize(NewBlockCount, 0);
- OutBlocks.BlockHashToBlockIndex.reserve(NewBlockCount);
-
- RwLock Lock;
-
- WorkerThreadPool& GenerateBlobsPool = m_IOWorkerPool;
- WorkerThreadPool& UploadBlocksPool = m_NetworkPool;
-
- FilteredRate FilteredGeneratedBytesPerSecond;
- FilteredRate FilteredUploadedBytesPerSecond;
-
- ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
-
- std::atomic<uint64_t> QueuedPendingBlocksForUpload = 0;
-
- for (size_t BlockIndex = 0; BlockIndex < NewBlockCount; BlockIndex++)
- {
- if (Work.IsAborted())
- {
- break;
- }
- const std::vector<uint32_t>& ChunksInBlock = NewBlockChunks[BlockIndex];
- Work.ScheduleWork(
- GenerateBlobsPool,
- [this,
- &Content,
- &Lookup,
- &Work,
- &UploadBlocksPool,
- NewBlockCount,
- ChunksInBlock,
- &Lock,
- &OutBlocks,
- &GenerateBlocksStats,
- &UploadStats,
- &FilteredGeneratedBytesPerSecond,
- &QueuedPendingBlocksForUpload,
- &FilteredUploadedBytesPerSecond,
- BlockIndex](std::atomic<bool>&) {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("GenerateBuildBlocks_Generate");
-
- FilteredGeneratedBytesPerSecond.Start();
-
- Stopwatch GenerateTimer;
- CompressedBuffer CompressedBlock =
- GenerateBlock(Content, Lookup, ChunksInBlock, OutBlocks.BlockDescriptions[BlockIndex]);
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Generated block {} ({}) containing {} chunks in {}",
- OutBlocks.BlockDescriptions[BlockIndex].BlockHash,
- NiceBytes(CompressedBlock.GetCompressedSize()),
- OutBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size(),
- NiceTimeSpanMs(GenerateTimer.GetElapsedTimeMs()));
- }
-
- OutBlocks.BlockSizes[BlockIndex] = CompressedBlock.GetCompressedSize();
- {
- CbObjectWriter Writer;
- Writer.AddString("createdBy", "zen");
- OutBlocks.BlockMetaDatas[BlockIndex] = Writer.Save();
- }
- GenerateBlocksStats.GeneratedBlockByteCount += OutBlocks.BlockSizes[BlockIndex];
- GenerateBlocksStats.GeneratedBlockCount++;
-
- Lock.WithExclusiveLock([&]() {
- OutBlocks.BlockHashToBlockIndex.insert_or_assign(OutBlocks.BlockDescriptions[BlockIndex].BlockHash, BlockIndex);
- });
-
- {
- std::span<const SharedBuffer> Segments = CompressedBlock.GetCompressed().GetSegments();
- ZEN_ASSERT(Segments.size() >= 2);
- OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]);
- }
-
- if (GenerateBlocksStats.GeneratedBlockCount == NewBlockCount)
- {
- FilteredGeneratedBytesPerSecond.Stop();
- }
-
- if (QueuedPendingBlocksForUpload.load() > 16)
- {
- std::span<const SharedBuffer> Segments = CompressedBlock.GetCompressed().GetSegments();
- ZEN_ASSERT(Segments.size() >= 2);
- OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]);
- }
- else
- {
- if (!m_AbortFlag)
- {
- QueuedPendingBlocksForUpload++;
-
- Work.ScheduleWork(
- UploadBlocksPool,
- [this,
- NewBlockCount,
- &GenerateBlocksStats,
- &UploadStats,
- &FilteredUploadedBytesPerSecond,
- &QueuedPendingBlocksForUpload,
- &OutBlocks,
- BlockIndex,
- Payload = std::move(CompressedBlock)](std::atomic<bool>&) mutable {
- auto _ = MakeGuard([&QueuedPendingBlocksForUpload] { QueuedPendingBlocksForUpload--; });
- if (!m_AbortFlag)
- {
- if (GenerateBlocksStats.GeneratedBlockCount == NewBlockCount)
- {
- ZEN_TRACE_CPU("GenerateBuildBlocks_Save");
-
- FilteredUploadedBytesPerSecond.Stop();
- std::span<const SharedBuffer> Segments = Payload.GetCompressed().GetSegments();
- ZEN_ASSERT(Segments.size() >= 2);
- OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]);
- }
- else
- {
- ZEN_TRACE_CPU("GenerateBuildBlocks_Upload");
-
- FilteredUploadedBytesPerSecond.Start();
-
- const CbObject BlockMetaData =
- BuildChunkBlockDescription(OutBlocks.BlockDescriptions[BlockIndex],
- OutBlocks.BlockMetaDatas[BlockIndex]);
-
- const IoHash& BlockHash = OutBlocks.BlockDescriptions[BlockIndex].BlockHash;
- const uint64_t CompressedBlockSize = Payload.GetCompressedSize();
-
- if (m_Storage.BuildCacheStorage && m_Options.PopulateCache)
- {
- m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId,
- BlockHash,
- ZenContentType::kCompressedBinary,
- Payload.GetCompressed());
- }
-
- m_Storage.BuildStorage->PutBuildBlob(m_BuildId,
- BlockHash,
- ZenContentType::kCompressedBinary,
- std::move(Payload).GetCompressed());
- UploadStats.BlocksBytes += CompressedBlockSize;
-
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Uploaded block {} ({}) containing {} chunks",
- BlockHash,
- NiceBytes(CompressedBlockSize),
- OutBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size());
- }
-
- if (m_Storage.BuildCacheStorage && m_Options.PopulateCache)
- {
- m_Storage.BuildCacheStorage->PutBlobMetadatas(m_BuildId,
- std::vector<IoHash>({BlockHash}),
- std::vector<CbObject>({BlockMetaData}));
- }
-
- bool MetadataSucceeded =
- m_Storage.BuildStorage->PutBlockMetadata(m_BuildId, BlockHash, BlockMetaData);
- if (MetadataSucceeded)
- {
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Uploaded block {} metadata ({})",
- BlockHash,
- NiceBytes(BlockMetaData.GetSize()));
- }
-
- OutBlocks.MetaDataHasBeenUploaded[BlockIndex] = true;
- UploadStats.BlocksBytes += BlockMetaData.GetSize();
- }
-
- UploadStats.BlockCount++;
- if (UploadStats.BlockCount == NewBlockCount)
- {
- FilteredUploadedBytesPerSecond.Stop();
- }
- }
- }
- });
- }
- }
- }
- });
- }
-
- Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
- ZEN_UNUSED(PendingWork);
-
- FilteredGeneratedBytesPerSecond.Update(GenerateBlocksStats.GeneratedBlockByteCount.load());
- FilteredUploadedBytesPerSecond.Update(UploadStats.BlocksBytes.load());
-
- std::string Details = fmt::format("Generated {}/{} ({}, {}B/s). Uploaded {}/{} ({}, {}bits/s)",
- GenerateBlocksStats.GeneratedBlockCount.load(),
- NewBlockCount,
- NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount.load()),
- NiceNum(FilteredGeneratedBytesPerSecond.GetCurrent()),
- UploadStats.BlockCount.load(),
- NewBlockCount,
- NiceBytes(UploadStats.BlocksBytes.load()),
- NiceNum(FilteredUploadedBytesPerSecond.GetCurrent() * 8));
-
- Progress.UpdateState({.Task = "Generating blocks",
- .Details = Details,
- .TotalCount = gsl::narrow<uint64_t>(NewBlockCount),
- .RemainingCount = gsl::narrow<uint64_t>(NewBlockCount - GenerateBlocksStats.GeneratedBlockCount.load()),
- .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
- false);
- });
-
- ZEN_ASSERT(m_AbortFlag || QueuedPendingBlocksForUpload.load() == 0);
-
- Progress.Finish();
-
- GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS = FilteredGeneratedBytesPerSecond.GetElapsedTimeUS();
- UploadStats.ElapsedWallTimeUS = FilteredUploadedBytesPerSecond.GetElapsedTimeUS();
- }
-}
-
-std::vector<uint32_t>
-BuildsOperationUploadFolder::CalculateAbsoluteChunkOrders(
- const std::span<const IoHash> LocalChunkHashes,
- const std::span<const uint32_t> LocalChunkOrder,
- const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToLocalChunkIndex,
- const std::span<const uint32_t>& LooseChunkIndexes,
- const std::span<const ChunkBlockDescription>& BlockDescriptions)
-{
- ZEN_TRACE_CPU("CalculateAbsoluteChunkOrders");
-
- std::vector<IoHash> TmpAbsoluteChunkHashes;
- if (m_Options.DoExtraContentValidation)
- {
- TmpAbsoluteChunkHashes.reserve(LocalChunkHashes.size());
- }
- std::vector<uint32_t> LocalChunkIndexToAbsoluteChunkIndex;
- LocalChunkIndexToAbsoluteChunkIndex.resize(LocalChunkHashes.size(), (uint32_t)-1);
- std::uint32_t AbsoluteChunkCount = 0;
- for (uint32_t ChunkIndex : LooseChunkIndexes)
- {
- LocalChunkIndexToAbsoluteChunkIndex[ChunkIndex] = AbsoluteChunkCount;
- if (m_Options.DoExtraContentValidation)
- {
- TmpAbsoluteChunkHashes.push_back(LocalChunkHashes[ChunkIndex]);
- }
- AbsoluteChunkCount++;
- }
- for (const ChunkBlockDescription& Block : BlockDescriptions)
- {
- for (const IoHash& ChunkHash : Block.ChunkRawHashes)
- {
- if (auto It = ChunkHashToLocalChunkIndex.find(ChunkHash); It != ChunkHashToLocalChunkIndex.end())
- {
- const uint32_t LocalChunkIndex = It->second;
- ZEN_ASSERT_SLOW(LocalChunkHashes[LocalChunkIndex] == ChunkHash);
- LocalChunkIndexToAbsoluteChunkIndex[LocalChunkIndex] = AbsoluteChunkCount;
- }
- if (m_Options.DoExtraContentValidation)
- {
- TmpAbsoluteChunkHashes.push_back(ChunkHash);
- }
- AbsoluteChunkCount++;
- }
- }
- std::vector<uint32_t> AbsoluteChunkOrder;
- AbsoluteChunkOrder.reserve(LocalChunkHashes.size());
- for (const uint32_t LocalChunkIndex : LocalChunkOrder)
- {
- const uint32_t AbsoluteChunkIndex = LocalChunkIndexToAbsoluteChunkIndex[LocalChunkIndex];
- if (m_Options.DoExtraContentValidation)
- {
- ZEN_ASSERT(LocalChunkHashes[LocalChunkIndex] == TmpAbsoluteChunkHashes[AbsoluteChunkIndex]);
- }
- AbsoluteChunkOrder.push_back(AbsoluteChunkIndex);
- }
- if (m_Options.DoExtraContentValidation)
- {
- uint32_t OrderIndex = 0;
- while (OrderIndex < LocalChunkOrder.size())
- {
- const uint32_t LocalChunkIndex = LocalChunkOrder[OrderIndex];
- const IoHash& LocalChunkHash = LocalChunkHashes[LocalChunkIndex];
- const uint32_t AbsoluteChunkIndex = AbsoluteChunkOrder[OrderIndex];
- const IoHash& AbsoluteChunkHash = TmpAbsoluteChunkHashes[AbsoluteChunkIndex];
- ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash);
- OrderIndex++;
- }
- }
- return AbsoluteChunkOrder;
-}
-
-CompositeBuffer
-BuildsOperationUploadFolder::FetchChunk(const ChunkedFolderContent& Content,
- const ChunkedContentLookup& Lookup,
- const IoHash& ChunkHash,
- ReadFileCache& OpenFileCache)
-{
- ZEN_TRACE_CPU("FetchChunk");
- auto It = Lookup.ChunkHashToChunkIndex.find(ChunkHash);
- ZEN_ASSERT(It != Lookup.ChunkHashToChunkIndex.end());
- uint32_t ChunkIndex = It->second;
- std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkLocations = GetChunkSequenceLocations(Lookup, ChunkIndex);
- ZEN_ASSERT(!ChunkLocations.empty());
- CompositeBuffer Chunk =
- OpenFileCache.GetRange(ChunkLocations[0].SequenceIndex, ChunkLocations[0].Offset, Content.ChunkedContent.ChunkRawSizes[ChunkIndex]);
- if (!Chunk)
- {
- throw std::runtime_error(fmt::format("Unable to read chunk at {}, size {} from '{}'",
- ChunkLocations[0].Offset,
- Content.ChunkedContent.ChunkRawSizes[ChunkIndex],
- Content.Paths[Lookup.SequenceIndexFirstPathIndex[ChunkLocations[0].SequenceIndex]]));
- }
- ZEN_ASSERT_SLOW(IoHash::HashBuffer(Chunk) == ChunkHash);
- return Chunk;
-};
-
-CompressedBuffer
-BuildsOperationUploadFolder::GenerateBlock(const ChunkedFolderContent& Content,
- const ChunkedContentLookup& Lookup,
- const std::vector<uint32_t>& ChunksInBlock,
- ChunkBlockDescription& OutBlockDescription)
-{
- ZEN_TRACE_CPU("GenerateBlock");
- ReadFileCache OpenFileCache(m_DiskStats.OpenReadCount,
- m_DiskStats.CurrentOpenFileCount,
- m_DiskStats.ReadCount,
- m_DiskStats.ReadByteCount,
- m_Path,
- Content,
- Lookup,
- 4);
-
- std::vector<std::pair<IoHash, FetchChunkFunc>> BlockContent;
- BlockContent.reserve(ChunksInBlock.size());
- for (uint32_t ChunkIndex : ChunksInBlock)
- {
- BlockContent.emplace_back(std::make_pair(
- Content.ChunkedContent.ChunkHashes[ChunkIndex],
- [this, &Content, &Lookup, &OpenFileCache, ChunkIndex](const IoHash& ChunkHash) -> std::pair<uint64_t, CompressedBuffer> {
- CompositeBuffer Chunk = FetchChunk(Content, Lookup, ChunkHash, OpenFileCache);
- ZEN_ASSERT(Chunk);
- uint64_t RawSize = Chunk.GetSize();
-
- const bool ShouldCompressChunk = RawSize >= m_Options.MinimumSizeForCompressInBlock &&
- IsChunkCompressable(m_NonCompressableExtensionHashes, Content, Lookup, ChunkIndex);
-
- const OodleCompressionLevel CompressionLevel =
- ShouldCompressChunk ? OodleCompressionLevel::VeryFast : OodleCompressionLevel::None;
- return {RawSize, CompressedBuffer::Compress(Chunk, OodleCompressor::Mermaid, CompressionLevel)};
- }));
- }
-
- return GenerateChunkBlock(std::move(BlockContent), OutBlockDescription);
-};
-
-CompressedBuffer
-BuildsOperationUploadFolder::RebuildBlock(const ChunkedFolderContent& Content,
- const ChunkedContentLookup& Lookup,
- CompositeBuffer&& HeaderBuffer,
- const std::vector<uint32_t>& ChunksInBlock)
-{
- ZEN_TRACE_CPU("RebuildBlock");
- ReadFileCache OpenFileCache(m_DiskStats.OpenReadCount,
- m_DiskStats.CurrentOpenFileCount,
- m_DiskStats.ReadCount,
- m_DiskStats.ReadByteCount,
- m_Path,
- Content,
- Lookup,
- 4);
-
- std::vector<SharedBuffer> ResultBuffers;
- ResultBuffers.reserve(HeaderBuffer.GetSegments().size() + ChunksInBlock.size());
- ResultBuffers.insert(ResultBuffers.end(), HeaderBuffer.GetSegments().begin(), HeaderBuffer.GetSegments().end());
- for (uint32_t ChunkIndex : ChunksInBlock)
- {
- std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkLocations = GetChunkSequenceLocations(Lookup, ChunkIndex);
- ZEN_ASSERT(!ChunkLocations.empty());
- CompositeBuffer Chunk = OpenFileCache.GetRange(ChunkLocations[0].SequenceIndex,
- ChunkLocations[0].Offset,
- Content.ChunkedContent.ChunkRawSizes[ChunkIndex]);
- ZEN_ASSERT_SLOW(IoHash::HashBuffer(Chunk) == Content.ChunkedContent.ChunkHashes[ChunkIndex]);
-
- const uint64_t RawSize = Chunk.GetSize();
- const bool ShouldCompressChunk = RawSize >= m_Options.MinimumSizeForCompressInBlock &&
- IsChunkCompressable(m_NonCompressableExtensionHashes, Content, Lookup, ChunkIndex);
-
- const OodleCompressionLevel CompressionLevel = ShouldCompressChunk ? OodleCompressionLevel::VeryFast : OodleCompressionLevel::None;
-
- CompositeBuffer CompressedChunk =
- CompressedBuffer::Compress(std::move(Chunk), OodleCompressor::Mermaid, CompressionLevel).GetCompressed();
- ResultBuffers.insert(ResultBuffers.end(), CompressedChunk.GetSegments().begin(), CompressedChunk.GetSegments().end());
- }
- return CompressedBuffer::FromCompressedNoValidate(CompositeBuffer(std::move(ResultBuffers)));
-};
-
-void
-BuildsOperationUploadFolder::UploadBuildPart(ChunkingController& ChunkController,
- ChunkingCache& ChunkCache,
- uint32_t PartIndex,
- const UploadPart& Part,
- uint32_t PartStepOffset,
- uint32_t StepCount)
-{
- Stopwatch UploadTimer;
-
- ChunkingStatistics ChunkingStats;
- FindBlocksStatistics FindBlocksStats;
- ReuseBlocksStatistics ReuseBlocksStats;
- UploadStatistics UploadStats;
- GenerateBlocksStatistics GenerateBlocksStats;
-
- LooseChunksStatistics LooseChunksStats;
- ChunkedFolderContent LocalContent;
-
- m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::ChunkPartContent, StepCount);
-
- Stopwatch ScanTimer;
- {
- std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(m_LogOutput.CreateProgressBar("Scan Folder"));
- OperationLogOutput::ProgressBar& Progress(*ProgressBarPtr);
-
- FilteredRate FilteredBytesHashed;
- FilteredBytesHashed.Start();
- LocalContent = ChunkFolderContent(
- ChunkingStats,
- m_IOWorkerPool,
- m_Path,
- Part.Content,
- ChunkController,
- ChunkCache,
- m_LogOutput.GetProgressUpdateDelayMS(),
- [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) {
- FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load());
- std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found",
- ChunkingStats.FilesProcessed.load(),
- Part.Content.Paths.size(),
- NiceBytes(ChunkingStats.BytesHashed.load()),
- NiceBytes(Part.TotalRawSize),
- NiceNum(FilteredBytesHashed.GetCurrent()),
- ChunkingStats.UniqueChunksFound.load(),
- NiceBytes(ChunkingStats.UniqueBytesFound.load()));
- Progress.UpdateState({.Task = "Scanning files ",
- .Details = Details,
- .TotalCount = Part.TotalRawSize,
- .RemainingCount = Part.TotalRawSize - ChunkingStats.BytesHashed.load(),
- .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
- false);
- },
- m_AbortFlag,
- m_PauseFlag);
- FilteredBytesHashed.Stop();
- Progress.Finish();
- if (m_AbortFlag)
- {
- return;
- }
- }
-
- if (!m_Options.IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Found {} ({}) files divided into {} ({}) unique chunks in '{}' in {}. Average hash rate {}B/sec",
- Part.Content.Paths.size(),
- NiceBytes(Part.TotalRawSize),
- ChunkingStats.UniqueChunksFound.load(),
- NiceBytes(ChunkingStats.UniqueBytesFound.load()),
- m_Path,
- NiceTimeSpanMs(ScanTimer.GetElapsedTimeMs()),
- NiceNum(GetBytesPerSecond(ChunkingStats.ElapsedWallTimeUS, ChunkingStats.BytesHashed)));
- }
-
- const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalContent);
-
- std::vector<size_t> ReuseBlockIndexes;
- std::vector<uint32_t> NewBlockChunkIndexes;
-
- if (PartIndex == 0)
- {
- const PrepareBuildResult PrepBuildResult = m_PrepBuildResultFuture.get();
-
- m_FindBlocksStats.FindBlockTimeMS = PrepBuildResult.ElapsedTimeMs;
- m_FindBlocksStats.FoundBlockCount = PrepBuildResult.KnownBlocks.size();
-
- if (!m_Options.IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Build prepare took {}. {} took {}, payload size {}{}",
- NiceTimeSpanMs(PrepBuildResult.ElapsedTimeMs),
- m_CreateBuild ? "PutBuild" : "GetBuild",
- NiceTimeSpanMs(PrepBuildResult.PrepareBuildTimeMs),
- NiceBytes(PrepBuildResult.PayloadSize),
- m_Options.IgnoreExistingBlocks ? ""
- : fmt::format(". Found {} blocks in {}",
- PrepBuildResult.KnownBlocks.size(),
- NiceTimeSpanMs(PrepBuildResult.FindBlocksTimeMs)));
- }
-
- m_PreferredMultipartChunkSize = PrepBuildResult.PreferredMultipartChunkSize;
-
- m_LargeAttachmentSize = m_Options.AllowMultiparts ? m_PreferredMultipartChunkSize * 4u : (std::uint64_t)-1;
-
- m_KnownBlocks = std::move(PrepBuildResult.KnownBlocks);
- }
-
- ZEN_ASSERT(m_PreferredMultipartChunkSize != 0);
- ZEN_ASSERT(m_LargeAttachmentSize != 0);
-
- m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::CalculateDelta, StepCount);
-
- Stopwatch BlockArrangeTimer;
-
- std::vector<std::uint32_t> LooseChunkIndexes;
- {
- bool EnableBlocks = true;
- std::vector<std::uint32_t> BlockChunkIndexes;
- for (uint32_t ChunkIndex = 0; ChunkIndex < LocalContent.ChunkedContent.ChunkHashes.size(); ChunkIndex++)
- {
- const uint64_t ChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[ChunkIndex];
- if (!EnableBlocks || ChunkRawSize == 0 || ChunkRawSize > m_Options.BlockParameters.MaxChunkEmbedSize)
- {
- LooseChunkIndexes.push_back(ChunkIndex);
- LooseChunksStats.ChunkByteCount += ChunkRawSize;
- }
- else
- {
- BlockChunkIndexes.push_back(ChunkIndex);
- FindBlocksStats.PotentialChunkByteCount += ChunkRawSize;
- }
- }
- FindBlocksStats.PotentialChunkCount += BlockChunkIndexes.size();
- LooseChunksStats.ChunkCount = LooseChunkIndexes.size();
-
- if (m_Options.IgnoreExistingBlocks)
- {
- if (!m_Options.IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput, "Ignoring any existing blocks in store");
- }
- NewBlockChunkIndexes = std::move(BlockChunkIndexes);
- }
- else
- {
- ReuseBlockIndexes = FindReuseBlocks(m_LogOutput,
- m_Options.BlockReuseMinPercentLimit,
- m_Options.IsVerbose,
- ReuseBlocksStats,
- m_KnownBlocks,
- LocalContent.ChunkedContent.ChunkHashes,
- BlockChunkIndexes,
- NewBlockChunkIndexes);
- FindBlocksStats.AcceptedBlockCount += ReuseBlockIndexes.size();
-
- for (const ChunkBlockDescription& Description : m_KnownBlocks)
- {
- for (uint32_t ChunkRawLength : Description.ChunkRawLengths)
- {
- FindBlocksStats.FoundBlockByteCount += ChunkRawLength;
- }
- FindBlocksStats.FoundBlockChunkCount += Description.ChunkRawHashes.size();
- }
- }
- }
-
- std::vector<std::vector<uint32_t>> NewBlockChunks;
- ArrangeChunksIntoBlocks(LocalContent, LocalLookup, NewBlockChunkIndexes, NewBlockChunks);
-
- FindBlocksStats.NewBlocksCount += NewBlockChunks.size();
- for (uint32_t ChunkIndex : NewBlockChunkIndexes)
- {
- FindBlocksStats.NewBlocksChunkByteCount += LocalContent.ChunkedContent.ChunkRawSizes[ChunkIndex];
- }
- FindBlocksStats.NewBlocksChunkCount += NewBlockChunkIndexes.size();
-
- const double AcceptedByteCountPercent = FindBlocksStats.PotentialChunkByteCount > 0
- ? (100.0 * ReuseBlocksStats.AcceptedRawByteCount / FindBlocksStats.PotentialChunkByteCount)
- : 0.0;
-
- const double AcceptedReduntantByteCountPercent =
- ReuseBlocksStats.AcceptedByteCount > 0 ? (100.0 * ReuseBlocksStats.AcceptedReduntantByteCount) /
- (ReuseBlocksStats.AcceptedByteCount + ReuseBlocksStats.AcceptedReduntantByteCount)
- : 0.0;
- if (!m_Options.IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Found {} chunks in {} ({}) blocks eligible for reuse in {}\n"
- " Reusing {} ({}) matching chunks in {} blocks ({:.1f}%)\n"
- " Accepting {} ({}) redundant chunks ({:.1f}%)\n"
- " Rejected {} ({}) chunks in {} blocks\n"
- " Arranged {} ({}) chunks in {} new blocks\n"
- " Keeping {} ({}) chunks as loose chunks\n"
- " Discovery completed in {}",
- FindBlocksStats.FoundBlockChunkCount,
- FindBlocksStats.FoundBlockCount,
- NiceBytes(FindBlocksStats.FoundBlockByteCount),
- NiceTimeSpanMs(FindBlocksStats.FindBlockTimeMS),
-
- ReuseBlocksStats.AcceptedChunkCount,
- NiceBytes(ReuseBlocksStats.AcceptedRawByteCount),
- FindBlocksStats.AcceptedBlockCount,
- AcceptedByteCountPercent,
-
- ReuseBlocksStats.AcceptedReduntantChunkCount,
- NiceBytes(ReuseBlocksStats.AcceptedReduntantByteCount),
- AcceptedReduntantByteCountPercent,
-
- ReuseBlocksStats.RejectedChunkCount,
- NiceBytes(ReuseBlocksStats.RejectedByteCount),
- ReuseBlocksStats.RejectedBlockCount,
-
- FindBlocksStats.NewBlocksChunkCount,
- NiceBytes(FindBlocksStats.NewBlocksChunkByteCount),
- FindBlocksStats.NewBlocksCount,
-
- LooseChunksStats.ChunkCount,
- NiceBytes(LooseChunksStats.ChunkByteCount),
-
- NiceTimeSpanMs(BlockArrangeTimer.GetElapsedTimeMs()));
- }
-
- m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::GenerateBlocks, StepCount);
- GeneratedBlocks NewBlocks;
-
- if (!NewBlockChunks.empty())
- {
- Stopwatch GenerateBuildBlocksTimer;
- auto __ = MakeGuard([&]() {
- uint64_t BlockGenerateTimeUs = GenerateBuildBlocksTimer.GetElapsedTimeUs();
- if (!m_Options.IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(
- m_LogOutput,
- "Generated {} ({}) and uploaded {} ({}) blocks in {}. Generate speed: {}B/sec. Transfer speed {}bits/sec.",
- GenerateBlocksStats.GeneratedBlockCount.load(),
- NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount),
- UploadStats.BlockCount.load(),
- NiceBytes(UploadStats.BlocksBytes.load()),
- NiceTimeSpanMs(BlockGenerateTimeUs / 1000),
- NiceNum(GetBytesPerSecond(GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS,
- GenerateBlocksStats.GeneratedBlockByteCount)),
- NiceNum(GetBytesPerSecond(UploadStats.ElapsedWallTimeUS, UploadStats.BlocksBytes * 8)));
- }
- });
- GenerateBuildBlocks(LocalContent, LocalLookup, NewBlockChunks, NewBlocks, GenerateBlocksStats, UploadStats);
- }
-
- m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::BuildPartManifest, StepCount);
-
- CbObject PartManifest;
- {
- CbObjectWriter PartManifestWriter;
- Stopwatch ManifestGenerationTimer;
- auto __ = MakeGuard([&]() {
- if (!m_Options.IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Generated build part manifest in {} ({})",
- NiceTimeSpanMs(ManifestGenerationTimer.GetElapsedTimeMs()),
- NiceBytes(PartManifestWriter.GetSaveSize()));
- }
- });
-
- PartManifestWriter.BeginObject("chunker"sv);
- {
- PartManifestWriter.AddString("name"sv, ChunkController.GetName());
- PartManifestWriter.AddObject("parameters"sv, ChunkController.GetParameters());
- }
- PartManifestWriter.EndObject(); // chunker
-
- std::vector<IoHash> AllChunkBlockHashes;
- std::vector<ChunkBlockDescription> AllChunkBlockDescriptions;
- AllChunkBlockHashes.reserve(ReuseBlockIndexes.size() + NewBlocks.BlockDescriptions.size());
- AllChunkBlockDescriptions.reserve(ReuseBlockIndexes.size() + NewBlocks.BlockDescriptions.size());
- for (size_t ReuseBlockIndex : ReuseBlockIndexes)
- {
- AllChunkBlockDescriptions.push_back(m_KnownBlocks[ReuseBlockIndex]);
- AllChunkBlockHashes.push_back(m_KnownBlocks[ReuseBlockIndex].BlockHash);
- }
- AllChunkBlockDescriptions.insert(AllChunkBlockDescriptions.end(),
- NewBlocks.BlockDescriptions.begin(),
- NewBlocks.BlockDescriptions.end());
- for (const ChunkBlockDescription& BlockDescription : NewBlocks.BlockDescriptions)
- {
- AllChunkBlockHashes.push_back(BlockDescription.BlockHash);
- }
-
- std::vector<IoHash> AbsoluteChunkHashes;
- if (m_Options.DoExtraContentValidation)
- {
- tsl::robin_map<IoHash, size_t, IoHash::Hasher> ChunkHashToAbsoluteChunkIndex;
- AbsoluteChunkHashes.reserve(LocalContent.ChunkedContent.ChunkHashes.size());
- for (uint32_t ChunkIndex : LooseChunkIndexes)
- {
- ChunkHashToAbsoluteChunkIndex.insert({LocalContent.ChunkedContent.ChunkHashes[ChunkIndex], AbsoluteChunkHashes.size()});
- AbsoluteChunkHashes.push_back(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]);
- }
- for (const ChunkBlockDescription& Block : AllChunkBlockDescriptions)
- {
- for (const IoHash& ChunkHash : Block.ChunkRawHashes)
- {
- ChunkHashToAbsoluteChunkIndex.insert({ChunkHash, AbsoluteChunkHashes.size()});
- AbsoluteChunkHashes.push_back(ChunkHash);
- }
- }
- for (const IoHash& ChunkHash : LocalContent.ChunkedContent.ChunkHashes)
- {
- ZEN_ASSERT(AbsoluteChunkHashes[ChunkHashToAbsoluteChunkIndex.at(ChunkHash)] == ChunkHash);
- ZEN_ASSERT(LocalContent.ChunkedContent.ChunkHashes[LocalLookup.ChunkHashToChunkIndex.at(ChunkHash)] == ChunkHash);
- }
- for (const uint32_t ChunkIndex : LocalContent.ChunkedContent.ChunkOrders)
- {
- ZEN_ASSERT(AbsoluteChunkHashes[ChunkHashToAbsoluteChunkIndex.at(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex])] ==
- LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]);
- ZEN_ASSERT(LocalLookup.ChunkHashToChunkIndex.at(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]) == ChunkIndex);
- }
- }
- std::vector<uint32_t> AbsoluteChunkOrders = CalculateAbsoluteChunkOrders(LocalContent.ChunkedContent.ChunkHashes,
- LocalContent.ChunkedContent.ChunkOrders,
- LocalLookup.ChunkHashToChunkIndex,
- LooseChunkIndexes,
- AllChunkBlockDescriptions);
-
- if (m_Options.DoExtraContentValidation)
- {
- for (uint32_t ChunkOrderIndex = 0; ChunkOrderIndex < LocalContent.ChunkedContent.ChunkOrders.size(); ChunkOrderIndex++)
- {
- uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[ChunkOrderIndex];
- uint32_t AbsoluteChunkIndex = AbsoluteChunkOrders[ChunkOrderIndex];
- const IoHash& LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex];
- const IoHash& AbsoluteChunkHash = AbsoluteChunkHashes[AbsoluteChunkIndex];
- ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash);
- }
- }
-
- WriteBuildContentToCompactBinary(PartManifestWriter,
- LocalContent.Platform,
- LocalContent.Paths,
- LocalContent.RawHashes,
- LocalContent.RawSizes,
- LocalContent.Attributes,
- LocalContent.ChunkedContent.SequenceRawHashes,
- LocalContent.ChunkedContent.ChunkCounts,
- LocalContent.ChunkedContent.ChunkHashes,
- LocalContent.ChunkedContent.ChunkRawSizes,
- AbsoluteChunkOrders,
- LooseChunkIndexes,
- AllChunkBlockHashes);
-
- if (m_Options.DoExtraContentValidation)
- {
- ChunkedFolderContent VerifyFolderContent;
-
- std::vector<uint32_t> OutAbsoluteChunkOrders;
- std::vector<IoHash> OutLooseChunkHashes;
- std::vector<uint64_t> OutLooseChunkRawSizes;
- std::vector<IoHash> OutBlockRawHashes;
- ReadBuildContentFromCompactBinary(PartManifestWriter.Save(),
- VerifyFolderContent.Platform,
- VerifyFolderContent.Paths,
- VerifyFolderContent.RawHashes,
- VerifyFolderContent.RawSizes,
- VerifyFolderContent.Attributes,
- VerifyFolderContent.ChunkedContent.SequenceRawHashes,
- VerifyFolderContent.ChunkedContent.ChunkCounts,
- OutAbsoluteChunkOrders,
- OutLooseChunkHashes,
- OutLooseChunkRawSizes,
- OutBlockRawHashes);
- ZEN_ASSERT(OutBlockRawHashes == AllChunkBlockHashes);
-
- for (uint32_t OrderIndex = 0; OrderIndex < OutAbsoluteChunkOrders.size(); OrderIndex++)
- {
- uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[OrderIndex];
- const IoHash LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex];
-
- uint32_t VerifyChunkIndex = OutAbsoluteChunkOrders[OrderIndex];
- const IoHash VerifyChunkHash = AbsoluteChunkHashes[VerifyChunkIndex];
-
- ZEN_ASSERT(LocalChunkHash == VerifyChunkHash);
- }
-
- CalculateLocalChunkOrders(OutAbsoluteChunkOrders,
- OutLooseChunkHashes,
- OutLooseChunkRawSizes,
- AllChunkBlockDescriptions,
- VerifyFolderContent.ChunkedContent.ChunkHashes,
- VerifyFolderContent.ChunkedContent.ChunkRawSizes,
- VerifyFolderContent.ChunkedContent.ChunkOrders,
- m_Options.DoExtraContentValidation);
-
- ZEN_ASSERT(LocalContent.Paths == VerifyFolderContent.Paths);
- ZEN_ASSERT(LocalContent.RawHashes == VerifyFolderContent.RawHashes);
- ZEN_ASSERT(LocalContent.RawSizes == VerifyFolderContent.RawSizes);
- ZEN_ASSERT(LocalContent.Attributes == VerifyFolderContent.Attributes);
- ZEN_ASSERT(LocalContent.ChunkedContent.SequenceRawHashes == VerifyFolderContent.ChunkedContent.SequenceRawHashes);
- ZEN_ASSERT(LocalContent.ChunkedContent.ChunkCounts == VerifyFolderContent.ChunkedContent.ChunkCounts);
-
- for (uint32_t OrderIndex = 0; OrderIndex < LocalContent.ChunkedContent.ChunkOrders.size(); OrderIndex++)
- {
- uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[OrderIndex];
- const IoHash LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex];
- uint64_t LocalChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[LocalChunkIndex];
-
- uint32_t VerifyChunkIndex = VerifyFolderContent.ChunkedContent.ChunkOrders[OrderIndex];
- const IoHash VerifyChunkHash = VerifyFolderContent.ChunkedContent.ChunkHashes[VerifyChunkIndex];
- uint64_t VerifyChunkRawSize = VerifyFolderContent.ChunkedContent.ChunkRawSizes[VerifyChunkIndex];
-
- ZEN_ASSERT(LocalChunkHash == VerifyChunkHash);
- ZEN_ASSERT(LocalChunkRawSize == VerifyChunkRawSize);
- }
- }
- PartManifest = PartManifestWriter.Save();
- }
-
- m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::UploadBuildPart, StepCount);
-
- Stopwatch PutBuildPartResultTimer;
- std::pair<IoHash, std::vector<IoHash>> PutBuildPartResult =
- m_Storage.BuildStorage->PutBuildPart(m_BuildId, Part.PartId, Part.PartName, PartManifest);
- if (!m_Options.IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "PutBuildPart took {}, payload size {}. {} attachments are needed.",
- NiceTimeSpanMs(PutBuildPartResultTimer.GetElapsedTimeMs()),
- NiceBytes(PartManifest.GetSize()),
- PutBuildPartResult.second.size());
- }
- IoHash PartHash = PutBuildPartResult.first;
-
- auto UploadAttachments =
- [this, &LooseChunksStats, &UploadStats, &LocalContent, &LocalLookup, &NewBlockChunks, &NewBlocks, &LooseChunkIndexes](
- std::span<IoHash> RawHashes,
- std::vector<IoHash>& OutUnknownChunks) {
- if (!m_AbortFlag)
- {
- UploadStatistics TempUploadStats;
- LooseChunksStatistics TempLooseChunksStats;
-
- Stopwatch TempUploadTimer;
- auto __ = MakeGuard([&]() {
- if (!m_Options.IsQuiet)
- {
- uint64_t TempChunkUploadTimeUs = TempUploadTimer.GetElapsedTimeUs();
- ZEN_OPERATION_LOG_INFO(
- m_LogOutput,
- "Uploaded {} ({}) blocks. "
- "Compressed {} ({} {}B/s) and uploaded {} ({}) chunks. "
- "Transferred {} ({}bits/s) in {}",
- TempUploadStats.BlockCount.load(),
- NiceBytes(TempUploadStats.BlocksBytes),
-
- TempLooseChunksStats.CompressedChunkCount.load(),
- NiceBytes(TempLooseChunksStats.CompressedChunkBytes.load()),
- NiceNum(GetBytesPerSecond(TempLooseChunksStats.CompressChunksElapsedWallTimeUS,
- TempLooseChunksStats.ChunkByteCount)),
- TempUploadStats.ChunkCount.load(),
- NiceBytes(TempUploadStats.ChunksBytes),
-
- NiceBytes(TempUploadStats.BlocksBytes + TempUploadStats.ChunksBytes),
- NiceNum(GetBytesPerSecond(TempUploadStats.ElapsedWallTimeUS, TempUploadStats.ChunksBytes * 8)),
- NiceTimeSpanMs(TempChunkUploadTimeUs / 1000));
- }
- });
- UploadPartBlobs(LocalContent,
- LocalLookup,
- RawHashes,
- NewBlockChunks,
- NewBlocks,
- LooseChunkIndexes,
- m_LargeAttachmentSize,
- TempUploadStats,
- TempLooseChunksStats,
- OutUnknownChunks);
- UploadStats += TempUploadStats;
- LooseChunksStats += TempLooseChunksStats;
- }
- };
-
- m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::UploadAttachments, StepCount);
-
- std::vector<IoHash> UnknownChunks;
- if (m_Options.IgnoreExistingBlocks)
- {
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "PutBuildPart uploading all attachments, needs are: {}",
- FormatArray<IoHash>(PutBuildPartResult.second, "\n "sv));
- }
-
- std::vector<IoHash> ForceUploadChunkHashes;
- ForceUploadChunkHashes.reserve(LooseChunkIndexes.size());
-
- for (uint32_t ChunkIndex : LooseChunkIndexes)
- {
- ForceUploadChunkHashes.push_back(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]);
- }
-
- for (size_t BlockIndex = 0; BlockIndex < NewBlocks.BlockHeaders.size(); BlockIndex++)
- {
- if (NewBlocks.BlockHeaders[BlockIndex])
- {
- // Block was not uploaded during generation
- ForceUploadChunkHashes.push_back(NewBlocks.BlockDescriptions[BlockIndex].BlockHash);
- }
- }
- UploadAttachments(ForceUploadChunkHashes, UnknownChunks);
- }
- else if (!PutBuildPartResult.second.empty())
- {
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "PutBuildPart needs attachments: {}",
- FormatArray<IoHash>(PutBuildPartResult.second, "\n "sv));
- }
- UploadAttachments(PutBuildPartResult.second, UnknownChunks);
- }
-
- auto BuildUnkownChunksResponse = [](const std::vector<IoHash>& UnknownChunks, bool WillRetry) {
- return fmt::format(
- "The following build blobs was reported as needed for upload but was reported as existing at the start of the "
- "operation.{}{}",
- WillRetry ? " Treating this as a transient inconsistency issue and will attempt to retry finalization."sv : ""sv,
- FormatArray<IoHash>(UnknownChunks, "\n "sv));
- };
-
- if (!UnknownChunks.empty())
- {
- ZEN_OPERATION_LOG_WARN(m_LogOutput, "{}", BuildUnkownChunksResponse(UnknownChunks, /*WillRetry*/ true));
- }
-
- uint32_t FinalizeBuildPartRetryCount = 5;
- while (!m_AbortFlag && (FinalizeBuildPartRetryCount--) > 0)
- {
- Stopwatch FinalizeBuildPartTimer;
- std::vector<IoHash> Needs = m_Storage.BuildStorage->FinalizeBuildPart(m_BuildId, Part.PartId, PartHash);
- if (!m_Options.IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "FinalizeBuildPart took {}. {} attachments are missing.",
- NiceTimeSpanMs(FinalizeBuildPartTimer.GetElapsedTimeMs()),
- Needs.size());
- }
- if (Needs.empty())
- {
- break;
- }
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput, "FinalizeBuildPart needs attachments: {}", FormatArray<IoHash>(Needs, "\n "sv));
- }
-
- std::vector<IoHash> RetryUnknownChunks;
- UploadAttachments(Needs, RetryUnknownChunks);
- if (RetryUnknownChunks == UnknownChunks)
- {
- if (FinalizeBuildPartRetryCount > 0)
- {
- // Back off a bit
- Sleep(1000);
- }
- }
- else
- {
- UnknownChunks = RetryUnknownChunks;
- ZEN_OPERATION_LOG_WARN(m_LogOutput,
- "{}",
- BuildUnkownChunksResponse(UnknownChunks, /*WillRetry*/ FinalizeBuildPartRetryCount != 0));
- }
- }
-
- if (!UnknownChunks.empty())
- {
- throw std::runtime_error(BuildUnkownChunksResponse(UnknownChunks, /*WillRetry*/ false));
- }
-
- if (!NewBlocks.BlockDescriptions.empty() && !m_AbortFlag)
- {
- uint64_t UploadBlockMetadataCount = 0;
- Stopwatch UploadBlockMetadataTimer;
-
- uint32_t FailedMetadataUploadCount = 1;
- int32_t MetadataUploadRetryCount = 3;
- while ((MetadataUploadRetryCount-- > 0) && (FailedMetadataUploadCount > 0))
- {
- FailedMetadataUploadCount = 0;
- for (size_t BlockIndex = 0; BlockIndex < NewBlocks.BlockDescriptions.size(); BlockIndex++)
- {
- if (m_AbortFlag)
- {
- break;
- }
- const IoHash& BlockHash = NewBlocks.BlockDescriptions[BlockIndex].BlockHash;
- if (!NewBlocks.MetaDataHasBeenUploaded[BlockIndex])
- {
- const CbObject BlockMetaData =
- BuildChunkBlockDescription(NewBlocks.BlockDescriptions[BlockIndex], NewBlocks.BlockMetaDatas[BlockIndex]);
- if (m_Storage.BuildCacheStorage && m_Options.PopulateCache)
- {
- m_Storage.BuildCacheStorage->PutBlobMetadatas(m_BuildId,
- std::vector<IoHash>({BlockHash}),
- std::vector<CbObject>({BlockMetaData}));
- }
- bool MetadataSucceeded = m_Storage.BuildStorage->PutBlockMetadata(m_BuildId, BlockHash, BlockMetaData);
- if (MetadataSucceeded)
- {
- UploadStats.BlocksBytes += BlockMetaData.GetSize();
- NewBlocks.MetaDataHasBeenUploaded[BlockIndex] = true;
- UploadBlockMetadataCount++;
- }
- else
- {
- FailedMetadataUploadCount++;
- }
- }
- }
- }
- if (UploadBlockMetadataCount > 0)
- {
- uint64_t ElapsedUS = UploadBlockMetadataTimer.GetElapsedTimeUs();
- UploadStats.ElapsedWallTimeUS += ElapsedUS;
- if (!m_Options.IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Uploaded metadata for {} blocks in {}",
- UploadBlockMetadataCount,
- NiceTimeSpanMs(ElapsedUS / 1000));
- }
- }
-
- // The newly generated blocks are now known blocks so the next part upload can use those blocks as well
- m_KnownBlocks.insert(m_KnownBlocks.end(), NewBlocks.BlockDescriptions.begin(), NewBlocks.BlockDescriptions.end());
- }
-
- m_LogOutput.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::PutBuildPartStats, StepCount);
-
- m_Storage.BuildStorage->PutBuildPartStats(
- m_BuildId,
- Part.PartId,
- {{"totalSize", double(Part.LocalFolderScanStats.FoundFileByteCount.load())},
- {"reusedRatio", AcceptedByteCountPercent / 100.0},
- {"reusedBlockCount", double(FindBlocksStats.AcceptedBlockCount)},
- {"reusedBlockByteCount", double(ReuseBlocksStats.AcceptedRawByteCount)},
- {"newBlockCount", double(FindBlocksStats.NewBlocksCount)},
- {"newBlockByteCount", double(FindBlocksStats.NewBlocksChunkByteCount)},
- {"uploadedCount", double(UploadStats.BlockCount.load() + UploadStats.ChunkCount.load())},
- {"uploadedByteCount", double(UploadStats.BlocksBytes.load() + UploadStats.ChunksBytes.load())},
- {"uploadedBytesPerSec",
- double(GetBytesPerSecond(UploadStats.ElapsedWallTimeUS, UploadStats.ChunksBytes + UploadStats.BlocksBytes))},
- {"elapsedTimeSec", double(UploadTimer.GetElapsedTimeMs() / 1000.0)}});
-
- m_LocalFolderScanStats += Part.LocalFolderScanStats;
- m_ChunkingStats += ChunkingStats;
- m_FindBlocksStats += FindBlocksStats;
- m_ReuseBlocksStats += ReuseBlocksStats;
- m_UploadStats += UploadStats;
- m_GenerateBlocksStats += GenerateBlocksStats;
- m_LooseChunksStats += LooseChunksStats;
-}
-
-void
-BuildsOperationUploadFolder::UploadPartBlobs(const ChunkedFolderContent& Content,
- const ChunkedContentLookup& Lookup,
- std::span<IoHash> RawHashes,
- const std::vector<std::vector<uint32_t>>& NewBlockChunks,
- GeneratedBlocks& NewBlocks,
- std::span<const uint32_t> LooseChunkIndexes,
- const std::uint64_t LargeAttachmentSize,
- UploadStatistics& TempUploadStats,
- LooseChunksStatistics& TempLooseChunksStats,
- std::vector<IoHash>& OutUnknownChunks)
-{
- ZEN_TRACE_CPU("UploadPartBlobs");
- {
- std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(m_LogOutput.CreateProgressBar("Upload Blobs"));
- OperationLogOutput::ProgressBar& Progress(*ProgressBarPtr);
-
- WorkerThreadPool& ReadChunkPool = m_IOWorkerPool;
- WorkerThreadPool& UploadChunkPool = m_NetworkPool;
-
- FilteredRate FilteredGenerateBlockBytesPerSecond;
- FilteredRate FilteredCompressedBytesPerSecond;
- FilteredRate FilteredUploadedBytesPerSecond;
-
- ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
-
- std::atomic<size_t> UploadedBlockSize = 0;
- std::atomic<size_t> UploadedBlockCount = 0;
- std::atomic<size_t> UploadedRawChunkSize = 0;
- std::atomic<size_t> UploadedCompressedChunkSize = 0;
- std::atomic<uint32_t> UploadedChunkCount = 0;
-
- tsl::robin_map<uint32_t, uint32_t> ChunkIndexToLooseChunkOrderIndex;
- ChunkIndexToLooseChunkOrderIndex.reserve(LooseChunkIndexes.size());
- for (uint32_t OrderIndex = 0; OrderIndex < LooseChunkIndexes.size(); OrderIndex++)
- {
- ChunkIndexToLooseChunkOrderIndex.insert_or_assign(LooseChunkIndexes[OrderIndex], OrderIndex);
- }
-
- std::vector<size_t> BlockIndexes;
- std::vector<uint32_t> LooseChunkOrderIndexes;
-
- uint64_t TotalLooseChunksSize = 0;
- uint64_t TotalBlocksSize = 0;
- for (const IoHash& RawHash : RawHashes)
- {
- if (auto It = NewBlocks.BlockHashToBlockIndex.find(RawHash); It != NewBlocks.BlockHashToBlockIndex.end())
- {
- BlockIndexes.push_back(It->second);
- TotalBlocksSize += NewBlocks.BlockSizes[It->second];
- }
- else if (auto ChunkIndexIt = Lookup.ChunkHashToChunkIndex.find(RawHash); ChunkIndexIt != Lookup.ChunkHashToChunkIndex.end())
- {
- const uint32_t ChunkIndex = ChunkIndexIt->second;
- if (auto LooseOrderIndexIt = ChunkIndexToLooseChunkOrderIndex.find(ChunkIndex);
- LooseOrderIndexIt != ChunkIndexToLooseChunkOrderIndex.end())
- {
- LooseChunkOrderIndexes.push_back(LooseOrderIndexIt->second);
- TotalLooseChunksSize += Content.ChunkedContent.ChunkRawSizes[ChunkIndex];
- }
- }
- else
- {
- OutUnknownChunks.push_back(RawHash);
- }
- }
- if (BlockIndexes.empty() && LooseChunkOrderIndexes.empty())
- {
- return;
- }
-
- uint64_t TotalRawSize = TotalLooseChunksSize + TotalBlocksSize;
-
- const size_t UploadBlockCount = BlockIndexes.size();
- const uint32_t UploadChunkCount = gsl::narrow<uint32_t>(LooseChunkOrderIndexes.size());
-
- auto AsyncUploadBlock = [this,
- &Work,
- &NewBlocks,
- UploadBlockCount,
- &UploadedBlockCount,
- UploadChunkCount,
- &UploadedChunkCount,
- &UploadedBlockSize,
- &TempUploadStats,
- &FilteredUploadedBytesPerSecond,
- &UploadChunkPool](const size_t BlockIndex,
- const IoHash BlockHash,
- CompositeBuffer&& Payload,
- std::atomic<uint64_t>& QueuedPendingInMemoryBlocksForUpload) {
- bool IsInMemoryBlock = true;
- if (QueuedPendingInMemoryBlocksForUpload.load() > 16)
- {
- ZEN_TRACE_CPU("AsyncUploadBlock_WriteTempBlock");
- std::filesystem::path TempFilePath = m_Options.TempDir / (BlockHash.ToHexString());
- Payload = CompositeBuffer(WriteToTempFile(std::move(Payload), TempFilePath));
- IsInMemoryBlock = false;
- }
- else
- {
- QueuedPendingInMemoryBlocksForUpload++;
- }
-
- Work.ScheduleWork(
- UploadChunkPool,
- [this,
- &QueuedPendingInMemoryBlocksForUpload,
- &NewBlocks,
- UploadBlockCount,
- &UploadedBlockCount,
- UploadChunkCount,
- &UploadedChunkCount,
- &UploadedBlockSize,
- &TempUploadStats,
- &FilteredUploadedBytesPerSecond,
- IsInMemoryBlock,
- BlockIndex,
- BlockHash,
- Payload = CompositeBuffer(std::move(Payload))](std::atomic<bool>&) mutable {
- auto _ = MakeGuard([IsInMemoryBlock, &QueuedPendingInMemoryBlocksForUpload] {
- if (IsInMemoryBlock)
- {
- QueuedPendingInMemoryBlocksForUpload--;
- }
- });
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("AsyncUploadBlock");
-
- const uint64_t PayloadSize = Payload.GetSize();
-
- FilteredUploadedBytesPerSecond.Start();
- const CbObject BlockMetaData =
- BuildChunkBlockDescription(NewBlocks.BlockDescriptions[BlockIndex], NewBlocks.BlockMetaDatas[BlockIndex]);
-
- if (m_Storage.BuildCacheStorage && m_Options.PopulateCache)
- {
- m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId, BlockHash, ZenContentType::kCompressedBinary, Payload);
- }
- m_Storage.BuildStorage->PutBuildBlob(m_BuildId, BlockHash, ZenContentType::kCompressedBinary, Payload);
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Uploaded block {} ({}) containing {} chunks",
- BlockHash,
- NiceBytes(PayloadSize),
- NewBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size());
- }
- UploadedBlockSize += PayloadSize;
- TempUploadStats.BlocksBytes += PayloadSize;
-
- if (m_Storage.BuildCacheStorage && m_Options.PopulateCache)
- {
- m_Storage.BuildCacheStorage->PutBlobMetadatas(m_BuildId,
- std::vector<IoHash>({BlockHash}),
- std::vector<CbObject>({BlockMetaData}));
- }
- bool MetadataSucceeded = m_Storage.BuildStorage->PutBlockMetadata(m_BuildId, BlockHash, BlockMetaData);
- if (MetadataSucceeded)
- {
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Uploaded block {} metadata ({})",
- BlockHash,
- NiceBytes(BlockMetaData.GetSize()));
- }
-
- NewBlocks.MetaDataHasBeenUploaded[BlockIndex] = true;
- TempUploadStats.BlocksBytes += BlockMetaData.GetSize();
- }
-
- TempUploadStats.BlockCount++;
-
- UploadedBlockCount++;
- if (UploadedBlockCount == UploadBlockCount && UploadedChunkCount == UploadChunkCount)
- {
- FilteredUploadedBytesPerSecond.Stop();
- }
- }
- });
- };
-
- auto AsyncUploadLooseChunk = [this,
- LargeAttachmentSize,
- &Work,
- &UploadChunkPool,
- &FilteredUploadedBytesPerSecond,
- &UploadedBlockCount,
- &UploadedChunkCount,
- UploadBlockCount,
- UploadChunkCount,
- &UploadedCompressedChunkSize,
- &UploadedRawChunkSize,
- &TempUploadStats](const IoHash& RawHash, uint64_t RawSize, CompositeBuffer&& Payload) {
- Work.ScheduleWork(
- UploadChunkPool,
- [this,
- &Work,
- LargeAttachmentSize,
- &FilteredUploadedBytesPerSecond,
- &UploadChunkPool,
- &UploadedBlockCount,
- &UploadedChunkCount,
- UploadBlockCount,
- UploadChunkCount,
- &UploadedCompressedChunkSize,
- &UploadedRawChunkSize,
- &TempUploadStats,
- RawHash,
- RawSize,
- Payload = CompositeBuffer(std::move(Payload))](std::atomic<bool>&) mutable {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("AsyncUploadLooseChunk");
-
- const uint64_t PayloadSize = Payload.GetSize();
-
- if (m_Storage.BuildCacheStorage && m_Options.PopulateCache)
- {
- m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId, RawHash, ZenContentType::kCompressedBinary, Payload);
- }
-
- if (PayloadSize >= LargeAttachmentSize)
- {
- ZEN_TRACE_CPU("AsyncUploadLooseChunk_Multipart");
- TempUploadStats.MultipartAttachmentCount++;
- std::vector<std::function<void()>> MultipartWork = m_Storage.BuildStorage->PutLargeBuildBlob(
- m_BuildId,
- RawHash,
- ZenContentType::kCompressedBinary,
- PayloadSize,
- [Payload = std::move(Payload), &FilteredUploadedBytesPerSecond](uint64_t Offset,
- uint64_t Size) mutable -> IoBuffer {
- FilteredUploadedBytesPerSecond.Start();
-
- IoBuffer PartPayload = Payload.Mid(Offset, Size).Flatten().AsIoBuffer();
- PartPayload.SetContentType(ZenContentType::kBinary);
- return PartPayload;
- },
- [RawSize,
- &TempUploadStats,
- &UploadedCompressedChunkSize,
- &UploadChunkPool,
- &UploadedBlockCount,
- UploadBlockCount,
- &UploadedChunkCount,
- UploadChunkCount,
- &FilteredUploadedBytesPerSecond,
- &UploadedRawChunkSize](uint64_t SentBytes, bool IsComplete) {
- TempUploadStats.ChunksBytes += SentBytes;
- UploadedCompressedChunkSize += SentBytes;
- if (IsComplete)
- {
- TempUploadStats.ChunkCount++;
- UploadedChunkCount++;
- if (UploadedBlockCount == UploadBlockCount && UploadedChunkCount == UploadChunkCount)
- {
- FilteredUploadedBytesPerSecond.Stop();
- }
- UploadedRawChunkSize += RawSize;
- }
- });
- for (auto& WorkPart : MultipartWork)
- {
- Work.ScheduleWork(UploadChunkPool, [Work = std::move(WorkPart)](std::atomic<bool>& AbortFlag) {
- ZEN_TRACE_CPU("AsyncUploadLooseChunk_Multipart_Work");
- if (!AbortFlag)
- {
- Work();
- }
- });
- }
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput, "Uploaded multipart chunk {} ({})", RawHash, NiceBytes(PayloadSize));
- }
- }
- else
- {
- ZEN_TRACE_CPU("AsyncUploadLooseChunk_Singlepart");
- m_Storage.BuildStorage->PutBuildBlob(m_BuildId, RawHash, ZenContentType::kCompressedBinary, Payload);
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput, "Uploaded chunk {} ({})", RawHash, NiceBytes(PayloadSize));
- }
- TempUploadStats.ChunksBytes += Payload.GetSize();
- TempUploadStats.ChunkCount++;
- UploadedCompressedChunkSize += Payload.GetSize();
- UploadedRawChunkSize += RawSize;
- UploadedChunkCount++;
- if (UploadedChunkCount == UploadChunkCount)
- {
- FilteredUploadedBytesPerSecond.Stop();
- }
- }
- }
- });
- };
-
- std::vector<size_t> GenerateBlockIndexes;
-
- std::atomic<uint64_t> GeneratedBlockCount = 0;
- std::atomic<uint64_t> GeneratedBlockByteCount = 0;
-
- std::atomic<uint64_t> QueuedPendingInMemoryBlocksForUpload = 0;
-
- // Start generation of any non-prebuilt blocks and schedule upload
- for (const size_t BlockIndex : BlockIndexes)
- {
- const IoHash& BlockHash = NewBlocks.BlockDescriptions[BlockIndex].BlockHash;
- if (!m_AbortFlag)
- {
- Work.ScheduleWork(
- ReadChunkPool,
- [this,
- BlockHash = IoHash(BlockHash),
- BlockIndex,
- &FilteredGenerateBlockBytesPerSecond,
- &Content,
- &Lookup,
- &NewBlocks,
- &NewBlockChunks,
- &GenerateBlockIndexes,
- &GeneratedBlockCount,
- &GeneratedBlockByteCount,
- &AsyncUploadBlock,
- &QueuedPendingInMemoryBlocksForUpload](std::atomic<bool>&) {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("UploadPartBlobs_GenerateBlock");
-
- FilteredGenerateBlockBytesPerSecond.Start();
-
- Stopwatch GenerateTimer;
- CompositeBuffer Payload;
- if (NewBlocks.BlockHeaders[BlockIndex])
- {
- Payload =
- RebuildBlock(Content, Lookup, std::move(NewBlocks.BlockHeaders[BlockIndex]), NewBlockChunks[BlockIndex])
- .GetCompressed();
- }
- else
- {
- ChunkBlockDescription BlockDescription;
- CompressedBuffer CompressedBlock =
- GenerateBlock(Content, Lookup, NewBlockChunks[BlockIndex], BlockDescription);
- if (!CompressedBlock)
- {
- throw std::runtime_error(fmt::format("Failed generating block {}", BlockHash));
- }
- ZEN_ASSERT(BlockDescription.BlockHash == BlockHash);
- Payload = std::move(CompressedBlock).GetCompressed();
- }
-
- GeneratedBlockByteCount += NewBlocks.BlockSizes[BlockIndex];
- GeneratedBlockCount++;
- if (GeneratedBlockCount == GenerateBlockIndexes.size())
- {
- FilteredGenerateBlockBytesPerSecond.Stop();
- }
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "{} block {} ({}) containing {} chunks in {}",
- NewBlocks.BlockHeaders[BlockIndex] ? "Regenerated" : "Generated",
- NewBlocks.BlockDescriptions[BlockIndex].BlockHash,
- NiceBytes(NewBlocks.BlockSizes[BlockIndex]),
- NewBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size(),
- NiceTimeSpanMs(GenerateTimer.GetElapsedTimeMs()));
- }
- if (!m_AbortFlag)
- {
- AsyncUploadBlock(BlockIndex, BlockHash, std::move(Payload), QueuedPendingInMemoryBlocksForUpload);
- }
- }
- });
- }
- }
-
- // Start compression of any non-precompressed loose chunks and schedule upload
- for (const uint32_t LooseChunkOrderIndex : LooseChunkOrderIndexes)
- {
- const uint32_t ChunkIndex = LooseChunkIndexes[LooseChunkOrderIndex];
- Work.ScheduleWork(
- ReadChunkPool,
- [this,
- &Content,
- &Lookup,
- &TempLooseChunksStats,
- &LooseChunkOrderIndexes,
- &FilteredCompressedBytesPerSecond,
- &TempUploadStats,
- &AsyncUploadLooseChunk,
- ChunkIndex](std::atomic<bool>&) {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("UploadPartBlobs_CompressChunk");
-
- FilteredCompressedBytesPerSecond.Start();
- Stopwatch CompressTimer;
- CompositeBuffer Payload = CompressChunk(Content, Lookup, ChunkIndex, TempLooseChunksStats);
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Compressed chunk {} ({} -> {}) in {}",
- Content.ChunkedContent.ChunkHashes[ChunkIndex],
- NiceBytes(Content.ChunkedContent.ChunkRawSizes[ChunkIndex]),
- NiceBytes(Payload.GetSize()),
- NiceTimeSpanMs(CompressTimer.GetElapsedTimeMs()));
- }
- const uint64_t ChunkRawSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex];
- TempUploadStats.ReadFromDiskBytes += ChunkRawSize;
- if (TempLooseChunksStats.CompressedChunkCount == LooseChunkOrderIndexes.size())
- {
- FilteredCompressedBytesPerSecond.Stop();
- }
- if (!m_AbortFlag)
- {
- AsyncUploadLooseChunk(Content.ChunkedContent.ChunkHashes[ChunkIndex], ChunkRawSize, std::move(Payload));
- }
- }
- });
- }
-
- Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
- ZEN_UNUSED(PendingWork);
- FilteredCompressedBytesPerSecond.Update(TempLooseChunksStats.CompressedChunkRawBytes.load());
- FilteredGenerateBlockBytesPerSecond.Update(GeneratedBlockByteCount.load());
- FilteredUploadedBytesPerSecond.Update(UploadedCompressedChunkSize.load() + UploadedBlockSize.load());
- uint64_t UploadedRawSize = UploadedRawChunkSize.load() + UploadedBlockSize.load();
- uint64_t UploadedCompressedSize = UploadedCompressedChunkSize.load() + UploadedBlockSize.load();
-
- std::string Details = fmt::format(
- "Compressed {}/{} ({}/{}{}) chunks. "
- "Uploaded {}/{} ({}/{}) blobs "
- "({}{})",
- TempLooseChunksStats.CompressedChunkCount.load(),
- LooseChunkOrderIndexes.size(),
- NiceBytes(TempLooseChunksStats.CompressedChunkRawBytes),
- NiceBytes(TotalLooseChunksSize),
- (TempLooseChunksStats.CompressedChunkCount == LooseChunkOrderIndexes.size())
- ? ""
- : fmt::format(" {}B/s", NiceNum(FilteredCompressedBytesPerSecond.GetCurrent())),
-
- UploadedBlockCount.load() + UploadedChunkCount.load(),
- UploadBlockCount + UploadChunkCount,
- NiceBytes(UploadedRawSize),
- NiceBytes(TotalRawSize),
-
- NiceBytes(UploadedCompressedSize),
- (UploadedBlockCount == UploadBlockCount && UploadedChunkCount == UploadChunkCount)
- ? ""
- : fmt::format(" {}bits/s", NiceNum(FilteredUploadedBytesPerSecond.GetCurrent())));
-
- Progress.UpdateState({.Task = "Uploading blobs ",
- .Details = Details,
- .TotalCount = gsl::narrow<uint64_t>(TotalRawSize),
- .RemainingCount = gsl::narrow<uint64_t>(TotalRawSize - UploadedRawSize),
- .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
- false);
- });
-
- ZEN_ASSERT(m_AbortFlag || QueuedPendingInMemoryBlocksForUpload.load() == 0);
-
- Progress.Finish();
-
- TempUploadStats.ElapsedWallTimeUS += FilteredUploadedBytesPerSecond.GetElapsedTimeUS();
- TempLooseChunksStats.CompressChunksElapsedWallTimeUS += FilteredCompressedBytesPerSecond.GetElapsedTimeUS();
- }
-}
-
-CompositeBuffer
-BuildsOperationUploadFolder::CompressChunk(const ChunkedFolderContent& Content,
- const ChunkedContentLookup& Lookup,
- uint32_t ChunkIndex,
- LooseChunksStatistics& TempLooseChunksStats)
-{
- ZEN_TRACE_CPU("CompressChunk");
- ZEN_ASSERT(!m_Options.TempDir.empty());
- const IoHash& ChunkHash = Content.ChunkedContent.ChunkHashes[ChunkIndex];
- const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex];
-
- const ChunkedContentLookup::ChunkSequenceLocation& Source = GetChunkSequenceLocations(Lookup, ChunkIndex)[0];
- const std::uint32_t PathIndex = Lookup.SequenceIndexFirstPathIndex[Source.SequenceIndex];
- IoBuffer RawSource = IoBufferBuilder::MakeFromFile((m_Path / Content.Paths[PathIndex]).make_preferred(), Source.Offset, ChunkSize);
- if (!RawSource)
- {
- throw std::runtime_error(fmt::format("Failed fetching chunk {}", ChunkHash));
- }
- if (RawSource.GetSize() != ChunkSize)
- {
- throw std::runtime_error(fmt::format("Fetched chunk {} has invalid size", ChunkHash));
- }
-
- const bool ShouldCompressChunk = IsChunkCompressable(m_NonCompressableExtensionHashes, Content, Lookup, ChunkIndex);
- const OodleCompressionLevel CompressionLevel = ShouldCompressChunk ? OodleCompressionLevel::VeryFast : OodleCompressionLevel::None;
-
- if (ShouldCompressChunk)
- {
- std::filesystem::path TempFilePath = m_Options.TempDir / ChunkHash.ToHexString();
-
- BasicFile CompressedFile;
- std::error_code Ec;
- CompressedFile.Open(TempFilePath, BasicFile::Mode::kTruncateDelete, Ec);
- if (Ec)
- {
- throw std::runtime_error(fmt::format("Failed creating temporary file for compressing blob {}, reason: ({}) {}",
- ChunkHash,
- Ec.value(),
- Ec.message()));
- }
-
- uint64_t StreamRawBytes = 0;
- uint64_t StreamCompressedBytes = 0;
-
- bool CouldCompress = CompressedBuffer::CompressToStream(
- CompositeBuffer(SharedBuffer(RawSource)),
- [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) {
- ZEN_UNUSED(SourceOffset);
- TempLooseChunksStats.CompressedChunkRawBytes += SourceSize;
- CompressedFile.Write(RangeBuffer, Offset);
- TempLooseChunksStats.CompressedChunkBytes += RangeBuffer.GetSize();
- StreamRawBytes += SourceSize;
- StreamCompressedBytes += RangeBuffer.GetSize();
- },
- OodleCompressor::Mermaid,
- CompressionLevel);
- if (CouldCompress)
- {
- uint64_t CompressedSize = CompressedFile.FileSize();
- void* FileHandle = CompressedFile.Detach();
- IoBuffer TempPayload = IoBuffer(IoBuffer::File,
- FileHandle,
- 0,
- CompressedSize,
- /*IsWholeFile*/ true);
- ZEN_ASSERT(TempPayload);
- TempPayload.SetDeleteOnClose(true);
- IoHash RawHash;
- uint64_t RawSize;
- CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(TempPayload), RawHash, RawSize);
- ZEN_ASSERT(Compressed);
- ZEN_ASSERT(RawHash == ChunkHash);
- ZEN_ASSERT(RawSize == ChunkSize);
-
- TempLooseChunksStats.CompressedChunkCount++;
-
- return Compressed.GetCompressed();
- }
- else
- {
- TempLooseChunksStats.CompressedChunkRawBytes -= StreamRawBytes;
- TempLooseChunksStats.CompressedChunkBytes -= StreamCompressedBytes;
- }
- CompressedFile.Close();
- RemoveFile(TempFilePath, Ec);
- ZEN_UNUSED(Ec);
- }
-
- CompressedBuffer CompressedBlob =
- CompressedBuffer::Compress(SharedBuffer(std::move(RawSource)), OodleCompressor::Mermaid, CompressionLevel);
- if (!CompressedBlob)
- {
- throw std::runtime_error(fmt::format("Failed to compress large blob {}", ChunkHash));
- }
- ZEN_ASSERT_SLOW(CompressedBlob.DecodeRawHash() == ChunkHash);
- ZEN_ASSERT_SLOW(CompressedBlob.DecodeRawSize() == ChunkSize);
-
- TempLooseChunksStats.CompressedChunkRawBytes += ChunkSize;
- TempLooseChunksStats.CompressedChunkBytes += CompressedBlob.GetCompressedSize();
-
- // If we use none-compression, the compressed blob references the data and has 64 kb in memory so we don't need to write it to disk
- if (ShouldCompressChunk)
- {
- std::filesystem::path TempFilePath = m_Options.TempDir / (ChunkHash.ToHexString());
- IoBuffer TempPayload = WriteToTempFile(std::move(CompressedBlob).GetCompressed(), TempFilePath);
- CompressedBlob = CompressedBuffer::FromCompressedNoValidate(std::move(TempPayload));
- }
-
- TempLooseChunksStats.CompressedChunkCount++;
- return std::move(CompressedBlob).GetCompressed();
-}
-
-BuildsOperationValidateBuildPart::BuildsOperationValidateBuildPart(OperationLogOutput& OperationLogOutput,
- BuildStorageBase& Storage,
- std::atomic<bool>& AbortFlag,
- std::atomic<bool>& PauseFlag,
- WorkerThreadPool& IOWorkerPool,
- WorkerThreadPool& NetworkPool,
- const Oid& BuildId,
- const Oid& BuildPartId,
- const std::string_view BuildPartName,
- const Options& Options)
-
-: m_LogOutput(OperationLogOutput)
-, m_Storage(Storage)
-, m_AbortFlag(AbortFlag)
-, m_PauseFlag(PauseFlag)
-, m_IOWorkerPool(IOWorkerPool)
-, m_NetworkPool(NetworkPool)
-, m_BuildId(BuildId)
-, m_BuildPartId(BuildPartId)
-, m_BuildPartName(BuildPartName)
-, m_Options(Options)
-{
-}
-
-void
-BuildsOperationValidateBuildPart::Execute()
-{
- ZEN_TRACE_CPU("ValidateBuildPart");
- try
- {
- enum class TaskSteps : uint32_t
- {
- FetchBuild,
- FetchBuildPart,
- ValidateBlobs,
- Cleanup,
- StepCount
- };
-
- auto EndProgress =
- MakeGuard([&]() { m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::StepCount, (uint32_t)TaskSteps::StepCount); });
-
- Stopwatch Timer;
- auto _ = MakeGuard([&]() {
- if (!m_Options.IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Validated build part {}/{} ('{}') in {}",
- m_BuildId,
- m_BuildPartId,
- m_BuildPartName,
- NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
- }
- });
-
- m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::FetchBuild, (uint32_t)TaskSteps::StepCount);
-
- CbObject Build = m_Storage.GetBuild(m_BuildId);
- if (!m_BuildPartName.empty())
- {
- m_BuildPartId = Build["parts"sv].AsObjectView()[m_BuildPartName].AsObjectId();
- if (m_BuildPartId == Oid::Zero)
- {
- throw std::runtime_error(fmt::format("Build {} does not have a part named '{}'", m_BuildId, m_BuildPartName));
- }
- }
- m_ValidateStats.BuildBlobSize = Build.GetSize();
- uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u;
- if (auto ChunkSize = Build["chunkSize"sv].AsUInt64(); ChunkSize != 0)
- {
- PreferredMultipartChunkSize = ChunkSize;
- }
-
- m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::FetchBuildPart, (uint32_t)TaskSteps::StepCount);
-
- CbObject BuildPart = m_Storage.GetBuildPart(m_BuildId, m_BuildPartId);
- m_ValidateStats.BuildPartSize = BuildPart.GetSize();
- if (!m_Options.IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Validating build part {}/{} ({})",
- m_BuildId,
- m_BuildPartId,
- NiceBytes(BuildPart.GetSize()));
- }
- std::vector<IoHash> ChunkAttachments;
- if (const CbObjectView ChunkAttachmentsView = BuildPart["chunkAttachments"sv].AsObjectView())
- {
- for (CbFieldView LooseFileView : ChunkAttachmentsView["rawHashes"sv])
- {
- ChunkAttachments.push_back(LooseFileView.AsBinaryAttachment());
- }
- }
- m_ValidateStats.ChunkAttachmentCount = ChunkAttachments.size();
- std::vector<IoHash> BlockAttachments;
- if (const CbObjectView BlockAttachmentsView = BuildPart["blockAttachments"sv].AsObjectView())
- {
- {
- for (CbFieldView BlocksView : BlockAttachmentsView["rawHashes"sv])
- {
- BlockAttachments.push_back(BlocksView.AsBinaryAttachment());
- }
- }
- }
- m_ValidateStats.BlockAttachmentCount = BlockAttachments.size();
-
- std::vector<ChunkBlockDescription> VerifyBlockDescriptions =
- ParseChunkBlockDescriptionList(m_Storage.GetBlockMetadatas(m_BuildId, BlockAttachments));
- if (VerifyBlockDescriptions.size() != BlockAttachments.size())
- {
- throw std::runtime_error(fmt::format("Uploaded blocks metadata could not all be found, {} blocks metadata is missing",
- BlockAttachments.size() - VerifyBlockDescriptions.size()));
- }
-
- ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
-
- const std::filesystem::path TempFolder = ".zen-tmp";
-
- CleanAndRemoveDirectory(m_IOWorkerPool, m_AbortFlag, m_PauseFlag, TempFolder);
- CreateDirectories(TempFolder);
- auto __ = MakeGuard([this, TempFolder]() { CleanAndRemoveDirectory(m_IOWorkerPool, m_AbortFlag, m_PauseFlag, TempFolder); });
-
- m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::ValidateBlobs, (uint32_t)TaskSteps::StepCount);
-
- std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(m_LogOutput.CreateProgressBar("Validate Blobs"));
- OperationLogOutput::ProgressBar& Progress(*ProgressBarPtr);
-
- uint64_t AttachmentsToVerifyCount = ChunkAttachments.size() + BlockAttachments.size();
- FilteredRate FilteredDownloadedBytesPerSecond;
- FilteredRate FilteredVerifiedBytesPerSecond;
-
- std::atomic<uint64_t> MultipartAttachmentCount = 0;
-
- for (const IoHash& ChunkAttachment : ChunkAttachments)
- {
- Work.ScheduleWork(
- m_NetworkPool,
- [this,
- &Work,
- AttachmentsToVerifyCount,
- &TempFolder,
- PreferredMultipartChunkSize,
- &FilteredDownloadedBytesPerSecond,
- &FilteredVerifiedBytesPerSecond,
- &ChunkAttachments,
- ChunkAttachment = IoHash(ChunkAttachment)](std::atomic<bool>&) {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("ValidateBuildPart_GetChunk");
-
- FilteredDownloadedBytesPerSecond.Start();
- DownloadLargeBlob(
- m_Storage,
- TempFolder,
- m_BuildId,
- ChunkAttachment,
- PreferredMultipartChunkSize,
- Work,
- m_NetworkPool,
- m_DownloadStats.DownloadedChunkByteCount,
- m_DownloadStats.MultipartAttachmentCount,
- [this,
- &Work,
- AttachmentsToVerifyCount,
- &FilteredDownloadedBytesPerSecond,
- &FilteredVerifiedBytesPerSecond,
- ChunkHash = IoHash(ChunkAttachment)](IoBuffer&& Payload) {
- m_DownloadStats.DownloadedChunkCount++;
- Payload.SetContentType(ZenContentType::kCompressedBinary);
- if (!m_AbortFlag)
- {
- Work.ScheduleWork(
- m_IOWorkerPool,
- [this,
- AttachmentsToVerifyCount,
- &FilteredDownloadedBytesPerSecond,
- &FilteredVerifiedBytesPerSecond,
- Payload = IoBuffer(std::move(Payload)),
- ChunkHash](std::atomic<bool>&) mutable {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("ValidateBuildPart_Validate");
-
- if (m_DownloadStats.DownloadedChunkCount + m_DownloadStats.DownloadedBlockCount ==
- AttachmentsToVerifyCount)
- {
- FilteredDownloadedBytesPerSecond.Stop();
- }
-
- FilteredVerifiedBytesPerSecond.Start();
-
- uint64_t CompressedSize;
- uint64_t DecompressedSize;
- ValidateBlob(m_AbortFlag, std::move(Payload), ChunkHash, CompressedSize, DecompressedSize);
- m_ValidateStats.VerifiedAttachmentCount++;
- m_ValidateStats.VerifiedByteCount += DecompressedSize;
- if (m_ValidateStats.VerifiedAttachmentCount.load() == AttachmentsToVerifyCount)
- {
- FilteredVerifiedBytesPerSecond.Stop();
- }
- }
- });
- }
- });
- }
- });
- }
-
- for (const IoHash& BlockAttachment : BlockAttachments)
- {
- Work.ScheduleWork(
- m_NetworkPool,
- [this,
- &Work,
- AttachmentsToVerifyCount,
- &FilteredDownloadedBytesPerSecond,
- &FilteredVerifiedBytesPerSecond,
- BlockAttachment = IoHash(BlockAttachment)](std::atomic<bool>&) {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("ValidateBuildPart_GetBlock");
-
- FilteredDownloadedBytesPerSecond.Start();
- IoBuffer Payload = m_Storage.GetBuildBlob(m_BuildId, BlockAttachment);
- m_DownloadStats.DownloadedBlockCount++;
- m_DownloadStats.DownloadedBlockByteCount += Payload.GetSize();
- if (m_DownloadStats.DownloadedChunkCount + m_DownloadStats.DownloadedBlockCount == AttachmentsToVerifyCount)
- {
- FilteredDownloadedBytesPerSecond.Stop();
- }
- if (!Payload)
- {
- throw std::runtime_error(fmt::format("Block attachment {} could not be found", BlockAttachment));
- }
- if (!m_AbortFlag)
- {
- Work.ScheduleWork(
- m_IOWorkerPool,
- [this,
- &FilteredVerifiedBytesPerSecond,
- AttachmentsToVerifyCount,
- Payload = std::move(Payload),
- BlockAttachment](std::atomic<bool>&) mutable {
- if (!m_AbortFlag)
- {
- ZEN_TRACE_CPU("ValidateBuildPart_ValidateBlock");
-
- FilteredVerifiedBytesPerSecond.Start();
-
- uint64_t CompressedSize;
- uint64_t DecompressedSize;
- ValidateChunkBlock(std::move(Payload), BlockAttachment, CompressedSize, DecompressedSize);
- m_ValidateStats.VerifiedAttachmentCount++;
- m_ValidateStats.VerifiedByteCount += DecompressedSize;
- if (m_ValidateStats.VerifiedAttachmentCount.load() == AttachmentsToVerifyCount)
- {
- FilteredVerifiedBytesPerSecond.Stop();
- }
- }
- });
- }
- }
- });
- }
-
- Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
- ZEN_UNUSED(PendingWork);
-
- const uint64_t DownloadedAttachmentCount = m_DownloadStats.DownloadedChunkCount + m_DownloadStats.DownloadedBlockCount;
- const uint64_t DownloadedByteCount = m_DownloadStats.DownloadedChunkByteCount + m_DownloadStats.DownloadedBlockByteCount;
-
- FilteredDownloadedBytesPerSecond.Update(DownloadedByteCount);
- FilteredVerifiedBytesPerSecond.Update(m_ValidateStats.VerifiedByteCount);
-
- std::string Details = fmt::format("Downloaded {}/{} ({}, {}bits/s). Verified {}/{} ({}, {}B/s)",
- DownloadedAttachmentCount,
- AttachmentsToVerifyCount,
- NiceBytes(DownloadedByteCount),
- NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8),
- m_ValidateStats.VerifiedAttachmentCount.load(),
- AttachmentsToVerifyCount,
- NiceBytes(m_ValidateStats.VerifiedByteCount.load()),
- NiceNum(FilteredVerifiedBytesPerSecond.GetCurrent()));
-
- Progress.UpdateState(
- {.Task = "Validating blobs ",
- .Details = Details,
- .TotalCount = gsl::narrow<uint64_t>(AttachmentsToVerifyCount * 2),
- .RemainingCount = gsl::narrow<uint64_t>(AttachmentsToVerifyCount * 2 -
- (DownloadedAttachmentCount + m_ValidateStats.VerifiedAttachmentCount.load())),
- .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
- false);
- });
-
- Progress.Finish();
- m_ValidateStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs();
-
- m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::Cleanup, (uint32_t)TaskSteps::StepCount);
- }
- catch (const std::exception&)
- {
- m_AbortFlag = true;
- throw;
- }
-}
-
-BuildsOperationPrimeCache::BuildsOperationPrimeCache(OperationLogOutput& OperationLogOutput,
- StorageInstance& Storage,
- std::atomic<bool>& AbortFlag,
- std::atomic<bool>& PauseFlag,
- WorkerThreadPool& NetworkPool,
- const Oid& BuildId,
- std::span<const Oid> BuildPartIds,
- const Options& Options,
- BuildStorageCache::Statistics& StorageCacheStats)
-: m_LogOutput(OperationLogOutput)
-, m_Storage(Storage)
-, m_AbortFlag(AbortFlag)
-, m_PauseFlag(PauseFlag)
-, m_NetworkPool(NetworkPool)
-, m_BuildId(BuildId)
-, m_BuildPartIds(BuildPartIds.begin(), BuildPartIds.end())
-, m_Options(Options)
-, m_StorageCacheStats(StorageCacheStats)
-{
- m_TempPath = m_Options.ZenFolderPath / "tmp";
- CreateDirectories(m_TempPath);
-}
-
-void
-BuildsOperationPrimeCache::Execute()
-{
- ZEN_TRACE_CPU("BuildsOperationPrimeCache::Execute");
-
- Stopwatch PrimeTimer;
-
- tsl::robin_map<IoHash, uint64_t, IoHash::Hasher> LooseChunkRawSizes;
-
- tsl::robin_set<IoHash, IoHash::Hasher> BuildBlobs;
-
- for (const Oid& BuildPartId : m_BuildPartIds)
- {
- CbObject BuildPart = m_Storage.BuildStorage->GetBuildPart(m_BuildId, BuildPartId);
-
- CbObjectView BlockAttachmentsView = BuildPart["blockAttachments"sv].AsObjectView();
- std::vector<IoHash> BlockAttachments = compactbinary_helpers::ReadBinaryAttachmentArray("rawHashes"sv, BlockAttachmentsView);
-
- CbObjectView ChunkAttachmentsView = BuildPart["chunkAttachments"sv].AsObjectView();
- std::vector<IoHash> ChunkAttachments = compactbinary_helpers::ReadBinaryAttachmentArray("rawHashes"sv, ChunkAttachmentsView);
- std::vector<uint64_t> ChunkRawSizes = compactbinary_helpers::ReadArray<uint64_t>("chunkRawSizes"sv, ChunkAttachmentsView);
- if (ChunkAttachments.size() != ChunkRawSizes.size())
- {
- throw std::runtime_error(fmt::format("Mismatch of loose chunk raw size array, expected {}, found {}",
- ChunkAttachments.size(),
- ChunkRawSizes.size()));
- }
-
- BuildBlobs.reserve(ChunkAttachments.size() + BlockAttachments.size());
- BuildBlobs.insert(BlockAttachments.begin(), BlockAttachments.end());
- BuildBlobs.insert(ChunkAttachments.begin(), ChunkAttachments.end());
-
- for (size_t ChunkAttachmentIndex = 0; ChunkAttachmentIndex < ChunkAttachments.size(); ChunkAttachmentIndex++)
- {
- LooseChunkRawSizes.insert_or_assign(ChunkAttachments[ChunkAttachmentIndex], ChunkRawSizes[ChunkAttachmentIndex]);
- }
- }
-
- if (!m_Options.IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput, "Found {} referenced blobs", BuildBlobs.size());
- }
-
- if (BuildBlobs.empty())
- {
- return;
- }
-
- std::vector<IoHash> BlobsToDownload;
- BlobsToDownload.reserve(BuildBlobs.size());
-
- if (m_Storage.BuildCacheStorage && !BuildBlobs.empty() && !m_Options.ForceUpload)
- {
- ZEN_TRACE_CPU("BlobCacheExistCheck");
- Stopwatch Timer;
-
- const std::vector<IoHash> BlobHashes(BuildBlobs.begin(), BuildBlobs.end());
- const std::vector<BuildStorageCache::BlobExistsResult> CacheExistsResult =
- m_Storage.BuildCacheStorage->BlobsExists(m_BuildId, BlobHashes);
-
- if (CacheExistsResult.size() == BlobHashes.size())
- {
- for (size_t BlobIndex = 0; BlobIndex < BlobHashes.size(); BlobIndex++)
- {
- if (!CacheExistsResult[BlobIndex].HasBody)
- {
- BlobsToDownload.push_back(BlobHashes[BlobIndex]);
- }
- }
- size_t FoundCount = BuildBlobs.size() - BlobsToDownload.size();
-
- if (FoundCount > 0 && !m_Options.IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Remote cache : Found {} out of {} needed blobs in {}",
- FoundCount,
- BuildBlobs.size(),
- NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
- }
- }
- }
- else
- {
- BlobsToDownload.insert(BlobsToDownload.end(), BuildBlobs.begin(), BuildBlobs.end());
- }
-
- if (BlobsToDownload.empty())
- {
- return;
- }
-
- std::atomic<uint64_t> MultipartAttachmentCount;
- std::atomic<size_t> CompletedDownloadCount;
- FilteredRate FilteredDownloadedBytesPerSecond;
-
- {
- std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(m_LogOutput.CreateProgressBar("Downloading"));
- OperationLogOutput::ProgressBar& Progress(*ProgressBarPtr);
-
- ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
-
- const size_t BlobCount = BlobsToDownload.size();
-
- for (size_t BlobIndex = 0; BlobIndex < BlobCount; BlobIndex++)
- {
- Work.ScheduleWork(
- m_NetworkPool,
- [this,
- &Work,
- &BlobsToDownload,
- BlobCount,
- &LooseChunkRawSizes,
- &CompletedDownloadCount,
- &FilteredDownloadedBytesPerSecond,
- &MultipartAttachmentCount,
- BlobIndex](std::atomic<bool>&) {
- if (!m_AbortFlag)
- {
- const IoHash& BlobHash = BlobsToDownload[BlobIndex];
-
- bool IsLargeBlob = false;
-
- if (auto It = LooseChunkRawSizes.find(BlobHash); It != LooseChunkRawSizes.end())
- {
- IsLargeBlob = It->second >= m_Options.LargeAttachmentSize;
- }
-
- FilteredDownloadedBytesPerSecond.Start();
-
- if (IsLargeBlob)
- {
- DownloadLargeBlob(*m_Storage.BuildStorage,
- m_TempPath,
- m_BuildId,
- BlobHash,
- m_Options.PreferredMultipartChunkSize,
- Work,
- m_NetworkPool,
- m_DownloadStats.DownloadedChunkByteCount,
- MultipartAttachmentCount,
- [this, BlobCount, BlobHash, &FilteredDownloadedBytesPerSecond, &CompletedDownloadCount](
- IoBuffer&& Payload) {
- m_DownloadStats.DownloadedChunkCount++;
- m_DownloadStats.RequestsCompleteCount++;
-
- if (!m_AbortFlag)
- {
- if (Payload && m_Storage.BuildCacheStorage)
- {
- m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId,
- BlobHash,
- ZenContentType::kCompressedBinary,
- CompositeBuffer(SharedBuffer(Payload)));
- }
- }
- CompletedDownloadCount++;
- if (CompletedDownloadCount == BlobCount)
- {
- FilteredDownloadedBytesPerSecond.Stop();
- }
- });
- }
- else
- {
- IoBuffer Payload = m_Storage.BuildStorage->GetBuildBlob(m_BuildId, BlobHash);
- m_DownloadStats.DownloadedBlockCount++;
- m_DownloadStats.DownloadedBlockByteCount += Payload.GetSize();
- m_DownloadStats.RequestsCompleteCount++;
-
- if (!m_AbortFlag)
- {
- if (Payload && m_Storage.BuildCacheStorage)
- {
- m_Storage.BuildCacheStorage->PutBuildBlob(m_BuildId,
- BlobHash,
- ZenContentType::kCompressedBinary,
- CompositeBuffer(SharedBuffer(std::move(Payload))));
- }
- }
- CompletedDownloadCount++;
- if (CompletedDownloadCount == BlobCount)
- {
- FilteredDownloadedBytesPerSecond.Stop();
- }
- }
- }
- });
- }
-
- Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
- ZEN_UNUSED(PendingWork);
-
- uint64_t DownloadedBytes = m_DownloadStats.DownloadedChunkByteCount.load() + m_DownloadStats.DownloadedBlockByteCount.load();
- FilteredDownloadedBytesPerSecond.Update(DownloadedBytes);
-
- std::string DownloadRateString = (CompletedDownloadCount == BlobCount)
- ? ""
- : fmt::format(" {}bits/s", NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8));
- std::string UploadDetails = m_Storage.BuildCacheStorage ? fmt::format(" {} ({}) uploaded.",
- m_StorageCacheStats.PutBlobCount.load(),
- NiceBytes(m_StorageCacheStats.PutBlobByteCount.load()))
- : "";
-
- std::string Details = fmt::format("{}/{} ({}{}) downloaded.{}",
- CompletedDownloadCount.load(),
- BlobCount,
- NiceBytes(DownloadedBytes),
- DownloadRateString,
- UploadDetails);
- Progress.UpdateState({.Task = "Downloading",
- .Details = Details,
- .TotalCount = BlobCount,
- .RemainingCount = BlobCount - CompletedDownloadCount.load(),
- .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
- false);
- });
-
- FilteredDownloadedBytesPerSecond.Stop();
-
- Progress.Finish();
- }
- if (m_AbortFlag)
- {
- return;
- }
-
- if (m_Storage.BuildCacheStorage)
- {
- m_Storage.BuildCacheStorage->Flush(m_LogOutput.GetProgressUpdateDelayMS(), [this](intptr_t Remaining) -> bool {
- ZEN_UNUSED(Remaining);
- if (!m_Options.IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput, "Waiting for {} blobs to finish upload to '{}'", Remaining, m_Storage.CacheName);
- }
- return !m_AbortFlag;
- });
- }
-
- if (!m_Options.IsQuiet)
- {
- uint64_t DownloadedBytes = m_DownloadStats.DownloadedChunkByteCount.load() + m_DownloadStats.DownloadedBlockByteCount.load();
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Downloaded {} ({}bits/s) in {}. {} as multipart. Completed in {}",
- NiceBytes(DownloadedBytes),
- NiceNum(GetBytesPerSecond(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS(), DownloadedBytes * 8)),
- NiceTimeSpanMs(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS() / 1000),
- MultipartAttachmentCount.load(),
- NiceTimeSpanMs(PrimeTimer.GetElapsedTimeMs()));
- }
-}
-
-CompositeBuffer
-ValidateBlob(std::atomic<bool>& AbortFlag,
- BuildStorageBase& Storage,
- const Oid& BuildId,
- const IoHash& BlobHash,
- uint64_t& OutCompressedSize,
- uint64_t& OutDecompressedSize)
-{
- ZEN_TRACE_CPU("ValidateBlob");
- IoBuffer Payload = Storage.GetBuildBlob(BuildId, BlobHash);
- if (!Payload)
- {
- throw std::runtime_error(fmt::format("Blob {} could not be found", BlobHash));
- }
- return ValidateBlob(AbortFlag, std::move(Payload), BlobHash, OutCompressedSize, OutDecompressedSize);
-}
-
-ChunkBlockDescription
-BuildsOperationValidateBuildPart::ValidateChunkBlock(IoBuffer&& Payload,
- const IoHash& BlobHash,
- uint64_t& OutCompressedSize,
- uint64_t& OutDecompressedSize)
-{
- CompositeBuffer BlockBuffer = ValidateBlob(m_AbortFlag, std::move(Payload), BlobHash, OutCompressedSize, OutDecompressedSize);
- if (!BlockBuffer)
- {
- throw std::runtime_error(fmt::format("Chunk block blob {} is not compressed using 'None' compression level", BlobHash));
- }
- return GetChunkBlockDescription(BlockBuffer.Flatten(), BlobHash);
-}
-
-std::vector<std::pair<Oid, std::string>>
-ResolveBuildPartNames(CbObjectView BuildObject,
- const Oid& BuildId,
- const std::vector<Oid>& BuildPartIds,
- std::span<const std::string> BuildPartNames,
- std::uint64_t& OutPreferredMultipartChunkSize)
-{
- std::vector<std::pair<Oid, std::string>> Result;
- {
- CbObjectView PartsObject = BuildObject["parts"sv].AsObjectView();
- if (!PartsObject)
- {
- throw std::runtime_error("Build object does not have a 'parts' object");
- }
-
- OutPreferredMultipartChunkSize = BuildObject["chunkSize"sv].AsUInt64(OutPreferredMultipartChunkSize);
-
- std::vector<std::pair<Oid, std::string>> AvailableParts;
-
- for (CbFieldView PartView : PartsObject)
- {
- const std::string BuildPartName = std::string(PartView.GetName());
- const Oid BuildPartId = PartView.AsObjectId();
- if (BuildPartId == Oid::Zero)
- {
- ExtendableStringBuilder<128> SB;
- for (CbFieldView ScanPartView : PartsObject)
- {
- SB.Append(fmt::format("\n {}: {}", ScanPartView.GetName(), ScanPartView.AsObjectId()));
- }
- throw std::runtime_error(fmt::format("Build object parts does not have a '{}' object id{}", BuildPartName, SB.ToView()));
- }
- AvailableParts.push_back({BuildPartId, BuildPartName});
- }
-
- if (BuildPartIds.empty() && BuildPartNames.empty())
- {
- Result = AvailableParts;
- }
- else
- {
- for (const std::string& BuildPartName : BuildPartNames)
- {
- if (auto It = std::find_if(AvailableParts.begin(),
- AvailableParts.end(),
- [&BuildPartName](const auto& Part) { return Part.second == BuildPartName; });
- It != AvailableParts.end())
- {
- Result.push_back(*It);
- }
- else
- {
- throw std::runtime_error(fmt::format("Build {} object does not have a part named '{}'", BuildId, BuildPartName));
- }
- }
- for (const Oid& BuildPartId : BuildPartIds)
- {
- if (auto It = std::find_if(AvailableParts.begin(),
- AvailableParts.end(),
- [&BuildPartId](const auto& Part) { return Part.first == BuildPartId; });
- It != AvailableParts.end())
- {
- Result.push_back(*It);
- }
- else
- {
- throw std::runtime_error(fmt::format("Build {} object does not have a part with id '{}'", BuildId, BuildPartId));
- }
- }
- }
-
- if (Result.empty())
- {
- throw std::runtime_error(fmt::format("Build object does not have any parts", BuildId));
- }
- }
- return Result;
-}
-
-ChunkedFolderContent
-GetRemoteContent(OperationLogOutput& Output,
- StorageInstance& Storage,
- const Oid& BuildId,
- const std::vector<std::pair<Oid, std::string>>& BuildParts,
- const BuildManifest& Manifest,
- std::span<const std::string> IncludeWildcards,
- std::span<const std::string> ExcludeWildcards,
- std::unique_ptr<ChunkingController>& OutChunkController,
- std::vector<ChunkedFolderContent>& OutPartContents,
- std::vector<ChunkBlockDescription>& OutBlockDescriptions,
- std::vector<IoHash>& OutLooseChunkHashes,
- bool IsQuiet,
- bool IsVerbose,
- bool DoExtraContentVerify)
-{
- ZEN_TRACE_CPU("GetRemoteContent");
-
- Stopwatch GetBuildPartTimer;
- const Oid BuildPartId = BuildParts[0].first;
- const std::string_view BuildPartName = BuildParts[0].second;
- CbObject BuildPartManifest = Storage.BuildStorage->GetBuildPart(BuildId, BuildPartId);
- if (!IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(Output,
- "GetBuildPart {} ('{}') took {}. Payload size: {}",
- BuildPartId,
- BuildPartName,
- NiceTimeSpanMs(GetBuildPartTimer.GetElapsedTimeMs()),
- NiceBytes(BuildPartManifest.GetSize()));
- ZEN_OPERATION_LOG_INFO(Output, "{}", GetCbObjectAsNiceString(BuildPartManifest, " "sv, "\n"sv));
- }
-
- {
- CbObjectView Chunker = BuildPartManifest["chunker"sv].AsObjectView();
- std::string_view ChunkerName = Chunker["name"sv].AsString();
- CbObjectView Parameters = Chunker["parameters"sv].AsObjectView();
- OutChunkController = CreateChunkingController(ChunkerName, Parameters);
- }
-
- auto ParseBuildPartManifest = [&Output, IsQuiet, IsVerbose, DoExtraContentVerify](
- StorageInstance& Storage,
- const Oid& BuildId,
- const Oid& BuildPartId,
- CbObject BuildPartManifest,
- std::span<const std::string> IncludeWildcards,
- std::span<const std::string> ExcludeWildcards,
- const BuildManifest::Part* OptionalManifest,
- ChunkedFolderContent& OutRemoteContent,
- std::vector<ChunkBlockDescription>& OutBlockDescriptions,
- std::vector<IoHash>& OutLooseChunkHashes) {
- std::vector<uint32_t> AbsoluteChunkOrders;
- std::vector<uint64_t> LooseChunkRawSizes;
- std::vector<IoHash> BlockRawHashes;
-
- ReadBuildContentFromCompactBinary(BuildPartManifest,
- OutRemoteContent.Platform,
- OutRemoteContent.Paths,
- OutRemoteContent.RawHashes,
- OutRemoteContent.RawSizes,
- OutRemoteContent.Attributes,
- OutRemoteContent.ChunkedContent.SequenceRawHashes,
- OutRemoteContent.ChunkedContent.ChunkCounts,
- AbsoluteChunkOrders,
- OutLooseChunkHashes,
- LooseChunkRawSizes,
- BlockRawHashes);
-
- // TODO: GetBlockDescriptions for all BlockRawHashes in one go - check for local block descriptions when we cache them
-
- {
- bool AttemptFallback = false;
- OutBlockDescriptions = GetBlockDescriptions(Output,
- *Storage.BuildStorage,
- Storage.BuildCacheStorage.get(),
- BuildId,
- BuildPartId,
- BlockRawHashes,
- AttemptFallback,
- IsQuiet,
- IsVerbose);
- }
-
- CalculateLocalChunkOrders(AbsoluteChunkOrders,
- OutLooseChunkHashes,
- LooseChunkRawSizes,
- OutBlockDescriptions,
- OutRemoteContent.ChunkedContent.ChunkHashes,
- OutRemoteContent.ChunkedContent.ChunkRawSizes,
- OutRemoteContent.ChunkedContent.ChunkOrders,
- DoExtraContentVerify);
-
- std::vector<std::filesystem::path> DeletedPaths;
-
- if (OptionalManifest)
- {
- tsl::robin_set<std::string> PathsInManifest;
- PathsInManifest.reserve(OptionalManifest->Files.size());
- for (const std::filesystem::path& ManifestPath : OptionalManifest->Files)
- {
- PathsInManifest.insert(ToLower(ManifestPath.generic_string()));
- }
- for (const std::filesystem::path& RemotePath : OutRemoteContent.Paths)
- {
- if (!PathsInManifest.contains(ToLower(RemotePath.generic_string())))
- {
- DeletedPaths.push_back(RemotePath);
- }
- }
- }
-
- if (!IncludeWildcards.empty() || !ExcludeWildcards.empty())
- {
- for (const std::filesystem::path& RemotePath : OutRemoteContent.Paths)
- {
- if (!IncludePath(IncludeWildcards, ExcludeWildcards, ToLower(RemotePath.generic_string()), /*CaseSensitive*/ true))
- {
- DeletedPaths.push_back(RemotePath);
- }
- }
- }
-
- if (!DeletedPaths.empty())
- {
- OutRemoteContent = DeletePathsFromChunkedContent(OutRemoteContent, DeletedPaths);
- InlineRemoveUnusedHashes(OutLooseChunkHashes, OutRemoteContent.ChunkedContent.ChunkHashes);
- }
-
-#if ZEN_BUILD_DEBUG
- ValidateChunkedFolderContent(OutRemoteContent, OutBlockDescriptions, OutLooseChunkHashes, IncludeWildcards, ExcludeWildcards);
-#endif // ZEN_BUILD_DEBUG
- };
-
- auto FindManifest = [&Manifest](const Oid& BuildPartId, std::string_view BuildPartName) -> const BuildManifest::Part* {
- if (Manifest.Parts.empty())
- {
- return nullptr;
- }
- if (Manifest.Parts.size() == 1)
- {
- if (Manifest.Parts[0].PartId == Oid::Zero && Manifest.Parts[0].PartName.empty())
- {
- return &Manifest.Parts[0];
- }
- }
-
- auto It = std::find_if(Manifest.Parts.begin(), Manifest.Parts.end(), [BuildPartId, BuildPartName](const BuildManifest::Part& Part) {
- if (Part.PartId != Oid::Zero)
- {
- return Part.PartId == BuildPartId;
- }
- if (!Part.PartName.empty())
- {
- return Part.PartName == BuildPartName;
- }
- return false;
- });
- if (It != Manifest.Parts.end())
- {
- return &(*It);
- }
- return nullptr;
- };
-
- OutPartContents.resize(1);
- ParseBuildPartManifest(Storage,
- BuildId,
- BuildPartId,
- BuildPartManifest,
- IncludeWildcards,
- ExcludeWildcards,
- FindManifest(BuildPartId, BuildPartName),
- OutPartContents[0],
- OutBlockDescriptions,
- OutLooseChunkHashes);
- ChunkedFolderContent RemoteContent;
- if (BuildParts.size() > 1)
- {
- std::vector<ChunkBlockDescription> OverlayBlockDescriptions;
- std::vector<IoHash> OverlayLooseChunkHashes;
- for (size_t PartIndex = 1; PartIndex < BuildParts.size(); PartIndex++)
- {
- const Oid& OverlayBuildPartId = BuildParts[PartIndex].first;
- const std::string& OverlayBuildPartName = BuildParts[PartIndex].second;
- Stopwatch GetOverlayBuildPartTimer;
- CbObject OverlayBuildPartManifest = Storage.BuildStorage->GetBuildPart(BuildId, OverlayBuildPartId);
- if (!IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(Output,
- "GetBuildPart {} ('{}') took {}. Payload size: {}",
- OverlayBuildPartId,
- OverlayBuildPartName,
- NiceTimeSpanMs(GetOverlayBuildPartTimer.GetElapsedTimeMs()),
- NiceBytes(OverlayBuildPartManifest.GetSize()));
- }
-
- ChunkedFolderContent OverlayPartContent;
- std::vector<ChunkBlockDescription> OverlayPartBlockDescriptions;
- std::vector<IoHash> OverlayPartLooseChunkHashes;
-
- ParseBuildPartManifest(Storage,
- BuildId,
- OverlayBuildPartId,
- OverlayBuildPartManifest,
- IncludeWildcards,
- ExcludeWildcards,
- FindManifest(OverlayBuildPartId, OverlayBuildPartName),
- OverlayPartContent,
- OverlayPartBlockDescriptions,
- OverlayPartLooseChunkHashes);
- OutPartContents.push_back(OverlayPartContent);
- OverlayBlockDescriptions.insert(OverlayBlockDescriptions.end(),
- OverlayPartBlockDescriptions.begin(),
- OverlayPartBlockDescriptions.end());
- OverlayLooseChunkHashes.insert(OverlayLooseChunkHashes.end(),
- OverlayPartLooseChunkHashes.begin(),
- OverlayPartLooseChunkHashes.end());
- }
-
- RemoteContent = MergeChunkedFolderContents(OutPartContents[0], std::span<const ChunkedFolderContent>(OutPartContents).subspan(1));
- {
- tsl::robin_set<IoHash> AllBlockHashes;
- for (const ChunkBlockDescription& Description : OutBlockDescriptions)
- {
- AllBlockHashes.insert(Description.BlockHash);
- }
- for (const ChunkBlockDescription& Description : OverlayBlockDescriptions)
- {
- if (!AllBlockHashes.contains(Description.BlockHash))
- {
- AllBlockHashes.insert(Description.BlockHash);
- OutBlockDescriptions.push_back(Description);
- }
- }
- }
- {
- tsl::robin_set<IoHash> AllLooseChunkHashes(OutLooseChunkHashes.begin(), OutLooseChunkHashes.end());
- for (const IoHash& OverlayLooseChunkHash : OverlayLooseChunkHashes)
- {
- if (!AllLooseChunkHashes.contains(OverlayLooseChunkHash))
- {
- AllLooseChunkHashes.insert(OverlayLooseChunkHash);
- OutLooseChunkHashes.push_back(OverlayLooseChunkHash);
- }
- }
- }
- }
- else
- {
- RemoteContent = OutPartContents[0];
- }
- return RemoteContent;
-}
-
-std::string
-GetCbObjectAsNiceString(CbObjectView Object, std::string_view Prefix, std::string_view Suffix)
-{
- ExtendableStringBuilder<512> SB;
- std::vector<std::pair<std::string, std::string>> NameStringValuePairs;
- for (CbFieldView Field : Object)
- {
- std::string_view Name = Field.GetName();
- switch (CbValue Accessor = Field.GetValue(); Accessor.GetType())
- {
- case CbFieldType::String:
- NameStringValuePairs.push_back({std::string(Name), std::string(Accessor.AsString())});
- break;
- case CbFieldType::IntegerPositive:
- NameStringValuePairs.push_back({std::string(Name), fmt::format("{}", Accessor.AsIntegerPositive())});
- break;
- case CbFieldType::IntegerNegative:
- NameStringValuePairs.push_back({std::string(Name), fmt::format("{}", Accessor.AsIntegerNegative())});
- break;
- case CbFieldType::Float32:
- {
- const float Value = Accessor.AsFloat32();
- if (std::isfinite(Value))
- {
- NameStringValuePairs.push_back({std::string(Name), fmt::format("{:.9g}", Value)});
- }
- else
- {
- NameStringValuePairs.push_back({std::string(Name), "null"});
- }
- }
- break;
- case CbFieldType::Float64:
- {
- const double Value = Accessor.AsFloat64();
- if (std::isfinite(Value))
- {
- NameStringValuePairs.push_back({std::string(Name), fmt::format("{:.17g}", Value)});
- }
- else
- {
- NameStringValuePairs.push_back({std::string(Name), "null"});
- }
- }
- break;
- case CbFieldType::BoolFalse:
- NameStringValuePairs.push_back({std::string(Name), "false"});
- break;
- case CbFieldType::BoolTrue:
- NameStringValuePairs.push_back({std::string(Name), "true"});
- break;
- case CbFieldType::Hash:
- {
- NameStringValuePairs.push_back({std::string(Name), Accessor.AsHash().ToHexString()});
- }
- break;
- case CbFieldType::Uuid:
- {
- StringBuilder<Oid::StringLength + 1> Builder;
- Accessor.AsUuid().ToString(Builder);
- NameStringValuePairs.push_back({std::string(Name), Builder.ToString()});
- }
- break;
- case CbFieldType::DateTime:
- {
- ExtendableStringBuilder<64> Builder;
- Builder << DateTime(Accessor.AsDateTimeTicks()).ToIso8601();
- NameStringValuePairs.push_back({std::string(Name), Builder.ToString()});
- }
- break;
- case CbFieldType::TimeSpan:
- {
- ExtendableStringBuilder<64> Builder;
- const TimeSpan Span(Accessor.AsTimeSpanTicks());
- if (Span.GetDays() == 0)
- {
- Builder << Span.ToString("%h:%m:%s.%n");
- }
- else
- {
- Builder << Span.ToString("%d.%h:%m:%s.%n");
- }
- NameStringValuePairs.push_back({std::string(Name), Builder.ToString()});
- break;
- }
- case CbFieldType::ObjectId:
- NameStringValuePairs.push_back({std::string(Name), Accessor.AsObjectId().ToString()});
- break;
- }
- }
- std::string::size_type LongestKey = 0;
- for (const std::pair<std::string, std::string>& KeyValue : NameStringValuePairs)
- {
- LongestKey = Max(KeyValue.first.length(), LongestKey);
- }
- for (const std::pair<std::string, std::string>& KeyValue : NameStringValuePairs)
- {
- SB.Append(fmt::format("{}{:<{}}: {}{}", Prefix, KeyValue.first, LongestKey, KeyValue.second, Suffix));
- }
- return SB.ToString();
-}
-
-#if ZEN_WITH_TESTS
-
-namespace buildstorageoperations_testutils {
- struct TestState
- {
- TestState(const std::filesystem::path& InRootPath)
- : RootPath(InRootPath)
- , LogOutput(CreateStandardLogOutput(Log))
- , ChunkController(CreateStandardChunkingController(StandardChunkingControllerSettings{}))
- , ChunkCache(CreateMemoryChunkingCache())
- , WorkerPool(2)
- , NetworkPool(2)
- {
- }
-
- void Initialize()
- {
- StoragePath = RootPath / "storage";
- TempPath = RootPath / "temp";
- SystemRootDir = RootPath / "sysroot";
- ZenFolderPath = RootPath / ".zen";
-
- CreateDirectories(TempPath);
- CreateDirectories(StoragePath);
-
- Storage.BuildStorage = CreateFileBuildStorage(StoragePath, StorageStats, false);
- }
-
- void CreateSourceData(const std::filesystem::path& Source, std::span<const std::string> Paths, std::span<const uint64_t> Sizes)
- {
- const std::filesystem::path SourcePath = RootPath / Source;
- CreateDirectories(SourcePath);
- for (size_t FileIndex = 0; FileIndex < Paths.size(); FileIndex++)
- {
- const std::string& FilePath = Paths[FileIndex];
- const uint64_t FileSize = Sizes[FileIndex];
- IoBuffer FileData = FileSize > 0 ? CreateSemiRandomBlob(FileSize) : IoBuffer{};
- WriteFile(SourcePath / FilePath, FileData);
- }
- }
-
- std::vector<std::pair<Oid, std::string>> Upload(const Oid& BuildId,
- const Oid& BuildPartId,
- const std::string_view BuildPartName,
- const std::filesystem::path& Source,
- const std::filesystem::path& ManifestPath)
- {
- const std::filesystem::path SourcePath = RootPath / Source;
- CbObject MetaData;
- BuildsOperationUploadFolder Upload(*LogOutput,
- Storage,
- AbortFlag,
- PauseFlag,
- WorkerPool,
- NetworkPool,
- BuildId,
- SourcePath,
- true,
- MetaData,
- BuildsOperationUploadFolder::Options{.TempDir = TempPath});
- return Upload.Execute(BuildPartId, BuildPartName, ManifestPath, *ChunkController, *ChunkCache);
- }
-
- void ValidateUpload(const Oid& BuildId, const std::vector<std::pair<Oid, std::string>>& Parts)
- {
- for (auto Part : Parts)
- {
- BuildsOperationValidateBuildPart Validate(*LogOutput,
- *Storage.BuildStorage,
- AbortFlag,
- PauseFlag,
- WorkerPool,
- NetworkPool,
- BuildId,
- Part.first,
- Part.second,
- BuildsOperationValidateBuildPart::Options{});
- Validate.Execute();
- }
- }
-
- FolderContent Download(const Oid& BuildId,
- const Oid& BuildPartId,
- const std::string_view BuildPartName,
- const std::filesystem::path& Target,
- bool Append)
- {
- const std::filesystem::path TargetPath = RootPath / Target;
-
- CreateDirectories(TargetPath);
-
- uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u;
- CbObject BuildObject = Storage.BuildStorage->GetBuild(BuildId);
- std::vector<Oid> PartIds;
- if (BuildPartId != Oid::Zero)
- {
- PartIds.push_back(BuildPartId);
- }
- std::vector<std::string> PartNames;
- if (!BuildPartName.empty())
- {
- PartNames.push_back(std::string(BuildPartName));
- }
- std::vector<std::pair<Oid, std::string>> AllBuildParts =
- ResolveBuildPartNames(BuildObject, BuildId, PartIds, PartNames, PreferredMultipartChunkSize);
-
- std::vector<ChunkedFolderContent> PartContents;
-
- std::vector<ChunkBlockDescription> BlockDescriptions;
- std::vector<IoHash> LooseChunkHashes;
-
- ChunkedFolderContent RemoteContent = GetRemoteContent(*LogOutput,
- Storage,
- BuildId,
- AllBuildParts,
- {},
- {},
- {},
- ChunkController,
- PartContents,
- BlockDescriptions,
- LooseChunkHashes,
- /*IsQuiet*/ false,
- /*IsVerbose*/ false,
- /*DoExtraContentVerify*/ true);
-
- GetFolderContentStatistics LocalFolderScanStats;
-
- struct ContentVisitor : public GetDirectoryContentVisitor
- {
- virtual void AsyncVisitDirectory(const std::filesystem::path& RelativeRoot, DirectoryContent&& Content)
- {
- RwLock::ExclusiveLockScope _(ExistingPathsLock);
- for (const std::filesystem::path& FileName : Content.FileNames)
- {
- if (RelativeRoot.empty())
- {
- ExistingPaths.push_back(FileName);
- }
- else
- {
- ExistingPaths.push_back(RelativeRoot / FileName);
- }
- }
- }
-
- RwLock ExistingPathsLock;
- std::vector<std::filesystem::path> ExistingPaths;
- } Visitor;
-
- Latch PendingWorkCount(1);
-
- GetDirectoryContent(TargetPath,
- DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive,
- Visitor,
- WorkerPool,
- PendingWorkCount);
-
- PendingWorkCount.CountDown();
- PendingWorkCount.Wait();
-
- FolderContent CurrentLocalFolderState = GetValidFolderContent(
- WorkerPool,
- LocalFolderScanStats,
- TargetPath,
- Visitor.ExistingPaths,
- [](uint64_t PathCount, uint64_t CompletedPathCount) { ZEN_UNUSED(PathCount, CompletedPathCount); },
- 1000,
- AbortFlag,
- PauseFlag);
-
- ChunkingStatistics LocalChunkingStats;
- ChunkedFolderContent LocalContent = ChunkFolderContent(
- LocalChunkingStats,
- WorkerPool,
- TargetPath,
- CurrentLocalFolderState,
- *ChunkController,
- *ChunkCache,
- 1000,
- [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { ZEN_UNUSED(IsAborted, IsPaused); },
- AbortFlag,
- PauseFlag);
-
- if (Append)
- {
- RemoteContent = ApplyChunkedContentOverlay(LocalContent, RemoteContent, {}, {});
- }
-
- const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalContent);
- const ChunkedContentLookup RemoteLookup = BuildChunkedContentLookup(RemoteContent);
-
- BuildsOperationUpdateFolder Download(*LogOutput,
- Storage,
- AbortFlag,
- PauseFlag,
- WorkerPool,
- NetworkPool,
- BuildId,
- TargetPath,
- LocalContent,
- LocalLookup,
- RemoteContent,
- RemoteLookup,
- BlockDescriptions,
- LooseChunkHashes,
- BuildsOperationUpdateFolder::Options{.SystemRootDir = SystemRootDir,
- .ZenFolderPath = ZenFolderPath,
- .ValidateCompletedSequences = true});
- FolderContent ResultingState;
- Download.Execute(ResultingState);
-
- return ResultingState;
- }
-
- void ValidateDownload(std::span<const std::string> Paths,
- std::span<const uint64_t> Sizes,
- const std::filesystem::path& Source,
- const std::filesystem::path& Target,
- const FolderContent& DownloadContent)
- {
- const std::filesystem::path SourcePath = RootPath / Source;
- const std::filesystem::path TargetPath = RootPath / Target;
-
- CHECK_EQ(Paths.size(), DownloadContent.Paths.size());
- tsl::robin_map<std::string, uint64_t> ExpectedSizes;
- tsl::robin_map<std::string, IoHash> ExpectedHashes;
- for (size_t Index = 0; Index < Paths.size(); Index++)
- {
- const std::string LookupString = std::filesystem::path(Paths[Index]).generic_string();
- ExpectedSizes.insert_or_assign(LookupString, Sizes[Index]);
- std::filesystem::path FilePath = SourcePath / Paths[Index];
- const IoHash SourceHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile(FilePath.make_preferred()));
- ExpectedHashes.insert_or_assign(LookupString, SourceHash);
- }
- for (size_t Index = 0; Index < DownloadContent.Paths.size(); Index++)
- {
- const std::string LookupString = std::filesystem::path(DownloadContent.Paths[Index]).generic_string();
- auto SizeIt = ExpectedSizes.find(LookupString);
- CHECK_NE(SizeIt, ExpectedSizes.end());
- CHECK_EQ(SizeIt->second, DownloadContent.RawSizes[Index]);
- std::filesystem::path FilePath = TargetPath / DownloadContent.Paths[Index];
- const IoHash DownloadedHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile(FilePath.make_preferred()));
- auto HashIt = ExpectedHashes.find(LookupString);
- CHECK_NE(HashIt, ExpectedHashes.end());
- CHECK_EQ(HashIt->second, DownloadedHash);
- }
- }
-
- const std::filesystem::path RootPath;
- std::filesystem::path StoragePath;
- std::filesystem::path TempPath;
- std::filesystem::path SystemRootDir;
- std::filesystem::path ZenFolderPath;
-
- LoggerRef Log = ConsoleLog();
- std::unique_ptr<OperationLogOutput> LogOutput;
-
- std::unique_ptr<ChunkingController> ChunkController;
- std::unique_ptr<ChunkingCache> ChunkCache;
-
- StorageInstance Storage;
- BuildStorageBase::Statistics StorageStats;
-
- WorkerThreadPool WorkerPool;
- WorkerThreadPool NetworkPool;
-
- std::atomic<bool> AbortFlag;
- std::atomic<bool> PauseFlag;
- };
-
-} // namespace buildstorageoperations_testutils
-
-TEST_CASE("buildstorageoperations.upload.folder")
-{
- using namespace buildstorageoperations_testutils;
-
- FastRandom BaseRandom;
-
- const size_t FileCount = 11;
-
- const std::string Paths[FileCount] = {{"file_1"},
- {"file_2.exe"},
- {"file_3.txt"},
- {"dir_1/dir1_file_1.exe"},
- {"dir_1/dir1_file_2.pdb"},
- {"dir_1/dir1_file_3.txt"},
- {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"},
- {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"},
- {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"},
- {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"},
- {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}};
- const uint64_t Sizes[FileCount] =
- {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u};
-
- ScopedTemporaryDirectory SourceFolder;
- TestState State(SourceFolder.Path());
- State.Initialize();
- State.CreateSourceData("source", Paths, Sizes);
-
- const Oid BuildId = Oid::NewOid();
- const Oid BuildPartId = Oid::NewOid();
- const std::string BuildPartName = "default";
-
- auto Result = State.Upload(BuildId, BuildPartId, BuildPartName, "source", {});
-
- CHECK_EQ(Result.size(), 1u);
- CHECK_EQ(Result[0].first, BuildPartId);
- CHECK_EQ(Result[0].second, BuildPartName);
- State.ValidateUpload(BuildId, Result);
-
- FolderContent DownloadContent = State.Download(BuildId, Oid::Zero, {}, "download", /* Append */ false);
- CHECK_EQ(DownloadContent.Paths.size(), FileCount);
- State.ValidateDownload(Paths, Sizes, "source", "download", DownloadContent);
-}
-
-TEST_CASE("buildstorageoperations.upload.manifest")
-{
- using namespace buildstorageoperations_testutils;
-
- FastRandom BaseRandom;
-
- const size_t FileCount = 11;
-
- const std::string Paths[FileCount] = {{"file_1"},
- {"file_2.exe"},
- {"file_3.txt"},
- {"dir_1/dir1_file_1.exe"},
- {"dir_1/dir1_file_2.pdb"},
- {"dir_1/dir1_file_3.txt"},
- {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"},
- {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"},
- {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"},
- {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"},
- {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}};
- const uint64_t Sizes[FileCount] =
- {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u};
-
- ScopedTemporaryDirectory SourceFolder;
- TestState State(SourceFolder.Path());
- State.Initialize();
- State.CreateSourceData("source", Paths, Sizes);
-
- std::span<const std::string> ManifestFiles(Paths);
- ManifestFiles = ManifestFiles.subspan(0, FileCount / 2);
-
- std::span<const uint64_t> ManifestSizes(Sizes);
- ManifestSizes = ManifestSizes.subspan(0, FileCount / 2);
-
- ExtendableStringBuilder<1024> Manifest;
- for (const std::string& FilePath : ManifestFiles)
- {
- Manifest << FilePath << "\n";
- }
-
- WriteFile(State.RootPath / "manifest.txt", IoBuffer(IoBuffer::Wrap, Manifest.Data(), Manifest.Size()));
-
- const Oid BuildId = Oid::NewOid();
- const Oid BuildPartId = Oid::NewOid();
- const std::string BuildPartName = "default";
-
- auto Result = State.Upload(BuildId, BuildPartId, BuildPartName, "source", State.RootPath / "manifest.txt");
-
- CHECK_EQ(Result.size(), 1u);
- CHECK_EQ(Result[0].first, BuildPartId);
- CHECK_EQ(Result[0].second, BuildPartName);
- State.ValidateUpload(BuildId, Result);
-
- FolderContent DownloadContent = State.Download(BuildId, Oid::Zero, {}, "download", /* Append */ false);
- State.ValidateDownload(ManifestFiles, ManifestSizes, "source", "download", DownloadContent);
-}
-
-TEST_CASE("buildstorageoperations.memorychunkingcache")
-{
- using namespace buildstorageoperations_testutils;
-
- FastRandom BaseRandom;
-
- const size_t FileCount = 11;
-
- const std::string Paths[FileCount] = {{"file_1"},
- {"file_2.exe"},
- {"file_3.txt"},
- {"dir_1/dir1_file_1.exe"},
- {"dir_1/dir1_file_2.pdb"},
- {"dir_1/dir1_file_3.txt"},
- {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"},
- {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"},
- {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"},
- {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"},
- {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}};
- const uint64_t Sizes[FileCount] =
- {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u};
-
- ScopedTemporaryDirectory SourceFolder;
- TestState State(SourceFolder.Path());
- State.Initialize();
- State.CreateSourceData("source", Paths, Sizes);
-
- const Oid BuildId = Oid::NewOid();
- const Oid BuildPartId = Oid::NewOid();
- const std::string BuildPartName = "default";
-
- {
- const std::filesystem::path SourcePath = SourceFolder.Path() / "source";
- CbObject MetaData;
- BuildsOperationUploadFolder Upload(*State.LogOutput,
- State.Storage,
- State.AbortFlag,
- State.PauseFlag,
- State.WorkerPool,
- State.NetworkPool,
- BuildId,
- SourcePath,
- true,
- MetaData,
- BuildsOperationUploadFolder::Options{.TempDir = State.TempPath});
- auto Result = Upload.Execute(BuildPartId, BuildPartName, {}, *State.ChunkController, *State.ChunkCache);
-
- CHECK_EQ(Upload.m_ChunkingStats.FilesStoredInCache.load(), FileCount - 1); // Zero size files are not stored in cache
- CHECK_EQ(Upload.m_ChunkingStats.BytesStoredInCache.load(), std::accumulate(&Sizes[0], &Sizes[FileCount], uint64_t(0)));
- CHECK(Upload.m_ChunkingStats.ChunksStoredInCache.load() >= FileCount - 1); // Zero size files are not stored in cache
-
- CHECK_EQ(Result.size(), 1u);
- CHECK_EQ(Result[0].first, BuildPartId);
- CHECK_EQ(Result[0].second, BuildPartName);
- }
-
- auto Result = State.Upload(BuildId, BuildPartId, BuildPartName, "source", {});
-
- const Oid BuildId2 = Oid::NewOid();
- const Oid BuildPartId2 = Oid::NewOid();
-
- {
- const std::filesystem::path SourcePath = SourceFolder.Path() / "source";
- CbObject MetaData;
- BuildsOperationUploadFolder Upload(*State.LogOutput,
- State.Storage,
- State.AbortFlag,
- State.PauseFlag,
- State.WorkerPool,
- State.NetworkPool,
- BuildId2,
- SourcePath,
- true,
- MetaData,
- BuildsOperationUploadFolder::Options{.TempDir = State.TempPath});
- Upload.Execute(BuildPartId2, BuildPartName, {}, *State.ChunkController, *State.ChunkCache);
-
- CHECK_EQ(Upload.m_ChunkingStats.FilesFoundInCache.load(), FileCount - 1); // Zero size files are not stored in cache
- CHECK_EQ(Upload.m_ChunkingStats.BytesFoundInCache.load(), std::accumulate(&Sizes[0], &Sizes[FileCount], uint64_t(0)));
- CHECK(Upload.m_ChunkingStats.ChunksFoundInCache.load() >= FileCount - 1); // Zero size files are not stored in cache
- }
-
- FolderContent DownloadContent = State.Download(BuildId2, BuildPartId2, {}, "download", /* Append */ false);
- State.ValidateDownload(Paths, Sizes, "source", "download", DownloadContent);
-}
-
-TEST_CASE("buildstorageoperations.upload.multipart")
-{
- // Disabled since it relies on authentication and specific block being present in cloud storage
- if (false)
- {
- using namespace buildstorageoperations_testutils;
-
- FastRandom BaseRandom;
-
- const size_t FileCount = 11;
-
- const std::string Paths[FileCount] = {{"file_1"},
- {"file_2.exe"},
- {"file_3.txt"},
- {"dir_1/dir1_file_1.exe"},
- {"dir_1/dir1_file_2.pdb"},
- {"dir_1/dir1_file_3.txt"},
- {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"},
- {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"},
- {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"},
- {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"},
- {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}};
- const uint64_t Sizes[FileCount] =
- {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u};
-
- ScopedTemporaryDirectory SourceFolder;
- TestState State(SourceFolder.Path());
- State.Initialize();
- State.CreateSourceData("source", Paths, Sizes);
-
- std::span<const std::string> ManifestFiles1(Paths);
- ManifestFiles1 = ManifestFiles1.subspan(0, FileCount / 2);
-
- std::span<const uint64_t> ManifestSizes1(Sizes);
- ManifestSizes1 = ManifestSizes1.subspan(0, FileCount / 2);
-
- std::span<const std::string> ManifestFiles2(Paths);
- ManifestFiles2 = ManifestFiles2.subspan(FileCount / 2 - 1);
-
- std::span<const uint64_t> ManifestSizes2(Sizes);
- ManifestSizes2 = ManifestSizes2.subspan(FileCount / 2 - 1);
-
- const Oid BuildPart1Id = Oid::NewOid();
- const std::string BuildPart1Name = "part1";
- const Oid BuildPart2Id = Oid::NewOid();
- const std::string BuildPart2Name = "part2";
- {
- CbObjectWriter Writer;
- Writer.BeginObject("parts"sv);
- {
- Writer.BeginObject(BuildPart1Name);
- {
- Writer.AddObjectId("partId"sv, BuildPart1Id);
- Writer.BeginArray("files"sv);
- for (const std::string& ManifestFile : ManifestFiles1)
- {
- Writer.AddString(ManifestFile);
- }
- Writer.EndArray(); // files
- }
- Writer.EndObject(); // part1
-
- Writer.BeginObject(BuildPart2Name);
- {
- Writer.AddObjectId("partId"sv, BuildPart2Id);
- Writer.BeginArray("files"sv);
- for (const std::string& ManifestFile : ManifestFiles2)
- {
- Writer.AddString(ManifestFile);
- }
- Writer.EndArray(); // files
- }
- Writer.EndObject(); // part2
- }
- Writer.EndObject(); // parts
-
- ExtendableStringBuilder<1024> Manifest;
- CompactBinaryToJson(Writer.Save(), Manifest);
- WriteFile(State.RootPath / "manifest.json", IoBuffer(IoBuffer::Wrap, Manifest.Data(), Manifest.Size()));
- }
-
- const Oid BuildId = Oid::NewOid();
-
- auto Result = State.Upload(BuildId, {}, {}, "source", State.RootPath / "manifest.json");
-
- CHECK_EQ(Result.size(), 2u);
- CHECK_EQ(Result[0].first, BuildPart1Id);
- CHECK_EQ(Result[0].second, BuildPart1Name);
- CHECK_EQ(Result[1].first, BuildPart2Id);
- CHECK_EQ(Result[1].second, BuildPart2Name);
- State.ValidateUpload(BuildId, Result);
-
- FolderContent DownloadContent = State.Download(BuildId, Oid::Zero, {}, "download", /* Append */ false);
- State.ValidateDownload(Paths, Sizes, "source", "download", DownloadContent);
-
- FolderContent Part1DownloadContent = State.Download(BuildId, BuildPart1Id, {}, "download_part1", /* Append */ false);
- State.ValidateDownload(ManifestFiles1, ManifestSizes1, "source", "download_part1", Part1DownloadContent);
-
- FolderContent Part2DownloadContent = State.Download(BuildId, Oid::Zero, BuildPart2Name, "download_part2", /* Append */ false);
- State.ValidateDownload(ManifestFiles2, ManifestSizes2, "source", "download_part2", Part2DownloadContent);
-
- (void)State.Download(BuildId, BuildPart1Id, BuildPart1Name, "download_part1+2", /* Append */ false);
- FolderContent Part1And2DownloadContent = State.Download(BuildId, BuildPart2Id, {}, "download_part1+2", /* Append */ true);
- State.ValidateDownload(Paths, Sizes, "source", "download_part1+2", Part1And2DownloadContent);
- }
-}
-
-void
-buildstorageoperations_forcelink()
-{
-}
-
-#endif // ZEN_WITH_TESTS
-
-} // namespace zen
diff --git a/src/zenremotestore/builds/buildstorageresolve.cpp b/src/zenremotestore/builds/buildstorageresolve.cpp
new file mode 100644
index 000000000..b33d7af29
--- /dev/null
+++ b/src/zenremotestore/builds/buildstorageresolve.cpp
@@ -0,0 +1,249 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zenremotestore/builds/buildstorageresolve.h>
+
+#include <zencore/fmtutils.h>
+#include <zenremotestore/builds/buildstoragecache.h>
+#include <zenremotestore/jupiter/jupiterhost.h>
+#include <zenutil/zenserverprocess.h>
+
+namespace zen {
+
+namespace {
+ std::string ConnectionSettingsToString(const HttpClientSettings& ClientSettings)
+ {
+ ExtendableStringBuilder<128> SB;
+ SB << "\n LogCategory: " << ClientSettings.LogCategory;
+ SB << "\n ConnectTimeout: " << ClientSettings.ConnectTimeout.count() << " ms";
+ SB << "\n Timeout: " << ClientSettings.Timeout.count() << " ms";
+ SB << "\n AccessTokenProvider: " << ClientSettings.AccessTokenProvider.has_value();
+ SB << "\n AssumeHttp2: " << ClientSettings.AssumeHttp2;
+ SB << "\n AllowResume: " << ClientSettings.AllowResume;
+ SB << "\n RetryCount: " << ClientSettings.RetryCount;
+ SB << "\n SessionId: " << ClientSettings.SessionId.ToString();
+ SB << "\n Verbose: " << ClientSettings.Verbose;
+ SB << "\n MaximumInMemoryDownloadSize: " << ClientSettings.MaximumInMemoryDownloadSize;
+ return SB.ToString();
+ }
+} // namespace
+
+BuildStorageResolveResult
+ResolveBuildStorage(LoggerRef InLog,
+ const HttpClientSettings& ClientSettings,
+ std::string_view Host,
+ std::string_view OverrideHost,
+ std::string_view ZenCacheHost,
+ ZenCacheResolveMode ZenResolveMode,
+ bool Verbose)
+{
+ ZEN_SCOPED_LOG(InLog);
+
+ bool AllowZenCacheDiscovery = ZenResolveMode == ZenCacheResolveMode::Discovery || ZenResolveMode == ZenCacheResolveMode::All;
+ bool AllowLocalZenCache = ZenResolveMode == ZenCacheResolveMode::LocalHost || ZenResolveMode == ZenCacheResolveMode::All;
+
+ auto GetHostNameFromUrl = [](std::string_view Url) -> std::string_view {
+ std::string::size_type HostnameStart = 0;
+ std::string::size_type HostnameLength = std::string::npos;
+ if (auto StartPos = Url.find("//"); StartPos != std::string::npos)
+ {
+ HostnameStart = StartPos + 2;
+ }
+ if (auto EndPos = Url.find("/", HostnameStart); EndPos != std::string::npos)
+ {
+ HostnameLength = EndPos - HostnameStart;
+ }
+ if (auto EndPos = Url.find(":", HostnameStart); EndPos != std::string::npos)
+ {
+ HostnameLength = EndPos - HostnameStart;
+ }
+ return Url.substr(HostnameStart, HostnameLength);
+ };
+
+ std::string HostUrl;
+ std::string HostName;
+ double HostLatencySec = -1.0;
+ uint64_t HostMaxRangeCountPerRequest = 1;
+
+ std::string CacheUrl;
+ std::string CacheName;
+ bool HostAssumeHttp2 = ClientSettings.AssumeHttp2;
+ bool CacheAssumeHttp2 = ClientSettings.AssumeHttp2;
+ double CacheLatencySec = -1.0;
+ uint64_t CacheMaxRangeCountPerRequest = 1;
+
+ JupiterServerDiscovery DiscoveryResponse;
+ const std::string_view DiscoveryHost = Host.empty() ? OverrideHost : Host;
+
+ if (OverrideHost.empty() || (ZenCacheHost.empty() && AllowZenCacheDiscovery))
+ {
+ if (Verbose)
+ {
+ ZEN_INFO("Querying servers at '{}/api/v1/status/servers'\n Connection settings:{}",
+ DiscoveryHost,
+ ConnectionSettingsToString(ClientSettings));
+ }
+
+ DiscoveryResponse = DiscoverJupiterEndpoints(DiscoveryHost, ClientSettings);
+ }
+
+ if (!OverrideHost.empty())
+ {
+ if (Verbose)
+ {
+ ZEN_INFO("Testing server endpoint at '{}/health/live'. Assume http2: {}", OverrideHost, HostAssumeHttp2);
+ }
+ if (JupiterEndpointTestResult TestResult = TestJupiterEndpoint(OverrideHost, HostAssumeHttp2, ClientSettings.Verbose);
+ TestResult.Success)
+ {
+ if (Verbose)
+ {
+ ZEN_INFO("Server endpoint at '{}/api/v1/status/servers' succeeded", OverrideHost);
+ }
+ HostUrl = OverrideHost;
+ HostName = GetHostNameFromUrl(OverrideHost);
+ HostLatencySec = TestResult.LatencySeconds;
+ HostMaxRangeCountPerRequest = TestResult.MaxRangeCountPerRequest;
+ }
+ else
+ {
+ throw std::runtime_error(fmt::format("Host {} could not be reached. Reason: {}", OverrideHost, TestResult.FailureReason));
+ }
+ }
+ else
+ {
+ if (DiscoveryResponse.ServerEndPoints.empty())
+ {
+ throw std::runtime_error(fmt::format("Failed to find any builds hosts at {}", DiscoveryHost));
+ }
+
+ for (const JupiterServerDiscovery::EndPoint& ServerEndpoint : DiscoveryResponse.ServerEndPoints)
+ {
+ if (!ServerEndpoint.BaseUrl.empty())
+ {
+ if (Verbose)
+ {
+ ZEN_INFO("Testing server endpoint at '{}/health/live'. Assume http2: {}",
+ ServerEndpoint.BaseUrl,
+ ServerEndpoint.AssumeHttp2);
+ }
+
+ if (JupiterEndpointTestResult TestResult =
+ TestJupiterEndpoint(ServerEndpoint.BaseUrl, ServerEndpoint.AssumeHttp2, ClientSettings.Verbose);
+ TestResult.Success)
+ {
+ if (Verbose)
+ {
+ ZEN_INFO("Server endpoint at '{}/api/v1/status/servers' succeeded", ServerEndpoint.BaseUrl);
+ }
+
+ HostUrl = ServerEndpoint.BaseUrl;
+ HostAssumeHttp2 = ServerEndpoint.AssumeHttp2;
+ HostName = ServerEndpoint.Name;
+ HostLatencySec = TestResult.LatencySeconds;
+ HostMaxRangeCountPerRequest = TestResult.MaxRangeCountPerRequest;
+ break;
+ }
+ else
+ {
+ ZEN_DEBUG("Unable to reach host {}. Reason: {}", ServerEndpoint.BaseUrl, TestResult.FailureReason);
+ }
+ }
+ }
+ if (HostUrl.empty())
+ {
+ throw std::runtime_error(fmt::format("Failed to find any usable builds hosts out of {} using {}",
+ DiscoveryResponse.ServerEndPoints.size(),
+ DiscoveryHost));
+ }
+ }
+ if (ZenCacheHost.empty())
+ {
+ if (AllowZenCacheDiscovery)
+ {
+ for (const JupiterServerDiscovery::EndPoint& CacheEndpoint : DiscoveryResponse.CacheEndPoints)
+ {
+ if (!CacheEndpoint.BaseUrl.empty())
+ {
+ if (Verbose)
+ {
+ ZEN_INFO("Testing cache endpoint at '{}/status/builds'. Assume http2: {}",
+ CacheEndpoint.BaseUrl,
+ CacheEndpoint.AssumeHttp2);
+ }
+
+ if (ZenCacheEndpointTestResult TestResult =
+ TestZenCacheEndpoint(CacheEndpoint.BaseUrl, CacheEndpoint.AssumeHttp2, ClientSettings.Verbose);
+ TestResult.Success)
+ {
+ if (Verbose)
+ {
+ ZEN_INFO("Cache endpoint at '{}/status/builds' succeeded", CacheEndpoint.BaseUrl);
+ }
+
+ CacheUrl = CacheEndpoint.BaseUrl;
+ CacheAssumeHttp2 = CacheEndpoint.AssumeHttp2;
+ CacheName = CacheEndpoint.Name;
+ CacheLatencySec = TestResult.LatencySeconds;
+ CacheMaxRangeCountPerRequest = TestResult.MaxRangeCountPerRequest;
+ break;
+ }
+ }
+ }
+ }
+ if (CacheUrl.empty() && AllowLocalZenCache)
+ {
+ ZenServerState State;
+ if (State.InitializeReadOnly())
+ {
+ State.Snapshot([&](const ZenServerState::ZenServerEntry& Entry) {
+ if (CacheUrl.empty())
+ {
+ std::string ZenServerLocalHostUrl = fmt::format("http://127.0.0.1:{}", Entry.EffectiveListenPort.load());
+ if (ZenCacheEndpointTestResult TestResult =
+ TestZenCacheEndpoint(ZenServerLocalHostUrl, /*AssumeHttp2*/ false, ClientSettings.Verbose);
+ TestResult.Success)
+ {
+ CacheUrl = ZenServerLocalHostUrl;
+ CacheAssumeHttp2 = false;
+ CacheName = "localhost";
+ CacheLatencySec = TestResult.LatencySeconds;
+ }
+ }
+ });
+ }
+ }
+ }
+ else
+ {
+ if (Verbose)
+ {
+ ZEN_INFO("Testing cache endpoint at '{}/status/builds'. Assume http2: {}", ZenCacheHost, false);
+ }
+ if (ZenCacheEndpointTestResult TestResult = TestZenCacheEndpoint(ZenCacheHost, /*AssumeHttp2*/ false, ClientSettings.Verbose);
+ TestResult.Success)
+ {
+ CacheUrl = ZenCacheHost;
+ CacheName = GetHostNameFromUrl(ZenCacheHost);
+ CacheLatencySec = TestResult.LatencySeconds;
+ CacheMaxRangeCountPerRequest = TestResult.MaxRangeCountPerRequest;
+ }
+ else
+ {
+ ZEN_WARN("Unable to reach cache host {}. Reason: {}", ZenCacheHost, TestResult.FailureReason);
+ }
+ }
+
+ return BuildStorageResolveResult{
+ .Cloud = {.Address = HostUrl,
+ .Name = HostName,
+ .AssumeHttp2 = HostAssumeHttp2,
+ .LatencySec = HostLatencySec,
+ .Caps = BuildStorageResolveResult::Capabilities{.MaxRangeCountPerRequest = HostMaxRangeCountPerRequest}},
+ .Cache = {.Address = CacheUrl,
+ .Name = CacheName,
+ .AssumeHttp2 = CacheAssumeHttp2,
+ .LatencySec = CacheLatencySec,
+ .Caps = BuildStorageResolveResult::Capabilities{.MaxRangeCountPerRequest = CacheMaxRangeCountPerRequest}}};
+}
+
+} // namespace zen
diff --git a/src/zenremotestore/builds/buildstorageutil.cpp b/src/zenremotestore/builds/buildstorageutil.cpp
index b249d7d52..dc8f79a47 100644
--- a/src/zenremotestore/builds/buildstorageutil.cpp
+++ b/src/zenremotestore/builds/buildstorageutil.cpp
@@ -2,293 +2,116 @@
#include <zenremotestore/builds/buildstorageutil.h>
+#include <zencore/basicfile.h>
+#include <zencore/compactbinary.h>
+#include <zencore/compactbinaryutil.h>
+#include <zencore/compactbinaryvalue.h>
+#include <zencore/except.h>
#include <zencore/fmtutils.h>
+#include <zencore/logging/broadcastsink.h>
+#include <zencore/parallelwork.h>
#include <zencore/timer.h>
+#include <zencore/trace.h>
+#include <zenremotestore/builds/buildcontent.h>
+#include <zenremotestore/builds/buildmanifest.h>
+#include <zenremotestore/builds/buildprimecache.h>
#include <zenremotestore/builds/buildstorage.h>
#include <zenremotestore/builds/buildstoragecache.h>
+#include <zenremotestore/builds/buildupdatefolder.h>
+#include <zenremotestore/builds/builduploadfolder.h>
+#include <zenremotestore/builds/buildvalidatebuildpart.h>
#include <zenremotestore/builds/jupiterbuildstorage.h>
#include <zenremotestore/chunking/chunkblock.h>
+#include <zenremotestore/chunking/chunkingcache.h>
+#include <zenremotestore/chunking/chunkingcontroller.h>
#include <zenremotestore/jupiter/jupiterhost.h>
-#include <zenremotestore/operationlogoutput.h>
+#include <zenutil/filesystemutils.h>
+#include <zenutil/logging.h>
+#include <zenutil/progress.h>
+#include <zenutil/wildcard.h>
#include <zenutil/zenserverprocess.h>
-namespace zen {
-namespace {
- std::string ConnectionSettingsToString(const HttpClientSettings& ClientSettings)
- {
- ExtendableStringBuilder<128> SB;
- SB << "\n LogCategory: " << ClientSettings.LogCategory;
- SB << "\n ConnectTimeout: " << ClientSettings.ConnectTimeout.count() << " ms";
- SB << "\n Timeout: " << ClientSettings.Timeout.count() << " ms";
- SB << "\n AccessTokenProvider: " << ClientSettings.AccessTokenProvider.has_value();
- SB << "\n AssumeHttp2: " << ClientSettings.AssumeHttp2;
- SB << "\n AllowResume: " << ClientSettings.AllowResume;
- SB << "\n RetryCount: " << ClientSettings.RetryCount;
- SB << "\n SessionId: " << ClientSettings.SessionId.ToString();
- SB << "\n Verbose: " << ClientSettings.Verbose;
- SB << "\n MaximumInMemoryDownloadSize: " << ClientSettings.MaximumInMemoryDownloadSize;
- return SB.ToString();
- }
-} // namespace
-
-BuildStorageResolveResult
-ResolveBuildStorage(OperationLogOutput& Output,
- const HttpClientSettings& ClientSettings,
- std::string_view Host,
- std::string_view OverrideHost,
- std::string_view ZenCacheHost,
- ZenCacheResolveMode ZenResolveMode,
- bool Verbose)
-{
- bool AllowZenCacheDiscovery = ZenResolveMode == ZenCacheResolveMode::Discovery || ZenResolveMode == ZenCacheResolveMode::All;
- bool AllowLocalZenCache = ZenResolveMode == ZenCacheResolveMode::LocalHost || ZenResolveMode == ZenCacheResolveMode::All;
-
- auto GetHostNameFromUrl = [](std::string_view Url) -> std::string_view {
- std::string::size_type HostnameStart = 0;
- std::string::size_type HostnameLength = std::string::npos;
- if (auto StartPos = Url.find("//"); StartPos != std::string::npos)
- {
- HostnameStart = StartPos + 2;
- }
- if (auto EndPos = Url.find("/", HostnameStart); EndPos != std::string::npos)
- {
- HostnameLength = EndPos - HostnameStart;
- }
- if (auto EndPos = Url.find(":", HostnameStart); EndPos != std::string::npos)
- {
- HostnameLength = EndPos - HostnameStart;
- }
- return Url.substr(HostnameStart, HostnameLength);
- };
+#include <numeric>
- std::string HostUrl;
- std::string HostName;
- double HostLatencySec = -1.0;
+#if ZEN_WITH_TESTS
+# include <zencore/testing.h>
+# include <zencore/testutils.h>
+# include <zenhttp/httpclientauth.h>
+# include <zenremotestore/builds/filebuildstorage.h>
+#endif // ZEN_WITH_TESTS
- std::string CacheUrl;
- std::string CacheName;
- bool HostAssumeHttp2 = ClientSettings.AssumeHttp2;
- bool CacheAssumeHttp2 = ClientSettings.AssumeHttp2;
- double CacheLatencySec = -1.0;
-
- JupiterServerDiscovery DiscoveryResponse;
- const std::string_view DiscoveryHost = Host.empty() ? OverrideHost : Host;
+namespace zen {
- if (OverrideHost.empty() || (ZenCacheHost.empty() && AllowZenCacheDiscovery))
+StorageInstance::~StorageInstance()
+{
+ if (CacheLogSink)
{
- if (Verbose)
+ if (Ref<logging::BroadcastSink> Broadcast = GetDefaultBroadcastSink())
{
- ZEN_OPERATION_LOG_INFO(Output,
- "Querying servers at '{}/api/v1/status/servers'\n Connection settings:{}",
- DiscoveryHost,
- ConnectionSettingsToString(ClientSettings));
+ Broadcast->RemoveSink(CacheLogSink);
}
-
- DiscoveryResponse = DiscoverJupiterEndpoints(DiscoveryHost, ClientSettings);
}
+}
- if (!OverrideHost.empty())
- {
- if (Verbose)
- {
- ZEN_OPERATION_LOG_INFO(Output, "Testing server endpoint at '{}/health/live'. Assume http2: {}", OverrideHost, HostAssumeHttp2);
- }
- if (JupiterEndpointTestResult TestResult = TestJupiterEndpoint(OverrideHost, HostAssumeHttp2, ClientSettings.Verbose);
- TestResult.Success)
- {
- if (Verbose)
- {
- ZEN_OPERATION_LOG_INFO(Output, "Server endpoint at '{}/api/v1/status/servers' succeeded", OverrideHost);
- }
- HostUrl = OverrideHost;
- HostName = GetHostNameFromUrl(OverrideHost);
- HostLatencySec = TestResult.LatencySeconds;
- }
- else
- {
- throw std::runtime_error(fmt::format("Host {} could not be reached. Reason: {}", OverrideHost, TestResult.FailureReason));
- }
- }
- else
- {
- if (DiscoveryResponse.ServerEndPoints.empty())
- {
- throw std::runtime_error(fmt::format("Failed to find any builds hosts at {}", DiscoveryHost));
- }
-
- for (const JupiterServerDiscovery::EndPoint& ServerEndpoint : DiscoveryResponse.ServerEndPoints)
- {
- if (!ServerEndpoint.BaseUrl.empty())
- {
- if (Verbose)
- {
- ZEN_OPERATION_LOG_INFO(Output,
- "Testing server endpoint at '{}/health/live'. Assume http2: {}",
- ServerEndpoint.BaseUrl,
- ServerEndpoint.AssumeHttp2);
- }
-
- if (JupiterEndpointTestResult TestResult =
- TestJupiterEndpoint(ServerEndpoint.BaseUrl, ServerEndpoint.AssumeHttp2, ClientSettings.Verbose);
- TestResult.Success)
- {
- if (Verbose)
- {
- ZEN_OPERATION_LOG_INFO(Output, "Server endpoint at '{}/api/v1/status/servers' succeeded", ServerEndpoint.BaseUrl);
- }
-
- HostUrl = ServerEndpoint.BaseUrl;
- HostAssumeHttp2 = ServerEndpoint.AssumeHttp2;
- HostName = ServerEndpoint.Name;
- HostLatencySec = TestResult.LatencySeconds;
- break;
- }
- else
- {
- ZEN_OPERATION_LOG_DEBUG(Output,
- "Unable to reach host {}. Reason: {}",
- ServerEndpoint.BaseUrl,
- TestResult.FailureReason);
- }
- }
- }
- if (HostUrl.empty())
- {
- throw std::runtime_error(fmt::format("Failed to find any usable builds hosts out of {} using {}",
- DiscoveryResponse.ServerEndPoints.size(),
- DiscoveryHost));
- }
- }
- if (ZenCacheHost.empty())
- {
- if (AllowZenCacheDiscovery)
- {
- for (const JupiterServerDiscovery::EndPoint& CacheEndpoint : DiscoveryResponse.CacheEndPoints)
- {
- if (!CacheEndpoint.BaseUrl.empty())
- {
- if (Verbose)
- {
- ZEN_OPERATION_LOG_INFO(Output,
- "Testing cache endpoint at '{}/status/builds'. Assume http2: {}",
- CacheEndpoint.BaseUrl,
- CacheEndpoint.AssumeHttp2);
- }
+void
+StorageInstance::SetupCacheSession(std::string_view TargetUrl, std::string_view Mode, const Oid& SessionId)
+{
+ CacheSession = std::make_unique<SessionsServiceClient>(SessionsServiceClient::Options{
+ .TargetUrl = std::string(TargetUrl),
+ .AppName = "zen",
+ .Mode = std::string(Mode),
+ .SessionId = SessionId,
+ });
+ CacheSession->Announce();
+ CacheLogSink = CacheSession->CreateLogSink();
+ GetDefaultBroadcastSink()->AddSink(CacheLogSink);
+}
- if (ZenCacheEndpointTestResult TestResult =
- TestZenCacheEndpoint(CacheEndpoint.BaseUrl, CacheEndpoint.AssumeHttp2, ClientSettings.Verbose);
- TestResult.Success)
- {
- if (Verbose)
- {
- ZEN_OPERATION_LOG_INFO(Output, "Cache endpoint at '{}/status/builds' succeeded", CacheEndpoint.BaseUrl);
- }
+using namespace std::literals;
- CacheUrl = CacheEndpoint.BaseUrl;
- CacheAssumeHttp2 = CacheEndpoint.AssumeHttp2;
- CacheName = CacheEndpoint.Name;
- CacheLatencySec = TestResult.LatencySeconds;
- break;
- }
- }
- }
- }
- if (CacheUrl.empty() && AllowLocalZenCache)
- {
- ZenServerState State;
- if (State.InitializeReadOnly())
- {
- State.Snapshot([&](const ZenServerState::ZenServerEntry& Entry) {
- if (CacheUrl.empty())
- {
- std::string ZenServerLocalHostUrl = fmt::format("http://127.0.0.1:{}", Entry.EffectiveListenPort.load());
- if (ZenCacheEndpointTestResult TestResult =
- TestZenCacheEndpoint(ZenServerLocalHostUrl, /*AssumeHttp2*/ false, ClientSettings.Verbose);
- TestResult.Success)
- {
- CacheUrl = ZenServerLocalHostUrl;
- CacheAssumeHttp2 = false;
- CacheName = "localhost";
- CacheLatencySec = TestResult.LatencySeconds;
- }
- }
- });
- }
- }
- }
- else
+std::vector<ChunkBlockDescription>
+ParseBlockMetadatas(std::span<const CbObject> BlockMetadatas)
+{
+ std::vector<ChunkBlockDescription> UnorderedList;
+ UnorderedList.reserve(BlockMetadatas.size());
+ for (size_t CacheBlockMetadataIndex = 0; CacheBlockMetadataIndex < BlockMetadatas.size(); CacheBlockMetadataIndex++)
{
- if (Verbose)
- {
- ZEN_OPERATION_LOG_INFO(Output, "Testing cache endpoint at '{}/status/builds'. Assume http2: {}", ZenCacheHost, false);
- }
- if (ZenCacheEndpointTestResult TestResult = TestZenCacheEndpoint(ZenCacheHost, /*AssumeHttp2*/ false, ClientSettings.Verbose);
- TestResult.Success)
+ const CbObject& CacheBlockMetadata = BlockMetadatas[CacheBlockMetadataIndex];
+ ChunkBlockDescription Description = ParseChunkBlockDescription(CacheBlockMetadata);
+ if (Description.BlockHash != IoHash::Zero)
{
- CacheUrl = ZenCacheHost;
- CacheName = GetHostNameFromUrl(ZenCacheHost);
- CacheLatencySec = TestResult.LatencySeconds;
- }
- else
- {
- ZEN_WARN("Unable to reach cache host {}. Reason: {}", ZenCacheHost, TestResult.FailureReason);
+ UnorderedList.emplace_back(std::move(Description));
}
}
-
- return BuildStorageResolveResult{.HostUrl = HostUrl,
- .HostName = HostName,
- .HostAssumeHttp2 = HostAssumeHttp2,
- .HostLatencySec = HostLatencySec,
-
- .CacheUrl = CacheUrl,
- .CacheName = CacheName,
- .CacheAssumeHttp2 = CacheAssumeHttp2,
- .CacheLatencySec = CacheLatencySec};
+ return UnorderedList;
}
std::vector<ChunkBlockDescription>
-GetBlockDescriptions(OperationLogOutput& Output,
+GetBlockDescriptions(LoggerRef InLog,
BuildStorageBase& Storage,
BuildStorageCache* OptionalCacheStorage,
const Oid& BuildId,
- const Oid& BuildPartId,
std::span<const IoHash> BlockRawHashes,
bool AttemptFallback,
bool IsQuiet,
bool IsVerbose)
{
using namespace std::literals;
-
- if (!IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(Output, "Fetching metadata for {} blocks", BlockRawHashes.size());
- }
-
- Stopwatch GetBlockMetadataTimer;
+ ZEN_SCOPED_LOG(InLog);
std::vector<ChunkBlockDescription> UnorderedList;
tsl::robin_map<IoHash, size_t, IoHash::Hasher> BlockDescriptionLookup;
if (OptionalCacheStorage && !BlockRawHashes.empty())
{
std::vector<CbObject> CacheBlockMetadatas = OptionalCacheStorage->GetBlobMetadatas(BuildId, BlockRawHashes);
- UnorderedList.reserve(CacheBlockMetadatas.size());
- for (size_t CacheBlockMetadataIndex = 0; CacheBlockMetadataIndex < CacheBlockMetadatas.size(); CacheBlockMetadataIndex++)
+ if (!CacheBlockMetadatas.empty())
{
- const CbObject& CacheBlockMetadata = CacheBlockMetadatas[CacheBlockMetadataIndex];
- ChunkBlockDescription Description = ParseChunkBlockDescription(CacheBlockMetadata);
- if (Description.BlockHash == IoHash::Zero)
+ UnorderedList = ParseBlockMetadatas(CacheBlockMetadatas);
+ for (size_t DescriptionIndex = 0; DescriptionIndex < UnorderedList.size(); DescriptionIndex++)
{
- ZEN_OPERATION_LOG_WARN(Output, "Unexpected/invalid block metadata received from remote cache, skipping block");
+ const ChunkBlockDescription& Description = UnorderedList[DescriptionIndex];
+ BlockDescriptionLookup.insert_or_assign(Description.BlockHash, DescriptionIndex);
}
- else
- {
- UnorderedList.emplace_back(std::move(Description));
- }
- }
- for (size_t DescriptionIndex = 0; DescriptionIndex < UnorderedList.size(); DescriptionIndex++)
- {
- const ChunkBlockDescription& Description = UnorderedList[DescriptionIndex];
- BlockDescriptionLookup.insert_or_assign(Description.BlockHash, DescriptionIndex);
}
}
@@ -315,7 +138,7 @@ GetBlockDescriptions(OperationLogOutput& Output,
if (Description.BlockHash == IoHash::Zero)
{
- ZEN_OPERATION_LOG_WARN(Output, "Unexpected/invalid block metadata received from remote store, skipping block");
+ ZEN_WARN("Unexpected/invalid block metadata received from remote store, skipping block");
}
else
{
@@ -355,15 +178,6 @@ GetBlockDescriptions(OperationLogOutput& Output,
}
}
- if (!IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(Output,
- "GetBlockMetadata for {} took {}. Found {} blocks",
- BuildPartId,
- NiceTimeSpanMs(GetBlockMetadataTimer.GetElapsedTimeMs()),
- Result.size());
- }
-
if (Result.size() != BlockRawHashes.size())
{
std::string ErrorDescription =
@@ -385,7 +199,7 @@ GetBlockDescriptions(OperationLogOutput& Output,
}
if (AttemptFallback)
{
- ZEN_OPERATION_LOG_WARN(Output, "{} Attemping fallback options.", ErrorDescription);
+ ZEN_WARN("{} Attemping fallback options.", ErrorDescription);
std::vector<ChunkBlockDescription> AugmentedBlockDescriptions;
AugmentedBlockDescriptions.reserve(BlockRawHashes.size());
std::vector<ChunkBlockDescription> FoundBlocks = ParseChunkBlockDescriptionList(Storage.FindBlocks(BuildId, (uint64_t)-1));
@@ -408,7 +222,7 @@ GetBlockDescriptions(OperationLogOutput& Output,
{
if (!IsQuiet)
{
- ZEN_OPERATION_LOG_INFO(Output, "Found block {} via context find successfully", BlockHash);
+ ZEN_INFO("Found block {} via context find successfully", BlockHash);
}
AugmentedBlockDescriptions.emplace_back(std::move(*ListBlocksIt));
}
@@ -453,4 +267,1472 @@ GetBlockDescriptions(OperationLogOutput& Output,
return Result;
}
+////////////////////// Shared helpers
+
+std::filesystem::path
+ZenStateFilePath(const std::filesystem::path& ZenFolderPath)
+{
+ return ZenFolderPath / "current_state.cbo";
+}
+std::filesystem::path
+ZenTempFolderPath(const std::filesystem::path& ZenFolderPath)
+{
+ return ZenFolderPath / "tmp";
+}
+
+CbObject
+GetBuild(BuildStorageBase& Storage, const Oid& BuildId, bool IsQuiet)
+{
+ Stopwatch GetBuildTimer;
+ CbObject BuildObject = Storage.GetBuild(BuildId);
+ if (!IsQuiet)
+ {
+ ZEN_CONSOLE("GetBuild took {}. Name: '{}', Payload size: {}",
+ NiceTimeSpanMs(GetBuildTimer.GetElapsedTimeMs()),
+ BuildObject["name"sv].AsString(),
+ NiceBytes(BuildObject.GetSize()));
+
+ ZEN_CONSOLE("{}", GetCbObjectAsNiceString(BuildObject, " "sv, "\n"sv));
+ }
+ return BuildObject;
+}
+
+uint64_t
+GetMaxMemoryBufferSize(size_t MaxBlockSize, bool BoostWorkerMemory)
+{
+ return BoostWorkerMemory ? (MaxBlockSize + 16u * 1024u) : 1024u * 1024u;
+}
+
+void
+DownloadLargeBlob(BuildStorageBase& Storage,
+ const std::filesystem::path& DownloadFolder,
+ const Oid& BuildId,
+ const IoHash& ChunkHash,
+ const std::uint64_t PreferredMultipartChunkSize,
+ ParallelWork& Work,
+ WorkerThreadPool& NetworkPool,
+ std::atomic<uint64_t>& DownloadedChunkByteCount,
+ std::atomic<uint64_t>& MultipartAttachmentCount,
+ std::function<void(IoBuffer&& Payload)>&& OnDownloadComplete)
+{
+ ZEN_TRACE_CPU("DownloadLargeBlob");
+
+ struct WorkloadData
+ {
+ TemporaryFile TempFile;
+ };
+ std::shared_ptr<WorkloadData> Workload(std::make_shared<WorkloadData>());
+
+ std::error_code Ec;
+ Workload->TempFile.CreateTemporary(DownloadFolder, Ec);
+ if (Ec)
+ {
+ throw std::runtime_error(
+ fmt::format("Failed opening temporary file '{}', reason: ({}) {}", Workload->TempFile.GetPath(), Ec.message(), Ec.value()));
+ }
+ std::vector<std::function<void()>> WorkItems = Storage.GetLargeBuildBlob(
+ BuildId,
+ ChunkHash,
+ PreferredMultipartChunkSize,
+ [&Work, Workload, &DownloadedChunkByteCount](uint64_t Offset, const IoBuffer& Chunk) {
+ DownloadedChunkByteCount += Chunk.GetSize();
+
+ if (!Work.IsAborted())
+ {
+ ZEN_TRACE_CPU("Async_DownloadLargeBlob_OnReceive");
+ Workload->TempFile.Write(Chunk.GetView(), Offset);
+ }
+ },
+ [&Work, Workload, OnDownloadComplete = std::move(OnDownloadComplete)]() {
+ if (!Work.IsAborted())
+ {
+ ZEN_TRACE_CPU("Async_DownloadLargeBlob_OnComplete");
+
+ uint64_t PayloadSize = Workload->TempFile.FileSize();
+ void* FileHandle = Workload->TempFile.Detach();
+ ZEN_ASSERT(FileHandle != nullptr);
+ IoBuffer Payload(IoBuffer::File, FileHandle, 0, PayloadSize, true);
+ Payload.SetDeleteOnClose(true);
+ OnDownloadComplete(std::move(Payload));
+ }
+ });
+ if (!WorkItems.empty())
+ {
+ MultipartAttachmentCount++;
+ }
+ for (auto& WorkItem : WorkItems)
+ {
+ Work.ScheduleWork(NetworkPool, [WorkItem = std::move(WorkItem)](std::atomic<bool>& AbortFlag) {
+ if (!AbortFlag)
+ {
+ ZEN_TRACE_CPU("Async_DownloadLargeBlob_Work");
+
+ WorkItem();
+ }
+ });
+ }
+}
+
+CompositeBuffer
+ValidateBlob(std::atomic<bool>& AbortFlag,
+ IoBuffer&& Payload,
+ const IoHash& BlobHash,
+ uint64_t& OutCompressedSize,
+ uint64_t& OutDecompressedSize)
+{
+ ZEN_TRACE_CPU("ValidateBlob");
+
+ if (Payload.GetContentType() != ZenContentType::kCompressedBinary)
+ {
+ throw std::runtime_error(fmt::format("Blob {} ({} bytes) has unexpected content type '{}'",
+ BlobHash,
+ Payload.GetSize(),
+ ToString(Payload.GetContentType())));
+ }
+ IoHash RawHash;
+ uint64_t RawSize;
+ CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(Payload), RawHash, RawSize);
+ if (!Compressed)
+ {
+ throw std::runtime_error(fmt::format("Blob {} ({} bytes) compressed header is invalid", BlobHash, Payload.GetSize()));
+ }
+ if (RawHash != BlobHash)
+ {
+ throw std::runtime_error(
+ fmt::format("Blob {} ({} bytes) compressed header has a mismatching raw hash {}", BlobHash, Payload.GetSize(), RawHash));
+ }
+
+ IoHashStream Hash;
+ bool CouldDecompress = Compressed.DecompressToStream(
+ 0,
+ RawSize,
+ [&AbortFlag, &Hash](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) {
+ ZEN_UNUSED(SourceOffset, SourceSize, Offset);
+ if (!AbortFlag)
+ {
+ for (const SharedBuffer& Segment : RangeBuffer.GetSegments())
+ {
+ Hash.Append(Segment.GetView());
+ }
+ return true;
+ }
+ return false;
+ });
+
+ if (AbortFlag)
+ {
+ return CompositeBuffer{};
+ }
+
+ if (!CouldDecompress)
+ {
+ throw std::runtime_error(
+ fmt::format("Blob {} ({} bytes) failed to decompress - header information mismatch", BlobHash, Payload.GetSize()));
+ }
+ IoHash ValidateRawHash = Hash.GetHash();
+ if (ValidateRawHash != BlobHash)
+ {
+ throw std::runtime_error(fmt::format("Blob {} ({} bytes) decompressed hash {} does not match header information",
+ BlobHash,
+ Payload.GetSize(),
+ ValidateRawHash));
+ }
+ OodleCompressor Compressor;
+ OodleCompressionLevel CompressionLevel;
+ uint64_t BlockSize;
+ if (!Compressed.TryGetCompressParameters(Compressor, CompressionLevel, BlockSize))
+ {
+ throw std::runtime_error(fmt::format("Blob {} ({} bytes) failed to get compression details", BlobHash, Payload.GetSize()));
+ }
+ OutCompressedSize = Payload.GetSize();
+ OutDecompressedSize = RawSize;
+ if (CompressionLevel == OodleCompressionLevel::None)
+ {
+ // Only decompress to composite if we need it for block verification
+ CompositeBuffer DecompressedComposite = Compressed.DecompressToComposite();
+ if (!DecompressedComposite)
+ {
+ throw std::runtime_error(fmt::format("Blob {} ({} bytes) failed to decompress to composite", BlobHash, Payload.GetSize()));
+ }
+ return DecompressedComposite;
+ }
+ return CompositeBuffer{};
+}
+
+CompositeBuffer
+ValidateBlob(std::atomic<bool>& AbortFlag,
+ BuildStorageBase& Storage,
+ const Oid& BuildId,
+ const IoHash& BlobHash,
+ uint64_t& OutCompressedSize,
+ uint64_t& OutDecompressedSize)
+{
+ ZEN_TRACE_CPU("ValidateBlob");
+ IoBuffer Payload = Storage.GetBuildBlob(BuildId, BlobHash);
+ if (!Payload)
+ {
+ throw std::runtime_error(fmt::format("Blob {} could not be found", BlobHash));
+ }
+ return ValidateBlob(AbortFlag, std::move(Payload), BlobHash, OutCompressedSize, OutDecompressedSize);
+}
+
+std::vector<std::pair<Oid, std::string>>
+ResolveBuildPartNames(CbObjectView BuildObject,
+ const Oid& BuildId,
+ const std::vector<Oid>& BuildPartIds,
+ std::span<const std::string> BuildPartNames,
+ std::uint64_t& OutPreferredMultipartChunkSize)
+{
+ std::vector<std::pair<Oid, std::string>> Result;
+ {
+ CbObjectView PartsObject = BuildObject["parts"sv].AsObjectView();
+ if (!PartsObject)
+ {
+ throw std::runtime_error("Build object does not have a 'parts' object");
+ }
+
+ OutPreferredMultipartChunkSize = BuildObject["chunkSize"sv].AsUInt64(OutPreferredMultipartChunkSize);
+
+ std::vector<std::pair<Oid, std::string>> AvailableParts;
+
+ for (CbFieldView PartView : PartsObject)
+ {
+ const std::string BuildPartName = std::string(PartView.GetName());
+ const Oid BuildPartId = PartView.AsObjectId();
+ if (BuildPartId == Oid::Zero)
+ {
+ ExtendableStringBuilder<128> SB;
+ for (CbFieldView ScanPartView : PartsObject)
+ {
+ SB.Append(fmt::format("\n {}: {}", ScanPartView.GetName(), ScanPartView.AsObjectId()));
+ }
+ throw std::runtime_error(fmt::format("Build object parts does not have a '{}' object id{}", BuildPartName, SB.ToView()));
+ }
+ AvailableParts.push_back({BuildPartId, BuildPartName});
+ }
+
+ if (BuildPartIds.empty() && BuildPartNames.empty())
+ {
+ Result = AvailableParts;
+ }
+ else
+ {
+ for (const std::string& BuildPartName : BuildPartNames)
+ {
+ if (auto It = std::find_if(AvailableParts.begin(),
+ AvailableParts.end(),
+ [&BuildPartName](const auto& Part) { return Part.second == BuildPartName; });
+ It != AvailableParts.end())
+ {
+ Result.push_back(*It);
+ }
+ else
+ {
+ throw std::runtime_error(fmt::format("Build {} object does not have a part named '{}'", BuildId, BuildPartName));
+ }
+ }
+ for (const Oid& BuildPartId : BuildPartIds)
+ {
+ if (auto It = std::find_if(AvailableParts.begin(),
+ AvailableParts.end(),
+ [&BuildPartId](const auto& Part) { return Part.first == BuildPartId; });
+ It != AvailableParts.end())
+ {
+ Result.push_back(*It);
+ }
+ else
+ {
+ throw std::runtime_error(fmt::format("Build {} object does not have a part with id '{}'", BuildId, BuildPartId));
+ }
+ }
+ }
+
+ if (Result.empty())
+ {
+ throw std::runtime_error(fmt::format("Build object does not have any parts", BuildId));
+ }
+ }
+ return Result;
+}
+
+void
+NormalizePartSelection(std::vector<Oid>& BuildPartIds, std::vector<std::string>& BuildPartNames, std::string_view HelpText)
+{
+ const bool HasWildcard = std::find(BuildPartNames.begin(), BuildPartNames.end(), "*") != BuildPartNames.end();
+ if (HasWildcard)
+ {
+ if (BuildPartNames.size() != 1 || !BuildPartIds.empty())
+ {
+ throw OptionParseException("'*' cannot be combined with other part names or ids", std::string(HelpText));
+ }
+ BuildPartNames.clear();
+ return;
+ }
+
+ if (BuildPartIds.empty() && BuildPartNames.empty())
+ {
+ BuildPartNames.push_back("default");
+ }
+}
+
+ChunkedFolderContent
+GetRemoteContent(LoggerRef InLog,
+ StorageInstance& Storage,
+ const Oid& BuildId,
+ const std::vector<std::pair<Oid, std::string>>& BuildParts,
+ const BuildManifest& Manifest,
+ std::span<const std::string> IncludeWildcards,
+ std::span<const std::string> ExcludeWildcards,
+ std::unique_ptr<ChunkingController>& OutChunkController,
+ std::vector<ChunkedFolderContent>& OutPartContents,
+ std::vector<ChunkBlockDescription>& OutBlockDescriptions,
+ std::vector<IoHash>& OutLooseChunkHashes,
+ bool IsQuiet,
+ bool IsVerbose,
+ bool DoExtraContentVerify)
+{
+ ZEN_TRACE_CPU("GetRemoteContent");
+ ZEN_SCOPED_LOG(InLog);
+
+ Stopwatch GetBuildPartTimer;
+ const Oid BuildPartId = BuildParts[0].first;
+ const std::string_view BuildPartName = BuildParts[0].second;
+ CbObject BuildPartManifest = Storage.BuildStorage->GetBuildPart(BuildId, BuildPartId);
+ if (!IsQuiet)
+ {
+ ZEN_INFO("GetBuildPart {} ('{}') took {}. Payload size: {}",
+ BuildPartId,
+ BuildPartName,
+ NiceTimeSpanMs(GetBuildPartTimer.GetElapsedTimeMs()),
+ NiceBytes(BuildPartManifest.GetSize()));
+ ZEN_INFO("{}", GetCbObjectAsNiceString(BuildPartManifest, " "sv, "\n"sv));
+ }
+
+ {
+ CbObjectView Chunker = BuildPartManifest["chunker"sv].AsObjectView();
+ std::string_view ChunkerName = Chunker["name"sv].AsString();
+ CbObjectView Parameters = Chunker["parameters"sv].AsObjectView();
+ OutChunkController = CreateChunkingController(ChunkerName, Parameters);
+ }
+
+ auto ParseBuildPartManifest = [&Log, IsQuiet, IsVerbose, DoExtraContentVerify](StorageInstance& Storage,
+ const Oid& BuildId,
+ const Oid& BuildPartId,
+ CbObject BuildPartManifest,
+ std::span<const std::string> IncludeWildcards,
+ std::span<const std::string> ExcludeWildcards,
+ const BuildManifest::Part* OptionalManifest,
+ ChunkedFolderContent& OutRemoteContent,
+ std::vector<ChunkBlockDescription>& OutBlockDescriptions,
+ std::vector<IoHash>& OutLooseChunkHashes) {
+ std::vector<uint32_t> AbsoluteChunkOrders;
+ std::vector<uint64_t> LooseChunkRawSizes;
+ std::vector<IoHash> BlockRawHashes;
+
+ ReadBuildContentFromCompactBinary(BuildPartManifest,
+ OutRemoteContent.Platform,
+ OutRemoteContent.Paths,
+ OutRemoteContent.RawHashes,
+ OutRemoteContent.RawSizes,
+ OutRemoteContent.Attributes,
+ OutRemoteContent.ChunkedContent.SequenceRawHashes,
+ OutRemoteContent.ChunkedContent.ChunkCounts,
+ AbsoluteChunkOrders,
+ OutLooseChunkHashes,
+ LooseChunkRawSizes,
+ BlockRawHashes);
+
+ // TODO: GetBlockDescriptions for all BlockRawHashes in one go - check for local block descriptions when we cache them
+
+ {
+ if (!IsQuiet)
+ {
+ ZEN_INFO("Fetching metadata for {} blocks", BlockRawHashes.size());
+ }
+
+ Stopwatch GetBlockMetadataTimer;
+
+ bool AttemptFallback = false;
+ OutBlockDescriptions = GetBlockDescriptions(Log(),
+ *Storage.BuildStorage,
+ Storage.CacheStorage.get(),
+ BuildId,
+ BlockRawHashes,
+ AttemptFallback,
+ IsQuiet,
+ IsVerbose);
+
+ if (!IsQuiet)
+ {
+ ZEN_INFO("GetBlockMetadata for {} took {}. Found {} blocks",
+ BuildPartId,
+ NiceTimeSpanMs(GetBlockMetadataTimer.GetElapsedTimeMs()),
+ OutBlockDescriptions.size());
+ }
+ }
+
+ CalculateLocalChunkOrders(AbsoluteChunkOrders,
+ OutLooseChunkHashes,
+ LooseChunkRawSizes,
+ OutBlockDescriptions,
+ OutRemoteContent.ChunkedContent.ChunkHashes,
+ OutRemoteContent.ChunkedContent.ChunkRawSizes,
+ OutRemoteContent.ChunkedContent.ChunkOrders,
+ DoExtraContentVerify);
+
+ std::vector<std::filesystem::path> DeletedPaths;
+
+ if (OptionalManifest)
+ {
+ tsl::robin_set<std::string> PathsInManifest;
+ PathsInManifest.reserve(OptionalManifest->Files.size());
+ for (const std::filesystem::path& ManifestPath : OptionalManifest->Files)
+ {
+ PathsInManifest.insert(ToLower(ManifestPath.generic_string()));
+ }
+ for (const std::filesystem::path& RemotePath : OutRemoteContent.Paths)
+ {
+ if (!PathsInManifest.contains(ToLower(RemotePath.generic_string())))
+ {
+ DeletedPaths.push_back(RemotePath);
+ }
+ }
+ }
+
+ if (!IncludeWildcards.empty() || !ExcludeWildcards.empty())
+ {
+ for (const std::filesystem::path& RemotePath : OutRemoteContent.Paths)
+ {
+ if (!IncludePath(IncludeWildcards, ExcludeWildcards, ToLower(RemotePath.generic_string()), /*CaseSensitive*/ true))
+ {
+ DeletedPaths.push_back(RemotePath);
+ }
+ }
+ }
+
+ if (!DeletedPaths.empty())
+ {
+ OutRemoteContent = DeletePathsFromChunkedContent(OutRemoteContent, DeletedPaths);
+ InlineRemoveUnusedHashes(OutLooseChunkHashes, OutRemoteContent.ChunkedContent.ChunkHashes);
+ }
+
+#if ZEN_BUILD_DEBUG
+ ValidateChunkedFolderContent(OutRemoteContent, OutBlockDescriptions, OutLooseChunkHashes, IncludeWildcards, ExcludeWildcards);
+#endif // ZEN_BUILD_DEBUG
+ };
+
+ auto FindManifest = [&Manifest](const Oid& BuildPartId, std::string_view BuildPartName) -> const BuildManifest::Part* {
+ if (Manifest.Parts.empty())
+ {
+ return nullptr;
+ }
+ if (Manifest.Parts.size() == 1)
+ {
+ if (Manifest.Parts[0].PartId == Oid::Zero && Manifest.Parts[0].PartName.empty())
+ {
+ return &Manifest.Parts[0];
+ }
+ }
+
+ auto It = std::find_if(Manifest.Parts.begin(), Manifest.Parts.end(), [BuildPartId, BuildPartName](const BuildManifest::Part& Part) {
+ if (Part.PartId != Oid::Zero)
+ {
+ return Part.PartId == BuildPartId;
+ }
+ if (!Part.PartName.empty())
+ {
+ return Part.PartName == BuildPartName;
+ }
+ return false;
+ });
+ if (It != Manifest.Parts.end())
+ {
+ return &(*It);
+ }
+ return nullptr;
+ };
+
+ OutPartContents.resize(1);
+ ParseBuildPartManifest(Storage,
+ BuildId,
+ BuildPartId,
+ BuildPartManifest,
+ IncludeWildcards,
+ ExcludeWildcards,
+ FindManifest(BuildPartId, BuildPartName),
+ OutPartContents[0],
+ OutBlockDescriptions,
+ OutLooseChunkHashes);
+ ChunkedFolderContent RemoteContent;
+ if (BuildParts.size() > 1)
+ {
+ std::vector<ChunkBlockDescription> OverlayBlockDescriptions;
+ std::vector<IoHash> OverlayLooseChunkHashes;
+ for (size_t PartIndex = 1; PartIndex < BuildParts.size(); PartIndex++)
+ {
+ const Oid& OverlayBuildPartId = BuildParts[PartIndex].first;
+ const std::string& OverlayBuildPartName = BuildParts[PartIndex].second;
+ Stopwatch GetOverlayBuildPartTimer;
+ CbObject OverlayBuildPartManifest = Storage.BuildStorage->GetBuildPart(BuildId, OverlayBuildPartId);
+ if (!IsQuiet)
+ {
+ ZEN_INFO("GetBuildPart {} ('{}') took {}. Payload size: {}",
+ OverlayBuildPartId,
+ OverlayBuildPartName,
+ NiceTimeSpanMs(GetOverlayBuildPartTimer.GetElapsedTimeMs()),
+ NiceBytes(OverlayBuildPartManifest.GetSize()));
+ }
+
+ ChunkedFolderContent OverlayPartContent;
+ std::vector<ChunkBlockDescription> OverlayPartBlockDescriptions;
+ std::vector<IoHash> OverlayPartLooseChunkHashes;
+
+ ParseBuildPartManifest(Storage,
+ BuildId,
+ OverlayBuildPartId,
+ OverlayBuildPartManifest,
+ IncludeWildcards,
+ ExcludeWildcards,
+ FindManifest(OverlayBuildPartId, OverlayBuildPartName),
+ OverlayPartContent,
+ OverlayPartBlockDescriptions,
+ OverlayPartLooseChunkHashes);
+ OutPartContents.push_back(OverlayPartContent);
+ OverlayBlockDescriptions.insert(OverlayBlockDescriptions.end(),
+ OverlayPartBlockDescriptions.begin(),
+ OverlayPartBlockDescriptions.end());
+ OverlayLooseChunkHashes.insert(OverlayLooseChunkHashes.end(),
+ OverlayPartLooseChunkHashes.begin(),
+ OverlayPartLooseChunkHashes.end());
+ }
+
+ RemoteContent = MergeChunkedFolderContents(OutPartContents[0], std::span<const ChunkedFolderContent>(OutPartContents).subspan(1));
+ {
+ tsl::robin_set<IoHash> AllBlockHashes;
+ for (const ChunkBlockDescription& Description : OutBlockDescriptions)
+ {
+ AllBlockHashes.insert(Description.BlockHash);
+ }
+ for (const ChunkBlockDescription& Description : OverlayBlockDescriptions)
+ {
+ if (!AllBlockHashes.contains(Description.BlockHash))
+ {
+ AllBlockHashes.insert(Description.BlockHash);
+ OutBlockDescriptions.push_back(Description);
+ }
+ }
+ }
+ {
+ tsl::robin_set<IoHash> AllLooseChunkHashes(OutLooseChunkHashes.begin(), OutLooseChunkHashes.end());
+ for (const IoHash& OverlayLooseChunkHash : OverlayLooseChunkHashes)
+ {
+ if (!AllLooseChunkHashes.contains(OverlayLooseChunkHash))
+ {
+ AllLooseChunkHashes.insert(OverlayLooseChunkHash);
+ OutLooseChunkHashes.push_back(OverlayLooseChunkHash);
+ }
+ }
+ }
+ }
+ else
+ {
+ RemoteContent = OutPartContents[0];
+ }
+ return RemoteContent;
+}
+std::string
+GetCbObjectAsNiceString(CbObjectView Object, std::string_view Prefix, std::string_view Suffix)
+{
+ ExtendableStringBuilder<512> SB;
+ std::vector<std::pair<std::string, std::string>> NameStringValuePairs;
+ for (CbFieldView Field : Object)
+ {
+ std::string_view Name = Field.GetName();
+ switch (CbValue Accessor = Field.GetValue(); Accessor.GetType())
+ {
+ case CbFieldType::String:
+ NameStringValuePairs.push_back({std::string(Name), std::string(Accessor.AsString())});
+ break;
+ case CbFieldType::IntegerPositive:
+ NameStringValuePairs.push_back({std::string(Name), fmt::format("{}", Accessor.AsIntegerPositive())});
+ break;
+ case CbFieldType::IntegerNegative:
+ NameStringValuePairs.push_back({std::string(Name), fmt::format("{}", Accessor.AsIntegerNegative())});
+ break;
+ case CbFieldType::Float32:
+ {
+ const float Value = Accessor.AsFloat32();
+ if (std::isfinite(Value))
+ {
+ NameStringValuePairs.push_back({std::string(Name), fmt::format("{:.9g}", Value)});
+ }
+ else
+ {
+ NameStringValuePairs.push_back({std::string(Name), "null"});
+ }
+ }
+ break;
+ case CbFieldType::Float64:
+ {
+ const double Value = Accessor.AsFloat64();
+ if (std::isfinite(Value))
+ {
+ NameStringValuePairs.push_back({std::string(Name), fmt::format("{:.17g}", Value)});
+ }
+ else
+ {
+ NameStringValuePairs.push_back({std::string(Name), "null"});
+ }
+ }
+ break;
+ case CbFieldType::BoolFalse:
+ NameStringValuePairs.push_back({std::string(Name), "false"});
+ break;
+ case CbFieldType::BoolTrue:
+ NameStringValuePairs.push_back({std::string(Name), "true"});
+ break;
+ case CbFieldType::Hash:
+ {
+ NameStringValuePairs.push_back({std::string(Name), Accessor.AsHash().ToHexString()});
+ }
+ break;
+ case CbFieldType::Uuid:
+ {
+ StringBuilder<Oid::StringLength + 1> Builder;
+ Accessor.AsUuid().ToString(Builder);
+ NameStringValuePairs.push_back({std::string(Name), Builder.ToString()});
+ }
+ break;
+ case CbFieldType::DateTime:
+ {
+ ExtendableStringBuilder<64> Builder;
+ Builder << DateTime(Accessor.AsDateTimeTicks()).ToIso8601();
+ NameStringValuePairs.push_back({std::string(Name), Builder.ToString()});
+ }
+ break;
+ case CbFieldType::TimeSpan:
+ {
+ ExtendableStringBuilder<64> Builder;
+ const TimeSpan Span(Accessor.AsTimeSpanTicks());
+ if (Span.GetDays() == 0)
+ {
+ Builder << Span.ToString("%h:%m:%s.%n");
+ }
+ else
+ {
+ Builder << Span.ToString("%d.%h:%m:%s.%n");
+ }
+ NameStringValuePairs.push_back({std::string(Name), Builder.ToString()});
+ break;
+ }
+ case CbFieldType::ObjectId:
+ NameStringValuePairs.push_back({std::string(Name), Accessor.AsObjectId().ToString()});
+ break;
+ }
+ }
+ std::string::size_type LongestKey = 0;
+ for (const std::pair<std::string, std::string>& KeyValue : NameStringValuePairs)
+ {
+ LongestKey = Max(KeyValue.first.length(), LongestKey);
+ }
+ for (const std::pair<std::string, std::string>& KeyValue : NameStringValuePairs)
+ {
+ SB.Append(fmt::format("{}{:<{}}: {}{}", Prefix, KeyValue.first, LongestKey, KeyValue.second, Suffix));
+ }
+ return SB.ToString();
+}
+
+#if ZEN_WITH_TESTS
+
+namespace buildstorageoperations_testutils {
+ struct TestState
+ {
+ TestState(const std::filesystem::path& InRootPath)
+ : RootPath(InRootPath)
+ , LogOutput(CreateStandardProgress(Log))
+ , ChunkController(CreateStandardChunkingController(StandardChunkingControllerSettings{}))
+ , ChunkCache(CreateMemoryChunkingCache())
+ , WorkerPool(2)
+ , NetworkPool(2)
+ {
+ }
+
+ void Initialize()
+ {
+ StoragePath = RootPath / "storage";
+ TempPath = RootPath / "temp";
+ SystemRootDir = RootPath / "sysroot";
+ ZenFolderPath = RootPath / ".zen";
+
+ CreateDirectories(TempPath);
+ CreateDirectories(StoragePath);
+
+ Storage.BuildStorage = CreateFileBuildStorage(StoragePath, StorageStats, false);
+ }
+
+ void CreateSourceData(const std::filesystem::path& Source, std::span<const std::string> Paths, std::span<const uint64_t> Sizes)
+ {
+ const std::filesystem::path SourcePath = RootPath / Source;
+ CreateDirectories(SourcePath);
+ for (size_t FileIndex = 0; FileIndex < Paths.size(); FileIndex++)
+ {
+ const std::string& FilePath = Paths[FileIndex];
+ const uint64_t FileSize = Sizes[FileIndex];
+ IoBuffer FileData = FileSize > 0 ? CreateSemiRandomBlob(FileSize) : IoBuffer{};
+ WriteFile(SourcePath / FilePath, FileData);
+ }
+ }
+
+ std::vector<std::pair<Oid, std::string>> Upload(const Oid& BuildId,
+ const Oid& BuildPartId,
+ const std::string_view BuildPartName,
+ const std::filesystem::path& Source,
+ const std::filesystem::path& ManifestPath)
+ {
+ const std::filesystem::path SourcePath = RootPath / Source;
+ CbObject MetaData;
+ BuildsOperationUploadFolder Upload(Log,
+ *LogOutput,
+ Storage,
+ AbortFlag,
+ PauseFlag,
+ WorkerPool,
+ NetworkPool,
+ BuildId,
+ SourcePath,
+ true,
+ MetaData,
+ BuildsOperationUploadFolder::Options{.TempDir = TempPath});
+ return Upload.Execute(BuildPartId, BuildPartName, ManifestPath, *ChunkController, *ChunkCache);
+ }
+
+ void ValidateUpload(const Oid& BuildId, const std::vector<std::pair<Oid, std::string>>& Parts)
+ {
+ for (auto Part : Parts)
+ {
+ BuildsOperationValidateBuildPart Validate(Log,
+ *LogOutput,
+ *Storage.BuildStorage,
+ AbortFlag,
+ PauseFlag,
+ WorkerPool,
+ NetworkPool,
+ BuildId,
+ Part.first,
+ Part.second,
+ BuildsOperationValidateBuildPart::Options{.TempFolder = TempPath / "validate"});
+ Validate.Execute();
+ }
+ }
+
+ FolderContent Download(const Oid& BuildId,
+ const Oid& BuildPartId,
+ const std::string_view BuildPartName,
+ const std::filesystem::path& Target,
+ bool Append)
+ {
+ const std::filesystem::path TargetPath = RootPath / Target;
+
+ CreateDirectories(TargetPath);
+
+ uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u;
+ CbObject BuildObject = Storage.BuildStorage->GetBuild(BuildId);
+ std::vector<Oid> PartIds;
+ if (BuildPartId != Oid::Zero)
+ {
+ PartIds.push_back(BuildPartId);
+ }
+ std::vector<std::string> PartNames;
+ if (!BuildPartName.empty())
+ {
+ PartNames.push_back(std::string(BuildPartName));
+ }
+ std::vector<std::pair<Oid, std::string>> AllBuildParts =
+ ResolveBuildPartNames(BuildObject, BuildId, PartIds, PartNames, PreferredMultipartChunkSize);
+
+ std::vector<ChunkedFolderContent> PartContents;
+
+ std::vector<ChunkBlockDescription> BlockDescriptions;
+ std::vector<IoHash> LooseChunkHashes;
+
+ ChunkedFolderContent RemoteContent = GetRemoteContent(Log,
+ Storage,
+ BuildId,
+ AllBuildParts,
+ {},
+ {},
+ {},
+ ChunkController,
+ PartContents,
+ BlockDescriptions,
+ LooseChunkHashes,
+ /*IsQuiet*/ false,
+ /*IsVerbose*/ false,
+ /*DoExtraContentVerify*/ true);
+
+ GetFolderContentStatistics LocalFolderScanStats;
+
+ struct ContentVisitor : public GetDirectoryContentVisitor
+ {
+ virtual void AsyncVisitDirectory(const std::filesystem::path& RelativeRoot, DirectoryContent&& Content)
+ {
+ RwLock::ExclusiveLockScope _(ExistingPathsLock);
+ for (const std::filesystem::path& FileName : Content.FileNames)
+ {
+ if (RelativeRoot.empty())
+ {
+ ExistingPaths.push_back(FileName);
+ }
+ else
+ {
+ ExistingPaths.push_back(RelativeRoot / FileName);
+ }
+ }
+ }
+
+ RwLock ExistingPathsLock;
+ std::vector<std::filesystem::path> ExistingPaths;
+ } Visitor;
+
+ Latch PendingWorkCount(1);
+
+ GetDirectoryContent(TargetPath,
+ DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive,
+ Visitor,
+ WorkerPool,
+ PendingWorkCount);
+
+ PendingWorkCount.CountDown();
+ PendingWorkCount.Wait();
+
+ FolderContent CurrentLocalFolderState = GetValidFolderContent(
+ WorkerPool,
+ LocalFolderScanStats,
+ TargetPath,
+ Visitor.ExistingPaths,
+ [](uint64_t PathCount, uint64_t CompletedPathCount) { ZEN_UNUSED(PathCount, CompletedPathCount); },
+ 1000,
+ AbortFlag,
+ PauseFlag);
+
+ ChunkingStatistics LocalChunkingStats;
+ ChunkedFolderContent LocalContent = ChunkFolderContent(
+ LocalChunkingStats,
+ WorkerPool,
+ TargetPath,
+ CurrentLocalFolderState,
+ *ChunkController,
+ *ChunkCache,
+ 1000,
+ [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { ZEN_UNUSED(IsAborted, IsPaused); },
+ AbortFlag,
+ PauseFlag);
+
+ if (Append)
+ {
+ RemoteContent = ApplyChunkedContentOverlay(LocalContent, RemoteContent, {}, {});
+ }
+
+ const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalContent);
+ const ChunkedContentLookup RemoteLookup = BuildChunkedContentLookup(RemoteContent);
+
+ BuildsOperationUpdateFolder Download(Log,
+ *LogOutput,
+ Storage,
+ AbortFlag,
+ PauseFlag,
+ WorkerPool,
+ NetworkPool,
+ BuildId,
+ TargetPath,
+ LocalContent,
+ LocalLookup,
+ RemoteContent,
+ RemoteLookup,
+ BlockDescriptions,
+ LooseChunkHashes,
+ BuildsOperationUpdateFolder::Options{.SystemRootDir = SystemRootDir,
+ .ZenFolderPath = ZenFolderPath,
+ .ValidateCompletedSequences = true});
+ FolderContent ResultingState;
+ Download.Execute(ResultingState);
+
+ return ResultingState;
+ }
+
+ void ValidateDownload(std::span<const std::string> Paths,
+ std::span<const uint64_t> Sizes,
+ const std::filesystem::path& Source,
+ const std::filesystem::path& Target,
+ const FolderContent& DownloadContent)
+ {
+ const std::filesystem::path SourcePath = RootPath / Source;
+ const std::filesystem::path TargetPath = RootPath / Target;
+
+ CHECK_EQ(Paths.size(), DownloadContent.Paths.size());
+ tsl::robin_map<std::string, uint64_t> ExpectedSizes;
+ tsl::robin_map<std::string, IoHash> ExpectedHashes;
+ for (size_t Index = 0; Index < Paths.size(); Index++)
+ {
+ const std::string LookupString = std::filesystem::path(Paths[Index]).generic_string();
+ ExpectedSizes.insert_or_assign(LookupString, Sizes[Index]);
+ std::filesystem::path FilePath = SourcePath / Paths[Index];
+ const IoHash SourceHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile(FilePath.make_preferred()));
+ ExpectedHashes.insert_or_assign(LookupString, SourceHash);
+ }
+ for (size_t Index = 0; Index < DownloadContent.Paths.size(); Index++)
+ {
+ const std::string LookupString = std::filesystem::path(DownloadContent.Paths[Index]).generic_string();
+ auto SizeIt = ExpectedSizes.find(LookupString);
+ CHECK_NE(SizeIt, ExpectedSizes.end());
+ CHECK_EQ(SizeIt->second, DownloadContent.RawSizes[Index]);
+ std::filesystem::path FilePath = TargetPath / DownloadContent.Paths[Index];
+ const IoHash DownloadedHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile(FilePath.make_preferred()));
+ auto HashIt = ExpectedHashes.find(LookupString);
+ CHECK_NE(HashIt, ExpectedHashes.end());
+ CHECK_EQ(HashIt->second, DownloadedHash);
+ }
+ }
+
+ const std::filesystem::path RootPath;
+ std::filesystem::path StoragePath;
+ std::filesystem::path TempPath;
+ std::filesystem::path SystemRootDir;
+ std::filesystem::path ZenFolderPath;
+
+ LoggerRef Log = ConsoleLog();
+ std::unique_ptr<ProgressBase> LogOutput;
+
+ std::unique_ptr<ChunkingController> ChunkController;
+ std::unique_ptr<ChunkingCache> ChunkCache;
+
+ StorageInstance Storage;
+ BuildStorageBase::Statistics StorageStats;
+
+ WorkerThreadPool WorkerPool;
+ WorkerThreadPool NetworkPool;
+
+ std::atomic<bool> AbortFlag;
+ std::atomic<bool> PauseFlag;
+ };
+
+} // namespace buildstorageoperations_testutils
+
+TEST_SUITE_BEGIN("remotestore.buildstorageutil");
+
+TEST_CASE("normalizepartselection.empty_defaults_to_default")
+{
+ std::vector<Oid> Ids;
+ std::vector<std::string> Names;
+ NormalizePartSelection(Ids, Names, {});
+ CHECK(Ids.empty());
+ REQUIRE_EQ(Names.size(), 1u);
+ CHECK_EQ(Names[0], "default");
+}
+
+TEST_CASE("normalizepartselection.wildcard_alone_clears_names")
+{
+ std::vector<Oid> Ids;
+ std::vector<std::string> Names = {"*"};
+ NormalizePartSelection(Ids, Names, {});
+ CHECK(Ids.empty());
+ CHECK(Names.empty());
+}
+
+TEST_CASE("normalizepartselection.wildcard_with_other_name_throws")
+{
+ std::vector<Oid> Ids;
+ std::vector<std::string> Names = {"*", "foo"};
+ CHECK_THROWS_AS(NormalizePartSelection(Ids, Names, {}), OptionParseException);
+}
+
+TEST_CASE("normalizepartselection.wildcard_with_ids_throws")
+{
+ std::vector<Oid> Ids = {Oid::NewOid()};
+ std::vector<std::string> Names = {"*"};
+ CHECK_THROWS_AS(NormalizePartSelection(Ids, Names, {}), OptionParseException);
+}
+
+TEST_CASE("normalizepartselection.explicit_name_unchanged")
+{
+ std::vector<Oid> Ids;
+ std::vector<std::string> Names = {"foo"};
+ NormalizePartSelection(Ids, Names, {});
+ CHECK(Ids.empty());
+ REQUIRE_EQ(Names.size(), 1u);
+ CHECK_EQ(Names[0], "foo");
+}
+
+TEST_CASE("normalizepartselection.ids_only_unchanged")
+{
+ const Oid Id = Oid::NewOid();
+ std::vector<Oid> Ids = {Id};
+ std::vector<std::string> Names;
+ NormalizePartSelection(Ids, Names, {});
+ REQUIRE_EQ(Ids.size(), 1u);
+ CHECK_EQ(Ids[0], Id);
+ CHECK(Names.empty());
+}
+
+TEST_CASE("buildstorageoperations.upload.folder")
+{
+ using namespace buildstorageoperations_testutils;
+
+ FastRandom BaseRandom;
+
+ const size_t FileCount = 11;
+
+ const std::string Paths[FileCount] = {{"file_1"},
+ {"file_2.exe"},
+ {"file_3.txt"},
+ {"dir_1/dir1_file_1.exe"},
+ {"dir_1/dir1_file_2.pdb"},
+ {"dir_1/dir1_file_3.txt"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"},
+ {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"},
+ {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}};
+ const uint64_t Sizes[FileCount] =
+ {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u};
+
+ ScopedTemporaryDirectory SourceFolder;
+ TestState State(SourceFolder.Path());
+ State.Initialize();
+ State.CreateSourceData("source", Paths, Sizes);
+
+ const Oid BuildId = Oid::NewOid();
+ const Oid BuildPartId = Oid::NewOid();
+ const std::string BuildPartName = "default";
+
+ auto Result = State.Upload(BuildId, BuildPartId, BuildPartName, "source", {});
+
+ CHECK_EQ(Result.size(), 1u);
+ CHECK_EQ(Result[0].first, BuildPartId);
+ CHECK_EQ(Result[0].second, BuildPartName);
+ State.ValidateUpload(BuildId, Result);
+
+ FolderContent DownloadContent = State.Download(BuildId, Oid::Zero, {}, "download", /* Append */ false);
+ CHECK_EQ(DownloadContent.Paths.size(), FileCount);
+ State.ValidateDownload(Paths, Sizes, "source", "download", DownloadContent);
+}
+
+TEST_CASE("buildstorageoperations.upload.manifest")
+{
+ using namespace buildstorageoperations_testutils;
+
+ FastRandom BaseRandom;
+
+ const size_t FileCount = 11;
+
+ const std::string Paths[FileCount] = {{"file_1"},
+ {"file_2.exe"},
+ {"file_3.txt"},
+ {"dir_1/dir1_file_1.exe"},
+ {"dir_1/dir1_file_2.pdb"},
+ {"dir_1/dir1_file_3.txt"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"},
+ {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"},
+ {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}};
+ const uint64_t Sizes[FileCount] =
+ {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u};
+
+ ScopedTemporaryDirectory SourceFolder;
+ TestState State(SourceFolder.Path());
+ State.Initialize();
+ State.CreateSourceData("source", Paths, Sizes);
+
+ std::span<const std::string> ManifestFiles(Paths);
+ ManifestFiles = ManifestFiles.subspan(0, FileCount / 2);
+
+ std::span<const uint64_t> ManifestSizes(Sizes);
+ ManifestSizes = ManifestSizes.subspan(0, FileCount / 2);
+
+ ExtendableStringBuilder<1024> Manifest;
+ for (const std::string& FilePath : ManifestFiles)
+ {
+ Manifest << FilePath << "\n";
+ }
+
+ WriteFile(State.RootPath / "manifest.txt", IoBuffer(IoBuffer::Wrap, Manifest.Data(), Manifest.Size()));
+
+ const Oid BuildId = Oid::NewOid();
+ const Oid BuildPartId = Oid::NewOid();
+ const std::string BuildPartName = "default";
+
+ auto Result = State.Upload(BuildId, BuildPartId, BuildPartName, "source", State.RootPath / "manifest.txt");
+
+ CHECK_EQ(Result.size(), 1u);
+ CHECK_EQ(Result[0].first, BuildPartId);
+ CHECK_EQ(Result[0].second, BuildPartName);
+ State.ValidateUpload(BuildId, Result);
+
+ FolderContent DownloadContent = State.Download(BuildId, Oid::Zero, {}, "download", /* Append */ false);
+ State.ValidateDownload(ManifestFiles, ManifestSizes, "source", "download", DownloadContent);
+}
+
+TEST_CASE("buildstorageoperations.memorychunkingcache")
+{
+ using namespace buildstorageoperations_testutils;
+
+ FastRandom BaseRandom;
+
+ const size_t FileCount = 11;
+
+ const std::string Paths[FileCount] = {{"file_1"},
+ {"file_2.exe"},
+ {"file_3.txt"},
+ {"dir_1/dir1_file_1.exe"},
+ {"dir_1/dir1_file_2.pdb"},
+ {"dir_1/dir1_file_3.txt"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"},
+ {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"},
+ {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}};
+ const uint64_t Sizes[FileCount] =
+ {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u};
+
+ ScopedTemporaryDirectory SourceFolder;
+ TestState State(SourceFolder.Path());
+ State.Initialize();
+ State.CreateSourceData("source", Paths, Sizes);
+
+ const Oid BuildId = Oid::NewOid();
+ const Oid BuildPartId = Oid::NewOid();
+ const std::string BuildPartName = "default";
+
+ {
+ const std::filesystem::path SourcePath = SourceFolder.Path() / "source";
+ CbObject MetaData;
+ BuildsOperationUploadFolder Upload(State.Log,
+ *State.LogOutput,
+ State.Storage,
+ State.AbortFlag,
+ State.PauseFlag,
+ State.WorkerPool,
+ State.NetworkPool,
+ BuildId,
+ SourcePath,
+ true,
+ MetaData,
+ BuildsOperationUploadFolder::Options{.TempDir = State.TempPath});
+ auto Result = Upload.Execute(BuildPartId, BuildPartName, {}, *State.ChunkController, *State.ChunkCache);
+
+ CHECK_EQ(Upload.m_ChunkingStats.FilesStoredInCache.load(), FileCount - 1); // Zero size files are not stored in cache
+ CHECK_EQ(Upload.m_ChunkingStats.BytesStoredInCache.load(), std::accumulate(&Sizes[0], &Sizes[FileCount], uint64_t(0)));
+ CHECK(Upload.m_ChunkingStats.ChunksStoredInCache.load() >= FileCount - 1); // Zero size files are not stored in cache
+
+ CHECK_EQ(Result.size(), 1u);
+ CHECK_EQ(Result[0].first, BuildPartId);
+ CHECK_EQ(Result[0].second, BuildPartName);
+ }
+
+ auto Result = State.Upload(BuildId, BuildPartId, BuildPartName, "source", {});
+
+ const Oid BuildId2 = Oid::NewOid();
+ const Oid BuildPartId2 = Oid::NewOid();
+
+ {
+ const std::filesystem::path SourcePath = SourceFolder.Path() / "source";
+ CbObject MetaData;
+ BuildsOperationUploadFolder Upload(State.Log,
+ *State.LogOutput,
+ State.Storage,
+ State.AbortFlag,
+ State.PauseFlag,
+ State.WorkerPool,
+ State.NetworkPool,
+ BuildId2,
+ SourcePath,
+ true,
+ MetaData,
+ BuildsOperationUploadFolder::Options{.TempDir = State.TempPath});
+ Upload.Execute(BuildPartId2, BuildPartName, {}, *State.ChunkController, *State.ChunkCache);
+
+ CHECK_EQ(Upload.m_ChunkingStats.FilesFoundInCache.load(), FileCount - 1); // Zero size files are not stored in cache
+ CHECK_EQ(Upload.m_ChunkingStats.BytesFoundInCache.load(), std::accumulate(&Sizes[0], &Sizes[FileCount], uint64_t(0)));
+ CHECK(Upload.m_ChunkingStats.ChunksFoundInCache.load() >= FileCount - 1); // Zero size files are not stored in cache
+ }
+
+ FolderContent DownloadContent = State.Download(BuildId2, BuildPartId2, {}, "download", /* Append */ false);
+ State.ValidateDownload(Paths, Sizes, "source", "download", DownloadContent);
+}
+
+TEST_CASE("buildstorageoperations.upload.multipart")
+{
+ // Disabled since it relies on authentication and specific block being present in cloud storage
+ if (false)
+ {
+ using namespace buildstorageoperations_testutils;
+
+ FastRandom BaseRandom;
+
+ const size_t FileCount = 11;
+
+ const std::string Paths[FileCount] = {{"file_1"},
+ {"file_2.exe"},
+ {"file_3.txt"},
+ {"dir_1/dir1_file_1.exe"},
+ {"dir_1/dir1_file_2.pdb"},
+ {"dir_1/dir1_file_3.txt"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_1.exe"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_2.pdb"},
+ {"dir_2/dir2_dir1/dir2_dir1_file_3.dll"},
+ {"dir_2/dir2_dir2/dir2_dir2_file_1.txt"},
+ {"dir_2/dir2_dir2/dir2_dir2_file_2.json"}};
+ const uint64_t Sizes[FileCount] =
+ {6u * 1024u, 0, 798, 19u * 1024u, 7u * 1024u, 93, 31u * 1024u, 17u * 1024u, 13u * 1024u, 2u * 1024u, 3u * 1024u};
+
+ ScopedTemporaryDirectory SourceFolder;
+ TestState State(SourceFolder.Path());
+ State.Initialize();
+ State.CreateSourceData("source", Paths, Sizes);
+
+ std::span<const std::string> ManifestFiles1(Paths);
+ ManifestFiles1 = ManifestFiles1.subspan(0, FileCount / 2);
+
+ std::span<const uint64_t> ManifestSizes1(Sizes);
+ ManifestSizes1 = ManifestSizes1.subspan(0, FileCount / 2);
+
+ std::span<const std::string> ManifestFiles2(Paths);
+ ManifestFiles2 = ManifestFiles2.subspan(FileCount / 2 - 1);
+
+ std::span<const uint64_t> ManifestSizes2(Sizes);
+ ManifestSizes2 = ManifestSizes2.subspan(FileCount / 2 - 1);
+
+ const Oid BuildPart1Id = Oid::NewOid();
+ const std::string BuildPart1Name = "part1";
+ const Oid BuildPart2Id = Oid::NewOid();
+ const std::string BuildPart2Name = "part2";
+ {
+ CbObjectWriter Writer;
+ Writer.BeginObject("parts"sv);
+ {
+ Writer.BeginObject(BuildPart1Name);
+ {
+ Writer.AddObjectId("partId"sv, BuildPart1Id);
+ Writer.BeginArray("files"sv);
+ for (const std::string& ManifestFile : ManifestFiles1)
+ {
+ Writer.AddString(ManifestFile);
+ }
+ Writer.EndArray(); // files
+ }
+ Writer.EndObject(); // part1
+
+ Writer.BeginObject(BuildPart2Name);
+ {
+ Writer.AddObjectId("partId"sv, BuildPart2Id);
+ Writer.BeginArray("files"sv);
+ for (const std::string& ManifestFile : ManifestFiles2)
+ {
+ Writer.AddString(ManifestFile);
+ }
+ Writer.EndArray(); // files
+ }
+ Writer.EndObject(); // part2
+ }
+ Writer.EndObject(); // parts
+
+ ExtendableStringBuilder<1024> Manifest;
+ CompactBinaryToJson(Writer.Save(), Manifest);
+ WriteFile(State.RootPath / "manifest.json", IoBuffer(IoBuffer::Wrap, Manifest.Data(), Manifest.Size()));
+ }
+
+ const Oid BuildId = Oid::NewOid();
+
+ auto Result = State.Upload(BuildId, {}, {}, "source", State.RootPath / "manifest.json");
+
+ CHECK_EQ(Result.size(), 2u);
+ CHECK_EQ(Result[0].first, BuildPart1Id);
+ CHECK_EQ(Result[0].second, BuildPart1Name);
+ CHECK_EQ(Result[1].first, BuildPart2Id);
+ CHECK_EQ(Result[1].second, BuildPart2Name);
+ State.ValidateUpload(BuildId, Result);
+
+ FolderContent DownloadContent = State.Download(BuildId, Oid::Zero, {}, "download", /* Append */ false);
+ State.ValidateDownload(Paths, Sizes, "source", "download", DownloadContent);
+
+ FolderContent Part1DownloadContent = State.Download(BuildId, BuildPart1Id, {}, "download_part1", /* Append */ false);
+ State.ValidateDownload(ManifestFiles1, ManifestSizes1, "source", "download_part1", Part1DownloadContent);
+
+ FolderContent Part2DownloadContent = State.Download(BuildId, Oid::Zero, BuildPart2Name, "download_part2", /* Append */ false);
+ State.ValidateDownload(ManifestFiles2, ManifestSizes2, "source", "download_part2", Part2DownloadContent);
+
+ (void)State.Download(BuildId, BuildPart1Id, BuildPart1Name, "download_part1+2", /* Append */ false);
+ FolderContent Part1And2DownloadContent = State.Download(BuildId, BuildPart2Id, {}, "download_part1+2", /* Append */ true);
+ State.ValidateDownload(Paths, Sizes, "source", "download_part1+2", Part1And2DownloadContent);
+ }
+}
+
+TEST_CASE("buildstorageoperations.partial.block.download" * doctest::skip(true))
+{
+ const std::string OidcExecutableName = "OidcToken" ZEN_EXE_SUFFIX_LITERAL;
+ std::filesystem::path OidcTokenExePath = (GetRunningExecutablePath().parent_path() / OidcExecutableName).make_preferred();
+
+ HttpClientSettings ClientSettings{
+ .LogCategory = "httpbuildsclient",
+ .AccessTokenProvider =
+ httpclientauth::CreateFromOidcTokenExecutable(OidcTokenExePath, "https://jupiter.devtools.epicgames.com", true, false, false),
+ .AssumeHttp2 = false,
+ .AllowResume = true,
+ .RetryCount = 0,
+ .Verbose = false};
+
+ HttpClient HttpClient("https://euc.jupiter.devtools.epicgames.com", ClientSettings);
+
+ const std::string_view Namespace = "fortnite.oplog";
+ const std::string_view Bucket = "fortnitegame.staged-build.fortnite-main.ps4-client";
+ const Oid BuildId = Oid::FromHexString("09a76ea92ad301d4724fafad");
+
+ {
+ HttpClient::Response Response = HttpClient.Get(fmt::format("/api/v2/builds/{}/{}/{}", Namespace, Bucket, BuildId),
+ HttpClient::Accept(ZenContentType::kCbObject));
+ CbValidateError ValidateResult = CbValidateError::None;
+ CbObject Object = ValidateAndReadCompactBinaryObject(IoBuffer(Response.ResponsePayload), ValidateResult);
+ REQUIRE(ValidateResult == CbValidateError::None);
+ }
+
+ std::vector<ChunkBlockDescription> BlockDescriptions;
+ {
+ CbObjectWriter Request;
+
+ Request.BeginArray("blocks"sv);
+ {
+ Request.AddHash(IoHash::FromHexString("7c353ed782675a5e8f968e61e51fc797ecdc2882"));
+ }
+ Request.EndArray();
+
+ IoBuffer Payload = Request.Save().GetBuffer().AsIoBuffer();
+ Payload.SetContentType(ZenContentType::kCbObject);
+
+ HttpClient::Response BlockDescriptionsResponse =
+ HttpClient.Post(fmt::format("/api/v2/builds/{}/{}/{}/blocks/getBlockMetadata", Namespace, Bucket, BuildId),
+ Payload,
+ HttpClient::Accept(ZenContentType::kCbObject));
+ REQUIRE(BlockDescriptionsResponse.IsSuccess());
+
+ CbValidateError ValidateResult = CbValidateError::None;
+ CbObject Object = ValidateAndReadCompactBinaryObject(IoBuffer(BlockDescriptionsResponse.ResponsePayload), ValidateResult);
+ REQUIRE(ValidateResult == CbValidateError::None);
+
+ {
+ CbArrayView BlocksArray = Object["blocks"sv].AsArrayView();
+ for (CbFieldView Block : BlocksArray)
+ {
+ ChunkBlockDescription Description = ParseChunkBlockDescription(Block.AsObjectView());
+ BlockDescriptions.emplace_back(std::move(Description));
+ }
+ }
+ }
+
+ REQUIRE(!BlockDescriptions.empty());
+
+ const IoHash BlockHash = BlockDescriptions.back().BlockHash;
+
+ const ChunkBlockDescription& BlockDescription = BlockDescriptions.front();
+ REQUIRE(!BlockDescription.ChunkRawHashes.empty());
+ REQUIRE(!BlockDescription.ChunkCompressedLengths.empty());
+
+ std::vector<std::pair<uint64_t, uint64_t>> ChunkOffsetAndSizes;
+ uint64_t Offset = gsl::narrow<uint32_t>(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize);
+
+ for (uint32_t ChunkCompressedSize : BlockDescription.ChunkCompressedLengths)
+ {
+ ChunkOffsetAndSizes.push_back(std::make_pair(Offset, ChunkCompressedSize));
+ Offset += ChunkCompressedSize;
+ }
+
+ ScopedTemporaryDirectory SourceFolder;
+
+ auto Validate = [&](std::span<const uint32_t> ChunkIndexesToFetch) {
+ std::vector<std::pair<uint64_t, uint64_t>> Ranges;
+ for (uint32_t ChunkIndex : ChunkIndexesToFetch)
+ {
+ Ranges.push_back(ChunkOffsetAndSizes[ChunkIndex]);
+ }
+
+ HttpClient::KeyValueMap Headers;
+ if (!Ranges.empty())
+ {
+ ExtendableStringBuilder<512> SB;
+ for (const std::pair<uint64_t, uint64_t>& R : Ranges)
+ {
+ if (SB.Size() > 0)
+ {
+ SB << ", ";
+ }
+ SB << R.first << "-" << R.first + R.second - 1;
+ }
+ Headers.Entries.insert({"Range", fmt::format("bytes={}", SB.ToView())});
+ }
+
+ HttpClient::Response GetBlobRangesResponse = HttpClient.Download(
+ fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}?supportsRedirect=false", Namespace, Bucket, BuildId, BlockHash),
+ SourceFolder.Path(),
+ Headers);
+
+ REQUIRE(GetBlobRangesResponse.IsSuccess());
+ [[maybe_unused]] MemoryView RangesMemoryView = GetBlobRangesResponse.ResponsePayload.GetView();
+
+ std::vector<std::pair<uint64_t, uint64_t>> PayloadRanges = GetBlobRangesResponse.GetRanges(Ranges);
+ if (PayloadRanges.empty())
+ {
+ // We got the whole blob, use the ranges as is
+ PayloadRanges = Ranges;
+ }
+
+ REQUIRE(PayloadRanges.size() == Ranges.size());
+
+ for (uint32_t RangeIndex = 0; RangeIndex < PayloadRanges.size(); RangeIndex++)
+ {
+ const std::pair<uint64_t, uint64_t>& PayloadRange = PayloadRanges[RangeIndex];
+
+ CHECK_EQ(PayloadRange.second, Ranges[RangeIndex].second);
+
+ IoBuffer ChunkPayload(GetBlobRangesResponse.ResponsePayload, PayloadRange.first, PayloadRange.second);
+ IoHash RawHash;
+ uint64_t RawSize;
+ CompressedBuffer CompressedChunk = CompressedBuffer::FromCompressed(SharedBuffer(ChunkPayload), RawHash, RawSize);
+ CHECK(CompressedChunk);
+ CHECK_EQ(RawHash, BlockDescription.ChunkRawHashes[ChunkIndexesToFetch[RangeIndex]]);
+ CHECK_EQ(RawSize, BlockDescription.ChunkRawLengths[ChunkIndexesToFetch[RangeIndex]]);
+ }
+ };
+
+ {
+ // Single
+ std::vector<uint32_t> ChunkIndexesToFetch{uint32_t(BlockDescription.ChunkCompressedLengths.size() / 2)};
+ Validate(ChunkIndexesToFetch);
+ }
+ {
+ // Many
+ std::vector<uint32_t> ChunkIndexesToFetch;
+ for (uint32_t Index = 0; Index < BlockDescription.ChunkCompressedLengths.size() / 16; Index++)
+ {
+ ChunkIndexesToFetch.push_back(uint32_t(BlockDescription.ChunkCompressedLengths.size() / 6 + Index * 7));
+ ChunkIndexesToFetch.push_back(uint32_t(BlockDescription.ChunkCompressedLengths.size() / 6 + Index * 7 + 1));
+ ChunkIndexesToFetch.push_back(uint32_t(BlockDescription.ChunkCompressedLengths.size() / 6 + Index * 7 + 3));
+ }
+ Validate(ChunkIndexesToFetch);
+ }
+
+ {
+ // First and last
+ std::vector<uint32_t> ChunkIndexesToFetch{0, uint32_t(BlockDescription.ChunkCompressedLengths.size() - 1)};
+ Validate(ChunkIndexesToFetch);
+ }
+}
+TEST_SUITE_END();
+
+void
+buildstorageutil_forcelink()
+{
+}
+
+#endif // ZEN_WITH_TESTS
+
} // namespace zen
diff --git a/src/zenremotestore/builds/buildupdatefolder.cpp b/src/zenremotestore/builds/buildupdatefolder.cpp
new file mode 100644
index 000000000..443ab957e
--- /dev/null
+++ b/src/zenremotestore/builds/buildupdatefolder.cpp
@@ -0,0 +1,4947 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zenremotestore/builds/buildupdatefolder.h>
+
+#include <zencore/basicfile.h>
+#include <zencore/fmtutils.h>
+#include <zencore/parallelwork.h>
+#include <zencore/scopeguard.h>
+#include <zencore/trace.h>
+#include <zenremotestore/builds/buildcontent.h>
+#include <zenremotestore/builds/buildmanifest.h>
+#include <zenremotestore/chunking/chunkingcache.h>
+#include <zenremotestore/chunking/chunkingcontroller.h>
+#include <zenremotestore/transferthreadworkers.h>
+#include <zenutil/filesystemutils.h>
+#include <zenutil/filteredrate.h>
+#include <zenutil/progress.h>
+
+#include <numeric>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <tsl/robin_set.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+namespace zen {
+
+using namespace std::literals;
+
+namespace {
+ std::filesystem::path ZenTempCacheFolderPath(const std::filesystem::path& ZenFolderPath)
+ {
+ return ZenTempFolderPath(ZenFolderPath) / "cache"; // Decompressed and verified data - chunks & sequences
+ }
+ std::filesystem::path ZenTempBlockFolderPath(const std::filesystem::path& ZenFolderPath)
+ {
+ return ZenTempFolderPath(ZenFolderPath) / "blocks"; // Temp storage for whole and partial blocks
+ }
+ std::filesystem::path ZenTempDownloadFolderPath(const std::filesystem::path& ZenFolderPath)
+ {
+ return ZenTempFolderPath(ZenFolderPath) / "download"; // Temp storage for decompressed and validated chunks
+ }
+ std::filesystem::path GetTempChunkedSequenceFileName(const std::filesystem::path& CacheFolderPath, const IoHash& RawHash)
+ {
+ return CacheFolderPath / (RawHash.ToHexString() + ".tmp");
+ }
+
+ std::filesystem::path GetFinalChunkedSequenceFileName(const std::filesystem::path& CacheFolderPath, const IoHash& RawHash)
+ {
+ return CacheFolderPath / RawHash.ToHexString();
+ }
+ bool CleanDirectory(LoggerRef InLog,
+ ProgressBase& Progress,
+ WorkerThreadPool& IOWorkerPool,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ bool IsQuiet,
+ const std::filesystem::path& Path,
+ std::span<const std::string> ExcludeDirectories)
+ {
+ ZEN_TRACE_CPU("CleanDirectory");
+ ZEN_SCOPED_LOG(InLog);
+ Stopwatch Timer;
+
+ std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = Progress.CreateProgressBar("Clean Folder");
+
+ CleanDirectoryResult Result = CleanDirectory(
+ IOWorkerPool,
+ AbortFlag,
+ PauseFlag,
+ Path,
+ ExcludeDirectories,
+ [&](const std::string_view Details, uint64_t TotalCount, uint64_t RemainingCount, bool IsPaused, bool IsAborted) {
+ ProgressBar->UpdateState({.Task = "Cleaning folder ",
+ .Details = std::string(Details),
+ .TotalCount = TotalCount,
+ .RemainingCount = RemainingCount,
+ .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
+ false);
+ },
+ Progress.GetProgressUpdateDelayMS());
+
+ ProgressBar->Finish();
+
+ if (AbortFlag)
+ {
+ return false;
+ }
+
+ uint64_t ElapsedTimeMs = Timer.GetElapsedTimeMs();
+
+ if (!Result.FailedRemovePaths.empty())
+ {
+ ExtendableStringBuilder<512> SB;
+ for (size_t FailedPathIndex = 0; FailedPathIndex < Result.FailedRemovePaths.size(); FailedPathIndex++)
+ {
+ SB << fmt::format("\n '{}': ({}) {}",
+ Result.FailedRemovePaths[FailedPathIndex].first,
+ Result.FailedRemovePaths[FailedPathIndex].second.value(),
+ Result.FailedRemovePaths[FailedPathIndex].second.message());
+ }
+ ZEN_WARN("Clean failed to remove files from '{}': {}", Path, SB.ToView());
+ }
+
+ if (ElapsedTimeMs >= 200 && !IsQuiet)
+ {
+ ZEN_INFO("Wiped folder '{}' {} ({}) in {}",
+ Path,
+ Result.FoundCount,
+ NiceBytes(Result.DeletedByteCount),
+ NiceTimeSpanMs(ElapsedTimeMs));
+ }
+
+ return Result.FailedRemovePaths.empty();
+ }
+ uint32_t SetNativeFileAttributes(const std::filesystem::path FilePath, SourcePlatform SourcePlatform, uint32_t Attributes)
+ {
+#if ZEN_PLATFORM_WINDOWS
+ if (SourcePlatform == SourcePlatform::Windows)
+ {
+ SetFileAttributesToPath(FilePath, Attributes);
+ return Attributes;
+ }
+ else
+ {
+ uint32_t CurrentAttributes = GetFileAttributesFromPath(FilePath);
+ uint32_t NewAttributes = zen::MakeFileAttributeReadOnly(CurrentAttributes, zen::IsFileModeReadOnly(Attributes));
+ if (CurrentAttributes != NewAttributes)
+ {
+ SetFileAttributesToPath(FilePath, NewAttributes);
+ }
+ return NewAttributes;
+ }
+#endif // ZEN_PLATFORM_WINDOWS
+#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC
+ if (SourcePlatform != SourcePlatform::Windows)
+ {
+ zen::SetFileMode(FilePath, Attributes);
+ return Attributes;
+ }
+ else
+ {
+ uint32_t CurrentMode = zen::GetFileMode(FilePath);
+ uint32_t NewMode = zen::MakeFileModeReadOnly(CurrentMode, zen::IsFileAttributeReadOnly(Attributes));
+ if (CurrentMode != NewMode)
+ {
+ zen::SetFileMode(FilePath, NewMode);
+ }
+ return NewMode;
+ }
+#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC
+ };
+
+ uint32_t GetNativeFileAttributes(const std::filesystem::path FilePath)
+ {
+#if ZEN_PLATFORM_WINDOWS
+ return GetFileAttributesFromPath(FilePath);
+#endif // ZEN_PLATFORM_WINDOWS
+#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC
+ return GetFileMode(FilePath);
+#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC
+ }
+ std::filesystem::path TryMoveDownloadedChunk(IoBuffer& BlockBuffer, const std::filesystem::path& Path, bool ForceDiskBased)
+ {
+ uint64_t BlockSize = BlockBuffer.GetSize();
+ IoBufferFileReference FileRef;
+ if (BlockBuffer.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) && (FileRef.FileChunkSize == BlockSize))
+ {
+ ZEN_TRACE_CPU("MoveTempFullBlock");
+ std::error_code Ec;
+ std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec);
+ if (!Ec)
+ {
+ BlockBuffer.SetDeleteOnClose(false);
+ BlockBuffer = {};
+ RenameFile(TempBlobPath, Path, Ec);
+ if (Ec)
+ {
+ // Re-open the temp file again
+ BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete);
+ BlockBuffer = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BlockSize, true);
+ BlockBuffer.SetDeleteOnClose(true);
+ }
+ else
+ {
+ return Path;
+ }
+ }
+ }
+
+ if (ForceDiskBased)
+ {
+ // Could not be moved and rather large, lets store it on disk
+ ZEN_TRACE_CPU("WriteTempFullBlock");
+ TemporaryFile::SafeWriteFile(Path, BlockBuffer);
+ BlockBuffer = {};
+ return Path;
+ }
+
+ return {};
+ }
+ bool IsSingleFileChunk(const ChunkedFolderContent& RemoteContent,
+ const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> Locations)
+ {
+ if (Locations.size() == 1)
+ {
+ const uint32_t FirstSequenceIndex = Locations[0]->SequenceIndex;
+ if (RemoteContent.ChunkedContent.ChunkCounts[FirstSequenceIndex] == 1)
+ {
+ ZEN_ASSERT_SLOW(Locations[0]->Offset == 0);
+ return true;
+ }
+ }
+ return false;
+ }
+ IoBuffer MakeBufferMemoryBased(const CompositeBuffer& PartialBlockBuffer)
+ {
+ ZEN_TRACE_CPU("MakeBufferMemoryBased");
+ IoBuffer BlockMemoryBuffer;
+ std::span<const SharedBuffer> Segments = PartialBlockBuffer.GetSegments();
+ if (Segments.size() == 1)
+ {
+ IoBufferFileReference FileRef = {};
+ if (PartialBlockBuffer.GetSegments().front().AsIoBuffer().GetFileReference(FileRef))
+ {
+ BlockMemoryBuffer = UniqueBuffer::Alloc(FileRef.FileChunkSize).MoveToShared().AsIoBuffer();
+ BasicFile Reader;
+ Reader.Attach(FileRef.FileHandle);
+ auto _ = MakeGuard([&Reader]() { Reader.Detach(); });
+ MutableMemoryView ReadMem = BlockMemoryBuffer.GetMutableView();
+ Reader.Read(ReadMem.GetData(), FileRef.FileChunkSize, FileRef.FileChunkOffset);
+ return BlockMemoryBuffer;
+ }
+ else
+ {
+ return PartialBlockBuffer.GetSegments().front().AsIoBuffer();
+ }
+ }
+ else
+ {
+ // Not a homogenous memory buffer, read all to memory
+
+ BlockMemoryBuffer = UniqueBuffer::Alloc(PartialBlockBuffer.GetSize()).MoveToShared().AsIoBuffer();
+ MutableMemoryView ReadMem = BlockMemoryBuffer.GetMutableView();
+ for (const SharedBuffer& Segment : Segments)
+ {
+ IoBufferFileReference FileRef = {};
+ if (Segment.AsIoBuffer().GetFileReference(FileRef))
+ {
+ BasicFile Reader;
+ Reader.Attach(FileRef.FileHandle);
+ auto _ = MakeGuard([&Reader]() { Reader.Detach(); });
+ Reader.Read(ReadMem.GetData(), FileRef.FileChunkSize, FileRef.FileChunkOffset);
+ ReadMem = ReadMem.Mid(FileRef.FileChunkSize);
+ }
+ else
+ {
+ ReadMem = ReadMem.CopyFrom(Segment.AsIoBuffer().GetView());
+ }
+ }
+ return BlockMemoryBuffer;
+ }
+ }
+
+ FolderContent CheckFolderFiles(ProgressBase& Progress,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ std::string_view ProgressLabel,
+ TransferThreadWorkers& Workers,
+ GetFolderContentStatistics& LocalFolderScanStats,
+ const std::filesystem::path& Path,
+ std::span<const std::filesystem::path> PathsToCheck)
+ {
+ std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = Progress.CreateProgressBar(ProgressLabel);
+ FolderContent Result = GetValidFolderContent(
+ Workers.GetIOWorkerPool(),
+ LocalFolderScanStats,
+ Path,
+ PathsToCheck,
+ [&ProgressBar, &LocalFolderScanStats, &AbortFlag, &PauseFlag](uint64_t PathCount, uint64_t CompletedPathCount) {
+ std::string Details =
+ fmt::format("{}/{} checked, {} found", CompletedPathCount, PathCount, LocalFolderScanStats.FoundFileCount.load());
+ ProgressBar->UpdateState({.Task = "Checking files ",
+ .Details = Details,
+ .TotalCount = PathCount,
+ .RemainingCount = PathCount - CompletedPathCount,
+ .Status = ProgressBase::ProgressBar::State::CalculateStatus(AbortFlag, PauseFlag)},
+ false);
+ },
+ Progress.GetProgressUpdateDelayMS(),
+ AbortFlag,
+ PauseFlag);
+ ProgressBar->Finish();
+ return Result;
+ }
+
+ ChunkedFolderContent ScanFolderFiles(ProgressBase& Progress,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ std::string_view ProgressLabel,
+ TransferThreadWorkers& Workers,
+ const std::filesystem::path& Path,
+ const FolderContent& FolderSource,
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache,
+ ChunkingStatistics& OutChunkingStats)
+ {
+ uint64_t ByteCountToScan = 0;
+ for (const uint64_t RawSize : FolderSource.RawSizes)
+ {
+ ByteCountToScan += RawSize;
+ }
+ std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = Progress.CreateProgressBar(ProgressLabel);
+ FilteredRate FilteredBytesHashed;
+ FilteredBytesHashed.Start();
+ ChunkingStatistics LocalChunkingStats;
+ ChunkedFolderContent Result = ChunkFolderContent(
+ LocalChunkingStats,
+ Workers.GetIOWorkerPool(),
+ Path,
+ FolderSource,
+ ChunkController,
+ ChunkCache,
+ Progress.GetProgressUpdateDelayMS(),
+ [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) {
+ FilteredBytesHashed.Update(LocalChunkingStats.BytesHashed.load());
+ std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found",
+ LocalChunkingStats.FilesProcessed.load(),
+ FolderSource.Paths.size(),
+ NiceBytes(LocalChunkingStats.BytesHashed.load()),
+ NiceBytes(ByteCountToScan),
+ NiceNum(FilteredBytesHashed.GetCurrent()),
+ LocalChunkingStats.UniqueChunksFound.load(),
+ NiceBytes(LocalChunkingStats.UniqueBytesFound.load()));
+ ProgressBar->UpdateState({.Task = "Scanning files ",
+ .Details = Details,
+ .TotalCount = ByteCountToScan,
+ .RemainingCount = ByteCountToScan - LocalChunkingStats.BytesHashed.load(),
+ .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
+ false);
+ },
+ AbortFlag,
+ PauseFlag);
+ OutChunkingStats += LocalChunkingStats;
+ FilteredBytesHashed.Stop();
+ ProgressBar->Finish();
+ return Result;
+ }
+} // namespace
+
+BuildsOperationUpdateFolder::BuildsOperationUpdateFolder(LoggerRef Log,
+ ProgressBase& Progress,
+ StorageInstance& Storage,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ WorkerThreadPool& IOWorkerPool,
+ WorkerThreadPool& NetworkPool,
+ const Oid& BuildId,
+ const std::filesystem::path& Path,
+ const ChunkedFolderContent& LocalContent,
+ const ChunkedContentLookup& LocalLookup,
+ const ChunkedFolderContent& RemoteContent,
+ const ChunkedContentLookup& RemoteLookup,
+ const std::vector<ChunkBlockDescription>& BlockDescriptions,
+ const std::vector<IoHash>& LooseChunkHashes,
+ const Options& Options)
+: m_Log(Log)
+, m_Progress(Progress)
+, m_Storage(Storage)
+, m_AbortFlag(AbortFlag)
+, m_PauseFlag(PauseFlag)
+, m_IOWorkerPool(IOWorkerPool)
+, m_NetworkPool(NetworkPool)
+, m_BuildId(BuildId)
+, m_Path(Path)
+, m_LocalContent(LocalContent)
+, m_LocalLookup(LocalLookup)
+, m_RemoteContent(RemoteContent)
+, m_RemoteLookup(RemoteLookup)
+, m_BlockDescriptions(BlockDescriptions)
+, m_LooseChunkHashes(LooseChunkHashes)
+, m_Options(Options)
+, m_CacheFolderPath(ZenTempCacheFolderPath(m_Options.ZenFolderPath))
+, m_TempDownloadFolderPath(ZenTempDownloadFolderPath(m_Options.ZenFolderPath))
+, m_TempBlockFolderPath(ZenTempBlockFolderPath(m_Options.ZenFolderPath))
+{
+}
+
+void
+BuildsOperationUpdateFolder::Execute(FolderContent& OutLocalFolderState)
+{
+ ZEN_TRACE_CPU("BuildsOperationUpdateFolder::Execute");
+ try
+ {
+ enum class TaskSteps : uint32_t
+ {
+ ScanExistingData,
+ WriteChunks,
+ PrepareTarget,
+ FinalizeTarget,
+ Cleanup,
+ StepCount
+ };
+
+ auto EndProgress =
+ MakeGuard([&]() { m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::StepCount, (uint32_t)TaskSteps::StepCount); });
+
+ m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::ScanExistingData, (uint32_t)TaskSteps::StepCount);
+
+ CreateDirectories(m_CacheFolderPath);
+ CreateDirectories(m_TempDownloadFolderPath);
+ CreateDirectories(m_TempBlockFolderPath);
+
+ std::vector<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters(m_RemoteContent.ChunkedContent.SequenceRawHashes.size());
+ std::vector<bool> RemoteChunkIndexNeedsCopyFromLocalFileFlags(m_RemoteContent.ChunkedContent.ChunkHashes.size());
+ std::vector<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags(m_RemoteContent.ChunkedContent.ChunkHashes.size());
+
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CachedChunkHashesFound;
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CachedSequenceHashesFound;
+ ScanCacheFolder(CachedChunkHashesFound, CachedSequenceHashesFound);
+
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CachedBlocksFound;
+ ScanTempBlocksFolder(CachedBlocksFound);
+
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> SequenceIndexesLeftToFindToRemoteIndex;
+ InitializeSequenceCounters(SequenceIndexChunksLeftToWriteCounters,
+ SequenceIndexesLeftToFindToRemoteIndex,
+ CachedChunkHashesFound,
+ CachedSequenceHashesFound);
+
+ std::vector<ChunkedFolderContent> ScavengedContents;
+ std::vector<ChunkedContentLookup> ScavengedLookups;
+ std::vector<std::filesystem::path> ScavengedPaths;
+
+ std::vector<ScavengedSequenceCopyOperation> ScavengedSequenceCopyOperations;
+ uint64_t ScavengedPathsCount = 0;
+
+ if (m_Options.EnableOtherDownloadsScavenging)
+ {
+ ZEN_TRACE_CPU("GetScavengedSequences");
+
+ Stopwatch ScavengeTimer;
+
+ if (!SequenceIndexesLeftToFindToRemoteIndex.empty())
+ {
+ std::vector<ScavengeSource> ScavengeSources = FindScavengeSources();
+ ScanScavengeSources(ScavengeSources, ScavengedContents, ScavengedLookups, ScavengedPaths);
+ if (m_AbortFlag)
+ {
+ return;
+ }
+
+ MatchScavengedSequencesToRemote(ScavengedContents,
+ ScavengedLookups,
+ ScavengedPaths,
+ SequenceIndexesLeftToFindToRemoteIndex,
+ SequenceIndexChunksLeftToWriteCounters,
+ ScavengedSequenceCopyOperations,
+ ScavengedPathsCount);
+ }
+ m_CacheMappingStats.ScavengeElapsedWallTimeUs += ScavengeTimer.GetElapsedTimeUs();
+ }
+
+ uint32_t RemainingChunkCount = 0;
+ for (uint32_t RemoteChunkIndex = 0; RemoteChunkIndex < m_RemoteContent.ChunkedContent.ChunkHashes.size(); RemoteChunkIndex++)
+ {
+ uint64_t ChunkWriteCount = GetChunkWriteCount(SequenceIndexChunksLeftToWriteCounters, RemoteChunkIndex);
+ if (ChunkWriteCount > 0)
+ {
+ RemainingChunkCount++;
+ }
+ }
+
+ // Pick up all chunks in current local state
+ tsl::robin_map<IoHash, size_t, IoHash::Hasher> RawHashToCopyChunkDataIndex;
+ std::vector<CopyChunkData> CopyChunkDatas;
+
+ if (m_Options.EnableTargetFolderScavenging)
+ {
+ ZEN_TRACE_CPU("GetLocalChunks");
+
+ Stopwatch LocalTimer;
+
+ ScavengeSourceForChunks(RemainingChunkCount,
+ RemoteChunkIndexNeedsCopyFromLocalFileFlags,
+ RawHashToCopyChunkDataIndex,
+ SequenceIndexChunksLeftToWriteCounters,
+ m_LocalContent,
+ m_LocalLookup,
+ CopyChunkDatas,
+ uint32_t(-1),
+ m_CacheMappingStats.LocalChunkMatchingRemoteCount,
+ m_CacheMappingStats.LocalChunkMatchingRemoteByteCount);
+
+ m_CacheMappingStats.LocalScanElapsedWallTimeUs += LocalTimer.GetElapsedTimeUs();
+ }
+
+ if (m_Options.EnableOtherDownloadsScavenging)
+ {
+ ZEN_TRACE_CPU("GetScavengeChunks");
+
+ Stopwatch ScavengeTimer;
+
+ for (uint32_t ScavengedContentIndex = 0; ScavengedContentIndex < ScavengedContents.size() && (RemainingChunkCount > 0);
+ ScavengedContentIndex++)
+ {
+ const ChunkedFolderContent& ScavengedContent = ScavengedContents[ScavengedContentIndex];
+ const ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengedContentIndex];
+
+ ScavengeSourceForChunks(RemainingChunkCount,
+ RemoteChunkIndexNeedsCopyFromLocalFileFlags,
+ RawHashToCopyChunkDataIndex,
+ SequenceIndexChunksLeftToWriteCounters,
+ ScavengedContent,
+ ScavengedLookup,
+ CopyChunkDatas,
+ ScavengedContentIndex,
+ m_CacheMappingStats.ScavengedChunkMatchingRemoteCount,
+ m_CacheMappingStats.ScavengedChunkMatchingRemoteByteCount);
+ }
+ m_CacheMappingStats.ScavengeElapsedWallTimeUs += ScavengeTimer.GetElapsedTimeUs();
+ }
+
+ if (!m_Options.IsQuiet)
+ {
+ if (m_CacheMappingStats.CacheSequenceHashesCount > 0 || m_CacheMappingStats.CacheChunkCount > 0 ||
+ m_CacheMappingStats.CacheBlockCount > 0)
+ {
+ ZEN_INFO("Download cache: Found {} ({}) chunk sequences, {} ({}) chunks, {} ({}) blocks in {}",
+ m_CacheMappingStats.CacheSequenceHashesCount,
+ NiceBytes(m_CacheMappingStats.CacheSequenceHashesByteCount),
+ m_CacheMappingStats.CacheChunkCount,
+ NiceBytes(m_CacheMappingStats.CacheChunkByteCount),
+ m_CacheMappingStats.CacheBlockCount,
+ NiceBytes(m_CacheMappingStats.CacheBlocksByteCount),
+ NiceTimeSpanMs(m_CacheMappingStats.CacheScanElapsedWallTimeUs / 1000));
+ }
+
+ if (m_CacheMappingStats.LocalPathsMatchingSequencesCount > 0 || m_CacheMappingStats.LocalChunkMatchingRemoteCount > 0)
+ {
+ ZEN_INFO("Local state : Found {} ({}) chunk sequences, {} ({}) chunks in {}",
+ m_CacheMappingStats.LocalPathsMatchingSequencesCount,
+ NiceBytes(m_CacheMappingStats.LocalPathsMatchingSequencesByteCount),
+ m_CacheMappingStats.LocalChunkMatchingRemoteCount,
+ NiceBytes(m_CacheMappingStats.LocalChunkMatchingRemoteByteCount),
+ NiceTimeSpanMs(m_CacheMappingStats.LocalScanElapsedWallTimeUs / 1000));
+ }
+ if (m_CacheMappingStats.ScavengedPathsMatchingSequencesCount > 0 || m_CacheMappingStats.ScavengedChunkMatchingRemoteCount > 0)
+ {
+ ZEN_INFO("Scavenge of {} paths, found {} ({}) chunk sequences, {} ({}) chunks in {}",
+ ScavengedPathsCount,
+ m_CacheMappingStats.ScavengedPathsMatchingSequencesCount,
+ NiceBytes(m_CacheMappingStats.ScavengedPathsMatchingSequencesByteCount),
+ m_CacheMappingStats.ScavengedChunkMatchingRemoteCount,
+ NiceBytes(m_CacheMappingStats.ScavengedChunkMatchingRemoteByteCount),
+ NiceTimeSpanMs(m_CacheMappingStats.ScavengeElapsedWallTimeUs / 1000));
+ }
+ }
+
+ uint64_t BytesToWrite = CalculateBytesToWriteAndFlagNeededChunks(SequenceIndexChunksLeftToWriteCounters,
+ RemoteChunkIndexNeedsCopyFromLocalFileFlags,
+ RemoteChunkIndexNeedsCopyFromSourceFlags);
+
+ for (const ScavengedSequenceCopyOperation& ScavengeCopyOp : ScavengedSequenceCopyOperations)
+ {
+ BytesToWrite += ScavengeCopyOp.RawSize;
+ }
+
+ uint64_t BytesToValidate = m_Options.ValidateCompletedSequences ? BytesToWrite : 0;
+
+ uint64_t TotalRequestCount = 0;
+ uint64_t TotalPartWriteCount = 0;
+ std::atomic<uint64_t> WritePartsComplete = 0;
+
+ tsl::robin_map<std::string, uint32_t> RemotePathToRemoteIndex;
+ RemotePathToRemoteIndex.reserve(m_RemoteContent.Paths.size());
+ for (uint32_t RemotePathIndex = 0; RemotePathIndex < m_RemoteContent.Paths.size(); RemotePathIndex++)
+ {
+ RemotePathToRemoteIndex.insert({m_RemoteContent.Paths[RemotePathIndex].generic_string(), RemotePathIndex});
+ }
+
+ CheckRequiredDiskSpace(RemotePathToRemoteIndex);
+
+ BlobsExistsResult ExistsResult;
+ {
+ ChunkBlockAnalyser BlockAnalyser(
+ Log(),
+ m_BlockDescriptions,
+ ChunkBlockAnalyser::Options{.IsQuiet = m_Options.IsQuiet,
+ .IsVerbose = m_Options.IsVerbose,
+ .HostLatencySec = m_Storage.BuildStorageHost.LatencySec,
+ .HostHighSpeedLatencySec = m_Storage.CacheHost.LatencySec,
+ .HostMaxRangeCountPerRequest = m_Storage.BuildStorageHost.Caps.MaxRangeCountPerRequest,
+ .HostHighSpeedMaxRangeCountPerRequest = m_Storage.CacheHost.Caps.MaxRangeCountPerRequest});
+
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = BlockAnalyser.GetNeeded(
+ m_RemoteLookup.ChunkHashToChunkIndex,
+ [&](uint32_t RemoteChunkIndex) -> bool { return RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex]; });
+
+ std::vector<uint32_t> FetchBlockIndexes;
+ std::vector<uint32_t> CachedChunkBlockIndexes;
+ ClassifyCachedAndFetchBlocks(NeededBlocks, CachedBlocksFound, TotalPartWriteCount, CachedChunkBlockIndexes, FetchBlockIndexes);
+
+ std::vector<uint32_t> NeededLooseChunkIndexes = DetermineNeededLooseChunkIndexes(SequenceIndexChunksLeftToWriteCounters,
+ RemoteChunkIndexNeedsCopyFromLocalFileFlags,
+ RemoteChunkIndexNeedsCopyFromSourceFlags);
+
+ ExistsResult = QueryBlobCacheExists(NeededLooseChunkIndexes, FetchBlockIndexes);
+
+ std::vector<ChunkBlockAnalyser::EPartialBlockDownloadMode> BlockPartialDownloadModes =
+ DeterminePartialDownloadModes(ExistsResult);
+ ZEN_ASSERT(BlockPartialDownloadModes.size() == m_BlockDescriptions.size());
+
+ ChunkBlockAnalyser::BlockResult PartialBlocks =
+ BlockAnalyser.CalculatePartialBlockDownloads(NeededBlocks, BlockPartialDownloadModes);
+
+ TotalRequestCount += NeededLooseChunkIndexes.size();
+ TotalPartWriteCount += NeededLooseChunkIndexes.size();
+ TotalRequestCount += PartialBlocks.BlockRanges.size();
+ TotalPartWriteCount += PartialBlocks.BlockRanges.size();
+ TotalRequestCount += PartialBlocks.FullBlockIndexes.size();
+ TotalPartWriteCount += PartialBlocks.FullBlockIndexes.size();
+
+ std::vector<LooseChunkHashWorkData> LooseChunkHashWorks =
+ BuildLooseChunkHashWorks(NeededLooseChunkIndexes, SequenceIndexChunksLeftToWriteCounters);
+
+ ZEN_TRACE_CPU("WriteChunks");
+
+ m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::WriteChunks, (uint32_t)TaskSteps::StepCount);
+
+ Stopwatch WriteTimer;
+
+ FilteredRate FilteredDownloadedBytesPerSecond;
+ FilteredRate FilteredWrittenBytesPerSecond;
+
+ std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Writing");
+ ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
+
+ TotalPartWriteCount += CopyChunkDatas.size();
+ TotalPartWriteCount += ScavengedSequenceCopyOperations.size();
+
+ BufferedWriteFileCache WriteCache;
+
+ WriteChunksContext Context{.Work = Work,
+ .WriteCache = WriteCache,
+ .SequenceIndexChunksLeftToWriteCounters = SequenceIndexChunksLeftToWriteCounters,
+ .RemoteChunkIndexNeedsCopyFromSourceFlags = RemoteChunkIndexNeedsCopyFromSourceFlags,
+ .WritePartsComplete = WritePartsComplete,
+ .TotalPartWriteCount = TotalPartWriteCount,
+ .TotalRequestCount = TotalRequestCount,
+ .ExistsResult = ExistsResult,
+ .FilteredDownloadedBytesPerSecond = FilteredDownloadedBytesPerSecond,
+ .FilteredWrittenBytesPerSecond = FilteredWrittenBytesPerSecond};
+
+ ScheduleScavengedSequenceWrites(Context, ScavengedSequenceCopyOperations, ScavengedContents, ScavengedPaths);
+ ScheduleLooseChunkWrites(Context, LooseChunkHashWorks);
+
+ std::unique_ptr<CloneQueryInterface> CloneQuery =
+ m_Options.AllowFileClone ? GetCloneQueryInterface(m_CacheFolderPath) : nullptr;
+
+ ScheduleLocalChunkCopies(Context, CopyChunkDatas, CloneQuery.get(), ScavengedContents, ScavengedLookups, ScavengedPaths);
+ ScheduleCachedBlockWrites(Context, CachedChunkBlockIndexes);
+ SchedulePartialBlockDownloads(Context, PartialBlocks);
+ ScheduleFullBlockDownloads(Context, PartialBlocks.FullBlockIndexes);
+
+ {
+ ZEN_TRACE_CPU("WriteChunks_Wait");
+
+ Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
+ ZEN_UNUSED(PendingWork);
+ uint64_t DownloadedBytes = m_DownloadStats.DownloadedChunkByteCount.load() +
+ m_DownloadStats.DownloadedBlockByteCount.load() +
+ +m_DownloadStats.DownloadedPartialBlockByteCount.load();
+ FilteredWrittenBytesPerSecond.Update(m_DiskStats.WriteByteCount.load());
+ FilteredDownloadedBytesPerSecond.Update(DownloadedBytes);
+ std::string DownloadRateString =
+ (m_DownloadStats.RequestsCompleteCount == TotalRequestCount)
+ ? ""
+ : fmt::format(" {}bits/s", NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8));
+ std::string CloneDetails;
+ if (m_DiskStats.CloneCount.load() > 0)
+ {
+ CloneDetails = fmt::format(" ({} cloned)", NiceBytes(m_DiskStats.CloneByteCount.load()));
+ }
+ std::string WriteDetails = fmt::format(" {}/{} ({}B/s) written{}",
+ NiceBytes(m_WrittenChunkByteCount.load()),
+ NiceBytes(BytesToWrite),
+ NiceNum(FilteredWrittenBytesPerSecond.GetCurrent()),
+ CloneDetails);
+
+ std::string Details = fmt::format("{}/{} ({}{}) downloaded.{}",
+ m_DownloadStats.RequestsCompleteCount.load(),
+ TotalRequestCount,
+ NiceBytes(DownloadedBytes),
+ DownloadRateString,
+ WriteDetails);
+
+ std::string Task;
+ if ((m_WrittenChunkByteCount < BytesToWrite) || (BytesToValidate == 0))
+ {
+ Task = "Writing chunks ";
+ }
+ else
+ {
+ Task = "Verifying chunks ";
+ }
+
+ ProgressBar->UpdateState({.Task = Task,
+ .Details = Details,
+ .TotalCount = (BytesToWrite + BytesToValidate),
+ .RemainingCount = ((BytesToWrite + BytesToValidate) -
+ (m_WrittenChunkByteCount.load() + m_ValidatedChunkByteCount.load())),
+ .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
+ false);
+ });
+ }
+
+ CloneQuery.reset();
+
+ FilteredWrittenBytesPerSecond.Stop();
+ FilteredDownloadedBytesPerSecond.Stop();
+
+ ProgressBar->Finish();
+ if (m_AbortFlag)
+ {
+ return;
+ }
+
+ VerifyWriteChunksComplete(SequenceIndexChunksLeftToWriteCounters, BytesToWrite, BytesToValidate);
+
+ const uint64_t DownloadedBytes = m_DownloadStats.DownloadedChunkByteCount.load() +
+ m_DownloadStats.DownloadedBlockByteCount.load() +
+ m_DownloadStats.DownloadedPartialBlockByteCount.load();
+ if (!m_Options.IsQuiet)
+ {
+ std::string CloneDetails;
+ if (m_DiskStats.CloneCount.load() > 0)
+ {
+ CloneDetails = fmt::format(" ({} cloned)", NiceBytes(m_DiskStats.CloneByteCount.load()));
+ }
+ ZEN_INFO("Downloaded {} ({}bits/s) in {}. Wrote {} ({}B/s){} in {}. Completed in {}",
+ NiceBytes(DownloadedBytes),
+ NiceNum(GetBytesPerSecond(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS(), DownloadedBytes * 8)),
+ NiceTimeSpanMs(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS() / 1000),
+ NiceBytes(m_WrittenChunkByteCount.load()),
+ NiceNum(GetBytesPerSecond(FilteredWrittenBytesPerSecond.GetElapsedTimeUS(), m_DiskStats.WriteByteCount.load())),
+ CloneDetails,
+ NiceTimeSpanMs(FilteredWrittenBytesPerSecond.GetElapsedTimeUS() / 1000),
+ NiceTimeSpanMs(WriteTimer.GetElapsedTimeMs()));
+ }
+
+ m_WriteChunkStats.WriteChunksElapsedWallTimeUs = WriteTimer.GetElapsedTimeUs();
+ m_WriteChunkStats.DownloadTimeUs = FilteredDownloadedBytesPerSecond.GetElapsedTimeUS();
+ m_WriteChunkStats.WriteTimeUs = FilteredWrittenBytesPerSecond.GetElapsedTimeUS();
+ }
+
+ m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::PrepareTarget, (uint32_t)TaskSteps::StepCount);
+
+ if (m_AbortFlag)
+ {
+ return;
+ }
+
+ LocalPathCategorization Categorization = CategorizeLocalPaths(RemotePathToRemoteIndex);
+
+ if (m_AbortFlag)
+ {
+ return;
+ }
+
+ std::atomic<uint64_t> CachedCount = 0;
+ std::atomic<uint64_t> CachedByteCount = 0;
+ ScheduleLocalFileCaching(Categorization.FilesToCache, CachedCount, CachedByteCount);
+ if (m_AbortFlag)
+ {
+ return;
+ }
+
+ ZEN_DEBUG(
+ "Local state prep: Match: {}, PathMismatch: {}, HashMismatch: {}, Cached: {} ({}), Skipped: {}, "
+ "Delete: {}",
+ Categorization.MatchCount,
+ Categorization.PathMismatchCount,
+ Categorization.HashMismatchCount,
+ CachedCount.load(),
+ NiceBytes(CachedByteCount.load()),
+ Categorization.SkippedCount,
+ Categorization.DeleteCount);
+
+ m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::FinalizeTarget, (uint32_t)TaskSteps::StepCount);
+
+ if (m_Options.WipeTargetFolder)
+ {
+ ZEN_TRACE_CPU("WipeTarget");
+ Stopwatch Timer;
+
+ // Clean target folder
+ if (!CleanDirectory(Log(),
+ m_Progress,
+ m_IOWorkerPool,
+ m_AbortFlag,
+ m_PauseFlag,
+ m_Options.IsQuiet,
+ m_Path,
+ m_Options.ExcludeFolders))
+ {
+ ZEN_WARN("Some files in {} could not be removed", m_Path);
+ }
+ m_RebuildFolderStateStats.CleanFolderElapsedWallTimeUs = Timer.GetElapsedTimeUs();
+ }
+
+ if (m_AbortFlag)
+ {
+ return;
+ }
+
+ {
+ ZEN_TRACE_CPU("FinalizeTree");
+
+ Stopwatch Timer;
+
+ std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Rebuild State");
+ ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
+
+ OutLocalFolderState.Paths.resize(m_RemoteContent.Paths.size());
+ OutLocalFolderState.RawSizes.resize(m_RemoteContent.Paths.size());
+ OutLocalFolderState.Attributes.resize(m_RemoteContent.Paths.size());
+ OutLocalFolderState.ModificationTicks.resize(m_RemoteContent.Paths.size());
+
+ std::atomic<uint64_t> DeletedCount = 0;
+ std::atomic<uint64_t> TargetsComplete = 0;
+
+ ScheduleLocalFileRemovals(Work, Categorization.RemoveLocalPathIndexes, DeletedCount);
+
+ std::vector<FinalizeTarget> Targets = BuildSortedFinalizeTargets();
+
+ ScheduleTargetFinalization(Work,
+ Targets,
+ Categorization.SequenceHashToLocalPathIndex,
+ Categorization.RemotePathIndexToLocalPathIndex,
+ OutLocalFolderState,
+ TargetsComplete);
+
+ {
+ ZEN_TRACE_CPU("FinalizeTree_Wait");
+
+ Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
+ ZEN_UNUSED(PendingWork);
+ const uint64_t WorkTotal = Targets.size() + Categorization.RemoveLocalPathIndexes.size();
+ const uint64_t WorkComplete = TargetsComplete.load() + DeletedCount.load();
+ std::string Details = fmt::format("{}/{} files", WorkComplete, WorkTotal);
+ ProgressBar->UpdateState({.Task = "Rebuilding state ",
+ .Details = Details,
+ .TotalCount = gsl::narrow<uint64_t>(WorkTotal),
+ .RemainingCount = gsl::narrow<uint64_t>(WorkTotal - WorkComplete),
+ .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
+ false);
+ });
+ }
+
+ m_RebuildFolderStateStats.FinalizeTreeElapsedWallTimeUs = Timer.GetElapsedTimeUs();
+ ProgressBar->Finish();
+ }
+ m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::Cleanup, (uint32_t)TaskSteps::StepCount);
+ }
+ catch (const std::exception&)
+ {
+ m_AbortFlag = true;
+ throw;
+ }
+}
+
+void
+BuildsOperationUpdateFolder::ScanCacheFolder(tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedChunkHashesFound,
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedSequenceHashesFound)
+{
+ ZEN_TRACE_CPU("ScanCacheFolder");
+
+ Stopwatch CacheTimer;
+
+ DirectoryContent CacheDirContent;
+ GetDirectoryContent(m_CacheFolderPath, DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes, CacheDirContent);
+ for (size_t Index = 0; Index < CacheDirContent.Files.size(); Index++)
+ {
+ if (m_Options.EnableTargetFolderScavenging)
+ {
+ IoHash FileHash;
+ if (IoHash::TryParse(CacheDirContent.Files[Index].filename().string(), FileHash))
+ {
+ if (auto ChunkIt = m_RemoteLookup.ChunkHashToChunkIndex.find(FileHash);
+ ChunkIt != m_RemoteLookup.ChunkHashToChunkIndex.end())
+ {
+ const uint32_t ChunkIndex = ChunkIt->second;
+ const uint64_t ChunkSize = m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex];
+ if (ChunkSize == CacheDirContent.FileSizes[Index])
+ {
+ OutCachedChunkHashesFound.insert({FileHash, ChunkIndex});
+ m_CacheMappingStats.CacheChunkCount++;
+ m_CacheMappingStats.CacheChunkByteCount += ChunkSize;
+ continue;
+ }
+ }
+ else if (auto SequenceIt = m_RemoteLookup.RawHashToSequenceIndex.find(FileHash);
+ SequenceIt != m_RemoteLookup.RawHashToSequenceIndex.end())
+ {
+ const uint32_t SequenceIndex = SequenceIt->second;
+ const uint32_t PathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex];
+ const uint64_t SequenceSize = m_RemoteContent.RawSizes[PathIndex];
+ if (SequenceSize == CacheDirContent.FileSizes[Index])
+ {
+ OutCachedSequenceHashesFound.insert({FileHash, SequenceIndex});
+ m_CacheMappingStats.CacheSequenceHashesCount++;
+ m_CacheMappingStats.CacheSequenceHashesByteCount += SequenceSize;
+
+ const std::filesystem::path CacheFilePath =
+ GetFinalChunkedSequenceFileName(m_CacheFolderPath,
+ m_RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]);
+ ZEN_ASSERT_SLOW(IsFile(CacheFilePath));
+
+ continue;
+ }
+ }
+ }
+ }
+ std::error_code Ec = TryRemoveFile(CacheDirContent.Files[Index]);
+ if (Ec)
+ {
+ ZEN_DEBUG("Failed removing file '{}', reason: ({}) {}", CacheDirContent.Files[Index], Ec.value(), Ec.message());
+ }
+ }
+ m_CacheMappingStats.CacheScanElapsedWallTimeUs += CacheTimer.GetElapsedTimeUs();
+}
+
+void
+BuildsOperationUpdateFolder::ScanTempBlocksFolder(tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedBlocksFound)
+{
+ ZEN_TRACE_CPU("ScanTempBlocksFolder");
+
+ Stopwatch CacheTimer;
+
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> AllBlockSizes;
+ AllBlockSizes.reserve(m_BlockDescriptions.size());
+ for (uint32_t BlockIndex = 0; BlockIndex < m_BlockDescriptions.size(); BlockIndex++)
+ {
+ const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex];
+ AllBlockSizes.insert({BlockDescription.BlockHash, BlockIndex});
+ }
+
+ DirectoryContent BlockDirContent;
+ GetDirectoryContent(m_TempBlockFolderPath,
+ DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes,
+ BlockDirContent);
+ OutCachedBlocksFound.reserve(BlockDirContent.Files.size());
+ for (size_t Index = 0; Index < BlockDirContent.Files.size(); Index++)
+ {
+ if (m_Options.EnableTargetFolderScavenging)
+ {
+ IoHash FileHash;
+ if (IoHash::TryParse(BlockDirContent.Files[Index].filename().string(), FileHash))
+ {
+ if (auto BlockIt = AllBlockSizes.find(FileHash); BlockIt != AllBlockSizes.end())
+ {
+ const uint32_t BlockIndex = BlockIt->second;
+ const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex];
+ uint64_t BlockSize = CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize;
+ for (uint64_t ChunkSize : BlockDescription.ChunkCompressedLengths)
+ {
+ BlockSize += ChunkSize;
+ }
+
+ if (BlockSize == BlockDirContent.FileSizes[Index])
+ {
+ OutCachedBlocksFound.insert({FileHash, BlockIndex});
+ m_CacheMappingStats.CacheBlockCount++;
+ m_CacheMappingStats.CacheBlocksByteCount += BlockSize;
+ continue;
+ }
+ }
+ }
+ }
+ std::error_code Ec = TryRemoveFile(BlockDirContent.Files[Index]);
+ if (Ec)
+ {
+ ZEN_DEBUG("Failed removing file '{}', reason: ({}) {}", BlockDirContent.Files[Index], Ec.value(), Ec.message());
+ }
+ }
+
+ m_CacheMappingStats.CacheScanElapsedWallTimeUs += CacheTimer.GetElapsedTimeUs();
+}
+
+void
+BuildsOperationUpdateFolder::InitializeSequenceCounters(std::vector<std::atomic<uint32_t>>& OutSequenceCounters,
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutSequencesLeftToFind,
+ const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedChunkHashesFound,
+ const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedSequenceHashesFound)
+{
+ if (m_Options.EnableTargetFolderScavenging)
+ {
+ // Pick up all whole files we can use from current local state
+ ZEN_TRACE_CPU("GetLocalSequences");
+
+ std::vector<uint32_t> MissingSequenceIndexes = ScanTargetFolder(CachedChunkHashesFound, CachedSequenceHashesFound);
+
+ for (uint32_t RemoteSequenceIndex : MissingSequenceIndexes)
+ {
+ // We must write the sequence
+ const uint32_t ChunkCount = m_RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex];
+ const IoHash& RemoteSequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex];
+ OutSequenceCounters[RemoteSequenceIndex] = ChunkCount;
+ OutSequencesLeftToFind.insert({RemoteSequenceRawHash, RemoteSequenceIndex});
+ }
+ }
+ else
+ {
+ for (uint32_t RemoteSequenceIndex = 0; RemoteSequenceIndex < m_RemoteContent.ChunkedContent.SequenceRawHashes.size();
+ RemoteSequenceIndex++)
+ {
+ OutSequenceCounters[RemoteSequenceIndex] = m_RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex];
+ }
+ }
+}
+
+void
+BuildsOperationUpdateFolder::MatchScavengedSequencesToRemote(std::span<const ChunkedFolderContent> Contents,
+ std::span<const ChunkedContentLookup> Lookups,
+ std::span<const std::filesystem::path> Paths,
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& InOutSequencesLeftToFind,
+ std::vector<std::atomic<uint32_t>>& InOutSequenceCounters,
+ std::vector<ScavengedSequenceCopyOperation>& OutCopyOperations,
+ uint64_t& OutScavengedPathsCount)
+{
+ for (uint32_t ScavengedContentIndex = 0; ScavengedContentIndex < Contents.size() && !InOutSequencesLeftToFind.empty();
+ ScavengedContentIndex++)
+ {
+ const std::filesystem::path& ScavengePath = Paths[ScavengedContentIndex];
+ if (ScavengePath.empty())
+ {
+ continue;
+ }
+ const ChunkedFolderContent& ScavengedLocalContent = Contents[ScavengedContentIndex];
+ const ChunkedContentLookup& ScavengedLookup = Lookups[ScavengedContentIndex];
+
+ for (uint32_t ScavengedSequenceIndex = 0; ScavengedSequenceIndex < ScavengedLocalContent.ChunkedContent.SequenceRawHashes.size();
+ ScavengedSequenceIndex++)
+ {
+ const IoHash& SequenceRawHash = ScavengedLocalContent.ChunkedContent.SequenceRawHashes[ScavengedSequenceIndex];
+ auto It = InOutSequencesLeftToFind.find(SequenceRawHash);
+ if (It == InOutSequencesLeftToFind.end())
+ {
+ continue;
+ }
+ const uint32_t RemoteSequenceIndex = It->second;
+ const uint64_t RawSize = m_RemoteContent.RawSizes[m_RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex]];
+ ZEN_ASSERT(RawSize > 0);
+
+ const uint32_t ScavengedPathIndex = ScavengedLookup.SequenceIndexFirstPathIndex[ScavengedSequenceIndex];
+ ZEN_ASSERT_SLOW(IsFile((ScavengePath / ScavengedLocalContent.Paths[ScavengedPathIndex]).make_preferred()));
+
+ OutCopyOperations.push_back({.ScavengedContentIndex = ScavengedContentIndex,
+ .ScavengedPathIndex = ScavengedPathIndex,
+ .RemoteSequenceIndex = RemoteSequenceIndex,
+ .RawSize = RawSize});
+
+ InOutSequencesLeftToFind.erase(SequenceRawHash);
+ InOutSequenceCounters[RemoteSequenceIndex] = 0;
+
+ m_CacheMappingStats.ScavengedPathsMatchingSequencesCount++;
+ m_CacheMappingStats.ScavengedPathsMatchingSequencesByteCount += RawSize;
+ }
+ OutScavengedPathsCount++;
+ }
+}
+
+uint64_t
+BuildsOperationUpdateFolder::CalculateBytesToWriteAndFlagNeededChunks(std::span<const std::atomic<uint32_t>> SequenceCounters,
+ const std::vector<bool>& NeedsCopyFromLocalFileFlags,
+ std::span<std::atomic<bool>> OutNeedsCopyFromSourceFlags)
+{
+ uint64_t BytesToWrite = 0;
+ for (uint32_t RemoteChunkIndex = 0; RemoteChunkIndex < m_RemoteContent.ChunkedContent.ChunkHashes.size(); RemoteChunkIndex++)
+ {
+ const uint64_t ChunkWriteCount = GetChunkWriteCount(SequenceCounters, RemoteChunkIndex);
+ if (ChunkWriteCount > 0)
+ {
+ BytesToWrite += m_RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex] * ChunkWriteCount;
+ if (!NeedsCopyFromLocalFileFlags[RemoteChunkIndex])
+ {
+ OutNeedsCopyFromSourceFlags[RemoteChunkIndex] = true;
+ }
+ }
+ }
+ return BytesToWrite;
+}
+
+void
+BuildsOperationUpdateFolder::ClassifyCachedAndFetchBlocks(std::span<const ChunkBlockAnalyser::NeededBlock> NeededBlocks,
+ const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedBlocksFound,
+ uint64_t& TotalPartWriteCount,
+ std::vector<uint32_t>& OutCachedChunkBlockIndexes,
+ std::vector<uint32_t>& OutFetchBlockIndexes)
+{
+ ZEN_TRACE_CPU("BlockCacheFileExists");
+ for (const ChunkBlockAnalyser::NeededBlock& NeededBlock : NeededBlocks)
+ {
+ const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[NeededBlock.BlockIndex];
+ bool UsingCachedBlock = false;
+ if (auto It = CachedBlocksFound.find(BlockDescription.BlockHash); It != CachedBlocksFound.end())
+ {
+ TotalPartWriteCount++;
+
+ std::filesystem::path BlockPath = m_TempBlockFolderPath / BlockDescription.BlockHash.ToHexString();
+ if (IsFile(BlockPath))
+ {
+ OutCachedChunkBlockIndexes.push_back(NeededBlock.BlockIndex);
+ UsingCachedBlock = true;
+ }
+ }
+ if (!UsingCachedBlock)
+ {
+ OutFetchBlockIndexes.push_back(NeededBlock.BlockIndex);
+ }
+ }
+}
+
+std::vector<uint32_t>
+BuildsOperationUpdateFolder::DetermineNeededLooseChunkIndexes(std::span<const std::atomic<uint32_t>> SequenceCounters,
+ const std::vector<bool>& NeedsCopyFromLocalFileFlags,
+ std::span<std::atomic<bool>> NeedsCopyFromSourceFlags)
+{
+ std::vector<uint32_t> NeededLooseChunkIndexes;
+ NeededLooseChunkIndexes.reserve(m_LooseChunkHashes.size());
+ for (uint32_t LooseChunkIndex = 0; LooseChunkIndex < m_LooseChunkHashes.size(); LooseChunkIndex++)
+ {
+ const IoHash& ChunkHash = m_LooseChunkHashes[LooseChunkIndex];
+ auto RemoteChunkIndexIt = m_RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash);
+ ZEN_ASSERT(RemoteChunkIndexIt != m_RemoteLookup.ChunkHashToChunkIndex.end());
+ const uint32_t RemoteChunkIndex = RemoteChunkIndexIt->second;
+
+ if (NeedsCopyFromLocalFileFlags[RemoteChunkIndex])
+ {
+ if (m_Options.IsVerbose)
+ {
+ ZEN_INFO("Skipping chunk {} due to cache reuse", m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]);
+ }
+ continue;
+ }
+
+ bool NeedsCopy = true;
+ if (NeedsCopyFromSourceFlags[RemoteChunkIndex].compare_exchange_strong(NeedsCopy, false))
+ {
+ const uint64_t WriteCount = GetChunkWriteCount(SequenceCounters, RemoteChunkIndex);
+ if (WriteCount == 0)
+ {
+ if (m_Options.IsVerbose)
+ {
+ ZEN_INFO("Skipping chunk {} due to cache reuse", m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]);
+ }
+ }
+ else
+ {
+ NeededLooseChunkIndexes.push_back(LooseChunkIndex);
+ }
+ }
+ }
+ return NeededLooseChunkIndexes;
+}
+
+BuildsOperationUpdateFolder::BlobsExistsResult
+BuildsOperationUpdateFolder::QueryBlobCacheExists(std::span<const uint32_t> NeededLooseChunkIndexes,
+ std::span<const uint32_t> FetchBlockIndexes)
+{
+ BlobsExistsResult Result;
+ if (!m_Storage.CacheStorage)
+ {
+ return Result;
+ }
+
+ ZEN_TRACE_CPU("BlobCacheExistCheck");
+ Stopwatch Timer;
+
+ std::vector<IoHash> BlobHashes;
+ BlobHashes.reserve(NeededLooseChunkIndexes.size() + FetchBlockIndexes.size());
+
+ for (const uint32_t LooseChunkIndex : NeededLooseChunkIndexes)
+ {
+ BlobHashes.push_back(m_LooseChunkHashes[LooseChunkIndex]);
+ }
+
+ for (uint32_t BlockIndex : FetchBlockIndexes)
+ {
+ BlobHashes.push_back(m_BlockDescriptions[BlockIndex].BlockHash);
+ }
+
+ const std::vector<BuildStorageCache::BlobExistsResult> CacheExistsResult = m_Storage.CacheStorage->BlobsExists(m_BuildId, BlobHashes);
+
+ if (CacheExistsResult.size() == BlobHashes.size())
+ {
+ Result.ExistingBlobs.reserve(CacheExistsResult.size());
+ for (size_t BlobIndex = 0; BlobIndex < BlobHashes.size(); BlobIndex++)
+ {
+ if (CacheExistsResult[BlobIndex].HasBody)
+ {
+ Result.ExistingBlobs.insert(BlobHashes[BlobIndex]);
+ }
+ }
+ }
+ Result.ElapsedTimeMs = Timer.GetElapsedTimeMs();
+ if (!Result.ExistingBlobs.empty() && !m_Options.IsQuiet)
+ {
+ ZEN_INFO("Remote cache : Found {} out of {} needed blobs in {}",
+ Result.ExistingBlobs.size(),
+ BlobHashes.size(),
+ NiceTimeSpanMs(Result.ElapsedTimeMs));
+ }
+ return Result;
+}
+
+std::vector<ChunkBlockAnalyser::EPartialBlockDownloadMode>
+BuildsOperationUpdateFolder::DeterminePartialDownloadModes(const BlobsExistsResult& ExistsResult)
+{
+ std::vector<ChunkBlockAnalyser::EPartialBlockDownloadMode> Modes;
+
+ if (m_Options.PartialBlockRequestMode == EPartialBlockRequestMode::Off)
+ {
+ Modes.resize(m_BlockDescriptions.size(), ChunkBlockAnalyser::EPartialBlockDownloadMode::Off);
+ return Modes;
+ }
+
+ const bool MultiRangeCache = m_Storage.CacheHost.Caps.MaxRangeCountPerRequest > 1;
+ const bool MultiRangeBuild = m_Storage.BuildStorageHost.Caps.MaxRangeCountPerRequest > 1;
+ ChunkBlockAnalyser::EPartialBlockDownloadMode CachePartialDownloadMode =
+ MultiRangeCache ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed
+ : ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange;
+ ChunkBlockAnalyser::EPartialBlockDownloadMode CloudPartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::Off;
+
+ switch (m_Options.PartialBlockRequestMode)
+ {
+ case EPartialBlockRequestMode::Off:
+ break;
+ case EPartialBlockRequestMode::ZenCacheOnly:
+ CloudPartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::Off;
+ break;
+ case EPartialBlockRequestMode::Mixed:
+ CloudPartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::SingleRange;
+ break;
+ case EPartialBlockRequestMode::All:
+ CloudPartialDownloadMode = MultiRangeBuild ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange
+ : ChunkBlockAnalyser::EPartialBlockDownloadMode::SingleRange;
+ break;
+ default:
+ ZEN_ASSERT(false);
+ break;
+ }
+
+ Modes.reserve(m_BlockDescriptions.size());
+ for (uint32_t BlockIndex = 0; BlockIndex < m_BlockDescriptions.size(); BlockIndex++)
+ {
+ const bool BlockExistInCache = ExistsResult.ExistingBlobs.contains(m_BlockDescriptions[BlockIndex].BlockHash);
+ Modes.push_back(BlockExistInCache ? CachePartialDownloadMode : CloudPartialDownloadMode);
+ }
+ return Modes;
+}
+
+std::vector<BuildsOperationUpdateFolder::LooseChunkHashWorkData>
+BuildsOperationUpdateFolder::BuildLooseChunkHashWorks(std::span<const uint32_t> NeededLooseChunkIndexes,
+ std::span<const std::atomic<uint32_t>> SequenceCounters)
+{
+ std::vector<LooseChunkHashWorkData> LooseChunkHashWorks;
+ LooseChunkHashWorks.reserve(NeededLooseChunkIndexes.size());
+ for (uint32_t LooseChunkIndex : NeededLooseChunkIndexes)
+ {
+ const IoHash& ChunkHash = m_LooseChunkHashes[LooseChunkIndex];
+ auto RemoteChunkIndexIt = m_RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash);
+ ZEN_ASSERT(RemoteChunkIndexIt != m_RemoteLookup.ChunkHashToChunkIndex.end());
+ const uint32_t RemoteChunkIndex = RemoteChunkIndexIt->second;
+
+ std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs =
+ GetRemainingChunkTargets(SequenceCounters, RemoteChunkIndex);
+
+ ZEN_ASSERT(!ChunkTargetPtrs.empty());
+ LooseChunkHashWorks.push_back(LooseChunkHashWorkData{.ChunkTargetPtrs = ChunkTargetPtrs, .RemoteChunkIndex = RemoteChunkIndex});
+ }
+ return LooseChunkHashWorks;
+}
+
+void
+BuildsOperationUpdateFolder::VerifyWriteChunksComplete(std::span<const std::atomic<uint32_t>> SequenceCounters,
+ uint64_t BytesToWrite,
+ uint64_t BytesToValidate)
+{
+ uint32_t RawSequencesMissingWriteCount = 0;
+ for (uint32_t SequenceIndex = 0; SequenceIndex < SequenceCounters.size(); SequenceIndex++)
+ {
+ const auto& Counter = SequenceCounters[SequenceIndex];
+ if (Counter.load() != 0)
+ {
+ RawSequencesMissingWriteCount++;
+ const uint32_t PathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex];
+ const std::filesystem::path& IncompletePath = m_RemoteContent.Paths[PathIndex];
+ ZEN_ASSERT(!IncompletePath.empty());
+ const uint32_t ExpectedSequenceCount = m_RemoteContent.ChunkedContent.ChunkCounts[SequenceIndex];
+ if (!m_Options.IsQuiet)
+ {
+ ZEN_INFO("{}: Max count {}, Current count {}", IncompletePath, ExpectedSequenceCount, Counter.load());
+ }
+ ZEN_ASSERT(Counter.load() <= ExpectedSequenceCount);
+ }
+ }
+ ZEN_ASSERT(RawSequencesMissingWriteCount == 0);
+ ZEN_ASSERT(m_WrittenChunkByteCount == BytesToWrite);
+ ZEN_ASSERT(m_ValidatedChunkByteCount == BytesToValidate);
+}
+
+std::vector<BuildsOperationUpdateFolder::FinalizeTarget>
+BuildsOperationUpdateFolder::BuildSortedFinalizeTargets()
+{
+ std::vector<FinalizeTarget> Targets;
+ Targets.reserve(m_RemoteContent.Paths.size());
+ for (uint32_t RemotePathIndex = 0; RemotePathIndex < m_RemoteContent.Paths.size(); RemotePathIndex++)
+ {
+ Targets.push_back(FinalizeTarget{.RawHash = m_RemoteContent.RawHashes[RemotePathIndex], .RemotePathIndex = RemotePathIndex});
+ }
+ std::sort(Targets.begin(), Targets.end(), [](const FinalizeTarget& Lhs, const FinalizeTarget& Rhs) {
+ return std::tie(Lhs.RawHash, Lhs.RemotePathIndex) < std::tie(Rhs.RawHash, Rhs.RemotePathIndex);
+ });
+ return Targets;
+}
+
+void
+BuildsOperationUpdateFolder::ScanScavengeSources(std::span<const ScavengeSource> Sources,
+ std::vector<ChunkedFolderContent>& OutContents,
+ std::vector<ChunkedContentLookup>& OutLookups,
+ std::vector<std::filesystem::path>& OutPaths)
+{
+ ZEN_TRACE_CPU("ScanScavengeSources");
+
+ const size_t ScavengePathCount = Sources.size();
+ OutContents.resize(ScavengePathCount);
+ OutLookups.resize(ScavengePathCount);
+ OutPaths.resize(ScavengePathCount);
+
+ std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Scavenging");
+
+ ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
+
+ std::atomic<uint64_t> PathsFound(0);
+ std::atomic<uint64_t> ChunksFound(0);
+ std::atomic<uint64_t> PathsScavenged(0);
+
+ for (size_t ScavengeIndex = 0; ScavengeIndex < ScavengePathCount; ScavengeIndex++)
+ {
+ Work.ScheduleWork(m_IOWorkerPool,
+ [this, &Sources, &OutContents, &OutPaths, &OutLookups, &PathsFound, &ChunksFound, &PathsScavenged, ScavengeIndex](
+ std::atomic<bool>&) {
+ if (!m_AbortFlag)
+ {
+ ZEN_TRACE_CPU("Async_FindScavengeContent");
+
+ const ScavengeSource& Source = Sources[ScavengeIndex];
+ ChunkedFolderContent& ScavengedLocalContent = OutContents[ScavengeIndex];
+ ChunkedContentLookup& ScavengedLookup = OutLookups[ScavengeIndex];
+
+ if (FindScavengeContent(Source, ScavengedLocalContent, ScavengedLookup))
+ {
+ OutPaths[ScavengeIndex] = Source.Path;
+ PathsFound += ScavengedLocalContent.Paths.size();
+ ChunksFound += ScavengedLocalContent.ChunkedContent.ChunkHashes.size();
+ }
+ else
+ {
+ OutPaths[ScavengeIndex].clear();
+ }
+ PathsScavenged++;
+ }
+ });
+ }
+ {
+ ZEN_TRACE_CPU("ScavengeScan_Wait");
+
+ Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
+ ZEN_UNUSED(PendingWork);
+ std::string Details = fmt::format("{}/{} scanned. {} paths and {} chunks found for scavenging",
+ PathsScavenged.load(),
+ ScavengePathCount,
+ PathsFound.load(),
+ ChunksFound.load());
+ ProgressBar->UpdateState({.Task = "Scavenging ",
+ .Details = Details,
+ .TotalCount = ScavengePathCount,
+ .RemainingCount = ScavengePathCount - PathsScavenged.load(),
+ .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
+ false);
+ });
+ }
+
+ ProgressBar->Finish();
+}
+
+BuildsOperationUpdateFolder::LocalPathCategorization
+BuildsOperationUpdateFolder::CategorizeLocalPaths(const tsl::robin_map<std::string, uint32_t>& RemotePathToRemoteIndex)
+{
+ ZEN_TRACE_CPU("PrepareTarget");
+
+ LocalPathCategorization Result;
+ tsl::robin_set<IoHash, IoHash::Hasher> CachedRemoteSequences;
+
+ Result.RemotePathIndexToLocalPathIndex.reserve(m_RemoteContent.Paths.size());
+
+ for (uint32_t LocalPathIndex = 0; LocalPathIndex < m_LocalContent.Paths.size(); LocalPathIndex++)
+ {
+ if (m_AbortFlag)
+ {
+ break;
+ }
+ const IoHash& RawHash = m_LocalContent.RawHashes[LocalPathIndex];
+ const std::filesystem::path& LocalPath = m_LocalContent.Paths[LocalPathIndex];
+
+ ZEN_ASSERT_SLOW(IsFile((m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred()));
+
+ if (m_Options.EnableTargetFolderScavenging)
+ {
+ if (!m_Options.WipeTargetFolder)
+ {
+ // Check if it is already in the correct place
+ if (auto RemotePathIt = RemotePathToRemoteIndex.find(LocalPath.generic_string());
+ RemotePathIt != RemotePathToRemoteIndex.end())
+ {
+ const uint32_t RemotePathIndex = RemotePathIt->second;
+ if (m_RemoteContent.RawHashes[RemotePathIndex] == RawHash)
+ {
+ // It is already in it's correct place
+ Result.RemotePathIndexToLocalPathIndex[RemotePathIndex] = LocalPathIndex;
+ Result.SequenceHashToLocalPathIndex.insert({RawHash, LocalPathIndex});
+ Result.MatchCount++;
+ continue;
+ }
+ else
+ {
+ Result.HashMismatchCount++;
+ }
+ }
+ else
+ {
+ Result.PathMismatchCount++;
+ }
+ }
+
+ // Do we need it?
+ if (m_RemoteLookup.RawHashToSequenceIndex.contains(RawHash))
+ {
+ if (!CachedRemoteSequences.contains(RawHash))
+ {
+ // We need it, make sure we move it to the cache
+ Result.FilesToCache.push_back(LocalPathIndex);
+ CachedRemoteSequences.insert(RawHash);
+ continue;
+ }
+ else
+ {
+ Result.SkippedCount++;
+ }
+ }
+ }
+
+ if (!m_Options.WipeTargetFolder)
+ {
+ // Explicitly delete the unneeded local file
+ Result.RemoveLocalPathIndexes.push_back(LocalPathIndex);
+ Result.DeleteCount++;
+ }
+ }
+
+ return Result;
+}
+
+void
+BuildsOperationUpdateFolder::ScheduleLocalFileCaching(std::span<const uint32_t> FilesToCache,
+ std::atomic<uint64_t>& OutCachedCount,
+ std::atomic<uint64_t>& OutCachedByteCount)
+{
+ ZEN_TRACE_CPU("CopyToCache");
+
+ std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Cache Local Data");
+ ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
+
+ for (uint32_t LocalPathIndex : FilesToCache)
+ {
+ if (m_AbortFlag)
+ {
+ break;
+ }
+ Work.ScheduleWork(m_IOWorkerPool, [this, &OutCachedCount, &OutCachedByteCount, LocalPathIndex](std::atomic<bool>&) {
+ if (!m_AbortFlag)
+ {
+ ZEN_TRACE_CPU("Async_CopyToCache");
+
+ const IoHash& RawHash = m_LocalContent.RawHashes[LocalPathIndex];
+ const std::filesystem::path& LocalPath = m_LocalContent.Paths[LocalPathIndex];
+ const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(m_CacheFolderPath, RawHash);
+ ZEN_ASSERT_SLOW(!IsFileWithRetry(CacheFilePath));
+ const std::filesystem::path LocalFilePath = (m_Path / LocalPath).make_preferred();
+
+ std::error_code Ec = RenameFileWithRetry(LocalFilePath, CacheFilePath);
+ if (Ec)
+ {
+ ZEN_WARN("Failed to move file from '{}' to '{}', reason: ({}) {}, retrying...",
+ LocalFilePath,
+ CacheFilePath,
+ Ec.value(),
+ Ec.message());
+ Ec = RenameFileWithRetry(LocalFilePath, CacheFilePath);
+ if (Ec)
+ {
+ throw std::system_error(std::error_code(Ec.value(), std::system_category()),
+ fmt::format("Failed to file from '{}' to '{}', reason: ({}) {}",
+ LocalFilePath,
+ CacheFilePath,
+ Ec.value(),
+ Ec.message()));
+ }
+ }
+
+ OutCachedCount++;
+ OutCachedByteCount += m_LocalContent.RawSizes[LocalPathIndex];
+ }
+ });
+ }
+
+ {
+ ZEN_TRACE_CPU("CopyToCache_Wait");
+
+ Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
+ ZEN_UNUSED(PendingWork);
+ const uint64_t WorkTotal = FilesToCache.size();
+ const uint64_t WorkComplete = OutCachedCount.load();
+ std::string Details = fmt::format("{}/{} ({}) files", WorkComplete, WorkTotal, NiceBytes(OutCachedByteCount));
+ ProgressBar->UpdateState({.Task = "Caching local ",
+ .Details = Details,
+ .TotalCount = gsl::narrow<uint64_t>(WorkTotal),
+ .RemainingCount = gsl::narrow<uint64_t>(WorkTotal - WorkComplete),
+ .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
+ false);
+ });
+ }
+
+ ProgressBar->Finish();
+}
+
+void
+BuildsOperationUpdateFolder::ScheduleScavengedSequenceWrites(WriteChunksContext& Context,
+ std::span<const ScavengedSequenceCopyOperation> CopyOperations,
+ const std::vector<ChunkedFolderContent>& ScavengedContents,
+ const std::vector<std::filesystem::path>& ScavengedPaths)
+{
+ for (uint32_t ScavengeOpIndex = 0; ScavengeOpIndex < CopyOperations.size(); ScavengeOpIndex++)
+ {
+ if (m_AbortFlag)
+ {
+ break;
+ }
+ Context.Work.ScheduleWork(
+ m_IOWorkerPool,
+ [this, &Context, CopyOperations, &ScavengedContents, &ScavengedPaths, ScavengeOpIndex](std::atomic<bool>&) {
+ if (!m_AbortFlag)
+ {
+ ZEN_TRACE_CPU("Async_WriteScavenged");
+
+ Context.FilteredWrittenBytesPerSecond.Start();
+
+ const ScavengedSequenceCopyOperation& ScavengeOp = CopyOperations[ScavengeOpIndex];
+ const ChunkedFolderContent& ScavengedContent = ScavengedContents[ScavengeOp.ScavengedContentIndex];
+ const std::filesystem::path& ScavengeRootPath = ScavengedPaths[ScavengeOp.ScavengedContentIndex];
+
+ WriteScavengedSequenceToCache(ScavengeRootPath, ScavengedContent, ScavengeOp);
+
+ if (Context.WritePartsComplete.fetch_add(1) + 1 == Context.TotalPartWriteCount)
+ {
+ Context.FilteredWrittenBytesPerSecond.Stop();
+ }
+ }
+ });
+ }
+}
+
+void
+BuildsOperationUpdateFolder::ScheduleLooseChunkWrites(WriteChunksContext& Context, std::vector<LooseChunkHashWorkData>& LooseChunkHashWorks)
+{
+ for (uint32_t LooseChunkHashWorkIndex = 0; LooseChunkHashWorkIndex < LooseChunkHashWorks.size(); LooseChunkHashWorkIndex++)
+ {
+ if (m_AbortFlag)
+ {
+ break;
+ }
+
+ Context.Work.ScheduleWork(
+ m_IOWorkerPool,
+ [this, &Context, &LooseChunkHashWorks, LooseChunkHashWorkIndex](std::atomic<bool>&) {
+ ZEN_TRACE_CPU("Async_ReadPreDownloadedChunk");
+ if (!m_AbortFlag)
+ {
+ LooseChunkHashWorkData& LooseChunkHashWork = LooseChunkHashWorks[LooseChunkHashWorkIndex];
+ const uint32_t RemoteChunkIndex = LooseChunkHashWork.RemoteChunkIndex;
+ WriteLooseChunk(RemoteChunkIndex,
+ Context.ExistsResult,
+ Context.SequenceIndexChunksLeftToWriteCounters,
+ Context.WritePartsComplete,
+ std::move(LooseChunkHashWork.ChunkTargetPtrs),
+ Context.WriteCache,
+ Context.Work,
+ Context.TotalRequestCount,
+ Context.TotalPartWriteCount,
+ Context.FilteredDownloadedBytesPerSecond,
+ Context.FilteredWrittenBytesPerSecond);
+ }
+ },
+ WorkerThreadPool::EMode::EnableBacklog);
+ }
+}
+
+void
+BuildsOperationUpdateFolder::ScheduleLocalChunkCopies(WriteChunksContext& Context,
+ std::span<const CopyChunkData> CopyChunkDatas,
+ CloneQueryInterface* CloneQuery,
+ const std::vector<ChunkedFolderContent>& ScavengedContents,
+ const std::vector<ChunkedContentLookup>& ScavengedLookups,
+ const std::vector<std::filesystem::path>& ScavengedPaths)
+{
+ for (size_t CopyDataIndex = 0; CopyDataIndex < CopyChunkDatas.size(); CopyDataIndex++)
+ {
+ if (m_AbortFlag)
+ {
+ break;
+ }
+
+ Context.Work.ScheduleWork(
+ m_IOWorkerPool,
+ [this, &Context, CloneQuery, CopyChunkDatas, &ScavengedContents, &ScavengedLookups, &ScavengedPaths, CopyDataIndex](
+ std::atomic<bool>&) {
+ if (!m_AbortFlag)
+ {
+ ZEN_TRACE_CPU("Async_CopyLocal");
+
+ Context.FilteredWrittenBytesPerSecond.Start();
+ const CopyChunkData& CopyData = CopyChunkDatas[CopyDataIndex];
+
+ std::vector<uint32_t> WrittenSequenceIndexes = WriteLocalChunkToCache(CloneQuery,
+ CopyData,
+ ScavengedContents,
+ ScavengedLookups,
+ ScavengedPaths,
+ Context.WriteCache);
+ bool WritePartsDone = Context.WritePartsComplete.fetch_add(1) + 1 == Context.TotalPartWriteCount;
+ if (!m_AbortFlag)
+ {
+ if (WritePartsDone)
+ {
+ Context.FilteredWrittenBytesPerSecond.Stop();
+ }
+
+ // Write tracking, updating this must be done without any files open
+ std::vector<uint32_t> CompletedChunkSequences;
+ for (uint32_t RemoteSequenceIndex : WrittenSequenceIndexes)
+ {
+ if (CompleteSequenceChunk(RemoteSequenceIndex, Context.SequenceIndexChunksLeftToWriteCounters))
+ {
+ CompletedChunkSequences.push_back(RemoteSequenceIndex);
+ }
+ }
+ Context.WriteCache.Close(CompletedChunkSequences);
+ VerifyAndCompleteChunkSequencesAsync(CompletedChunkSequences, Context.Work);
+ }
+ }
+ });
+ }
+}
+
+void
+BuildsOperationUpdateFolder::ScheduleCachedBlockWrites(WriteChunksContext& Context, std::span<const uint32_t> CachedBlockIndexes)
+{
+ for (uint32_t BlockIndex : CachedBlockIndexes)
+ {
+ if (m_AbortFlag)
+ {
+ break;
+ }
+
+ Context.Work.ScheduleWork(m_IOWorkerPool, [this, &Context, BlockIndex](std::atomic<bool>&) {
+ if (!m_AbortFlag)
+ {
+ ZEN_TRACE_CPU("Async_WriteCachedBlock");
+
+ const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex];
+ Context.FilteredWrittenBytesPerSecond.Start();
+
+ std::filesystem::path BlockChunkPath = m_TempBlockFolderPath / BlockDescription.BlockHash.ToHexString();
+ IoBuffer BlockBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath);
+ if (!BlockBuffer)
+ {
+ throw std::runtime_error(fmt::format("Can not read block {} at {}", BlockDescription.BlockHash, BlockChunkPath));
+ }
+
+ if (!m_AbortFlag)
+ {
+ if (!WriteChunksBlockToCache(BlockDescription,
+ Context.SequenceIndexChunksLeftToWriteCounters,
+ Context.Work,
+ CompositeBuffer(std::move(BlockBuffer)),
+ Context.RemoteChunkIndexNeedsCopyFromSourceFlags,
+ Context.WriteCache))
+ {
+ std::error_code DummyEc;
+ RemoveFile(BlockChunkPath, DummyEc);
+ throw std::runtime_error(fmt::format("Block {} is malformed", BlockDescription.BlockHash));
+ }
+
+ std::error_code Ec = TryRemoveFile(BlockChunkPath);
+ if (Ec)
+ {
+ ZEN_DEBUG("Failed removing file '{}', reason: ({}) {}", BlockChunkPath, Ec.value(), Ec.message());
+ }
+
+ if (Context.WritePartsComplete.fetch_add(1) + 1 == Context.TotalPartWriteCount)
+ {
+ Context.FilteredWrittenBytesPerSecond.Stop();
+ }
+ }
+ }
+ });
+ }
+}
+
+void
+BuildsOperationUpdateFolder::SchedulePartialBlockDownloads(WriteChunksContext& Context,
+ const ChunkBlockAnalyser::BlockResult& PartialBlocks)
+{
+ for (size_t BlockRangeIndex = 0; BlockRangeIndex < PartialBlocks.BlockRanges.size();)
+ {
+ if (m_AbortFlag)
+ {
+ break;
+ }
+
+ size_t RangeCount = 1;
+ size_t RangesLeft = PartialBlocks.BlockRanges.size() - BlockRangeIndex;
+ const ChunkBlockAnalyser::BlockRangeDescriptor& CurrentBlockRange = PartialBlocks.BlockRanges[BlockRangeIndex];
+ while (RangeCount < RangesLeft &&
+ CurrentBlockRange.BlockIndex == PartialBlocks.BlockRanges[BlockRangeIndex + RangeCount].BlockIndex)
+ {
+ RangeCount++;
+ }
+
+ Context.Work.ScheduleWork(
+ m_NetworkPool,
+ [this, &Context, &PartialBlocks, BlockRangeStartIndex = BlockRangeIndex, RangeCount = RangeCount](std::atomic<bool>&) {
+ if (!m_AbortFlag)
+ {
+ ZEN_TRACE_CPU("Async_GetPartialBlockRanges");
+
+ Context.FilteredDownloadedBytesPerSecond.Start();
+
+ DownloadPartialBlock(
+ PartialBlocks.BlockRanges,
+ BlockRangeStartIndex,
+ RangeCount,
+ Context.ExistsResult,
+ Context.TotalRequestCount,
+ Context.FilteredDownloadedBytesPerSecond,
+ [this, &Context, &PartialBlocks](IoBuffer&& InMemoryBuffer,
+ const std::filesystem::path& OnDiskPath,
+ size_t BlockRangeStartIndex,
+ std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths) {
+ if (!m_AbortFlag)
+ {
+ Context.Work.ScheduleWork(
+ m_IOWorkerPool,
+ [this,
+ &Context,
+ &PartialBlocks,
+ BlockRangeStartIndex,
+ BlockChunkPath = std::filesystem::path(OnDiskPath),
+ BlockPartialBuffer = std::move(InMemoryBuffer),
+ OffsetAndLengths =
+ std::vector<std::pair<uint64_t, uint64_t>>(OffsetAndLengths.begin(), OffsetAndLengths.end())](
+ std::atomic<bool>&) mutable {
+ if (!m_AbortFlag)
+ {
+ WritePartialBlockToCache(Context,
+ BlockRangeStartIndex,
+ std::move(BlockPartialBuffer),
+ BlockChunkPath,
+ OffsetAndLengths,
+ PartialBlocks);
+ }
+ },
+ OnDiskPath.empty() ? WorkerThreadPool::EMode::DisableBacklog : WorkerThreadPool::EMode::EnableBacklog);
+ }
+ });
+ }
+ });
+ BlockRangeIndex += RangeCount;
+ }
+}
+
+void
+BuildsOperationUpdateFolder::WritePartialBlockToCache(WriteChunksContext& Context,
+ size_t BlockRangeStartIndex,
+ IoBuffer BlockPartialBuffer,
+ const std::filesystem::path& BlockChunkPath,
+ std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths,
+ const ChunkBlockAnalyser::BlockResult& PartialBlocks)
+{
+ ZEN_TRACE_CPU("Async_WritePartialBlock");
+
+ const uint32_t BlockIndex = PartialBlocks.BlockRanges[BlockRangeStartIndex].BlockIndex;
+ const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex];
+
+ if (BlockChunkPath.empty())
+ {
+ ZEN_ASSERT(BlockPartialBuffer);
+ }
+ else
+ {
+ ZEN_ASSERT(!BlockPartialBuffer);
+ BlockPartialBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath);
+ if (!BlockPartialBuffer)
+ {
+ throw std::runtime_error(fmt::format("Could not open downloaded block {} from {}", BlockDescription.BlockHash, BlockChunkPath));
+ }
+ }
+
+ Context.FilteredWrittenBytesPerSecond.Start();
+
+ const size_t RangeCount = OffsetAndLengths.size();
+
+ for (size_t PartialRangeIndex = 0; PartialRangeIndex < RangeCount; PartialRangeIndex++)
+ {
+ const std::pair<uint64_t, uint64_t>& OffsetAndLength = OffsetAndLengths[PartialRangeIndex];
+ IoBuffer BlockRangeBuffer(BlockPartialBuffer, OffsetAndLength.first, OffsetAndLength.second);
+
+ const ChunkBlockAnalyser::BlockRangeDescriptor& RangeDescriptor =
+ PartialBlocks.BlockRanges[BlockRangeStartIndex + PartialRangeIndex];
+
+ if (!WritePartialBlockChunksToCache(BlockDescription,
+ Context.SequenceIndexChunksLeftToWriteCounters,
+ Context.Work,
+ CompositeBuffer(std::move(BlockRangeBuffer)),
+ RangeDescriptor.ChunkBlockIndexStart,
+ RangeDescriptor.ChunkBlockIndexStart + RangeDescriptor.ChunkBlockIndexCount - 1,
+ Context.RemoteChunkIndexNeedsCopyFromSourceFlags,
+ Context.WriteCache))
+ {
+ std::error_code DummyEc;
+ RemoveFile(BlockChunkPath, DummyEc);
+ throw std::runtime_error(fmt::format("Partial block {} is malformed", BlockDescription.BlockHash));
+ }
+
+ if (Context.WritePartsComplete.fetch_add(1) + 1 == Context.TotalPartWriteCount)
+ {
+ Context.FilteredWrittenBytesPerSecond.Stop();
+ }
+ }
+ std::error_code Ec = TryRemoveFile(BlockChunkPath);
+ if (Ec)
+ {
+ ZEN_DEBUG("Failed removing file '{}', reason: ({}) {}", BlockChunkPath, Ec.value(), Ec.message());
+ }
+}
+
+void
+BuildsOperationUpdateFolder::ScheduleFullBlockDownloads(WriteChunksContext& Context, std::span<const uint32_t> FullBlockIndexes)
+{
+ for (uint32_t BlockIndex : FullBlockIndexes)
+ {
+ if (m_AbortFlag)
+ {
+ break;
+ }
+
+ Context.Work.ScheduleWork(m_NetworkPool, [this, &Context, BlockIndex](std::atomic<bool>&) {
+ if (!m_AbortFlag)
+ {
+ ZEN_TRACE_CPU("Async_GetFullBlock");
+
+ const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex];
+
+ Context.FilteredDownloadedBytesPerSecond.Start();
+
+ IoBuffer BlockBuffer;
+ const bool ExistsInCache =
+ m_Storage.CacheStorage && Context.ExistsResult.ExistingBlobs.contains(BlockDescription.BlockHash);
+ if (ExistsInCache)
+ {
+ BlockBuffer = m_Storage.CacheStorage->GetBuildBlob(m_BuildId, BlockDescription.BlockHash);
+ }
+ if (!BlockBuffer)
+ {
+ try
+ {
+ BlockBuffer = m_Storage.BuildStorage->GetBuildBlob(m_BuildId, BlockDescription.BlockHash);
+ }
+ catch (const std::exception&)
+ {
+ // Silence http errors due to abort
+ if (!m_AbortFlag)
+ {
+ throw;
+ }
+ }
+ }
+ if (!m_AbortFlag)
+ {
+ if (!BlockBuffer)
+ {
+ throw std::runtime_error(fmt::format("Block {} is missing", BlockDescription.BlockHash));
+ }
+
+ uint64_t BlockSize = BlockBuffer.GetSize();
+ m_DownloadStats.DownloadedBlockCount++;
+ m_DownloadStats.DownloadedBlockByteCount += BlockSize;
+ if (m_DownloadStats.RequestsCompleteCount.fetch_add(1) + 1 == Context.TotalRequestCount)
+ {
+ Context.FilteredDownloadedBytesPerSecond.Stop();
+ }
+
+ const bool PutInCache = !ExistsInCache && m_Storage.CacheStorage && m_Options.PopulateCache;
+
+ std::filesystem::path BlockChunkPath =
+ TryMoveDownloadedChunk(BlockBuffer,
+ m_TempBlockFolderPath / BlockDescription.BlockHash.ToHexString(),
+ /* ForceDiskBased */ PutInCache || (BlockSize > m_Options.MaximumInMemoryPayloadSize));
+
+ if (PutInCache)
+ {
+ ZEN_ASSERT(!BlockChunkPath.empty());
+ IoBuffer CacheBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath);
+ if (CacheBuffer)
+ {
+ m_Storage.CacheStorage->PutBuildBlob(m_BuildId,
+ BlockDescription.BlockHash,
+ ZenContentType::kCompressedBinary,
+ CompositeBuffer(SharedBuffer(CacheBuffer)));
+ }
+ }
+
+ if (!m_AbortFlag)
+ {
+ Context.Work.ScheduleWork(
+ m_IOWorkerPool,
+ [this, &Context, BlockIndex, BlockChunkPath, BlockBuffer = std::move(BlockBuffer)](std::atomic<bool>&) mutable {
+ if (!m_AbortFlag)
+ {
+ WriteFullBlockToCache(Context, BlockIndex, std::move(BlockBuffer), BlockChunkPath);
+ }
+ },
+ BlockChunkPath.empty() ? WorkerThreadPool::EMode::DisableBacklog : WorkerThreadPool::EMode::EnableBacklog);
+ }
+ }
+ }
+ });
+ }
+}
+
+void
+BuildsOperationUpdateFolder::WriteFullBlockToCache(WriteChunksContext& Context,
+ uint32_t BlockIndex,
+ IoBuffer BlockBuffer,
+ const std::filesystem::path& BlockChunkPath)
+{
+ ZEN_TRACE_CPU("Async_WriteFullBlock");
+
+ const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex];
+
+ if (BlockChunkPath.empty())
+ {
+ ZEN_ASSERT(BlockBuffer);
+ }
+ else
+ {
+ ZEN_ASSERT(!BlockBuffer);
+ BlockBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath);
+ if (!BlockBuffer)
+ {
+ throw std::runtime_error(fmt::format("Could not open dowloaded block {} from {}", BlockDescription.BlockHash, BlockChunkPath));
+ }
+ }
+
+ Context.FilteredWrittenBytesPerSecond.Start();
+ if (!WriteChunksBlockToCache(BlockDescription,
+ Context.SequenceIndexChunksLeftToWriteCounters,
+ Context.Work,
+ CompositeBuffer(std::move(BlockBuffer)),
+ Context.RemoteChunkIndexNeedsCopyFromSourceFlags,
+ Context.WriteCache))
+ {
+ std::error_code DummyEc;
+ RemoveFile(BlockChunkPath, DummyEc);
+ throw std::runtime_error(fmt::format("Block {} is malformed", BlockDescription.BlockHash));
+ }
+
+ if (!BlockChunkPath.empty())
+ {
+ std::error_code Ec = TryRemoveFile(BlockChunkPath);
+ if (Ec)
+ {
+ ZEN_DEBUG("Failed removing file '{}', reason: ({}) {}", BlockChunkPath, Ec.value(), Ec.message());
+ }
+ }
+
+ if (Context.WritePartsComplete.fetch_add(1) + 1 == Context.TotalPartWriteCount)
+ {
+ Context.FilteredWrittenBytesPerSecond.Stop();
+ }
+}
+
+void
+BuildsOperationUpdateFolder::ScheduleLocalFileRemovals(ParallelWork& Work,
+ std::span<const uint32_t> RemoveLocalPathIndexes,
+ std::atomic<uint64_t>& DeletedCount)
+{
+ for (uint32_t LocalPathIndex : RemoveLocalPathIndexes)
+ {
+ if (m_AbortFlag)
+ {
+ break;
+ }
+ Work.ScheduleWork(m_IOWorkerPool, [this, &DeletedCount, LocalPathIndex](std::atomic<bool>&) {
+ if (!m_AbortFlag)
+ {
+ ZEN_TRACE_CPU("Async_RemoveFile");
+
+ const std::filesystem::path LocalFilePath = (m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred();
+ SetFileReadOnlyWithRetry(LocalFilePath, false);
+ RemoveFileWithRetry(LocalFilePath);
+ DeletedCount++;
+ }
+ });
+ }
+}
+
+void
+BuildsOperationUpdateFolder::ScheduleTargetFinalization(
+ ParallelWork& Work,
+ std::span<const FinalizeTarget> Targets,
+ const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& SequenceHashToLocalPathIndex,
+ const tsl::robin_map<uint32_t, uint32_t>& RemotePathIndexToLocalPathIndex,
+ FolderContent& OutLocalFolderState,
+ std::atomic<uint64_t>& TargetsComplete)
+{
+ size_t TargetOffset = 0;
+ while (TargetOffset < Targets.size())
+ {
+ if (m_AbortFlag)
+ {
+ break;
+ }
+
+ size_t TargetCount = 1;
+ while ((TargetOffset + TargetCount) < Targets.size() &&
+ (Targets[TargetOffset + TargetCount].RawHash == Targets[TargetOffset].RawHash))
+ {
+ TargetCount++;
+ }
+
+ Work.ScheduleWork(m_IOWorkerPool,
+ [this,
+ &SequenceHashToLocalPathIndex,
+ Targets,
+ &RemotePathIndexToLocalPathIndex,
+ &OutLocalFolderState,
+ BaseTargetOffset = TargetOffset,
+ TargetCount,
+ &TargetsComplete](std::atomic<bool>&) {
+ if (!m_AbortFlag)
+ {
+ FinalizeTargetGroup(BaseTargetOffset,
+ TargetCount,
+ Targets,
+ SequenceHashToLocalPathIndex,
+ RemotePathIndexToLocalPathIndex,
+ OutLocalFolderState,
+ TargetsComplete);
+ }
+ });
+
+ TargetOffset += TargetCount;
+ }
+}
+
+void
+BuildsOperationUpdateFolder::FinalizeTargetGroup(size_t BaseOffset,
+ size_t Count,
+ std::span<const FinalizeTarget> Targets,
+ const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& SequenceHashToLocalPathIndex,
+ const tsl::robin_map<uint32_t, uint32_t>& RemotePathIndexToLocalPathIndex,
+ FolderContent& OutLocalFolderState,
+ std::atomic<uint64_t>& TargetsComplete)
+{
+ ZEN_TRACE_CPU("Async_FinalizeChunkSequence");
+
+ size_t TargetOffset = BaseOffset;
+ const IoHash& RawHash = Targets[TargetOffset].RawHash;
+
+ if (RawHash == IoHash::Zero)
+ {
+ ZEN_TRACE_CPU("CreateEmptyFiles");
+ while (TargetOffset < (BaseOffset + Count))
+ {
+ const uint32_t RemotePathIndex = Targets[TargetOffset].RemotePathIndex;
+ ZEN_ASSERT(Targets[TargetOffset].RawHash == RawHash);
+ const std::filesystem::path& TargetPath = m_RemoteContent.Paths[RemotePathIndex];
+ std::filesystem::path TargetFilePath = (m_Path / TargetPath).make_preferred();
+ auto InPlaceIt = RemotePathIndexToLocalPathIndex.find(RemotePathIndex);
+ if (InPlaceIt == RemotePathIndexToLocalPathIndex.end() || InPlaceIt->second == 0)
+ {
+ if (IsFileWithRetry(TargetFilePath))
+ {
+ SetFileReadOnlyWithRetry(TargetFilePath, false);
+ }
+ else
+ {
+ CreateDirectories(TargetFilePath.parent_path());
+ }
+ BasicFile OutputFile;
+ OutputFile.Open(TargetFilePath, BasicFile::Mode::kTruncate);
+ }
+ OutLocalFolderState.Paths[RemotePathIndex] = TargetPath;
+ OutLocalFolderState.RawSizes[RemotePathIndex] = m_RemoteContent.RawSizes[RemotePathIndex];
+
+ OutLocalFolderState.Attributes[RemotePathIndex] =
+ m_RemoteContent.Attributes.empty()
+ ? GetNativeFileAttributes(TargetFilePath)
+ : SetNativeFileAttributes(TargetFilePath, m_RemoteContent.Platform, m_RemoteContent.Attributes[RemotePathIndex]);
+ OutLocalFolderState.ModificationTicks[RemotePathIndex] = GetModificationTickFromPath(TargetFilePath);
+
+ TargetOffset++;
+ TargetsComplete++;
+ }
+ }
+ else
+ {
+ ZEN_TRACE_CPU("FinalizeFile");
+ ZEN_ASSERT(m_RemoteLookup.RawHashToSequenceIndex.contains(RawHash));
+ const uint32_t FirstRemotePathIndex = Targets[TargetOffset].RemotePathIndex;
+ const std::filesystem::path& FirstTargetPath = m_RemoteContent.Paths[FirstRemotePathIndex];
+ std::filesystem::path FirstTargetFilePath = (m_Path / FirstTargetPath).make_preferred();
+
+ if (auto InPlaceIt = RemotePathIndexToLocalPathIndex.find(FirstRemotePathIndex); InPlaceIt != RemotePathIndexToLocalPathIndex.end())
+ {
+ ZEN_ASSERT_SLOW(IsFileWithRetry(FirstTargetFilePath));
+ }
+ else
+ {
+ if (IsFileWithRetry(FirstTargetFilePath))
+ {
+ SetFileReadOnlyWithRetry(FirstTargetFilePath, false);
+ }
+ else
+ {
+ CreateDirectories(FirstTargetFilePath.parent_path());
+ }
+
+ if (auto InplaceIt = SequenceHashToLocalPathIndex.find(RawHash); InplaceIt != SequenceHashToLocalPathIndex.end())
+ {
+ ZEN_TRACE_CPU("Copy");
+ const uint32_t LocalPathIndex = InplaceIt->second;
+ const std::filesystem::path& SourcePath = m_LocalContent.Paths[LocalPathIndex];
+ std::filesystem::path SourceFilePath = (m_Path / SourcePath).make_preferred();
+ ZEN_ASSERT_SLOW(IsFileWithRetry(SourceFilePath));
+
+ ZEN_DEBUG("Copying from '{}' -> '{}'", SourceFilePath, FirstTargetFilePath);
+ const uint64_t RawSize = m_LocalContent.RawSizes[LocalPathIndex];
+ FastCopyFile(m_Options.AllowFileClone,
+ m_Options.UseSparseFiles,
+ SourceFilePath,
+ FirstTargetFilePath,
+ RawSize,
+ m_DiskStats.WriteCount,
+ m_DiskStats.WriteByteCount,
+ m_DiskStats.CloneCount,
+ m_DiskStats.CloneByteCount);
+
+ m_RebuildFolderStateStats.FinalizeTreeFilesCopiedCount++;
+ }
+ else
+ {
+ ZEN_TRACE_CPU("Rename");
+ const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(m_CacheFolderPath, RawHash);
+ ZEN_ASSERT_SLOW(IsFileWithRetry(CacheFilePath));
+
+ std::error_code Ec = RenameFileWithRetry(CacheFilePath, FirstTargetFilePath);
+ if (Ec)
+ {
+ ZEN_WARN("Failed to move file from '{}' to '{}', reason: ({}) {}, retrying...",
+ CacheFilePath,
+ FirstTargetFilePath,
+ Ec.value(),
+ Ec.message());
+ Ec = RenameFileWithRetry(CacheFilePath, FirstTargetFilePath);
+ if (Ec)
+ {
+ throw std::system_error(std::error_code(Ec.value(), std::system_category()),
+ fmt::format("Failed to move file from '{}' to '{}', reason: ({}) {}",
+ CacheFilePath,
+ FirstTargetFilePath,
+ Ec.value(),
+ Ec.message()));
+ }
+ }
+
+ m_RebuildFolderStateStats.FinalizeTreeFilesMovedCount++;
+ }
+ }
+
+ OutLocalFolderState.Paths[FirstRemotePathIndex] = FirstTargetPath;
+ OutLocalFolderState.RawSizes[FirstRemotePathIndex] = m_RemoteContent.RawSizes[FirstRemotePathIndex];
+
+ OutLocalFolderState.Attributes[FirstRemotePathIndex] =
+ m_RemoteContent.Attributes.empty()
+ ? GetNativeFileAttributes(FirstTargetFilePath)
+ : SetNativeFileAttributes(FirstTargetFilePath, m_RemoteContent.Platform, m_RemoteContent.Attributes[FirstRemotePathIndex]);
+ OutLocalFolderState.ModificationTicks[FirstRemotePathIndex] = GetModificationTickFromPath(FirstTargetFilePath);
+
+ TargetOffset++;
+ TargetsComplete++;
+
+ while (TargetOffset < (BaseOffset + Count))
+ {
+ const uint32_t RemotePathIndex = Targets[TargetOffset].RemotePathIndex;
+ ZEN_ASSERT(Targets[TargetOffset].RawHash == RawHash);
+ const std::filesystem::path& TargetPath = m_RemoteContent.Paths[RemotePathIndex];
+ std::filesystem::path TargetFilePath = (m_Path / TargetPath).make_preferred();
+
+ if (auto InPlaceIt = RemotePathIndexToLocalPathIndex.find(RemotePathIndex); InPlaceIt != RemotePathIndexToLocalPathIndex.end())
+ {
+ ZEN_ASSERT_SLOW(IsFileWithRetry(TargetFilePath));
+ }
+ else
+ {
+ ZEN_TRACE_CPU("Copy");
+ if (IsFileWithRetry(TargetFilePath))
+ {
+ SetFileReadOnlyWithRetry(TargetFilePath, false);
+ }
+ else
+ {
+ CreateDirectories(TargetFilePath.parent_path());
+ }
+
+ ZEN_ASSERT_SLOW(IsFileWithRetry(FirstTargetFilePath));
+ ZEN_DEBUG("Copying from '{}' -> '{}'", FirstTargetFilePath, TargetFilePath);
+ const uint64_t RawSize = m_RemoteContent.RawSizes[RemotePathIndex];
+ FastCopyFile(m_Options.AllowFileClone,
+ m_Options.UseSparseFiles,
+ FirstTargetFilePath,
+ TargetFilePath,
+ RawSize,
+ m_DiskStats.WriteCount,
+ m_DiskStats.WriteByteCount,
+ m_DiskStats.CloneCount,
+ m_DiskStats.CloneByteCount);
+
+ m_RebuildFolderStateStats.FinalizeTreeFilesCopiedCount++;
+ }
+
+ OutLocalFolderState.Paths[RemotePathIndex] = TargetPath;
+ OutLocalFolderState.RawSizes[RemotePathIndex] = m_RemoteContent.RawSizes[RemotePathIndex];
+
+ OutLocalFolderState.Attributes[RemotePathIndex] =
+ m_RemoteContent.Attributes.empty()
+ ? GetNativeFileAttributes(TargetFilePath)
+ : SetNativeFileAttributes(TargetFilePath, m_RemoteContent.Platform, m_RemoteContent.Attributes[RemotePathIndex]);
+ OutLocalFolderState.ModificationTicks[RemotePathIndex] = GetModificationTickFromPath(TargetFilePath);
+
+ TargetOffset++;
+ TargetsComplete++;
+ }
+ }
+}
+
+std::vector<BuildsOperationUpdateFolder::ScavengeSource>
+BuildsOperationUpdateFolder::FindScavengeSources()
+{
+ ZEN_TRACE_CPU("FindScavengeSources");
+
+ const bool TargetPathExists = IsDir(m_Path);
+
+ std::vector<std::filesystem::path> StatePaths = GetDownloadedStatePaths(m_Options.SystemRootDir);
+
+ std::vector<ScavengeSource> Result;
+ for (const std::filesystem::path& EntryPath : StatePaths)
+ {
+ if (IsFile(EntryPath))
+ {
+ bool DeleteEntry = false;
+
+ try
+ {
+ BuildsDownloadInfo Info = ReadDownloadedInfoFile(EntryPath);
+ const bool LocalPathExists = !Info.LocalPath.empty() && IsDir(Info.LocalPath);
+ const bool LocalStateFileExists = IsFile(Info.StateFilePath);
+ if (LocalPathExists && LocalStateFileExists)
+ {
+ if (TargetPathExists && std::filesystem::equivalent(Info.LocalPath, m_Path))
+ {
+ DeleteEntry = true;
+ }
+ else
+ {
+ Result.push_back({.StateFilePath = std::move(Info.StateFilePath), .Path = std::move(Info.LocalPath)});
+ }
+ }
+ else
+ {
+ DeleteEntry = true;
+ }
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_WARN("{}", Ex.what());
+ DeleteEntry = true;
+ }
+
+ if (DeleteEntry)
+ {
+ std::error_code DummyEc;
+ std::filesystem::remove(EntryPath, DummyEc);
+ }
+ }
+ }
+ return Result;
+}
+
+std::vector<uint32_t>
+BuildsOperationUpdateFolder::ScanTargetFolder(const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedChunkHashesFound,
+ const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedSequenceHashesFound)
+{
+ ZEN_TRACE_CPU("ScanTargetFolder");
+
+ Stopwatch LocalTimer;
+
+ std::vector<uint32_t> MissingSequenceIndexes;
+
+ for (uint32_t RemoteSequenceIndex = 0; RemoteSequenceIndex < m_RemoteContent.ChunkedContent.SequenceRawHashes.size();
+ RemoteSequenceIndex++)
+ {
+ const IoHash& RemoteSequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex];
+ const uint32_t RemotePathIndex = GetFirstPathIndexForSeqeuenceIndex(m_RemoteLookup, RemoteSequenceIndex);
+ const uint64_t RemoteRawSize = m_RemoteContent.RawSizes[RemotePathIndex];
+ if (auto CacheSequenceIt = CachedSequenceHashesFound.find(RemoteSequenceRawHash);
+ CacheSequenceIt != CachedSequenceHashesFound.end())
+ {
+ const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(m_CacheFolderPath, RemoteSequenceRawHash);
+ ZEN_ASSERT_SLOW(IsFile(CacheFilePath));
+ if (m_Options.IsVerbose)
+ {
+ ZEN_INFO("Found sequence {} at {} ({})", RemoteSequenceRawHash, CacheFilePath, NiceBytes(RemoteRawSize));
+ }
+ }
+ else if (auto CacheChunkIt = CachedChunkHashesFound.find(RemoteSequenceRawHash); CacheChunkIt != CachedChunkHashesFound.end())
+ {
+ const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(m_CacheFolderPath, RemoteSequenceRawHash);
+ ZEN_ASSERT_SLOW(IsFile(CacheFilePath));
+ if (m_Options.IsVerbose)
+ {
+ ZEN_INFO("Found chunk {} at {} ({})", RemoteSequenceRawHash, CacheFilePath, NiceBytes(RemoteRawSize));
+ }
+ }
+ else if (auto It = m_LocalLookup.RawHashToSequenceIndex.find(RemoteSequenceRawHash);
+ It != m_LocalLookup.RawHashToSequenceIndex.end())
+ {
+ const uint32_t LocalSequenceIndex = It->second;
+ const uint32_t LocalPathIndex = GetFirstPathIndexForSeqeuenceIndex(m_LocalLookup, LocalSequenceIndex);
+ const std::filesystem::path LocalFilePath = (m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred();
+ ZEN_ASSERT_SLOW(IsFile(LocalFilePath));
+ m_CacheMappingStats.LocalPathsMatchingSequencesCount++;
+ m_CacheMappingStats.LocalPathsMatchingSequencesByteCount += RemoteRawSize;
+ if (m_Options.IsVerbose)
+ {
+ ZEN_INFO("Found sequence {} at {} ({})", RemoteSequenceRawHash, LocalFilePath, NiceBytes(RemoteRawSize));
+ }
+ }
+ else
+ {
+ MissingSequenceIndexes.push_back(RemoteSequenceIndex);
+ }
+ }
+
+ m_CacheMappingStats.LocalScanElapsedWallTimeUs += LocalTimer.GetElapsedTimeUs();
+ return MissingSequenceIndexes;
+}
+
+bool
+BuildsOperationUpdateFolder::FindScavengeContent(const ScavengeSource& Source,
+ ChunkedFolderContent& OutScavengedLocalContent,
+ ChunkedContentLookup& OutScavengedLookup)
+{
+ ZEN_TRACE_CPU("FindScavengeContent");
+
+ FolderContent LocalFolderState;
+ try
+ {
+ BuildSaveState SavedState = ReadBuildSaveStateFile(Source.StateFilePath);
+ if (SavedState.Version == BuildSaveState::NoVersion)
+ {
+ ZEN_DEBUG("Skipping old build state at '{}', state files before version {} can not be trusted during scavenge",
+ Source.StateFilePath,
+ BuildSaveState::kVersion1);
+ return false;
+ }
+ OutScavengedLocalContent = std::move(SavedState.State.ChunkedContent);
+ LocalFolderState = std::move(SavedState.FolderState);
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_DEBUG("Skipping invalid build state at '{}', reason: {}", Source.StateFilePath, Ex.what());
+ return false;
+ }
+
+ tsl::robin_set<uint32_t> PathIndexesToScavenge;
+ PathIndexesToScavenge.reserve(OutScavengedLocalContent.Paths.size());
+ std::vector<uint32_t> ChunkOrderOffsets = BuildChunkOrderOffset(OutScavengedLocalContent.ChunkedContent.ChunkCounts);
+
+ {
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToPathIndex;
+
+ RawHashToPathIndex.reserve(OutScavengedLocalContent.Paths.size());
+ for (uint32_t ScavengedPathIndex = 0; ScavengedPathIndex < OutScavengedLocalContent.RawHashes.size(); ScavengedPathIndex++)
+ {
+ if (!RawHashToPathIndex.contains(OutScavengedLocalContent.RawHashes[ScavengedPathIndex]))
+ {
+ RawHashToPathIndex.insert_or_assign(OutScavengedLocalContent.RawHashes[ScavengedPathIndex], ScavengedPathIndex);
+ }
+ }
+
+ for (uint32_t ScavengeSequenceIndex = 0; ScavengeSequenceIndex < OutScavengedLocalContent.ChunkedContent.SequenceRawHashes.size();
+ ScavengeSequenceIndex++)
+ {
+ const IoHash& SequenceHash = OutScavengedLocalContent.ChunkedContent.SequenceRawHashes[ScavengeSequenceIndex];
+ if (auto It = RawHashToPathIndex.find(SequenceHash); It != RawHashToPathIndex.end())
+ {
+ uint32_t PathIndex = It->second;
+ if (!PathIndexesToScavenge.contains(PathIndex))
+ {
+ if (m_RemoteLookup.RawHashToSequenceIndex.contains(SequenceHash))
+ {
+ PathIndexesToScavenge.insert(PathIndex);
+ }
+ else
+ {
+ uint32_t ChunkOrderIndexStart = ChunkOrderOffsets[ScavengeSequenceIndex];
+ const uint32_t ChunkCount = OutScavengedLocalContent.ChunkedContent.ChunkCounts[ScavengeSequenceIndex];
+ for (uint32_t ChunkOrderIndex = 0; ChunkOrderIndex < ChunkCount; ChunkOrderIndex++)
+ {
+ const uint32_t ChunkIndex =
+ OutScavengedLocalContent.ChunkedContent.ChunkOrders[ChunkOrderIndexStart + ChunkOrderIndex];
+ const IoHash& ChunkHash = OutScavengedLocalContent.ChunkedContent.ChunkHashes[ChunkIndex];
+ if (m_RemoteLookup.ChunkHashToChunkIndex.contains(ChunkHash))
+ {
+ PathIndexesToScavenge.insert(PathIndex);
+ break;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ ZEN_WARN("Scavenged state file at '{}' for '{}' is invalid, skipping scavenging for sequence {}",
+ Source.StateFilePath,
+ Source.Path,
+ SequenceHash);
+ }
+ }
+ }
+
+ if (PathIndexesToScavenge.empty())
+ {
+ OutScavengedLocalContent = {};
+ return false;
+ }
+
+ std::vector<std::filesystem::path> PathsToScavenge;
+ PathsToScavenge.reserve(PathIndexesToScavenge.size());
+ for (uint32_t ScavengedStatePathIndex : PathIndexesToScavenge)
+ {
+ PathsToScavenge.push_back(OutScavengedLocalContent.Paths[ScavengedStatePathIndex]);
+ }
+
+ FolderContent ValidFolderContent =
+ GetValidFolderContent(m_IOWorkerPool, m_ScavengedFolderScanStats, Source.Path, PathsToScavenge, {}, 0, m_AbortFlag, m_PauseFlag);
+
+ if (!LocalFolderState.AreKnownFilesEqual(ValidFolderContent))
+ {
+ std::vector<std::filesystem::path> DeletedPaths;
+ FolderContent UpdatedContent = GetUpdatedContent(LocalFolderState, ValidFolderContent, DeletedPaths);
+
+ // If the files are modified since the state was saved we ignore the files since we don't
+ // want to incur the cost of scanning/hashing scavenged files
+ DeletedPaths.insert(DeletedPaths.end(), UpdatedContent.Paths.begin(), UpdatedContent.Paths.end());
+ if (!DeletedPaths.empty())
+ {
+ OutScavengedLocalContent =
+ DeletePathsFromChunkedContent(OutScavengedLocalContent,
+ BuildHashLookup(OutScavengedLocalContent.ChunkedContent.SequenceRawHashes),
+ ChunkOrderOffsets,
+ DeletedPaths);
+ }
+ }
+
+ if (OutScavengedLocalContent.Paths.empty())
+ {
+ OutScavengedLocalContent = {};
+ return false;
+ }
+
+ OutScavengedLookup = BuildChunkedContentLookup(OutScavengedLocalContent);
+
+ return true;
+}
+
+void
+BuildsOperationUpdateFolder::ScavengeSourceForChunks(uint32_t& InOutRemainingChunkCount,
+ std::vector<bool>& InOutRemoteChunkIndexNeedsCopyFromLocalFileFlags,
+ tsl::robin_map<IoHash, size_t, IoHash::Hasher>& InOutRawHashToCopyChunkDataIndex,
+ const std::vector<std::atomic<uint32_t>>& SequenceIndexChunksLeftToWriteCounters,
+ const ChunkedFolderContent& ScavengedContent,
+ const ChunkedContentLookup& ScavengedLookup,
+ std::vector<CopyChunkData>& InOutCopyChunkDatas,
+ uint32_t ScavengedContentIndex,
+ uint64_t& InOutChunkMatchingRemoteCount,
+ uint64_t& InOutChunkMatchingRemoteByteCount)
+{
+ for (uint32_t RemoteChunkIndex = 0;
+ RemoteChunkIndex < m_RemoteContent.ChunkedContent.ChunkHashes.size() && (InOutRemainingChunkCount > 0);
+ RemoteChunkIndex++)
+ {
+ if (!InOutRemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex])
+ {
+ const IoHash& RemoteChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex];
+ if (auto It = ScavengedLookup.ChunkHashToChunkIndex.find(RemoteChunkHash); It != ScavengedLookup.ChunkHashToChunkIndex.end())
+ {
+ std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs =
+ GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteChunkIndex);
+
+ if (!ChunkTargetPtrs.empty())
+ {
+ const uint32_t ScavengedChunkIndex = It->second;
+ const uint64_t ScavengedChunkRawSize = ScavengedContent.ChunkedContent.ChunkRawSizes[ScavengedChunkIndex];
+ const size_t ChunkSequenceLocationOffset = ScavengedLookup.ChunkSequenceLocationOffset[ScavengedChunkIndex];
+ const ChunkedContentLookup::ChunkSequenceLocation& ScavengeLocation =
+ ScavengedLookup.ChunkSequenceLocations[ChunkSequenceLocationOffset];
+ const IoHash& ScavengedSequenceRawHash =
+ ScavengedContent.ChunkedContent.SequenceRawHashes[ScavengeLocation.SequenceIndex];
+
+ CopyChunkData::ChunkTarget Target = {.TargetChunkLocationCount = gsl::narrow<uint32_t>(ChunkTargetPtrs.size()),
+ .RemoteChunkIndex = RemoteChunkIndex,
+ .CacheFileOffset = ScavengeLocation.Offset};
+ if (auto CopySourceIt = InOutRawHashToCopyChunkDataIndex.find(ScavengedSequenceRawHash);
+ CopySourceIt != InOutRawHashToCopyChunkDataIndex.end())
+ {
+ CopyChunkData& Data = InOutCopyChunkDatas[CopySourceIt->second];
+ if (Data.TargetChunkLocationPtrs.size() > 1024)
+ {
+ InOutRawHashToCopyChunkDataIndex.insert_or_assign(ScavengedSequenceRawHash, InOutCopyChunkDatas.size());
+ InOutCopyChunkDatas.push_back(CopyChunkData{.ScavengeSourceIndex = ScavengedContentIndex,
+ .SourceSequenceIndex = ScavengeLocation.SequenceIndex,
+ .TargetChunkLocationPtrs = ChunkTargetPtrs,
+ .ChunkTargets = std::vector<CopyChunkData::ChunkTarget>{Target}});
+ }
+ else
+ {
+ Data.TargetChunkLocationPtrs.insert(Data.TargetChunkLocationPtrs.end(),
+ ChunkTargetPtrs.begin(),
+ ChunkTargetPtrs.end());
+ Data.ChunkTargets.push_back(Target);
+ }
+ }
+ else
+ {
+ InOutRawHashToCopyChunkDataIndex.insert_or_assign(ScavengedSequenceRawHash, InOutCopyChunkDatas.size());
+ InOutCopyChunkDatas.push_back(CopyChunkData{.ScavengeSourceIndex = ScavengedContentIndex,
+ .SourceSequenceIndex = ScavengeLocation.SequenceIndex,
+ .TargetChunkLocationPtrs = ChunkTargetPtrs,
+ .ChunkTargets = std::vector<CopyChunkData::ChunkTarget>{Target}});
+ }
+ InOutChunkMatchingRemoteCount++;
+ InOutChunkMatchingRemoteByteCount += ScavengedChunkRawSize;
+ InOutRemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex] = true;
+ InOutRemainingChunkCount--;
+ }
+ }
+ }
+ }
+}
+
+std::filesystem::path
+BuildsOperationUpdateFolder::FindDownloadedChunk(const IoHash& ChunkHash)
+{
+ ZEN_TRACE_CPU("FindDownloadedChunk");
+
+ std::filesystem::path CompressedChunkPath = m_TempDownloadFolderPath / ChunkHash.ToHexString();
+ if (IsFile(CompressedChunkPath))
+ {
+ IoBuffer ExistingCompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath);
+ if (ExistingCompressedPart)
+ {
+ IoHash RawHash;
+ uint64_t RawSize;
+ if (CompressedBuffer::ValidateCompressedHeader(ExistingCompressedPart,
+ RawHash,
+ RawSize,
+ /*OutOptionalTotalCompressedSize*/ nullptr))
+ {
+ return CompressedChunkPath;
+ }
+ else
+ {
+ std::error_code DummyEc;
+ RemoveFile(CompressedChunkPath, DummyEc);
+ }
+ }
+ }
+ return {};
+}
+
+std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>
+BuildsOperationUpdateFolder::GetRemainingChunkTargets(std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
+ uint32_t ChunkIndex)
+{
+ ZEN_TRACE_CPU("GetRemainingChunkTargets");
+
+ std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkSources = GetChunkSequenceLocations(m_RemoteLookup, ChunkIndex);
+ std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs;
+ if (!ChunkSources.empty())
+ {
+ ChunkTargetPtrs.reserve(ChunkSources.size());
+ for (const ChunkedContentLookup::ChunkSequenceLocation& Source : ChunkSources)
+ {
+ if (SequenceIndexChunksLeftToWriteCounters[Source.SequenceIndex].load() > 0)
+ {
+ ChunkTargetPtrs.push_back(&Source);
+ }
+ }
+ }
+ return ChunkTargetPtrs;
+};
+
+uint64_t
+BuildsOperationUpdateFolder::GetChunkWriteCount(std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
+ uint32_t ChunkIndex)
+{
+ ZEN_TRACE_CPU("GetChunkWriteCount");
+
+ uint64_t WriteCount = 0;
+ std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkSources = GetChunkSequenceLocations(m_RemoteLookup, ChunkIndex);
+ for (const ChunkedContentLookup::ChunkSequenceLocation& Source : ChunkSources)
+ {
+ if (SequenceIndexChunksLeftToWriteCounters[Source.SequenceIndex].load() > 0)
+ {
+ WriteCount++;
+ }
+ }
+ return WriteCount;
+};
+
+void
+BuildsOperationUpdateFolder::CheckRequiredDiskSpace(const tsl::robin_map<std::string, uint32_t>& RemotePathToRemoteIndex)
+{
+ tsl::robin_set<uint32_t> ExistingRemotePaths;
+
+ if (m_Options.EnableTargetFolderScavenging)
+ {
+ for (uint32_t LocalPathIndex = 0; LocalPathIndex < m_LocalContent.Paths.size(); LocalPathIndex++)
+ {
+ const IoHash& RawHash = m_LocalContent.RawHashes[LocalPathIndex];
+ const std::filesystem::path& LocalPath = m_LocalContent.Paths[LocalPathIndex];
+
+ if (auto RemotePathIt = RemotePathToRemoteIndex.find(LocalPath.generic_string()); RemotePathIt != RemotePathToRemoteIndex.end())
+ {
+ const uint32_t RemotePathIndex = RemotePathIt->second;
+ if (m_RemoteContent.RawHashes[RemotePathIndex] == RawHash)
+ {
+ ExistingRemotePaths.insert(RemotePathIndex);
+ }
+ }
+ }
+ }
+
+ uint64_t RequiredSpace = 0;
+ for (uint32_t RemotePathIndex = 0; RemotePathIndex < m_RemoteContent.Paths.size(); RemotePathIndex++)
+ {
+ if (!ExistingRemotePaths.contains(RemotePathIndex))
+ {
+ RequiredSpace += m_RemoteContent.RawSizes[RemotePathIndex];
+ }
+ }
+
+ std::error_code Ec;
+ DiskSpace Space = DiskSpaceInfo(m_Path, Ec);
+ if (Ec)
+ {
+ throw std::runtime_error(fmt::format("Get free disk space for target path '{}' FAILED, reason: {}", m_Path, Ec.message()));
+ }
+ if (Space.Free < (RequiredSpace + 16u * 1024u * 1024u))
+ {
+ throw std::runtime_error(
+ fmt::format("Not enough free space for target path '{}', {} of free space is needed but only {} is available",
+ m_Path,
+ NiceBytes(RequiredSpace),
+ NiceBytes(Space.Free)));
+ }
+}
+
+void
+BuildsOperationUpdateFolder::WriteScavengedSequenceToCache(const std::filesystem::path& ScavengeRootPath,
+ const ChunkedFolderContent& ScavengedContent,
+ const ScavengedSequenceCopyOperation& ScavengeOp)
+{
+ ZEN_TRACE_CPU("WriteScavengedSequenceToCache");
+
+ const std::filesystem::path ScavengedPath = ScavengedContent.Paths[ScavengeOp.ScavengedPathIndex];
+ const std::filesystem::path ScavengedFilePath = (ScavengeRootPath / ScavengedPath).make_preferred();
+ ZEN_ASSERT_SLOW(FileSizeFromPath(ScavengedFilePath) == ScavengeOp.RawSize);
+
+ const IoHash& RemoteSequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[ScavengeOp.RemoteSequenceIndex];
+ const std::filesystem::path TempFilePath = GetTempChunkedSequenceFileName(m_CacheFolderPath, RemoteSequenceRawHash);
+
+ const uint64_t RawSize = ScavengedContent.RawSizes[ScavengeOp.ScavengedPathIndex];
+ FastCopyFile(m_Options.AllowFileClone,
+ m_Options.UseSparseFiles,
+ ScavengedFilePath,
+ TempFilePath,
+ RawSize,
+ m_DiskStats.WriteCount,
+ m_DiskStats.WriteByteCount,
+ m_DiskStats.CloneCount,
+ m_DiskStats.CloneByteCount);
+
+ const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(m_CacheFolderPath, RemoteSequenceRawHash);
+ RenameFile(TempFilePath, CacheFilePath);
+
+ m_WrittenChunkByteCount += RawSize;
+ if (m_Options.ValidateCompletedSequences)
+ {
+ m_ValidatedChunkByteCount += RawSize;
+ }
+}
+
+void
+BuildsOperationUpdateFolder::WriteLooseChunk(const uint32_t RemoteChunkIndex,
+ const BlobsExistsResult& ExistsResult,
+ std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
+ std::atomic<uint64_t>& WritePartsComplete,
+ std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>&& ChunkTargetPtrs,
+ BufferedWriteFileCache& WriteCache,
+ ParallelWork& Work,
+ uint64_t TotalRequestCount,
+ uint64_t TotalPartWriteCount,
+ FilteredRate& FilteredDownloadedBytesPerSecond,
+ FilteredRate& FilteredWrittenBytesPerSecond)
+{
+ const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex];
+ std::filesystem::path ExistingCompressedChunkPath = FindDownloadedChunk(ChunkHash);
+ if (!ExistingCompressedChunkPath.empty())
+ {
+ if (m_DownloadStats.RequestsCompleteCount.fetch_add(1) + 1 == TotalRequestCount)
+ {
+ FilteredDownloadedBytesPerSecond.Stop();
+ }
+ }
+ if (!m_AbortFlag)
+ {
+ if (!ExistingCompressedChunkPath.empty())
+ {
+ Work.ScheduleWork(
+ m_IOWorkerPool,
+ [this,
+ SequenceIndexChunksLeftToWriteCounters,
+ &WriteCache,
+ &Work,
+ &WritePartsComplete,
+ TotalPartWriteCount,
+ &FilteredWrittenBytesPerSecond,
+ RemoteChunkIndex,
+ ChunkTargetPtrs = std::move(ChunkTargetPtrs),
+ CompressedChunkPath = std::move(ExistingCompressedChunkPath)](std::atomic<bool>& AbortFlag) {
+ if (!AbortFlag)
+ {
+ ZEN_TRACE_CPU("Async_WritePreDownloadedChunk");
+
+ FilteredWrittenBytesPerSecond.Start();
+
+ const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex];
+
+ IoBuffer CompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath);
+ if (!CompressedPart)
+ {
+ throw std::runtime_error(
+ fmt::format("Could not open dowloaded compressed chunk {} from {}", ChunkHash, CompressedChunkPath));
+ }
+
+ bool NeedHashVerify =
+ WriteCompressedChunkToCache(ChunkHash, ChunkTargetPtrs, WriteCache, std::move(CompressedPart));
+ bool WritePartsDone = WritePartsComplete.fetch_add(1) + 1 == TotalPartWriteCount;
+
+ if (!AbortFlag)
+ {
+ if (WritePartsDone)
+ {
+ FilteredWrittenBytesPerSecond.Stop();
+ }
+
+ std::error_code Ec = TryRemoveFile(CompressedChunkPath);
+ if (Ec)
+ {
+ ZEN_DEBUG("Failed removing file '{}', reason: ({}) {}", CompressedChunkPath, Ec.value(), Ec.message());
+ }
+
+ std::vector<uint32_t> CompletedSequences =
+ CompleteChunkTargets(ChunkTargetPtrs, SequenceIndexChunksLeftToWriteCounters);
+ WriteCache.Close(CompletedSequences);
+ if (NeedHashVerify)
+ {
+ VerifyAndCompleteChunkSequencesAsync(CompletedSequences, Work);
+ }
+ else
+ {
+ FinalizeChunkSequences(CompletedSequences);
+ }
+ }
+ }
+ });
+ }
+ else
+ {
+ Work.ScheduleWork(m_NetworkPool,
+ [this,
+ &ExistsResult,
+ SequenceIndexChunksLeftToWriteCounters,
+ &WriteCache,
+ &Work,
+ &WritePartsComplete,
+ TotalPartWriteCount,
+ TotalRequestCount,
+ &FilteredDownloadedBytesPerSecond,
+ &FilteredWrittenBytesPerSecond,
+ RemoteChunkIndex,
+ ChunkTargetPtrs = std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>(
+ std::move(ChunkTargetPtrs))](std::atomic<bool>&) mutable {
+ if (!m_AbortFlag)
+ {
+ ZEN_TRACE_CPU("Async_DownloadChunk");
+
+ FilteredDownloadedBytesPerSecond.Start();
+ DownloadBuildBlob(RemoteChunkIndex,
+ ExistsResult,
+ Work,
+ TotalRequestCount,
+ FilteredDownloadedBytesPerSecond,
+ [this,
+ &ExistsResult,
+ SequenceIndexChunksLeftToWriteCounters,
+ &WriteCache,
+ &Work,
+ &WritePartsComplete,
+ TotalPartWriteCount,
+ RemoteChunkIndex,
+ &FilteredWrittenBytesPerSecond,
+ ChunkTargetPtrs = std::move(ChunkTargetPtrs)](IoBuffer&& Payload) mutable {
+ AsyncWriteDownloadedChunk(RemoteChunkIndex,
+ ExistsResult,
+ std::move(ChunkTargetPtrs),
+ WriteCache,
+ Work,
+ std::move(Payload),
+ SequenceIndexChunksLeftToWriteCounters,
+ WritePartsComplete,
+ TotalPartWriteCount,
+ FilteredWrittenBytesPerSecond);
+ });
+ }
+ });
+ }
+ }
+}
+
+void
+BuildsOperationUpdateFolder::DownloadBuildBlob(uint32_t RemoteChunkIndex,
+ const BlobsExistsResult& ExistsResult,
+ ParallelWork& Work,
+ uint64_t TotalRequestCount,
+ FilteredRate& FilteredDownloadedBytesPerSecond,
+ std::function<void(IoBuffer&& Payload)>&& OnDownloaded)
+{
+ const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex];
+ // FilteredDownloadedBytesPerSecond.Start();
+ IoBuffer BuildBlob;
+ const bool ExistsInCache = m_Storage.CacheStorage && ExistsResult.ExistingBlobs.contains(ChunkHash);
+ if (ExistsInCache)
+ {
+ BuildBlob = m_Storage.CacheStorage->GetBuildBlob(m_BuildId, ChunkHash);
+ }
+ if (BuildBlob)
+ {
+ uint64_t BlobSize = BuildBlob.GetSize();
+ m_DownloadStats.DownloadedChunkCount++;
+ m_DownloadStats.DownloadedChunkByteCount += BlobSize;
+ if (m_DownloadStats.RequestsCompleteCount.fetch_add(1) + 1 == TotalRequestCount)
+ {
+ FilteredDownloadedBytesPerSecond.Stop();
+ }
+ OnDownloaded(std::move(BuildBlob));
+ }
+ else
+ {
+ if (m_RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex] >= m_Options.LargeAttachmentSize)
+ {
+ DownloadLargeBlob(
+ *m_Storage.BuildStorage,
+ m_TempDownloadFolderPath,
+ m_BuildId,
+ ChunkHash,
+ m_Options.PreferredMultipartChunkSize,
+ Work,
+ m_NetworkPool,
+ m_DownloadStats.DownloadedChunkByteCount,
+ m_DownloadStats.MultipartAttachmentCount,
+ [this, &FilteredDownloadedBytesPerSecond, TotalRequestCount, OnDownloaded = std::move(OnDownloaded)](IoBuffer&& Payload) {
+ m_DownloadStats.DownloadedChunkCount++;
+ if (m_DownloadStats.RequestsCompleteCount.fetch_add(1) + 1 == TotalRequestCount)
+ {
+ FilteredDownloadedBytesPerSecond.Stop();
+ }
+
+ OnDownloaded(std::move(Payload));
+ });
+ }
+ else
+ {
+ try
+ {
+ BuildBlob = m_Storage.BuildStorage->GetBuildBlob(m_BuildId, ChunkHash);
+ }
+ catch (const std::exception&)
+ {
+ // Silence http errors due to abort
+ if (!m_AbortFlag)
+ {
+ throw;
+ }
+ }
+ if (!m_AbortFlag)
+ {
+ if (!BuildBlob)
+ {
+ throw std::runtime_error(fmt::format("Chunk {} is missing", ChunkHash));
+ }
+
+ if (!m_AbortFlag)
+ {
+ uint64_t BlobSize = BuildBlob.GetSize();
+ m_DownloadStats.DownloadedChunkCount++;
+ m_DownloadStats.DownloadedChunkByteCount += BlobSize;
+ if (m_DownloadStats.RequestsCompleteCount.fetch_add(1) + 1 == TotalRequestCount)
+ {
+ FilteredDownloadedBytesPerSecond.Stop();
+ }
+
+ OnDownloaded(std::move(BuildBlob));
+ }
+ }
+ }
+ }
+}
+
+void
+BuildsOperationUpdateFolder::DownloadPartialBlock(
+ std::span<const ChunkBlockAnalyser::BlockRangeDescriptor> BlockRanges,
+ size_t BlockRangeStartIndex,
+ size_t BlockRangeCount,
+ const BlobsExistsResult& ExistsResult,
+ uint64_t TotalRequestCount,
+ FilteredRate& FilteredDownloadedBytesPerSecond,
+ std::function<void(IoBuffer&& InMemoryBuffer,
+ const std::filesystem::path& OnDiskPath,
+ size_t BlockRangeStartIndex,
+ std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths)>&& OnDownloaded)
+{
+ const uint32_t BlockIndex = BlockRanges[BlockRangeStartIndex].BlockIndex;
+
+ const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex];
+
+ auto ProcessDownload = [this](
+ const ChunkBlockDescription& BlockDescription,
+ IoBuffer&& BlockRangeBuffer,
+ size_t BlockRangeStartIndex,
+ std::span<const std::pair<uint64_t, uint64_t>> BlockOffsetAndLengths,
+ uint64_t TotalRequestCount,
+ FilteredRate& FilteredDownloadedBytesPerSecond,
+ const std::function<void(IoBuffer && InMemoryBuffer,
+ const std::filesystem::path& OnDiskPath,
+ size_t BlockRangeStartIndex,
+ std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths)>& OnDownloaded) {
+ uint64_t BlockRangeBufferSize = BlockRangeBuffer.GetSize();
+ m_DownloadStats.DownloadedBlockCount++;
+ m_DownloadStats.DownloadedBlockByteCount += BlockRangeBufferSize;
+ if (m_DownloadStats.RequestsCompleteCount.fetch_add(BlockOffsetAndLengths.size()) + BlockOffsetAndLengths.size() ==
+ TotalRequestCount)
+ {
+ FilteredDownloadedBytesPerSecond.Stop();
+ }
+
+ IoHashStream RangeId;
+ for (const std::pair<uint64_t, uint64_t>& Range : BlockOffsetAndLengths)
+ {
+ RangeId.Append(&Range.first, sizeof(uint64_t));
+ RangeId.Append(&Range.second, sizeof(uint64_t));
+ }
+ std::filesystem::path BlockChunkPath =
+ TryMoveDownloadedChunk(BlockRangeBuffer,
+ m_TempBlockFolderPath / fmt::format("{}_{}", BlockDescription.BlockHash, RangeId.GetHash()),
+ /* ForceDiskBased */ BlockRangeBufferSize > m_Options.MaximumInMemoryPayloadSize);
+
+ if (!m_AbortFlag)
+ {
+ OnDownloaded(std::move(BlockRangeBuffer), std::move(BlockChunkPath), BlockRangeStartIndex, BlockOffsetAndLengths);
+ }
+ };
+
+ std::vector<std::pair<uint64_t, uint64_t>> Ranges;
+ Ranges.reserve(BlockRangeCount);
+ for (size_t BlockRangeIndex = BlockRangeStartIndex; BlockRangeIndex < BlockRangeStartIndex + BlockRangeCount; BlockRangeIndex++)
+ {
+ const ChunkBlockAnalyser::BlockRangeDescriptor& BlockRange = BlockRanges[BlockRangeIndex];
+ Ranges.push_back(std::make_pair(BlockRange.RangeStart, BlockRange.RangeLength));
+ }
+
+ const bool ExistsInCache = m_Storage.CacheStorage && ExistsResult.ExistingBlobs.contains(BlockDescription.BlockHash);
+
+ size_t SubBlockRangeCount = BlockRangeCount;
+ size_t SubRangeCountComplete = 0;
+ std::span<const std::pair<uint64_t, uint64_t>> RangesSpan(Ranges);
+ while (SubRangeCountComplete < SubBlockRangeCount)
+ {
+ if (m_AbortFlag)
+ {
+ break;
+ }
+
+ // First try to get subrange from cache.
+ // If not successful, try to get the ranges from the build store and adapt SubRangeCount...
+
+ size_t SubRangeStartIndex = BlockRangeStartIndex + SubRangeCountComplete;
+ if (ExistsInCache)
+ {
+ size_t SubRangeCount = Min(BlockRangeCount - SubRangeCountComplete, m_Storage.CacheHost.Caps.MaxRangeCountPerRequest);
+
+ if (SubRangeCount == 1)
+ {
+ // Legacy single-range path, prefer that for max compatibility
+
+ const std::pair<uint64_t, uint64_t> SubRange = RangesSpan[SubRangeCountComplete];
+ IoBuffer PayloadBuffer =
+ m_Storage.CacheStorage->GetBuildBlob(m_BuildId, BlockDescription.BlockHash, SubRange.first, SubRange.second);
+ if (m_AbortFlag)
+ {
+ break;
+ }
+ if (PayloadBuffer)
+ {
+ ProcessDownload(BlockDescription,
+ std::move(PayloadBuffer),
+ SubRangeStartIndex,
+ std::vector<std::pair<uint64_t, uint64_t>>{std::make_pair(0u, SubRange.second)},
+ TotalRequestCount,
+ FilteredDownloadedBytesPerSecond,
+ OnDownloaded);
+ SubRangeCountComplete += SubRangeCount;
+ continue;
+ }
+ }
+ else
+ {
+ auto SubRanges = RangesSpan.subspan(SubRangeCountComplete, SubRangeCount);
+
+ BuildStorageCache::BuildBlobRanges RangeBuffers =
+ m_Storage.CacheStorage->GetBuildBlobRanges(m_BuildId, BlockDescription.BlockHash, SubRanges);
+ if (m_AbortFlag)
+ {
+ break;
+ }
+ if (RangeBuffers.PayloadBuffer)
+ {
+ if (RangeBuffers.Ranges.empty())
+ {
+ SubRangeCount = Ranges.size() - SubRangeCountComplete;
+ ProcessDownload(BlockDescription,
+ std::move(RangeBuffers.PayloadBuffer),
+ SubRangeStartIndex,
+ RangesSpan.subspan(SubRangeCountComplete, SubRangeCount),
+ TotalRequestCount,
+ FilteredDownloadedBytesPerSecond,
+ OnDownloaded);
+ SubRangeCountComplete += SubRangeCount;
+ continue;
+ }
+ else if (RangeBuffers.Ranges.size() == SubRangeCount)
+ {
+ ProcessDownload(BlockDescription,
+ std::move(RangeBuffers.PayloadBuffer),
+ SubRangeStartIndex,
+ RangeBuffers.Ranges,
+ TotalRequestCount,
+ FilteredDownloadedBytesPerSecond,
+ OnDownloaded);
+ SubRangeCountComplete += SubRangeCount;
+ continue;
+ }
+ }
+ }
+ }
+
+ size_t SubRangeCount = Min(BlockRangeCount - SubRangeCountComplete, m_Storage.BuildStorageHost.Caps.MaxRangeCountPerRequest);
+
+ auto SubRanges = RangesSpan.subspan(SubRangeCountComplete, SubRangeCount);
+
+ BuildStorageBase::BuildBlobRanges RangeBuffers;
+
+ try
+ {
+ RangeBuffers = m_Storage.BuildStorage->GetBuildBlobRanges(m_BuildId, BlockDescription.BlockHash, SubRanges);
+ }
+ catch (const std::exception&)
+ {
+ // Silence http errors due to abort
+ if (!m_AbortFlag)
+ {
+ throw;
+ }
+ }
+
+ if (!m_AbortFlag)
+ {
+ if (RangeBuffers.PayloadBuffer)
+ {
+ if (RangeBuffers.Ranges.empty())
+ {
+ // Jupiter will ignore the ranges and send the whole payload if it fetches the payload from S3
+ // Upload to cache (if enabled) and use the whole payload for the remaining ranges
+
+ const uint64_t Size = RangeBuffers.PayloadBuffer.GetSize();
+
+ const bool PopulateCache = !ExistsInCache && m_Storage.CacheStorage && m_Options.PopulateCache;
+
+ std::filesystem::path BlockPath =
+ TryMoveDownloadedChunk(RangeBuffers.PayloadBuffer,
+ m_TempBlockFolderPath / BlockDescription.BlockHash.ToHexString(),
+ /* ForceDiskBased */ PopulateCache || Size > m_Options.MaximumInMemoryPayloadSize);
+ if (!BlockPath.empty())
+ {
+ RangeBuffers.PayloadBuffer = IoBufferBuilder::MakeFromFile(BlockPath);
+ if (!RangeBuffers.PayloadBuffer)
+ {
+ throw std::runtime_error(
+ fmt::format("Failed to read block {} from temporary path '{}'", BlockDescription.BlockHash, BlockPath));
+ }
+ RangeBuffers.PayloadBuffer.SetDeleteOnClose(true);
+ }
+
+ if (PopulateCache)
+ {
+ m_Storage.CacheStorage->PutBuildBlob(m_BuildId,
+ BlockDescription.BlockHash,
+ ZenContentType::kCompressedBinary,
+ CompositeBuffer(SharedBuffer(RangeBuffers.PayloadBuffer)));
+ }
+
+ if (m_AbortFlag)
+ {
+ break;
+ }
+
+ SubRangeCount = Ranges.size() - SubRangeCountComplete;
+ ProcessDownload(BlockDescription,
+ std::move(RangeBuffers.PayloadBuffer),
+ SubRangeStartIndex,
+ RangesSpan.subspan(SubRangeCountComplete, SubRangeCount),
+ TotalRequestCount,
+ FilteredDownloadedBytesPerSecond,
+ OnDownloaded);
+ }
+ else
+ {
+ if (RangeBuffers.Ranges.size() != SubRanges.size())
+ {
+ throw std::runtime_error(fmt::format("Fetching {} ranges from {} resulted in {} ranges",
+ SubRanges.size(),
+ BlockDescription.BlockHash,
+ RangeBuffers.Ranges.size()));
+ }
+ ProcessDownload(BlockDescription,
+ std::move(RangeBuffers.PayloadBuffer),
+ SubRangeStartIndex,
+ RangeBuffers.Ranges,
+ TotalRequestCount,
+ FilteredDownloadedBytesPerSecond,
+ OnDownloaded);
+ }
+ }
+ else
+ {
+ throw std::runtime_error(
+ fmt::format("Block {} is missing when fetching {} ranges", BlockDescription.BlockHash, SubRangeCount));
+ }
+
+ SubRangeCountComplete += SubRangeCount;
+ }
+ }
+}
+
+std::vector<uint32_t>
+BuildsOperationUpdateFolder::WriteLocalChunkToCache(CloneQueryInterface* CloneQuery,
+ const CopyChunkData& CopyData,
+ const std::vector<ChunkedFolderContent>& ScavengedContents,
+ const std::vector<ChunkedContentLookup>& ScavengedLookups,
+ const std::vector<std::filesystem::path>& ScavengedPaths,
+ BufferedWriteFileCache& WriteCache)
+{
+ ZEN_TRACE_CPU("WriteLocalChunkToCache");
+
+ std::filesystem::path SourceFilePath;
+
+ if (CopyData.ScavengeSourceIndex == (uint32_t)-1)
+ {
+ const uint32_t LocalPathIndex = m_LocalLookup.SequenceIndexFirstPathIndex[CopyData.SourceSequenceIndex];
+ SourceFilePath = (m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred();
+ }
+ else
+ {
+ const ChunkedFolderContent& ScavengedContent = ScavengedContents[CopyData.ScavengeSourceIndex];
+ const ChunkedContentLookup& ScavengedLookup = ScavengedLookups[CopyData.ScavengeSourceIndex];
+ const std::filesystem::path ScavengedPath = ScavengedPaths[CopyData.ScavengeSourceIndex];
+ const uint32_t ScavengedPathIndex = ScavengedLookup.SequenceIndexFirstPathIndex[CopyData.SourceSequenceIndex];
+ SourceFilePath = (ScavengedPath / ScavengedContent.Paths[ScavengedPathIndex]).make_preferred();
+ }
+ ZEN_ASSERT_SLOW(IsFile(SourceFilePath));
+ ZEN_ASSERT(!CopyData.TargetChunkLocationPtrs.empty());
+
+ uint64_t CacheLocalFileBytesRead = 0;
+
+ size_t TargetStart = 0;
+ const std::span<const ChunkedContentLookup::ChunkSequenceLocation* const> AllTargets(CopyData.TargetChunkLocationPtrs);
+
+ struct WriteOp
+ {
+ const ChunkedContentLookup::ChunkSequenceLocation* Target = nullptr;
+ uint64_t CacheFileOffset = (uint64_t)-1;
+ uint32_t ChunkIndex = (uint32_t)-1;
+ };
+
+ std::vector<WriteOp> WriteOps;
+
+ if (!m_AbortFlag)
+ {
+ ZEN_TRACE_CPU("Sort");
+ WriteOps.reserve(AllTargets.size());
+ for (const CopyChunkData::ChunkTarget& ChunkTarget : CopyData.ChunkTargets)
+ {
+ std::span<const ChunkedContentLookup::ChunkSequenceLocation* const> TargetRange =
+ AllTargets.subspan(TargetStart, ChunkTarget.TargetChunkLocationCount);
+ for (const ChunkedContentLookup::ChunkSequenceLocation* Target : TargetRange)
+ {
+ WriteOps.push_back(
+ WriteOp{.Target = Target, .CacheFileOffset = ChunkTarget.CacheFileOffset, .ChunkIndex = ChunkTarget.RemoteChunkIndex});
+ }
+ TargetStart += ChunkTarget.TargetChunkLocationCount;
+ }
+
+ std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOp& Lhs, const WriteOp& Rhs) {
+ if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex)
+ {
+ return true;
+ }
+ else if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex)
+ {
+ return false;
+ }
+ if (Lhs.Target->Offset < Rhs.Target->Offset)
+ {
+ return true;
+ }
+ return false;
+ });
+ }
+
+ if (!m_AbortFlag)
+ {
+ ZEN_TRACE_CPU("Write");
+
+ tsl::robin_set<uint32_t> ChunkIndexesWritten;
+
+ BufferedOpenFile SourceFile(SourceFilePath,
+ m_DiskStats.OpenReadCount,
+ m_DiskStats.CurrentOpenFileCount,
+ m_DiskStats.ReadCount,
+ m_DiskStats.ReadByteCount);
+
+ bool CanCloneSource = CloneQuery && CloneQuery->CanClone(SourceFile.Handle());
+
+ BufferedWriteFileCache::Local LocalWriter(WriteCache);
+
+ for (size_t WriteOpIndex = 0; WriteOpIndex < WriteOps.size();)
+ {
+ if (m_AbortFlag)
+ {
+ break;
+ }
+ const WriteOp& Op = WriteOps[WriteOpIndex];
+
+ const uint32_t RemoteSequenceIndex = Op.Target->SequenceIndex;
+ const uint32_t RemotePathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex];
+ const uint64_t TargetSize = m_RemoteContent.RawSizes[RemotePathIndex];
+ const uint64_t ChunkSize = m_RemoteContent.ChunkedContent.ChunkRawSizes[Op.ChunkIndex];
+
+ uint64_t ReadLength = ChunkSize;
+ size_t WriteCount = 1;
+ uint64_t OpSourceEnd = Op.CacheFileOffset + ChunkSize;
+ uint64_t OpTargetEnd = Op.Target->Offset + ChunkSize;
+ while ((WriteOpIndex + WriteCount) < WriteOps.size())
+ {
+ const WriteOp& NextOp = WriteOps[WriteOpIndex + WriteCount];
+ if (NextOp.Target->SequenceIndex != Op.Target->SequenceIndex)
+ {
+ break;
+ }
+ if (NextOp.Target->Offset != OpTargetEnd)
+ {
+ break;
+ }
+ if (NextOp.CacheFileOffset != OpSourceEnd)
+ {
+ break;
+ }
+ const uint64_t NextChunkLength = m_RemoteContent.ChunkedContent.ChunkRawSizes[NextOp.ChunkIndex];
+ if (ReadLength + NextChunkLength > BufferedOpenFile::BlockSize)
+ {
+ break;
+ }
+ ReadLength += NextChunkLength;
+ OpSourceEnd += NextChunkLength;
+ OpTargetEnd += NextChunkLength;
+ WriteCount++;
+ }
+
+ {
+ bool DidClone = false;
+
+ if (CanCloneSource)
+ {
+ uint64_t PreBytes = 0;
+ uint64_t PostBytes = 0;
+ uint64_t ClonableBytes =
+ CloneQuery->GetClonableRange(Op.CacheFileOffset, Op.Target->Offset, ReadLength, PreBytes, PostBytes);
+ if (ClonableBytes > 0)
+ {
+ // We need to open the file...
+ BufferedWriteFileCache::Local::Writer* Writer = LocalWriter.GetWriter(RemoteSequenceIndex);
+ if (!Writer)
+ {
+ Writer = LocalWriter.PutWriter(RemoteSequenceIndex, std::make_unique<BufferedWriteFileCache::Local::Writer>());
+
+ Writer->File = std::make_unique<BasicFile>();
+
+ const std::filesystem::path FileName =
+ GetTempChunkedSequenceFileName(m_CacheFolderPath,
+ m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]);
+ Writer->File->Open(FileName, BasicFile::Mode::kWrite);
+ if (m_Options.UseSparseFiles)
+ {
+ PrepareFileForScatteredWrite(Writer->File->Handle(), TargetSize);
+ }
+ }
+ DidClone = CloneQuery->TryClone(SourceFile.Handle(),
+ Writer->File->Handle(),
+ Op.CacheFileOffset + PreBytes,
+ Op.Target->Offset + PreBytes,
+ ClonableBytes,
+ TargetSize);
+ if (DidClone)
+ {
+ m_DiskStats.WriteCount++;
+ m_DiskStats.WriteByteCount += ClonableBytes;
+
+ m_DiskStats.CloneCount++;
+ m_DiskStats.CloneByteCount += ClonableBytes;
+
+ m_WrittenChunkByteCount += ClonableBytes;
+
+ if (PreBytes > 0)
+ {
+ CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset, PreBytes);
+ const uint64_t FileOffset = Op.Target->Offset;
+
+ WriteSequenceChunkToCache(LocalWriter, ChunkSource, RemoteSequenceIndex, FileOffset, RemotePathIndex);
+ }
+ if (PostBytes > 0)
+ {
+ CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset + ReadLength - PostBytes, PostBytes);
+ const uint64_t FileOffset = Op.Target->Offset + ReadLength - PostBytes;
+
+ WriteSequenceChunkToCache(LocalWriter, ChunkSource, RemoteSequenceIndex, FileOffset, RemotePathIndex);
+ }
+ }
+ }
+ }
+
+ if (!DidClone)
+ {
+ CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset, ReadLength);
+
+ const uint64_t FileOffset = Op.Target->Offset;
+
+ WriteSequenceChunkToCache(LocalWriter, ChunkSource, RemoteSequenceIndex, FileOffset, RemotePathIndex);
+ }
+ }
+
+ CacheLocalFileBytesRead += ReadLength; // TODO: This should be the sum of unique chunk sizes?
+
+ WriteOpIndex += WriteCount;
+ }
+ }
+
+ if (m_Options.IsVerbose)
+ {
+ ZEN_INFO("Copied {} from {}", NiceBytes(CacheLocalFileBytesRead), SourceFilePath);
+ }
+
+ std::vector<uint32_t> Result;
+ Result.reserve(WriteOps.size());
+
+ for (const WriteOp& Op : WriteOps)
+ {
+ Result.push_back(Op.Target->SequenceIndex);
+ }
+ return Result;
+}
+
+bool
+BuildsOperationUpdateFolder::WriteCompressedChunkToCache(
+ const IoHash& ChunkHash,
+ const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>& ChunkTargetPtrs,
+ BufferedWriteFileCache& WriteCache,
+ IoBuffer&& CompressedPart)
+{
+ ZEN_TRACE_CPU("WriteCompressedChunkToCache");
+
+ auto ChunkHashToChunkIndexIt = m_RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash);
+ ZEN_ASSERT(ChunkHashToChunkIndexIt != m_RemoteLookup.ChunkHashToChunkIndex.end());
+ if (IsSingleFileChunk(m_RemoteContent, ChunkTargetPtrs))
+ {
+ const std::uint32_t SequenceIndex = ChunkTargetPtrs.front()->SequenceIndex;
+ const IoHash& SequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex];
+ StreamDecompress(SequenceRawHash, CompositeBuffer(std::move(CompressedPart)));
+ return false;
+ }
+ else
+ {
+ IoHash RawHash;
+ uint64_t RawSize;
+ CompressedBuffer Compressed = CompressedBuffer::FromCompressed(CompositeBuffer(std::move(CompressedPart)), RawHash, RawSize);
+ if (!Compressed)
+ {
+ throw std::runtime_error(fmt::format("Failed to parse header of compressed large blob {}", ChunkHash));
+ }
+ if (RawHash != ChunkHash)
+ {
+ throw std::runtime_error(fmt::format("RawHash in header {} in large blob {} does match.", RawHash, ChunkHash));
+ }
+
+ BufferedWriteFileCache::Local LocalWriter(WriteCache);
+
+ IoHashStream Hash;
+ bool CouldDecompress = Compressed.DecompressToStream(
+ 0,
+ (uint64_t)-1,
+ [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) {
+ ZEN_UNUSED(SourceOffset);
+ ZEN_TRACE_CPU("Async_StreamDecompress_Write");
+ m_DiskStats.ReadByteCount += SourceSize;
+ if (!m_AbortFlag)
+ {
+ for (const ChunkedContentLookup::ChunkSequenceLocation* TargetPtr : ChunkTargetPtrs)
+ {
+ const auto& Target = *TargetPtr;
+ const uint64_t FileOffset = Target.Offset + Offset;
+ const uint32_t SequenceIndex = Target.SequenceIndex;
+ const uint32_t PathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex];
+
+ WriteSequenceChunkToCache(LocalWriter, RangeBuffer, SequenceIndex, FileOffset, PathIndex);
+ }
+
+ return true;
+ }
+ return false;
+ });
+
+ if (m_AbortFlag)
+ {
+ return false;
+ }
+
+ if (!CouldDecompress)
+ {
+ throw std::runtime_error(fmt::format("Failed to decompress large chunk {}", ChunkHash));
+ }
+
+ return true;
+ }
+}
+
+void
+BuildsOperationUpdateFolder::StreamDecompress(const IoHash& SequenceRawHash, CompositeBuffer&& CompressedPart)
+{
+ ZEN_TRACE_CPU("StreamDecompress");
+ const std::filesystem::path TempChunkSequenceFileName = GetTempChunkedSequenceFileName(m_CacheFolderPath, SequenceRawHash);
+ TemporaryFile DecompressedTemp;
+ std::error_code Ec;
+ DecompressedTemp.CreateTemporary(TempChunkSequenceFileName.parent_path(), Ec);
+ if (Ec)
+ {
+ throw std::runtime_error(fmt::format("Failed creating temporary file for decompressing large blob {}, reason: ({}) {}",
+ SequenceRawHash,
+ Ec.value(),
+ Ec.message()));
+ }
+ IoHash RawHash;
+ uint64_t RawSize;
+ CompressedBuffer Compressed = CompressedBuffer::FromCompressed(CompressedPart, RawHash, RawSize);
+ if (!Compressed)
+ {
+ throw std::runtime_error(fmt::format("Failed to parse header of compressed large blob {}", SequenceRawHash));
+ }
+ if (RawHash != SequenceRawHash)
+ {
+ throw std::runtime_error(fmt::format("RawHash in header {} in large blob {} does match.", RawHash, SequenceRawHash));
+ }
+ PrepareFileForScatteredWrite(DecompressedTemp.Handle(), RawSize);
+
+ IoHashStream Hash;
+ bool CouldDecompress =
+ Compressed.DecompressToStream(0,
+ (uint64_t)-1,
+ [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) {
+ ZEN_UNUSED(SourceOffset);
+ ZEN_TRACE_CPU("StreamDecompress_Write");
+ m_DiskStats.ReadCount++;
+ m_DiskStats.ReadByteCount += SourceSize;
+ if (!m_AbortFlag)
+ {
+ for (const SharedBuffer& Segment : RangeBuffer.GetSegments())
+ {
+ if (m_Options.ValidateCompletedSequences)
+ {
+ Hash.Append(Segment.GetView());
+ m_ValidatedChunkByteCount += Segment.GetSize();
+ }
+ DecompressedTemp.Write(Segment, Offset);
+ Offset += Segment.GetSize();
+ m_DiskStats.WriteByteCount += Segment.GetSize();
+ m_DiskStats.WriteCount++;
+ m_WrittenChunkByteCount += Segment.GetSize();
+ }
+ return true;
+ }
+ return false;
+ });
+
+ if (m_AbortFlag)
+ {
+ return;
+ }
+
+ if (!CouldDecompress)
+ {
+ throw std::runtime_error(fmt::format("Failed to decompress large blob {}", SequenceRawHash));
+ }
+ if (m_Options.ValidateCompletedSequences)
+ {
+ const IoHash VerifyHash = Hash.GetHash();
+ if (VerifyHash != SequenceRawHash)
+ {
+ throw std::runtime_error(
+ fmt::format("Decompressed blob payload hash {} does not match expected hash {}", VerifyHash, SequenceRawHash));
+ }
+ }
+ DecompressedTemp.MoveTemporaryIntoPlace(TempChunkSequenceFileName, Ec);
+ if (Ec)
+ {
+ throw std::runtime_error(fmt::format("Failed moving temporary file for decompressing large blob {}, reason: ({}) {}",
+ SequenceRawHash,
+ Ec.value(),
+ Ec.message()));
+ }
+ // WriteChunkStats.ChunkCountWritten++;
+}
+
+void
+BuildsOperationUpdateFolder::WriteSequenceChunkToCache(BufferedWriteFileCache::Local& LocalWriter,
+ const CompositeBuffer& Chunk,
+ const uint32_t SequenceIndex,
+ const uint64_t FileOffset,
+ const uint32_t PathIndex)
+{
+ ZEN_TRACE_CPU("WriteSequenceChunkToCache");
+
+ const uint64_t SequenceSize = m_RemoteContent.RawSizes[PathIndex];
+
+ auto OpenFile = [&](BasicFile& File) {
+ const std::filesystem::path FileName =
+ GetTempChunkedSequenceFileName(m_CacheFolderPath, m_RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]);
+ File.Open(FileName, BasicFile::Mode::kWrite);
+ if (m_Options.UseSparseFiles)
+ {
+ PrepareFileForScatteredWrite(File.Handle(), SequenceSize);
+ }
+ };
+
+ const uint64_t ChunkSize = Chunk.GetSize();
+ ZEN_ASSERT(FileOffset + ChunkSize <= SequenceSize);
+ if (ChunkSize == SequenceSize)
+ {
+ BasicFile SingleChunkFile;
+ OpenFile(SingleChunkFile);
+
+ m_DiskStats.CurrentOpenFileCount++;
+ auto _ = MakeGuard([this]() { m_DiskStats.CurrentOpenFileCount--; });
+ SingleChunkFile.Write(Chunk, FileOffset);
+ }
+ else
+ {
+ const uint64_t MaxWriterBufferSize = 256u * 1025u;
+
+ BufferedWriteFileCache::Local::Writer* Writer = LocalWriter.GetWriter(SequenceIndex);
+ if (Writer)
+ {
+ if ((!Writer->Writer) && (ChunkSize < MaxWriterBufferSize))
+ {
+ Writer->Writer = std::make_unique<BasicFileWriter>(*Writer->File, Min(SequenceSize, MaxWriterBufferSize));
+ }
+ Writer->Write(Chunk, FileOffset);
+ }
+ else
+ {
+ Writer = LocalWriter.PutWriter(SequenceIndex, std::make_unique<BufferedWriteFileCache::Local::Writer>());
+
+ Writer->File = std::make_unique<BasicFile>();
+ OpenFile(*Writer->File);
+ if (ChunkSize < MaxWriterBufferSize)
+ {
+ Writer->Writer = std::make_unique<BasicFileWriter>(*Writer->File, Min(SequenceSize, MaxWriterBufferSize));
+ }
+ Writer->Write(Chunk, FileOffset);
+ }
+ }
+ m_DiskStats.WriteCount++;
+ m_DiskStats.WriteByteCount += ChunkSize;
+ m_WrittenChunkByteCount += ChunkSize;
+}
+
+bool
+BuildsOperationUpdateFolder::GetBlockWriteOps(const IoHash& BlockRawHash,
+ std::span<const IoHash> ChunkRawHashes,
+ std::span<const uint32_t> ChunkCompressedLengths,
+ std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
+ std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags,
+ const MemoryView BlockView,
+ uint32_t FirstIncludedBlockChunkIndex,
+ uint32_t LastIncludedBlockChunkIndex,
+ BlockWriteOps& OutOps)
+{
+ ZEN_TRACE_CPU("GetBlockWriteOps");
+
+ uint32_t OffsetInBlock = 0;
+ for (uint32_t ChunkBlockIndex = FirstIncludedBlockChunkIndex; ChunkBlockIndex <= LastIncludedBlockChunkIndex; ChunkBlockIndex++)
+ {
+ const uint32_t ChunkCompressedSize = ChunkCompressedLengths[ChunkBlockIndex];
+ const IoHash& ChunkHash = ChunkRawHashes[ChunkBlockIndex];
+ if (auto It = m_RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); It != m_RemoteLookup.ChunkHashToChunkIndex.end())
+ {
+ const uint32_t ChunkIndex = It->second;
+ std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs =
+ GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, ChunkIndex);
+
+ if (!ChunkTargetPtrs.empty())
+ {
+ bool NeedsWrite = true;
+ if (RemoteChunkIndexNeedsCopyFromSourceFlags[ChunkIndex].compare_exchange_strong(NeedsWrite, false))
+ {
+ MemoryView ChunkMemoryView = BlockView.Mid(OffsetInBlock, ChunkCompressedSize);
+ IoHash VerifyChunkHash;
+ uint64_t VerifyChunkSize;
+ CompressedBuffer CompressedChunk =
+ CompressedBuffer::FromCompressed(SharedBuffer::MakeView(ChunkMemoryView), VerifyChunkHash, VerifyChunkSize);
+ if (!CompressedChunk)
+ {
+ throw std::runtime_error(fmt::format("Chunk {} at {}, size {} in block {} is not a valid compressed buffer",
+ ChunkHash,
+ OffsetInBlock,
+ ChunkCompressedSize,
+ BlockRawHash));
+ }
+ if (VerifyChunkHash != ChunkHash)
+ {
+ throw std::runtime_error(fmt::format("Chunk {} at {}, size {} in block {} has a mismatching content hash {}",
+ ChunkHash,
+ OffsetInBlock,
+ ChunkCompressedSize,
+ BlockRawHash,
+ VerifyChunkHash));
+ }
+ if (VerifyChunkSize != m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex])
+ {
+ throw std::runtime_error(
+ fmt::format("Chunk {} at {}, size {} in block {} has a mismatching raw size {}, expected {}",
+ ChunkHash,
+ OffsetInBlock,
+ ChunkCompressedSize,
+ BlockRawHash,
+ VerifyChunkSize,
+ m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]));
+ }
+
+ OodleCompressor ChunkCompressor;
+ OodleCompressionLevel ChunkCompressionLevel;
+ uint64_t ChunkBlockSize;
+
+ bool GetCompressParametersSuccess =
+ CompressedChunk.TryGetCompressParameters(ChunkCompressor, ChunkCompressionLevel, ChunkBlockSize);
+ ZEN_ASSERT(GetCompressParametersSuccess);
+
+ IoBuffer Decompressed;
+ if (ChunkCompressionLevel == OodleCompressionLevel::None)
+ {
+ MemoryView ChunkDecompressedMemoryView = ChunkMemoryView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder());
+ Decompressed =
+ IoBuffer(IoBuffer::Wrap, ChunkDecompressedMemoryView.GetData(), ChunkDecompressedMemoryView.GetSize());
+ }
+ else
+ {
+ Decompressed = CompressedChunk.Decompress().AsIoBuffer();
+ }
+
+ if (Decompressed.GetSize() != m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex])
+ {
+ throw std::runtime_error(fmt::format("Chunk {} at {}, size {} in block {} decompressed to size {}, expected {}",
+ ChunkHash,
+ OffsetInBlock,
+ ChunkCompressedSize,
+ BlockRawHash,
+ Decompressed.GetSize(),
+ m_RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]));
+ }
+
+ ZEN_ASSERT_SLOW(ChunkHash == IoHash::HashBuffer(Decompressed));
+ for (const ChunkedContentLookup::ChunkSequenceLocation* Target : ChunkTargetPtrs)
+ {
+ OutOps.WriteOps.push_back(
+ BlockWriteOps::WriteOpData{.Target = Target, .ChunkBufferIndex = OutOps.ChunkBuffers.size()});
+ }
+ OutOps.ChunkBuffers.emplace_back(std::move(Decompressed));
+ }
+ }
+ }
+
+ OffsetInBlock += ChunkCompressedSize;
+ }
+ {
+ ZEN_TRACE_CPU("Sort");
+ std::sort(OutOps.WriteOps.begin(),
+ OutOps.WriteOps.end(),
+ [](const BlockWriteOps::WriteOpData& Lhs, const BlockWriteOps::WriteOpData& Rhs) {
+ if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex)
+ {
+ return true;
+ }
+ if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex)
+ {
+ return false;
+ }
+ return Lhs.Target->Offset < Rhs.Target->Offset;
+ });
+ }
+ return true;
+}
+
+void
+BuildsOperationUpdateFolder::WriteBlockChunkOpsToCache(std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
+ const BlockWriteOps& Ops,
+ BufferedWriteFileCache& WriteCache,
+ ParallelWork& Work)
+{
+ ZEN_TRACE_CPU("WriteBlockChunkOpsToCache");
+
+ {
+ BufferedWriteFileCache::Local LocalWriter(WriteCache);
+ for (const BlockWriteOps::WriteOpData& WriteOp : Ops.WriteOps)
+ {
+ if (Work.IsAborted())
+ {
+ break;
+ }
+ const CompositeBuffer& Chunk = Ops.ChunkBuffers[WriteOp.ChunkBufferIndex];
+ const uint32_t SequenceIndex = WriteOp.Target->SequenceIndex;
+ ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[SequenceIndex].load() <=
+ m_RemoteContent.ChunkedContent.ChunkCounts[SequenceIndex]);
+ ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[SequenceIndex].load() > 0);
+ const uint64_t FileOffset = WriteOp.Target->Offset;
+ const uint32_t PathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex];
+
+ WriteSequenceChunkToCache(LocalWriter, Chunk, SequenceIndex, FileOffset, PathIndex);
+ }
+ }
+ if (!Work.IsAborted())
+ {
+ // Write tracking, updating this must be done without any files open (BufferedWriteFileCache::Local)
+ std::vector<uint32_t> CompletedChunkSequences;
+ for (const BlockWriteOps::WriteOpData& WriteOp : Ops.WriteOps)
+ {
+ const uint32_t RemoteSequenceIndex = WriteOp.Target->SequenceIndex;
+ if (CompleteSequenceChunk(RemoteSequenceIndex, SequenceIndexChunksLeftToWriteCounters))
+ {
+ CompletedChunkSequences.push_back(RemoteSequenceIndex);
+ }
+ }
+ WriteCache.Close(CompletedChunkSequences);
+ VerifyAndCompleteChunkSequencesAsync(CompletedChunkSequences, Work);
+ }
+}
+
+bool
+BuildsOperationUpdateFolder::WriteChunksBlockToCache(const ChunkBlockDescription& BlockDescription,
+ std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
+ ParallelWork& Work,
+ CompositeBuffer&& BlockBuffer,
+ std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags,
+ BufferedWriteFileCache& WriteCache)
+{
+ ZEN_TRACE_CPU("WriteChunksBlockToCache");
+
+ IoBuffer BlockMemoryBuffer = MakeBufferMemoryBased(BlockBuffer);
+ const MemoryView BlockView = BlockMemoryBuffer.GetView();
+
+ BlockWriteOps Ops;
+ if ((BlockDescription.HeaderSize == 0) || BlockDescription.ChunkCompressedLengths.empty())
+ {
+ ZEN_TRACE_CPU("WriteChunksBlockToCache_Legacy");
+
+ uint64_t HeaderSize;
+ const std::vector<uint32_t> ChunkCompressedLengths =
+ ReadChunkBlockHeader(BlockView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder()), HeaderSize);
+
+ if (GetBlockWriteOps(BlockDescription.BlockHash,
+ BlockDescription.ChunkRawHashes,
+ ChunkCompressedLengths,
+ SequenceIndexChunksLeftToWriteCounters,
+ RemoteChunkIndexNeedsCopyFromSourceFlags,
+ BlockView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder() + HeaderSize),
+ 0,
+ gsl::narrow<uint32_t>(BlockDescription.ChunkRawHashes.size() - 1),
+ Ops))
+ {
+ WriteBlockChunkOpsToCache(SequenceIndexChunksLeftToWriteCounters, Ops, WriteCache, Work);
+ return true;
+ }
+ return false;
+ }
+
+ if (GetBlockWriteOps(BlockDescription.BlockHash,
+ BlockDescription.ChunkRawHashes,
+ BlockDescription.ChunkCompressedLengths,
+ SequenceIndexChunksLeftToWriteCounters,
+ RemoteChunkIndexNeedsCopyFromSourceFlags,
+ BlockView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize),
+ 0,
+ gsl::narrow<uint32_t>(BlockDescription.ChunkRawHashes.size() - 1),
+ Ops))
+ {
+ WriteBlockChunkOpsToCache(SequenceIndexChunksLeftToWriteCounters, Ops, WriteCache, Work);
+ return true;
+ }
+ return false;
+}
+
+bool
+BuildsOperationUpdateFolder::WritePartialBlockChunksToCache(const ChunkBlockDescription& BlockDescription,
+ std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
+ ParallelWork& Work,
+ CompositeBuffer&& PartialBlockBuffer,
+ uint32_t FirstIncludedBlockChunkIndex,
+ uint32_t LastIncludedBlockChunkIndex,
+ std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags,
+ BufferedWriteFileCache& WriteCache)
+{
+ ZEN_TRACE_CPU("WritePartialBlockChunksToCache");
+
+ IoBuffer BlockMemoryBuffer = MakeBufferMemoryBased(PartialBlockBuffer);
+ const MemoryView BlockView = BlockMemoryBuffer.GetView();
+
+ BlockWriteOps Ops;
+ if (GetBlockWriteOps(BlockDescription.BlockHash,
+ BlockDescription.ChunkRawHashes,
+ BlockDescription.ChunkCompressedLengths,
+ SequenceIndexChunksLeftToWriteCounters,
+ RemoteChunkIndexNeedsCopyFromSourceFlags,
+ BlockView,
+ FirstIncludedBlockChunkIndex,
+ LastIncludedBlockChunkIndex,
+ Ops))
+ {
+ WriteBlockChunkOpsToCache(SequenceIndexChunksLeftToWriteCounters, Ops, WriteCache, Work);
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+void
+BuildsOperationUpdateFolder::AsyncWriteDownloadedChunk(uint32_t RemoteChunkIndex,
+ const BlobsExistsResult& ExistsResult,
+ std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>&& ChunkTargetPtrs,
+ BufferedWriteFileCache& WriteCache,
+ ParallelWork& Work,
+ IoBuffer&& Payload,
+ std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
+ std::atomic<uint64_t>& WritePartsComplete,
+ const uint64_t TotalPartWriteCount,
+ FilteredRate& FilteredWrittenBytesPerSecond)
+{
+ ZEN_TRACE_CPU("AsyncWriteDownloadedChunk");
+
+ const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex];
+
+ const uint64_t Size = Payload.GetSize();
+
+ const bool ExistsInCache = m_Storage.CacheStorage && ExistsResult.ExistingBlobs.contains(ChunkHash);
+
+ const bool PopulateCache = !ExistsInCache && m_Storage.CacheStorage && m_Options.PopulateCache;
+
+ std::filesystem::path CompressedChunkPath =
+ TryMoveDownloadedChunk(Payload,
+ m_TempDownloadFolderPath / ChunkHash.ToHexString(),
+ /* ForceDiskBased */ PopulateCache || Size > m_Options.MaximumInMemoryPayloadSize);
+ if (PopulateCache)
+ {
+ IoBuffer CacheBlob = IoBufferBuilder::MakeFromFile(CompressedChunkPath);
+ if (CacheBlob)
+ {
+ m_Storage.CacheStorage->PutBuildBlob(m_BuildId,
+ ChunkHash,
+ ZenContentType::kCompressedBinary,
+ CompositeBuffer(SharedBuffer(CacheBlob)));
+ }
+ }
+
+ IoBufferFileReference FileRef;
+ bool EnableBacklog = !CompressedChunkPath.empty() || Payload.GetFileReference(FileRef);
+
+ Work.ScheduleWork(
+ m_IOWorkerPool,
+ [this,
+ SequenceIndexChunksLeftToWriteCounters,
+ &Work,
+ CompressedChunkPath,
+ RemoteChunkIndex,
+ TotalPartWriteCount,
+ &WriteCache,
+ &WritePartsComplete,
+ &FilteredWrittenBytesPerSecond,
+ ChunkTargetPtrs = std::move(ChunkTargetPtrs),
+ CompressedPart = IoBuffer(std::move(Payload))](std::atomic<bool>&) mutable {
+ if (!m_AbortFlag)
+ {
+ ZEN_TRACE_CPU("Async_WriteChunk");
+
+ FilteredWrittenBytesPerSecond.Start();
+
+ const IoHash& ChunkHash = m_RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex];
+ if (CompressedChunkPath.empty())
+ {
+ ZEN_ASSERT(CompressedPart);
+ }
+ else
+ {
+ ZEN_ASSERT(!CompressedPart);
+ CompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath);
+ if (!CompressedPart)
+ {
+ throw std::runtime_error(
+ fmt::format("Could not open dowloaded compressed chunk {} from {}", ChunkHash, CompressedChunkPath));
+ }
+ }
+
+ bool NeedHashVerify = WriteCompressedChunkToCache(ChunkHash, ChunkTargetPtrs, WriteCache, std::move(CompressedPart));
+ if (!m_AbortFlag)
+ {
+ if (WritePartsComplete.fetch_add(1) + 1 == TotalPartWriteCount)
+ {
+ FilteredWrittenBytesPerSecond.Stop();
+ }
+
+ if (!CompressedChunkPath.empty())
+ {
+ std::error_code Ec = TryRemoveFile(CompressedChunkPath);
+ if (Ec)
+ {
+ ZEN_DEBUG("Failed removing file '{}', reason: ({}) {}", CompressedChunkPath, Ec.value(), Ec.message());
+ }
+ }
+
+ std::vector<uint32_t> CompletedSequences =
+ CompleteChunkTargets(ChunkTargetPtrs, SequenceIndexChunksLeftToWriteCounters);
+ WriteCache.Close(CompletedSequences);
+ if (NeedHashVerify)
+ {
+ VerifyAndCompleteChunkSequencesAsync(CompletedSequences, Work);
+ }
+ else
+ {
+ FinalizeChunkSequences(CompletedSequences);
+ }
+ }
+ }
+ },
+ EnableBacklog ? WorkerThreadPool::EMode::EnableBacklog : WorkerThreadPool::EMode::DisableBacklog);
+}
+
+void
+BuildsOperationUpdateFolder::VerifyAndCompleteChunkSequencesAsync(std::span<const uint32_t> RemoteSequenceIndexes, ParallelWork& Work)
+{
+ if (RemoteSequenceIndexes.empty())
+ {
+ return;
+ }
+ ZEN_TRACE_CPU("VerifyAndCompleteChunkSequence");
+ if (m_Options.ValidateCompletedSequences)
+ {
+ for (uint32_t RemoteSequenceIndexOffset = 1; RemoteSequenceIndexOffset < RemoteSequenceIndexes.size(); RemoteSequenceIndexOffset++)
+ {
+ const uint32_t RemoteSequenceIndex = RemoteSequenceIndexes[RemoteSequenceIndexOffset];
+ Work.ScheduleWork(m_IOWorkerPool, [this, RemoteSequenceIndex](std::atomic<bool>&) {
+ if (!m_AbortFlag)
+ {
+ ZEN_TRACE_CPU("Async_VerifyAndFinalizeSequence");
+
+ VerifySequence(RemoteSequenceIndex);
+ if (!m_AbortFlag)
+ {
+ const IoHash& SequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex];
+ FinalizeChunkSequence(SequenceRawHash);
+ }
+ }
+ });
+ }
+ const uint32_t RemoteSequenceIndex = RemoteSequenceIndexes[0];
+
+ VerifySequence(RemoteSequenceIndex);
+ const IoHash& SequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex];
+ FinalizeChunkSequence(SequenceRawHash);
+ }
+ else
+ {
+ for (uint32_t RemoteSequenceIndexOffset = 0; RemoteSequenceIndexOffset < RemoteSequenceIndexes.size(); RemoteSequenceIndexOffset++)
+ {
+ const uint32_t RemoteSequenceIndex = RemoteSequenceIndexes[RemoteSequenceIndexOffset];
+ const IoHash& SequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex];
+ FinalizeChunkSequence(SequenceRawHash);
+ }
+ }
+}
+
+bool
+BuildsOperationUpdateFolder::CompleteSequenceChunk(uint32_t RemoteSequenceIndex,
+ std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters)
+{
+ uint32_t PreviousValue = SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1);
+ ZEN_ASSERT(PreviousValue >= 1);
+ ZEN_ASSERT(PreviousValue != (uint32_t)-1);
+ return PreviousValue == 1;
+}
+
+std::vector<uint32_t>
+BuildsOperationUpdateFolder::CompleteChunkTargets(const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>& ChunkTargetPtrs,
+ std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters)
+{
+ ZEN_TRACE_CPU("CompleteChunkTargets");
+
+ std::vector<uint32_t> CompletedSequenceIndexes;
+ for (const ChunkedContentLookup::ChunkSequenceLocation* Location : ChunkTargetPtrs)
+ {
+ const uint32_t RemoteSequenceIndex = Location->SequenceIndex;
+ if (CompleteSequenceChunk(RemoteSequenceIndex, SequenceIndexChunksLeftToWriteCounters))
+ {
+ CompletedSequenceIndexes.push_back(RemoteSequenceIndex);
+ }
+ }
+ return CompletedSequenceIndexes;
+}
+
+void
+BuildsOperationUpdateFolder::FinalizeChunkSequence(const IoHash& SequenceRawHash)
+{
+ ZEN_TRACE_CPU("FinalizeChunkSequence");
+
+ ZEN_ASSERT_SLOW(!IsFile(GetFinalChunkedSequenceFileName(m_CacheFolderPath, SequenceRawHash)));
+ std::error_code Ec;
+ RenameFile(GetTempChunkedSequenceFileName(m_CacheFolderPath, SequenceRawHash),
+ GetFinalChunkedSequenceFileName(m_CacheFolderPath, SequenceRawHash),
+ Ec);
+ if (Ec)
+ {
+ throw std::system_error(Ec);
+ }
+}
+
+void
+BuildsOperationUpdateFolder::FinalizeChunkSequences(std::span<const uint32_t> RemoteSequenceIndexes)
+{
+ ZEN_TRACE_CPU("FinalizeChunkSequences");
+
+ for (uint32_t SequenceIndex : RemoteSequenceIndexes)
+ {
+ FinalizeChunkSequence(m_RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]);
+ }
+}
+
+void
+BuildsOperationUpdateFolder::VerifySequence(uint32_t RemoteSequenceIndex)
+{
+ ZEN_TRACE_CPU("VerifySequence");
+
+ ZEN_ASSERT(m_Options.ValidateCompletedSequences);
+
+ const IoHash& SequenceRawHash = m_RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex];
+ {
+ ZEN_TRACE_CPU("HashSequence");
+ const std::uint32_t RemotePathIndex = m_RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex];
+ const uint64_t ExpectedSize = m_RemoteContent.RawSizes[RemotePathIndex];
+ IoBuffer VerifyBuffer = IoBufferBuilder::MakeFromFile(GetTempChunkedSequenceFileName(m_CacheFolderPath, SequenceRawHash));
+ const uint64_t VerifySize = VerifyBuffer.GetSize();
+ if (VerifySize != ExpectedSize)
+ {
+ throw std::runtime_error(fmt::format("Written chunk sequence {} size {} does not match expected size {}",
+ SequenceRawHash,
+ VerifySize,
+ ExpectedSize));
+ }
+
+ const IoHash VerifyChunkHash = IoHash::HashBuffer(std::move(VerifyBuffer), &m_ValidatedChunkByteCount);
+ if (VerifyChunkHash != SequenceRawHash)
+ {
+ throw std::runtime_error(
+ fmt::format("Written chunk sequence {} hash does not match expected hash {}", VerifyChunkHash, SequenceRawHash));
+ }
+ }
+}
+
+void
+VerifyFolder(ProgressBase& Progress,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ TransferThreadWorkers& Workers,
+ const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ const std::filesystem::path& Path,
+ const std::vector<std::string>& ExcludeFolders,
+ bool VerifyFileHash,
+ VerifyFolderStatistics& VerifyFolderStats)
+{
+ ZEN_TRACE_CPU("VerifyFolder");
+
+ Stopwatch Timer;
+
+ std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = Progress.CreateProgressBar("Verify Files");
+
+ WorkerThreadPool& VerifyPool = Workers.GetIOWorkerPool();
+
+ ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
+
+ const uint32_t PathCount = gsl::narrow<uint32_t>(Content.Paths.size());
+
+ RwLock ErrorLock;
+ std::vector<std::string> Errors;
+
+ auto IsAcceptedFolder = [ExcludeFolders = ExcludeFolders](const std::string_view& RelativePath) -> bool {
+ for (const std::string& ExcludeFolder : ExcludeFolders)
+ {
+ if (RelativePath.starts_with(ExcludeFolder))
+ {
+ if (RelativePath.length() == ExcludeFolder.length())
+ {
+ return false;
+ }
+ else if (RelativePath[ExcludeFolder.length()] == '/')
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ };
+
+ for (uint32_t PathIndex = 0; PathIndex < PathCount; PathIndex++)
+ {
+ if (Work.IsAborted())
+ {
+ break;
+ }
+
+ Work.ScheduleWork(
+ VerifyPool,
+ [&Path, &Content, &Lookup, &ErrorLock, &Errors, &VerifyFolderStats, VerifyFileHash, &IsAcceptedFolder, PathIndex, &AbortFlag](
+ std::atomic<bool>&) {
+ if (!AbortFlag)
+ {
+ ZEN_TRACE_CPU("VerifyFile_work");
+
+ // TODO: Convert ScheduleWork body to function
+
+ const std::filesystem::path TargetPath = (Path / Content.Paths[PathIndex]).make_preferred();
+ if (IsAcceptedFolder(TargetPath.parent_path().generic_string()))
+ {
+ const uint64_t ExpectedSize = Content.RawSizes[PathIndex];
+ if (!IsFile(TargetPath))
+ {
+ ErrorLock.WithExclusiveLock([&]() {
+ Errors.push_back(fmt::format("File {} with expected size {} does not exist", TargetPath, ExpectedSize));
+ });
+ VerifyFolderStats.FilesFailed++;
+ }
+ else
+ {
+ std::error_code Ec;
+ uint64_t SizeOnDisk = gsl::narrow<uint64_t>(FileSizeFromPath(TargetPath, Ec));
+ if (Ec)
+ {
+ ErrorLock.WithExclusiveLock([&]() {
+ Errors.push_back(
+ fmt::format("Failed to get size of file {}: {} ({})", TargetPath, Ec.message(), Ec.value()));
+ });
+ VerifyFolderStats.FilesFailed++;
+ }
+ else if (SizeOnDisk < ExpectedSize)
+ {
+ ErrorLock.WithExclusiveLock([&]() {
+ Errors.push_back(fmt::format("Size of file {} is smaller than expected. Expected: {}, Found: {}",
+ TargetPath,
+ ExpectedSize,
+ SizeOnDisk));
+ });
+ VerifyFolderStats.FilesFailed++;
+ }
+ else if (SizeOnDisk > ExpectedSize)
+ {
+ ErrorLock.WithExclusiveLock([&]() {
+ Errors.push_back(fmt::format("Size of file {} is bigger than expected. Expected: {}, Found: {}",
+ TargetPath,
+ ExpectedSize,
+ SizeOnDisk));
+ });
+ VerifyFolderStats.FilesFailed++;
+ }
+ else if (SizeOnDisk > 0 && VerifyFileHash)
+ {
+ const IoHash& ExpectedRawHash = Content.RawHashes[PathIndex];
+ IoBuffer Buffer = IoBufferBuilder::MakeFromFile(TargetPath);
+ IoHash RawHash = IoHash::HashBuffer(Buffer);
+ if (RawHash != ExpectedRawHash)
+ {
+ uint64_t FileOffset = 0;
+ const uint32_t SequenceIndex = Lookup.RawHashToSequenceIndex.at(ExpectedRawHash);
+ const uint32_t OrderOffset = Lookup.SequenceIndexChunkOrderOffset[SequenceIndex];
+ for (uint32_t OrderIndex = OrderOffset;
+ OrderIndex < OrderOffset + Content.ChunkedContent.ChunkCounts[SequenceIndex];
+ OrderIndex++)
+ {
+ uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex];
+ uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex];
+ IoHash ChunkHash = Content.ChunkedContent.ChunkHashes[ChunkIndex];
+ IoBuffer FileChunk = IoBuffer(Buffer, FileOffset, ChunkSize);
+ if (IoHash::HashBuffer(FileChunk) != ChunkHash)
+ {
+ ErrorLock.WithExclusiveLock([&]() {
+ Errors.push_back(fmt::format(
+ "WARNING: Hash of file {} does not match expected hash. Expected: {}, Found: {}. "
+ "Mismatch at chunk {}",
+ TargetPath,
+ ExpectedRawHash,
+ RawHash,
+ OrderIndex - OrderOffset));
+ });
+ break;
+ }
+ FileOffset += ChunkSize;
+ }
+ VerifyFolderStats.FilesFailed++;
+ }
+ VerifyFolderStats.ReadBytes += SizeOnDisk;
+ }
+ }
+ }
+ VerifyFolderStats.FilesVerified++;
+ }
+ },
+ [&, PathIndex](std::exception_ptr Ex, std::atomic<bool>&) {
+ std::string Description;
+ try
+ {
+ std::rethrow_exception(Ex);
+ }
+ catch (const std::exception& Ex)
+ {
+ Description = Ex.what();
+ }
+ ErrorLock.WithExclusiveLock([&]() {
+ Errors.push_back(fmt::format("Failed verifying file '{}'. Reason: {}",
+ (Path / Content.Paths[PathIndex]).make_preferred(),
+ Description));
+ });
+ VerifyFolderStats.FilesFailed++;
+ });
+ }
+
+ Work.Wait(Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
+ ZEN_UNUSED(PendingWork);
+ std::string Details = fmt::format("Verified {}/{} ({}). Failed files: {}",
+ VerifyFolderStats.FilesVerified.load(),
+ PathCount,
+ NiceBytes(VerifyFolderStats.ReadBytes.load()),
+ VerifyFolderStats.FilesFailed.load());
+ ProgressBar->UpdateState({.Task = "Verifying files ",
+ .Details = Details,
+ .TotalCount = gsl::narrow<uint64_t>(PathCount),
+ .RemainingCount = gsl::narrow<uint64_t>(PathCount - VerifyFolderStats.FilesVerified.load()),
+ .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
+ false);
+ });
+ VerifyFolderStats.VerifyElapsedWallTimeUs = Timer.GetElapsedTimeUs();
+
+ ProgressBar->Finish();
+ if (AbortFlag)
+ {
+ return;
+ }
+
+ for (const std::string& Error : Errors)
+ {
+ ZEN_CONSOLE_ERROR("{}", Error);
+ }
+ if (!Errors.empty())
+ {
+ throw std::runtime_error(fmt::format("Verify failed with {} errors", Errors.size()));
+ }
+}
+
+std::vector<std::filesystem::path>
+GetNewPaths(const std::span<const std::filesystem::path> KnownPaths, const std::span<const std::filesystem::path> Paths)
+{
+ tsl::robin_set<std::string> KnownPathsSet;
+ KnownPathsSet.reserve(KnownPaths.size());
+ for (const std::filesystem::path& LocalPath : KnownPaths)
+ {
+ KnownPathsSet.insert(LocalPath.generic_string());
+ }
+
+ std::vector<std::filesystem::path> NewPaths;
+ for (const std::filesystem::path& UntrackedPath : Paths)
+ {
+ if (!KnownPathsSet.contains(UntrackedPath.generic_string()))
+ {
+ NewPaths.push_back(UntrackedPath);
+ }
+ }
+ return NewPaths;
+}
+
+BuildSaveState
+GetLocalStateFromPaths(ProgressBase& Progress,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ TransferThreadWorkers& Workers,
+ GetFolderContentStatistics& LocalFolderScanStats,
+ ChunkingStatistics& ChunkingStats,
+ const std::filesystem::path& Path,
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache,
+ std::span<const std::filesystem::path> PathsToCheck)
+{
+ FolderContent FolderState =
+ CheckFolderFiles(Progress, AbortFlag, PauseFlag, "Check Files", Workers, LocalFolderScanStats, Path, PathsToCheck);
+
+ ChunkedFolderContent ChunkedContent;
+ if (FolderState.Paths.size() > 0)
+ {
+ ChunkedContent = ScanFolderFiles(Progress,
+ AbortFlag,
+ PauseFlag,
+ "Scan Files",
+ Workers,
+ Path,
+ FolderState,
+ ChunkController,
+ ChunkCache,
+ ChunkingStats);
+ }
+
+ return BuildSaveState{.State = BuildState{.ChunkedContent = std::move(ChunkedContent)}, .FolderState = FolderState, .LocalPath = Path};
+}
+
+BuildSaveState
+GetLocalContent(ProgressBase& Progress,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ bool IsQuiet,
+ TransferThreadWorkers& Workers,
+ GetFolderContentStatistics& LocalFolderScanStats,
+ ChunkingStatistics& ChunkingStats,
+ const std::filesystem::path& Path,
+ const std::filesystem::path& StateFilePath,
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache)
+{
+ Stopwatch ReadStateTimer;
+ bool FileExists = IsFile(StateFilePath);
+ if (!FileExists)
+ {
+ ZEN_CONSOLE("No known local state file in {}, falling back to scanning", Path);
+ return {};
+ }
+
+ BuildSaveState SavedLocalState;
+ try
+ {
+ SavedLocalState = ReadBuildSaveStateFile(StateFilePath);
+ if (!IsQuiet)
+ {
+ ZEN_CONSOLE("Read local state file {} in {}", StateFilePath, NiceTimeSpanMs(ReadStateTimer.GetElapsedTimeMs()));
+ }
+ }
+ catch (const std::exception& Ex)
+ {
+ ZEN_CONSOLE_WARN("Failed reading state file {}, falling back to scannning. Reason: {}", StateFilePath, Ex.what());
+ return {};
+ }
+
+ FolderContent CurrentLocalFolderState = CheckFolderFiles(Progress,
+ AbortFlag,
+ PauseFlag,
+ "Check Known Files",
+ Workers,
+ LocalFolderScanStats,
+ Path,
+ SavedLocalState.FolderState.Paths);
+ if (AbortFlag)
+ {
+ return {};
+ }
+
+ if (!SavedLocalState.FolderState.AreKnownFilesEqual(CurrentLocalFolderState))
+ {
+ const size_t LocalStatePathCount = SavedLocalState.FolderState.Paths.size();
+ std::vector<std::filesystem::path> DeletedPaths;
+ FolderContent UpdatedContent = GetUpdatedContent(SavedLocalState.FolderState, CurrentLocalFolderState, DeletedPaths);
+ if (!DeletedPaths.empty())
+ {
+ SavedLocalState.State.ChunkedContent = DeletePathsFromChunkedContent(SavedLocalState.State.ChunkedContent, DeletedPaths);
+ }
+
+ if (!IsQuiet)
+ {
+ ZEN_CONSOLE("Updating state, {} local files deleted and {} local files updated out of {}",
+ DeletedPaths.size(),
+ UpdatedContent.Paths.size(),
+ LocalStatePathCount);
+ }
+ if (UpdatedContent.Paths.size() > 0)
+ {
+ ChunkedFolderContent UpdatedLocalContent = ScanFolderFiles(Progress,
+ AbortFlag,
+ PauseFlag,
+ "Scan Known Files",
+ Workers,
+ Path,
+ UpdatedContent,
+ ChunkController,
+ ChunkCache,
+ ChunkingStats);
+ if (AbortFlag)
+ {
+ return {};
+ }
+ SavedLocalState.State.ChunkedContent =
+ MergeChunkedFolderContents(SavedLocalState.State.ChunkedContent, {{UpdatedLocalContent}});
+ }
+ }
+ else
+ {
+ // Remove files from LocalContent no longer in LocalFolderState
+ tsl::robin_set<std::string> LocalFolderPaths;
+ LocalFolderPaths.reserve(SavedLocalState.FolderState.Paths.size());
+ for (const std::filesystem::path& LocalFolderPath : SavedLocalState.FolderState.Paths)
+ {
+ LocalFolderPaths.insert(LocalFolderPath.generic_string());
+ }
+ std::vector<std::filesystem::path> DeletedPaths;
+ for (const std::filesystem::path& LocalContentPath : SavedLocalState.State.ChunkedContent.Paths)
+ {
+ if (!LocalFolderPaths.contains(LocalContentPath.generic_string()))
+ {
+ DeletedPaths.push_back(LocalContentPath);
+ }
+ }
+ if (!DeletedPaths.empty())
+ {
+ SavedLocalState.State.ChunkedContent = DeletePathsFromChunkedContent(SavedLocalState.State.ChunkedContent, DeletedPaths);
+ }
+ }
+
+ SavedLocalState.FolderState = CurrentLocalFolderState;
+
+ return SavedLocalState;
+}
+
+void
+DownloadFolder(LoggerRef InLog,
+ ProgressBase& Progress,
+ TransferThreadWorkers& Workers,
+ StorageInstance& Storage,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ const BuildStorageCache::Statistics& StorageCacheStats,
+ const Oid& BuildId,
+ const std::vector<Oid>& BuildPartIds,
+ std::span<const std::string> BuildPartNames,
+ const std::filesystem::path& DownloadSpecPath,
+ const std::filesystem::path& Path,
+ const DownloadOptions& Options)
+{
+ ZEN_TRACE_CPU("DownloadFolder");
+ ZEN_SCOPED_LOG(InLog);
+
+ Progress.SetLogOperationName("Download Folder");
+
+ enum TaskSteps : uint32_t
+ {
+ CheckState,
+ CompareState,
+ Download,
+ Verify,
+ Cleanup,
+ StepCount
+ };
+
+ auto EndProgress = MakeGuard([&]() { Progress.SetLogOperationProgress(TaskSteps::StepCount, TaskSteps::StepCount); });
+
+ Stopwatch DownloadTimer;
+
+ Progress.SetLogOperationProgress(TaskSteps::CheckState, TaskSteps::StepCount);
+
+ const std::filesystem::path ZenTempFolder = ZenTempFolderPath(Options.ZenFolderPath);
+ CreateDirectories(ZenTempFolder);
+
+ std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u;
+
+ CbObject BuildObject = GetBuild(*Storage.BuildStorage, BuildId, Options.IsQuiet);
+
+ std::vector<std::pair<Oid, std::string>> AllBuildParts =
+ ResolveBuildPartNames(BuildObject, BuildId, BuildPartIds, BuildPartNames, PreferredMultipartChunkSize);
+
+ BuildManifest Manifest;
+ if (!DownloadSpecPath.empty())
+ {
+ const std::filesystem::path AbsoluteDownloadSpecPath =
+ DownloadSpecPath.is_relative() ? MakeSafeAbsolutePath(Path / DownloadSpecPath) : MakeSafeAbsolutePath(DownloadSpecPath);
+ Manifest = ParseBuildManifest(AbsoluteDownloadSpecPath);
+ }
+
+ std::vector<ChunkedFolderContent> PartContents;
+
+ std::unique_ptr<ChunkingController> ChunkController;
+
+ std::vector<ChunkBlockDescription> BlockDescriptions;
+ std::vector<IoHash> LooseChunkHashes;
+
+ Progress.SetLogOperationProgress(TaskSteps::CompareState, TaskSteps::StepCount);
+
+ ChunkedFolderContent RemoteContent = GetRemoteContent(InLog,
+ Storage,
+ BuildId,
+ AllBuildParts,
+ Manifest,
+ Options.IncludeWildcards,
+ Options.ExcludeWildcards,
+ ChunkController,
+ PartContents,
+ BlockDescriptions,
+ LooseChunkHashes,
+ Options.IsQuiet,
+ Options.IsVerbose,
+ Options.DoExtraContentVerify);
+
+ const std::uint64_t LargeAttachmentSize = Options.AllowMultiparts ? PreferredMultipartChunkSize * 4u : (std::uint64_t)-1;
+ GetFolderContentStatistics LocalFolderScanStats;
+ ChunkingStatistics ChunkingStats;
+
+ BuildSaveState LocalState;
+
+ if (IsDir(Path))
+ {
+ if (!ChunkController && !Options.IsQuiet)
+ {
+ ZEN_CONSOLE_INFO("Unspecified chunking algorithm, using default");
+ ChunkController = CreateStandardChunkingController(StandardChunkingControllerSettings{});
+ }
+ std::unique_ptr<ChunkingCache> ChunkCache(CreateNullChunkingCache());
+
+ LocalState = GetLocalContent(Progress,
+ AbortFlag,
+ PauseFlag,
+ Options.IsQuiet,
+ Workers,
+ LocalFolderScanStats,
+ ChunkingStats,
+ Path,
+ ZenStateFilePath(Path / ZenFolderName),
+ *ChunkController,
+ *ChunkCache);
+
+ std::vector<std::filesystem::path> UntrackedPaths = GetNewPaths(LocalState.State.ChunkedContent.Paths, RemoteContent.Paths);
+
+ BuildSaveState UntrackedLocalContent = GetLocalStateFromPaths(Progress,
+ AbortFlag,
+ PauseFlag,
+ Workers,
+ LocalFolderScanStats,
+ ChunkingStats,
+ Path,
+ *ChunkController,
+ *ChunkCache,
+ UntrackedPaths);
+
+ if (!UntrackedLocalContent.State.ChunkedContent.Paths.empty())
+ {
+ LocalState.State.ChunkedContent =
+ MergeChunkedFolderContents(LocalState.State.ChunkedContent,
+ std::vector<ChunkedFolderContent>{UntrackedLocalContent.State.ChunkedContent});
+
+ // TODO: Helper
+ LocalState.FolderState.Paths.insert(LocalState.FolderState.Paths.begin(),
+ UntrackedLocalContent.FolderState.Paths.begin(),
+ UntrackedLocalContent.FolderState.Paths.end());
+ LocalState.FolderState.RawSizes.insert(LocalState.FolderState.RawSizes.begin(),
+ UntrackedLocalContent.FolderState.RawSizes.begin(),
+ UntrackedLocalContent.FolderState.RawSizes.end());
+ LocalState.FolderState.Attributes.insert(LocalState.FolderState.Attributes.begin(),
+ UntrackedLocalContent.FolderState.Attributes.begin(),
+ UntrackedLocalContent.FolderState.Attributes.end());
+ LocalState.FolderState.ModificationTicks.insert(LocalState.FolderState.ModificationTicks.begin(),
+ UntrackedLocalContent.FolderState.ModificationTicks.begin(),
+ UntrackedLocalContent.FolderState.ModificationTicks.end());
+ }
+
+ if (Options.AppendNewContent)
+ {
+ RemoteContent = ApplyChunkedContentOverlay(LocalState.State.ChunkedContent,
+ RemoteContent,
+ Options.IncludeWildcards,
+ Options.ExcludeWildcards);
+ }
+#if ZEN_BUILD_DEBUG
+ ValidateChunkedFolderContent(RemoteContent,
+ BlockDescriptions,
+ LooseChunkHashes,
+ Options.IncludeWildcards,
+ Options.ExcludeWildcards);
+#endif // ZEN_BUILD_DEBUG
+ }
+ else
+ {
+ CreateDirectories(Path);
+ }
+ if (AbortFlag)
+ {
+ return;
+ }
+
+ LocalState.LocalPath = Path;
+
+ {
+ BuildsSelection::Build RemoteBuildState = {.Id = BuildId,
+ .IncludeWildcards = Options.IncludeWildcards,
+ .ExcludeWildcards = Options.ExcludeWildcards};
+ RemoteBuildState.Parts.reserve(BuildPartIds.size());
+ for (size_t PartIndex = 0; PartIndex < BuildPartIds.size(); PartIndex++)
+ {
+ RemoteBuildState.Parts.push_back(
+ {BuildsSelection::BuildPart{.Id = BuildPartIds[PartIndex],
+ .Name = PartIndex < BuildPartNames.size() ? BuildPartNames[PartIndex] : ""}});
+ }
+
+ if (Options.AppendNewContent)
+ {
+ LocalState.State.Selection.Builds.emplace_back(std::move(RemoteBuildState));
+ }
+ else
+ {
+ LocalState.State.Selection.Builds = std::vector<BuildsSelection::Build>{std::move(RemoteBuildState)};
+ }
+ }
+
+ if ((Options.EnableTargetFolderScavenging || Options.AppendNewContent) && !Options.CleanTargetFolder &&
+ CompareChunkedContent(RemoteContent, LocalState.State.ChunkedContent))
+ {
+ if (!Options.IsQuiet)
+ {
+ ZEN_CONSOLE("Local state is identical to build to download. All done. Completed in {}.",
+ NiceTimeSpanMs(DownloadTimer.GetElapsedTimeMs()));
+ }
+
+ Stopwatch WriteStateTimer;
+
+ CbObject StateObject = CreateBuildSaveStateObject(LocalState);
+ CreateDirectories(ZenStateFilePath(Options.ZenFolderPath).parent_path());
+ TemporaryFile::SafeWriteFile(ZenStateFilePath(Options.ZenFolderPath), StateObject.GetView());
+ if (!Options.IsQuiet)
+ {
+ ZEN_CONSOLE("Wrote local state in {}", NiceTimeSpanMs(WriteStateTimer.GetElapsedTimeMs()));
+ }
+
+ AddDownloadedPath(Options.SystemRootDir,
+ BuildsDownloadInfo{.Selection = LocalState.State.Selection,
+ .LocalPath = Path,
+ .StateFilePath = ZenStateFilePath(Options.ZenFolderPath),
+ .Iso8601Date = DateTime::Now().ToIso8601()});
+ }
+ else
+ {
+ ExtendableStringBuilder<128> BuildPartString;
+ for (const std::pair<Oid, std::string>& BuildPart : AllBuildParts)
+ {
+ BuildPartString.Append(fmt::format(" {} ({})", BuildPart.second, BuildPart.first));
+ }
+
+ uint64_t RawSize = std::accumulate(RemoteContent.RawSizes.begin(), RemoteContent.RawSizes.end(), std::uint64_t(0));
+
+ if (!Options.IsQuiet)
+ {
+ ZEN_CONSOLE("Downloading build {}, parts:{} to '{}' ({})", BuildId, BuildPartString.ToView(), Path, NiceBytes(RawSize));
+ }
+
+ Stopwatch IndexTimer;
+
+ const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalState.State.ChunkedContent);
+ const ChunkedContentLookup RemoteLookup = BuildChunkedContentLookup(RemoteContent);
+
+ if (!Options.IsQuiet)
+ {
+ ZEN_INFO("Indexed local and remote content in {}", NiceTimeSpanMs(IndexTimer.GetElapsedTimeMs()));
+ }
+
+ Progress.SetLogOperationProgress(TaskSteps::Download, TaskSteps::StepCount);
+
+ BuildsOperationUpdateFolder Updater(
+ InLog,
+ Progress,
+ Storage,
+ AbortFlag,
+ PauseFlag,
+ Workers.GetIOWorkerPool(),
+ Workers.GetNetworkPool(),
+ BuildId,
+ Path,
+ LocalState.State.ChunkedContent,
+ LocalLookup,
+ RemoteContent,
+ RemoteLookup,
+ BlockDescriptions,
+ LooseChunkHashes,
+ BuildsOperationUpdateFolder::Options{
+ .IsQuiet = Options.IsQuiet,
+ .IsVerbose = Options.IsVerbose,
+ .AllowFileClone = Options.AllowFileClone,
+ .UseSparseFiles = Options.UseSparseFiles,
+ .SystemRootDir = Options.SystemRootDir,
+ .ZenFolderPath = Options.ZenFolderPath,
+ .LargeAttachmentSize = LargeAttachmentSize,
+ .PreferredMultipartChunkSize = PreferredMultipartChunkSize,
+ .PartialBlockRequestMode = Options.PartialBlockRequestMode,
+ .WipeTargetFolder = Options.CleanTargetFolder,
+ .EnableOtherDownloadsScavenging = Options.EnableOtherDownloadsScavenging,
+ .EnableTargetFolderScavenging = Options.EnableTargetFolderScavenging || Options.AppendNewContent,
+ .ValidateCompletedSequences = Options.PostDownloadVerify,
+ .ExcludeFolders = Options.ExcludeFolders,
+ .MaximumInMemoryPayloadSize = Options.MaximumInMemoryPayloadSize,
+ .PopulateCache = Options.PopulateCache});
+ {
+ Progress.PushLogOperation("Download");
+ auto _ = MakeGuard([&Progress]() { Progress.PopLogOperation(); });
+ FolderContent UpdatedLocalFolderState;
+ Updater.Execute(UpdatedLocalFolderState);
+
+ LocalState.State.ChunkedContent = RemoteContent;
+ LocalState.FolderState = std::move(UpdatedLocalFolderState);
+ }
+
+ VerifyFolderStatistics VerifyFolderStats;
+ if (!AbortFlag)
+ {
+ AddDownloadedPath(Options.SystemRootDir,
+ BuildsDownloadInfo{.Selection = LocalState.State.Selection,
+ .LocalPath = Path,
+ .StateFilePath = ZenStateFilePath(Options.ZenFolderPath),
+ .Iso8601Date = DateTime::Now().ToIso8601()});
+
+ Progress.SetLogOperationProgress(TaskSteps::Verify, TaskSteps::StepCount);
+
+ VerifyFolder(Progress,
+ AbortFlag,
+ PauseFlag,
+ Workers,
+ RemoteContent,
+ RemoteLookup,
+ Path,
+ Options.ExcludeFolders,
+ Options.PostDownloadVerify,
+ VerifyFolderStats);
+
+ Stopwatch WriteStateTimer;
+ CbObject StateObject = CreateBuildSaveStateObject(LocalState);
+
+ CreateDirectories(ZenStateFilePath(Options.ZenFolderPath).parent_path());
+ TemporaryFile::SafeWriteFile(ZenStateFilePath(Options.ZenFolderPath), StateObject.GetView());
+ if (!Options.IsQuiet)
+ {
+ ZEN_CONSOLE("Wrote local state in {}", NiceTimeSpanMs(WriteStateTimer.GetElapsedTimeMs()));
+ }
+
+#if 0
+ ExtendableStringBuilder<1024> SB;
+ CompactBinaryToJson(StateObject, SB);
+ WriteFile(ZenStateFileJsonPath(Options.ZenFolderPath), IoBuffer(IoBuffer::Wrap, SB.Data(), SB.Size()));
+#endif // 0
+ const uint64_t DownloadCount = Updater.m_DownloadStats.DownloadedChunkCount.load() +
+ Updater.m_DownloadStats.DownloadedBlockCount.load() +
+ Updater.m_DownloadStats.DownloadedPartialBlockCount.load();
+ const uint64_t DownloadByteCount = Updater.m_DownloadStats.DownloadedChunkByteCount.load() +
+ Updater.m_DownloadStats.DownloadedBlockByteCount.load() +
+ Updater.m_DownloadStats.DownloadedPartialBlockByteCount.load();
+ const uint64_t DownloadTimeMs = DownloadTimer.GetElapsedTimeMs();
+
+ if (!Options.IsQuiet)
+ {
+ std::string CloneInfo;
+ if (Updater.m_DiskStats.CloneByteCount > 0)
+ {
+ CloneInfo = fmt::format(" ({} cloned)", NiceBytes(Updater.m_DiskStats.CloneByteCount.load()));
+ }
+
+ std::string DownloadDetails;
+ {
+ ExtendableStringBuilder<128> SB;
+ BuildStorageBase::ExtendedStatistics ExtendedDownloadStats;
+ if (Storage.BuildStorage->GetExtendedStatistics(ExtendedDownloadStats))
+ {
+ if (!ExtendedDownloadStats.ReceivedBytesPerSource.empty())
+ {
+ for (auto& It : ExtendedDownloadStats.ReceivedBytesPerSource)
+ {
+ if (SB.Size() > 0)
+ {
+ SB.Append(", "sv);
+ }
+ SB.Append(It.first);
+ SB.Append(": "sv);
+ SB.Append(NiceBytes(It.second));
+ }
+ }
+ }
+ if (Storage.CacheStorage)
+ {
+ if (SB.Size() > 0)
+ {
+ SB.Append(", "sv);
+ }
+ SB.Append("Cache: ");
+ SB.Append(NiceBytes(StorageCacheStats.TotalBytesRead.load()));
+ }
+ if (SB.Size() > 0)
+ {
+ DownloadDetails = fmt::format(" ({})", SB.ToView());
+ }
+ }
+
+ ZEN_CONSOLE(
+ "Downloaded build {}, parts:{} in {}\n"
+ " Scavenge: {} (Target: {}, Cache: {}, Others: {})\n"
+ " Download: {} ({}) {}bits/s{}\n"
+ " Write: {} ({}) {}B/s{}\n"
+ " Clean: {}\n"
+ " Finalize: {}\n"
+ " Verify: {}",
+ BuildId,
+ BuildPartString.ToView(),
+ NiceTimeSpanMs(DownloadTimeMs),
+
+ NiceTimeSpanMs((Updater.m_CacheMappingStats.CacheScanElapsedWallTimeUs +
+ Updater.m_CacheMappingStats.LocalScanElapsedWallTimeUs +
+ Updater.m_CacheMappingStats.ScavengeElapsedWallTimeUs) /
+ 1000),
+ NiceTimeSpanMs(Updater.m_CacheMappingStats.LocalScanElapsedWallTimeUs / 1000),
+ NiceTimeSpanMs(Updater.m_CacheMappingStats.CacheScanElapsedWallTimeUs / 1000),
+ NiceTimeSpanMs(Updater.m_CacheMappingStats.ScavengeElapsedWallTimeUs / 1000),
+
+ DownloadCount,
+ NiceBytes(DownloadByteCount),
+ NiceNum(GetBytesPerSecond(Updater.m_WriteChunkStats.DownloadTimeUs, DownloadByteCount * 8)),
+ DownloadDetails,
+
+ Updater.m_DiskStats.WriteCount.load(),
+ NiceBytes(Updater.m_WrittenChunkByteCount.load()),
+ NiceNum(GetBytesPerSecond(Updater.m_WriteChunkStats.WriteTimeUs, Updater.m_DiskStats.WriteByteCount.load())),
+ CloneInfo,
+
+ NiceTimeSpanMs(Updater.m_RebuildFolderStateStats.CleanFolderElapsedWallTimeUs / 1000),
+
+ NiceTimeSpanMs(Updater.m_RebuildFolderStateStats.FinalizeTreeElapsedWallTimeUs / 1000),
+
+ NiceTimeSpanMs(VerifyFolderStats.VerifyElapsedWallTimeUs / 1000));
+ }
+ }
+ }
+
+ Progress.SetLogOperationProgress(TaskSteps::Cleanup, TaskSteps::StepCount);
+
+ CleanAndRemoveDirectory(Workers.GetIOWorkerPool(), AbortFlag, PauseFlag, ZenTempFolder);
+}
+} // namespace zen
diff --git a/src/zenremotestore/builds/builduploadfolder.cpp b/src/zenremotestore/builds/builduploadfolder.cpp
new file mode 100644
index 000000000..b536ae464
--- /dev/null
+++ b/src/zenremotestore/builds/builduploadfolder.cpp
@@ -0,0 +1,2634 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zenremotestore/builds/builduploadfolder.h>
+
+#include <zencore/basicfile.h>
+#include <zencore/compactbinarybuilder.h>
+#include <zencore/fmtutils.h>
+#include <zencore/parallelwork.h>
+#include <zencore/scopeguard.h>
+#include <zencore/trace.h>
+#include <zenremotestore/builds/buildcontent.h>
+#include <zenremotestore/builds/buildmanifest.h>
+#include <zenremotestore/builds/buildstoragecache.h>
+#include <zenremotestore/chunking/chunkingcache.h>
+#include <zenremotestore/chunking/chunkingcontroller.h>
+#include <zenremotestore/transferthreadworkers.h>
+#include <zenutil/filesystemutils.h>
+#include <zenutil/filteredrate.h>
+#include <zenutil/progress.h>
+
+#include <numeric>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <tsl/robin_set.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+namespace zen {
+
+using namespace std::literals;
+
+namespace {
+ bool IsExtensionHashCompressable(const tsl::robin_set<uint32_t>& NonCompressableExtensionHashes, const uint32_t PathHash)
+ {
+ return !NonCompressableExtensionHashes.contains(PathHash);
+ }
+
+ bool IsChunkCompressable(const tsl::robin_set<uint32_t>& NonCompressableExtensionHashes,
+ const ChunkedContentLookup& Lookup,
+ uint32_t ChunkIndex)
+ {
+ const uint32_t ChunkLocationCount = Lookup.ChunkSequenceLocationCounts[ChunkIndex];
+ if (ChunkLocationCount == 0)
+ {
+ return false;
+ }
+ const size_t ChunkLocationOffset = Lookup.ChunkSequenceLocationOffset[ChunkIndex];
+ const uint32_t SequenceIndex = Lookup.ChunkSequenceLocations[ChunkLocationOffset].SequenceIndex;
+ const uint32_t PathIndex = Lookup.SequenceIndexFirstPathIndex[SequenceIndex];
+ const uint32_t ExtensionHash = Lookup.PathExtensionHash[PathIndex];
+
+ const bool IsCompressable = IsExtensionHashCompressable(NonCompressableExtensionHashes, ExtensionHash);
+ return IsCompressable;
+ }
+ template<typename T>
+ std::string FormatArray(std::span<const T> Items, std::string_view Prefix)
+ {
+ ExtendableStringBuilder<512> SB;
+ for (const T& Item : Items)
+ {
+ SB.Append(fmt::format("{}{}", Prefix, Item));
+ }
+ return SB.ToString();
+ }
+} // namespace
+
+class ReadFileCache
+{
+public:
+ // A buffered file reader that provides CompositeBuffer where the buffers are owned and the memory never overwritten
+ ReadFileCache(std::atomic<uint64_t>& OpenReadCount,
+ std::atomic<uint64_t>& CurrentOpenFileCount,
+ std::atomic<uint64_t>& ReadCount,
+ std::atomic<uint64_t>& ReadByteCount,
+ const std::filesystem::path& Path,
+ const ChunkedFolderContent& LocalContent,
+ const ChunkedContentLookup& LocalLookup,
+ size_t MaxOpenFileCount)
+ : m_Path(Path)
+ , m_LocalContent(LocalContent)
+ , m_LocalLookup(LocalLookup)
+ , m_OpenReadCount(OpenReadCount)
+ , m_CurrentOpenFileCount(CurrentOpenFileCount)
+ , m_ReadCount(ReadCount)
+ , m_ReadByteCount(ReadByteCount)
+ {
+ m_OpenFiles.reserve(MaxOpenFileCount);
+ }
+ ~ReadFileCache() { m_OpenFiles.clear(); }
+
+ CompositeBuffer GetRange(uint32_t SequenceIndex, uint64_t Offset, uint64_t Size)
+ {
+ ZEN_TRACE_CPU("ReadFileCache::GetRange");
+
+ auto CacheIt =
+ std::find_if(m_OpenFiles.begin(), m_OpenFiles.end(), [SequenceIndex](const auto& Lhs) { return Lhs.first == SequenceIndex; });
+ if (CacheIt != m_OpenFiles.end())
+ {
+ if (CacheIt != m_OpenFiles.begin())
+ {
+ auto CachedFile(std::move(CacheIt->second));
+ m_OpenFiles.erase(CacheIt);
+ m_OpenFiles.insert(m_OpenFiles.begin(), std::make_pair(SequenceIndex, std::move(CachedFile)));
+ }
+ CompositeBuffer Result = m_OpenFiles.front().second->GetRange(Offset, Size);
+ return Result;
+ }
+ const uint32_t LocalPathIndex = m_LocalLookup.SequenceIndexFirstPathIndex[SequenceIndex];
+ const std::filesystem::path LocalFilePath = (m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred();
+ if (Size == m_LocalContent.RawSizes[LocalPathIndex])
+ {
+ IoBuffer Result = IoBufferBuilder::MakeFromFile(LocalFilePath);
+ return CompositeBuffer(SharedBuffer(Result));
+ }
+ if (m_OpenFiles.size() == m_OpenFiles.capacity())
+ {
+ m_OpenFiles.pop_back();
+ }
+ m_OpenFiles.insert(
+ m_OpenFiles.begin(),
+ std::make_pair(
+ SequenceIndex,
+ std::make_unique<BufferedOpenFile>(LocalFilePath, m_OpenReadCount, m_CurrentOpenFileCount, m_ReadCount, m_ReadByteCount)));
+ CompositeBuffer Result = m_OpenFiles.front().second->GetRange(Offset, Size);
+ return Result;
+ }
+
+private:
+ const std::filesystem::path m_Path;
+ const ChunkedFolderContent& m_LocalContent;
+ const ChunkedContentLookup& m_LocalLookup;
+ std::vector<std::pair<uint32_t, std::unique_ptr<BufferedOpenFile>>> m_OpenFiles;
+ std::atomic<uint64_t>& m_OpenReadCount;
+ std::atomic<uint64_t>& m_CurrentOpenFileCount;
+ std::atomic<uint64_t>& m_ReadCount;
+ std::atomic<uint64_t>& m_ReadByteCount;
+};
+
+BuildsOperationUploadFolder::BuildsOperationUploadFolder(LoggerRef Log,
+ ProgressBase& Progress,
+ StorageInstance& Storage,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ WorkerThreadPool& IOWorkerPool,
+ WorkerThreadPool& NetworkPool,
+ const Oid& BuildId,
+ const std::filesystem::path& Path,
+ bool CreateBuild,
+ const CbObject& MetaData,
+ const Options& Options)
+: m_Log(Log)
+, m_Progress(Progress)
+, m_Storage(Storage)
+, m_AbortFlag(AbortFlag)
+, m_PauseFlag(PauseFlag)
+, m_IOWorkerPool(IOWorkerPool)
+, m_NetworkPool(NetworkPool)
+, m_BuildId(BuildId)
+, m_Path(Path)
+, m_CreateBuild(CreateBuild)
+, m_MetaData(MetaData)
+, m_Options(Options)
+{
+ m_NonCompressableExtensionHashes.reserve(Options.NonCompressableExtensions.size());
+ for (const std::string& Extension : Options.NonCompressableExtensions)
+ {
+ m_NonCompressableExtensionHashes.insert(HashStringAsLowerDjb2(Extension));
+ }
+}
+
+BuildsOperationUploadFolder::PrepareBuildResult
+BuildsOperationUploadFolder::PrepareBuild()
+{
+ ZEN_TRACE_CPU("PrepareBuild");
+
+ PrepareBuildResult Result;
+ Result.PreferredMultipartChunkSize = m_Options.PreferredMultipartChunkSize;
+ Stopwatch Timer;
+ if (m_CreateBuild)
+ {
+ ZEN_TRACE_CPU("CreateBuild");
+
+ Stopwatch PutBuildTimer;
+ CbObject PutBuildResult = m_Storage.BuildStorage->PutBuild(m_BuildId, m_MetaData);
+ Result.PrepareBuildTimeMs = PutBuildTimer.GetElapsedTimeMs();
+ if (auto ChunkSize = PutBuildResult["chunkSize"sv].AsUInt64(); ChunkSize != 0)
+ {
+ Result.PreferredMultipartChunkSize = ChunkSize;
+ }
+ Result.PayloadSize = m_MetaData.GetSize();
+ }
+ else
+ {
+ ZEN_TRACE_CPU("PutBuild");
+ Stopwatch GetBuildTimer;
+ CbObject Build = m_Storage.BuildStorage->GetBuild(m_BuildId);
+ Result.PrepareBuildTimeMs = GetBuildTimer.GetElapsedTimeMs();
+ Result.PayloadSize = Build.GetSize();
+ if (auto ChunkSize = Build["chunkSize"sv].AsUInt64(); ChunkSize != 0)
+ {
+ Result.PreferredMultipartChunkSize = ChunkSize;
+ }
+ else if (m_Options.AllowMultiparts)
+ {
+ ZEN_WARN("PreferredMultipartChunkSize is unknown. Defaulting to '{}'", NiceBytes(Result.PreferredMultipartChunkSize));
+ }
+ }
+
+ if (!m_Options.IgnoreExistingBlocks)
+ {
+ ZEN_TRACE_CPU("FindBlocks");
+ Stopwatch KnownBlocksTimer;
+ CbObject BlockDescriptionList = m_Storage.BuildStorage->FindBlocks(m_BuildId, m_Options.FindBlockMaxCount);
+ if (BlockDescriptionList)
+ {
+ Result.KnownBlocks = ParseChunkBlockDescriptionList(BlockDescriptionList);
+ }
+ Result.FindBlocksTimeMs = KnownBlocksTimer.GetElapsedTimeMs();
+ }
+ Result.ElapsedTimeMs = Timer.GetElapsedTimeMs();
+ return Result;
+}
+
+std::vector<BuildsOperationUploadFolder::UploadPart>
+BuildsOperationUploadFolder::ReadFolder()
+{
+ std::vector<UploadPart> UploadParts;
+ std::filesystem::path ExcludeManifestPath = m_Path / m_Options.ZenExcludeManifestName;
+ tsl::robin_set<std::string> ExcludeAssetPaths;
+ if (IsFile(ExcludeManifestPath))
+ {
+ std::filesystem::path AbsoluteExcludeManifestPath =
+ MakeSafeAbsolutePath(ExcludeManifestPath.is_absolute() ? ExcludeManifestPath : m_Path / ExcludeManifestPath);
+ BuildManifest Manifest = ParseBuildManifest(AbsoluteExcludeManifestPath);
+ const std::vector<std::filesystem::path>& AssetPaths = Manifest.Parts.front().Files;
+ ExcludeAssetPaths.reserve(AssetPaths.size());
+ for (const std::filesystem::path& AssetPath : AssetPaths)
+ {
+ ExcludeAssetPaths.insert(AssetPath.generic_string());
+ }
+ }
+
+ UploadParts.resize(1);
+
+ UploadPart& Part = UploadParts.front();
+ GetFolderContentStatistics& LocalFolderScanStats = Part.LocalFolderScanStats;
+
+ Part.Content = GetFolderContent(
+ Part.LocalFolderScanStats,
+ m_Path,
+ [this](const std::string_view& RelativePath) { return IsAcceptedFolder(RelativePath); },
+ [this, &ExcludeAssetPaths](const std::string_view& RelativePath, uint64_t Size, uint32_t Attributes) -> bool {
+ ZEN_UNUSED(Size, Attributes);
+ if (!IsAcceptedFile(RelativePath))
+ {
+ return false;
+ }
+ if (ExcludeAssetPaths.contains(std::filesystem::path(RelativePath).generic_string()))
+ {
+ return false;
+ }
+ return true;
+ },
+ m_IOWorkerPool,
+ m_Progress.GetProgressUpdateDelayMS(),
+ [&](bool, std::ptrdiff_t) { ZEN_INFO("Found {} files in '{}'...", LocalFolderScanStats.AcceptedFileCount.load(), m_Path); },
+ m_AbortFlag);
+ Part.TotalRawSize = std::accumulate(Part.Content.RawSizes.begin(), Part.Content.RawSizes.end(), std::uint64_t(0));
+
+ return UploadParts;
+}
+
+std::vector<BuildsOperationUploadFolder::UploadPart>
+BuildsOperationUploadFolder::ReadManifestParts(const std::filesystem::path& ManifestPath)
+{
+ std::vector<UploadPart> UploadParts;
+ Stopwatch ManifestParseTimer;
+ std::filesystem::path AbsoluteManifestPath = MakeSafeAbsolutePath(ManifestPath.is_absolute() ? ManifestPath : m_Path / ManifestPath);
+ BuildManifest Manifest = ParseBuildManifest(AbsoluteManifestPath);
+ if (Manifest.Parts.empty())
+ {
+ throw std::runtime_error(fmt::format("Manifest file at '{}' is invalid", ManifestPath));
+ }
+
+ UploadParts.resize(Manifest.Parts.size());
+ for (size_t PartIndex = 0; PartIndex < Manifest.Parts.size(); PartIndex++)
+ {
+ BuildManifest::Part& PartManifest = Manifest.Parts[PartIndex];
+ if (ManifestPath.is_relative())
+ {
+ PartManifest.Files.push_back(ManifestPath);
+ }
+
+ UploadPart& Part = UploadParts[PartIndex];
+ FolderContent& Content = Part.Content;
+
+ GetFolderContentStatistics& LocalFolderScanStats = Part.LocalFolderScanStats;
+
+ const std::vector<std::filesystem::path>& AssetPaths = PartManifest.Files;
+ Content = GetValidFolderContent(
+ m_IOWorkerPool,
+ LocalFolderScanStats,
+ m_Path,
+ AssetPaths,
+ [](uint64_t PathCount, uint64_t CompletedPathCount) { ZEN_UNUSED(PathCount, CompletedPathCount); },
+ 1000,
+ m_AbortFlag,
+ m_PauseFlag);
+
+ if (Content.Paths.size() != AssetPaths.size())
+ {
+ const tsl::robin_set<std::filesystem::path> FoundPaths(Content.Paths.begin(), Content.Paths.end());
+ ExtendableStringBuilder<1024> SB;
+ for (const std::filesystem::path& AssetPath : AssetPaths)
+ {
+ if (!FoundPaths.contains(AssetPath))
+ {
+ SB << "\n " << AssetPath.generic_string();
+ }
+ }
+ throw std::runtime_error(
+ fmt::format("Manifest file at '{}' references files that does not exist{}", ManifestPath, SB.ToView()));
+ }
+
+ Part.PartId = PartManifest.PartId;
+ Part.PartName = PartManifest.PartName;
+ Part.TotalRawSize = std::accumulate(Part.Content.RawSizes.begin(), Part.Content.RawSizes.end(), std::uint64_t(0));
+ }
+
+ return UploadParts;
+}
+
+std::vector<std::pair<Oid, std::string>>
+BuildsOperationUploadFolder::Execute(const Oid& BuildPartId,
+ const std::string_view BuildPartName,
+ const std::filesystem::path& ManifestPath,
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache)
+{
+ ZEN_TRACE_CPU("BuildsOperationUploadFolder::Execute");
+ try
+ {
+ Stopwatch ReadPartsTimer;
+ std::vector<UploadPart> UploadParts = ManifestPath.empty() ? ReadFolder() : ReadManifestParts(ManifestPath);
+
+ for (UploadPart& Part : UploadParts)
+ {
+ if (Part.PartId == Oid::Zero)
+ {
+ if (UploadParts.size() != 1)
+ {
+ throw std::runtime_error(fmt::format("Multi part upload manifest '{}' must contains build part id", ManifestPath));
+ }
+
+ if (BuildPartId == Oid::Zero)
+ {
+ Part.PartId = Oid::NewOid();
+ }
+ else
+ {
+ Part.PartId = BuildPartId;
+ }
+ }
+ if (Part.PartName.empty())
+ {
+ if (UploadParts.size() != 1)
+ {
+ throw std::runtime_error(fmt::format("Multi part upload manifest '{}' must contains build part name", ManifestPath));
+ }
+ if (BuildPartName.empty())
+ {
+ throw std::runtime_error("Build part name must be set");
+ }
+ Part.PartName = std::string(BuildPartName);
+ }
+ }
+
+ if (!m_Options.IsQuiet)
+ {
+ ZEN_INFO("Reading {} parts took {}", UploadParts.size(), NiceTimeSpanMs(ReadPartsTimer.GetElapsedTimeMs()));
+ }
+
+ const uint32_t PartsUploadStepCount = gsl::narrow<uint32_t>(uint32_t(PartTaskSteps::StepCount) * UploadParts.size());
+
+ const uint32_t PrepareBuildStep = 0;
+ const uint32_t UploadPartsStep = 1;
+ const uint32_t FinalizeBuildStep = UploadPartsStep + PartsUploadStepCount;
+ const uint32_t CleanupStep = FinalizeBuildStep + 1;
+ const uint32_t StepCount = CleanupStep + 1;
+
+ auto EndProgress = MakeGuard([&]() { m_Progress.SetLogOperationProgress(StepCount, StepCount); });
+
+ Stopwatch ProcessTimer;
+
+ CleanAndRemoveDirectory(m_IOWorkerPool, m_AbortFlag, m_PauseFlag, m_Options.TempDir);
+ CreateDirectories(m_Options.TempDir);
+ auto _ = MakeGuard([&]() { CleanAndRemoveDirectory(m_IOWorkerPool, m_AbortFlag, m_PauseFlag, m_Options.TempDir); });
+
+ m_Progress.SetLogOperationProgress(PrepareBuildStep, StepCount);
+
+ m_PrepBuildResultFuture = m_NetworkPool.EnqueueTask(std::packaged_task<PrepareBuildResult()>{[this] { return PrepareBuild(); }},
+ WorkerThreadPool::EMode::EnableBacklog);
+
+ for (uint32_t PartIndex = 0; PartIndex < UploadParts.size(); PartIndex++)
+ {
+ const uint32_t PartStepOffset = UploadPartsStep + (PartIndex * uint32_t(PartTaskSteps::StepCount));
+
+ const UploadPart& Part = UploadParts[PartIndex];
+ UploadBuildPart(ChunkController, ChunkCache, PartIndex, Part, PartStepOffset, StepCount);
+ if (m_AbortFlag)
+ {
+ return {};
+ }
+ }
+
+ m_Progress.SetLogOperationProgress(FinalizeBuildStep, StepCount);
+
+ if (m_CreateBuild && !m_AbortFlag)
+ {
+ Stopwatch FinalizeBuildTimer;
+ m_Storage.BuildStorage->FinalizeBuild(m_BuildId);
+ if (!m_Options.IsQuiet)
+ {
+ ZEN_INFO("FinalizeBuild took {}", NiceTimeSpanMs(FinalizeBuildTimer.GetElapsedTimeMs()));
+ }
+ }
+
+ m_Progress.SetLogOperationProgress(CleanupStep, StepCount);
+
+ std::vector<std::pair<Oid, std::string>> Result;
+ Result.reserve(UploadParts.size());
+ for (UploadPart& Part : UploadParts)
+ {
+ Result.push_back(std::make_pair(Part.PartId, Part.PartName));
+ }
+ return Result;
+ }
+ catch (const std::exception&)
+ {
+ m_AbortFlag = true;
+ throw;
+ }
+}
+
+bool
+BuildsOperationUploadFolder::IsAcceptedFolder(const std::string_view& RelativePath) const
+{
+ for (const std::string& ExcludeFolder : m_Options.ExcludeFolders)
+ {
+ if (RelativePath.starts_with(ExcludeFolder))
+ {
+ if (RelativePath.length() == ExcludeFolder.length())
+ {
+ return false;
+ }
+ else if (RelativePath[ExcludeFolder.length()] == '/')
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+bool
+BuildsOperationUploadFolder::IsAcceptedFile(const std::string_view& RelativePath) const
+{
+ if (RelativePath == m_Options.ZenExcludeManifestName)
+ {
+ return false;
+ }
+ for (const std::string& ExcludeExtension : m_Options.ExcludeExtensions)
+ {
+ if (RelativePath.ends_with(ExcludeExtension))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+void
+BuildsOperationUploadFolder::ArrangeChunksIntoBlocks(const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ std::vector<uint32_t>& ChunkIndexes,
+ std::vector<std::vector<uint32_t>>& OutBlocks)
+{
+ ZEN_TRACE_CPU("ArrangeChunksIntoBlocks");
+ std::sort(ChunkIndexes.begin(), ChunkIndexes.end(), [&Content, &Lookup](uint32_t Lhs, uint32_t Rhs) {
+ const ChunkedContentLookup::ChunkSequenceLocation& LhsLocation = GetChunkSequenceLocations(Lookup, Lhs)[0];
+ const ChunkedContentLookup::ChunkSequenceLocation& RhsLocation = GetChunkSequenceLocations(Lookup, Rhs)[0];
+ if (LhsLocation.SequenceIndex < RhsLocation.SequenceIndex)
+ {
+ return true;
+ }
+ else if (LhsLocation.SequenceIndex > RhsLocation.SequenceIndex)
+ {
+ return false;
+ }
+ return LhsLocation.Offset < RhsLocation.Offset;
+ });
+
+ uint64_t MaxBlockSizeLowThreshold = m_Options.BlockParameters.MaxBlockSize - (m_Options.BlockParameters.MaxBlockSize / 16);
+
+ uint64_t BlockSize = 0;
+
+ uint32_t ChunkIndexStart = 0;
+ for (uint32_t ChunkIndexOffset = 0; ChunkIndexOffset < ChunkIndexes.size();)
+ {
+ const uint32_t ChunkIndex = ChunkIndexes[ChunkIndexOffset];
+ const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex];
+
+ if (((BlockSize + ChunkSize) > m_Options.BlockParameters.MaxBlockSize) ||
+ (ChunkIndexOffset - ChunkIndexStart) > m_Options.BlockParameters.MaxChunksPerBlock)
+ {
+ // Within the span of MaxBlockSizeLowThreshold and MaxBlockSize, see if there is a break
+ // between source paths for chunks. Break the block at the last such break if any.
+ ZEN_ASSERT(ChunkIndexOffset > ChunkIndexStart);
+
+ const uint32_t ChunkSequenceIndex = Lookup.ChunkSequenceLocations[Lookup.ChunkSequenceLocationOffset[ChunkIndex]].SequenceIndex;
+
+ uint64_t ScanBlockSize = BlockSize;
+
+ uint32_t ScanChunkIndexOffset = ChunkIndexOffset - 1;
+ while (ScanChunkIndexOffset > (ChunkIndexStart + 2))
+ {
+ const uint32_t TestChunkIndex = ChunkIndexes[ScanChunkIndexOffset];
+ const uint64_t TestChunkSize = Content.ChunkedContent.ChunkRawSizes[TestChunkIndex];
+ if ((ScanBlockSize - TestChunkSize) < MaxBlockSizeLowThreshold)
+ {
+ break;
+ }
+
+ const uint32_t TestSequenceIndex =
+ Lookup.ChunkSequenceLocations[Lookup.ChunkSequenceLocationOffset[TestChunkIndex]].SequenceIndex;
+ if (ChunkSequenceIndex != TestSequenceIndex)
+ {
+ ChunkIndexOffset = ScanChunkIndexOffset + 1;
+ break;
+ }
+
+ ScanBlockSize -= TestChunkSize;
+ ScanChunkIndexOffset--;
+ }
+
+ std::vector<uint32_t> ChunksInBlock;
+ ChunksInBlock.reserve(ChunkIndexOffset - ChunkIndexStart);
+ for (uint32_t AddIndexOffset = ChunkIndexStart; AddIndexOffset < ChunkIndexOffset; AddIndexOffset++)
+ {
+ const uint32_t AddChunkIndex = ChunkIndexes[AddIndexOffset];
+ ChunksInBlock.push_back(AddChunkIndex);
+ }
+ OutBlocks.emplace_back(std::move(ChunksInBlock));
+ BlockSize = 0;
+ ChunkIndexStart = ChunkIndexOffset;
+ }
+ else
+ {
+ ChunkIndexOffset++;
+ BlockSize += ChunkSize;
+ }
+ }
+ if (ChunkIndexStart < ChunkIndexes.size())
+ {
+ std::vector<uint32_t> ChunksInBlock;
+ ChunksInBlock.reserve(ChunkIndexes.size() - ChunkIndexStart);
+ for (uint32_t AddIndexOffset = ChunkIndexStart; AddIndexOffset < ChunkIndexes.size(); AddIndexOffset++)
+ {
+ const uint32_t AddChunkIndex = ChunkIndexes[AddIndexOffset];
+ ChunksInBlock.push_back(AddChunkIndex);
+ }
+ OutBlocks.emplace_back(std::move(ChunksInBlock));
+ }
+}
+
+void
+BuildsOperationUploadFolder::GenerateBuildBlocks(const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ const std::vector<std::vector<uint32_t>>& NewBlockChunks,
+ GeneratedBlocks& OutBlocks,
+ GenerateBlocksStatistics& GenerateBlocksStats,
+ UploadStatistics& UploadStats)
+{
+ ZEN_TRACE_CPU("GenerateBuildBlocks");
+ const std::size_t NewBlockCount = NewBlockChunks.size();
+ if (NewBlockCount == 0)
+ {
+ return;
+ }
+
+ std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Generate Blocks");
+
+ OutBlocks.BlockDescriptions.resize(NewBlockCount);
+ OutBlocks.BlockSizes.resize(NewBlockCount);
+ OutBlocks.BlockMetaDatas.resize(NewBlockCount);
+ OutBlocks.BlockHeaders.resize(NewBlockCount);
+ OutBlocks.MetaDataHasBeenUploaded.resize(NewBlockCount, 0);
+ OutBlocks.BlockHashToBlockIndex.reserve(NewBlockCount);
+
+ RwLock Lock;
+ FilteredRate FilteredGeneratedBytesPerSecond;
+ FilteredRate FilteredUploadedBytesPerSecond;
+ ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
+ std::atomic<uint64_t> QueuedPendingBlocksForUpload = 0;
+
+ GenerateBuildBlocksContext Context{.Work = Work,
+ .GenerateBlobsPool = m_IOWorkerPool,
+ .UploadBlocksPool = m_NetworkPool,
+ .FilteredGeneratedBytesPerSecond = FilteredGeneratedBytesPerSecond,
+ .FilteredUploadedBytesPerSecond = FilteredUploadedBytesPerSecond,
+ .QueuedPendingBlocksForUpload = QueuedPendingBlocksForUpload,
+ .Lock = Lock,
+ .OutBlocks = OutBlocks,
+ .GenerateBlocksStats = GenerateBlocksStats,
+ .UploadStats = UploadStats,
+ .NewBlockCount = NewBlockCount};
+
+ ScheduleBlockGeneration(Context, Content, Lookup, NewBlockChunks);
+
+ Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
+ ZEN_UNUSED(PendingWork);
+
+ FilteredGeneratedBytesPerSecond.Update(GenerateBlocksStats.GeneratedBlockByteCount.load());
+ FilteredUploadedBytesPerSecond.Update(UploadStats.BlocksBytes.load());
+
+ std::string Details = fmt::format("Generated {}/{} ({}, {}B/s). Uploaded {}/{} ({}, {}bits/s)",
+ GenerateBlocksStats.GeneratedBlockCount.load(),
+ NewBlockCount,
+ NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount.load()),
+ NiceNum(FilteredGeneratedBytesPerSecond.GetCurrent()),
+ UploadStats.BlockCount.load(),
+ NewBlockCount,
+ NiceBytes(UploadStats.BlocksBytes.load()),
+ NiceNum(FilteredUploadedBytesPerSecond.GetCurrent() * 8));
+
+ ProgressBar->UpdateState({.Task = "Generating blocks",
+ .Details = Details,
+ .TotalCount = gsl::narrow<uint64_t>(NewBlockCount),
+ .RemainingCount = gsl::narrow<uint64_t>(NewBlockCount - GenerateBlocksStats.GeneratedBlockCount.load()),
+ .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
+ false);
+ });
+
+ ZEN_ASSERT(m_AbortFlag || QueuedPendingBlocksForUpload.load() == 0);
+
+ ProgressBar->Finish();
+
+ GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS = FilteredGeneratedBytesPerSecond.GetElapsedTimeUS();
+ UploadStats.ElapsedWallTimeUS = FilteredUploadedBytesPerSecond.GetElapsedTimeUS();
+}
+
+void
+BuildsOperationUploadFolder::ScheduleBlockGeneration(GenerateBuildBlocksContext& Context,
+ const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ const std::vector<std::vector<uint32_t>>& NewBlockChunks)
+{
+ for (size_t BlockIndex = 0; BlockIndex < Context.NewBlockCount; BlockIndex++)
+ {
+ if (Context.Work.IsAborted())
+ {
+ break;
+ }
+ const std::vector<uint32_t>& ChunksInBlock = NewBlockChunks[BlockIndex];
+ Context.Work.ScheduleWork(
+ Context.GenerateBlobsPool,
+ [this, &Context, &Content, &Lookup, ChunksInBlock, BlockIndex](std::atomic<bool>&) {
+ if (!m_AbortFlag)
+ {
+ ZEN_TRACE_CPU("GenerateBuildBlocks_Generate");
+
+ Context.FilteredGeneratedBytesPerSecond.Start();
+
+ Stopwatch GenerateTimer;
+ CompressedBuffer CompressedBlock =
+ GenerateBlock(Content, Lookup, ChunksInBlock, Context.OutBlocks.BlockDescriptions[BlockIndex]);
+ if (m_Options.IsVerbose)
+ {
+ ZEN_INFO("Generated block {} ({}) containing {} chunks in {}",
+ Context.OutBlocks.BlockDescriptions[BlockIndex].BlockHash,
+ NiceBytes(CompressedBlock.GetCompressedSize()),
+ Context.OutBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size(),
+ NiceTimeSpanMs(GenerateTimer.GetElapsedTimeMs()));
+ }
+
+ Context.OutBlocks.BlockSizes[BlockIndex] = CompressedBlock.GetCompressedSize();
+ {
+ CbObjectWriter Writer;
+ Writer.AddString("createdBy", "zen");
+ Context.OutBlocks.BlockMetaDatas[BlockIndex] = Writer.Save();
+ }
+ Context.GenerateBlocksStats.GeneratedBlockByteCount += Context.OutBlocks.BlockSizes[BlockIndex];
+ Context.GenerateBlocksStats.GeneratedBlockCount++;
+
+ Context.Lock.WithExclusiveLock([&]() {
+ Context.OutBlocks.BlockHashToBlockIndex.insert_or_assign(Context.OutBlocks.BlockDescriptions[BlockIndex].BlockHash,
+ BlockIndex);
+ });
+
+ {
+ std::span<const SharedBuffer> Segments = CompressedBlock.GetCompressed().GetSegments();
+ ZEN_ASSERT(Segments.size() >= 2);
+ Context.OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]);
+ }
+
+ if (Context.GenerateBlocksStats.GeneratedBlockCount == Context.NewBlockCount)
+ {
+ Context.FilteredGeneratedBytesPerSecond.Stop();
+ }
+
+ if (Context.QueuedPendingBlocksForUpload.load() > 16)
+ {
+ std::span<const SharedBuffer> Segments = CompressedBlock.GetCompressed().GetSegments();
+ ZEN_ASSERT(Segments.size() >= 2);
+ Context.OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]);
+ }
+ else
+ {
+ if (!m_AbortFlag)
+ {
+ Context.QueuedPendingBlocksForUpload++;
+ Context.Work.ScheduleWork(
+ Context.UploadBlocksPool,
+ [this, &Context, BlockIndex, Payload = std::move(CompressedBlock)](std::atomic<bool>&) mutable {
+ UploadGeneratedBlock(Context, BlockIndex, std::move(Payload));
+ });
+ }
+ }
+ }
+ });
+ }
+}
+
+void
+BuildsOperationUploadFolder::UploadGeneratedBlock(GenerateBuildBlocksContext& Context, size_t BlockIndex, CompressedBuffer Payload)
+{
+ auto _ = MakeGuard([&Context] { Context.QueuedPendingBlocksForUpload--; });
+ if (m_AbortFlag)
+ {
+ return;
+ }
+
+ if (Context.GenerateBlocksStats.GeneratedBlockCount == Context.NewBlockCount)
+ {
+ ZEN_TRACE_CPU("GenerateBuildBlocks_Save");
+
+ Context.FilteredUploadedBytesPerSecond.Stop();
+ std::span<const SharedBuffer> Segments = Payload.GetCompressed().GetSegments();
+ ZEN_ASSERT(Segments.size() >= 2);
+ Context.OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]);
+ return;
+ }
+
+ ZEN_TRACE_CPU("GenerateBuildBlocks_Upload");
+
+ Context.FilteredUploadedBytesPerSecond.Start();
+
+ const CbObject BlockMetaData =
+ BuildChunkBlockDescription(Context.OutBlocks.BlockDescriptions[BlockIndex], Context.OutBlocks.BlockMetaDatas[BlockIndex]);
+
+ const IoHash& BlockHash = Context.OutBlocks.BlockDescriptions[BlockIndex].BlockHash;
+ const uint64_t CompressedBlockSize = Payload.GetCompressedSize();
+
+ if (m_Storage.CacheStorage && m_Options.PopulateCache)
+ {
+ m_Storage.CacheStorage->PutBuildBlob(m_BuildId, BlockHash, ZenContentType::kCompressedBinary, Payload.GetCompressed());
+ }
+
+ try
+ {
+ m_Storage.BuildStorage->PutBuildBlob(m_BuildId, BlockHash, ZenContentType::kCompressedBinary, std::move(Payload).GetCompressed());
+ }
+ catch (const std::exception&)
+ {
+ // Silence http errors due to abort
+ if (!m_AbortFlag)
+ {
+ throw;
+ }
+ }
+
+ if (m_AbortFlag)
+ {
+ return;
+ }
+
+ Context.UploadStats.BlocksBytes += CompressedBlockSize;
+
+ if (m_Options.IsVerbose)
+ {
+ ZEN_INFO("Uploaded block {} ({}) containing {} chunks",
+ BlockHash,
+ NiceBytes(CompressedBlockSize),
+ Context.OutBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size());
+ }
+
+ if (m_Storage.CacheStorage && m_Options.PopulateCache)
+ {
+ m_Storage.CacheStorage->PutBlobMetadatas(m_BuildId, std::vector<IoHash>({BlockHash}), std::vector<CbObject>({BlockMetaData}));
+ }
+
+ bool MetadataSucceeded = false;
+ try
+ {
+ MetadataSucceeded = m_Storage.BuildStorage->PutBlockMetadata(m_BuildId, BlockHash, BlockMetaData);
+ }
+ catch (const std::exception&)
+ {
+ // Silence http errors due to abort
+ if (!m_AbortFlag)
+ {
+ throw;
+ }
+ }
+
+ if (m_AbortFlag)
+ {
+ return;
+ }
+
+ if (MetadataSucceeded)
+ {
+ if (m_Options.IsVerbose)
+ {
+ ZEN_INFO("Uploaded block {} metadata ({})", BlockHash, NiceBytes(BlockMetaData.GetSize()));
+ }
+
+ Context.OutBlocks.MetaDataHasBeenUploaded[BlockIndex] = true;
+ Context.UploadStats.BlocksBytes += BlockMetaData.GetSize();
+ }
+
+ Context.UploadStats.BlockCount++;
+ if (Context.UploadStats.BlockCount == Context.NewBlockCount)
+ {
+ Context.FilteredUploadedBytesPerSecond.Stop();
+ }
+}
+
+std::vector<uint32_t>
+BuildsOperationUploadFolder::CalculateAbsoluteChunkOrders(
+ const std::span<const IoHash> LocalChunkHashes,
+ const std::span<const uint32_t> LocalChunkOrder,
+ const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToLocalChunkIndex,
+ const std::span<const uint32_t>& LooseChunkIndexes,
+ const std::span<const ChunkBlockDescription>& BlockDescriptions)
+{
+ ZEN_TRACE_CPU("CalculateAbsoluteChunkOrders");
+
+ std::vector<IoHash> TmpAbsoluteChunkHashes;
+ if (m_Options.DoExtraContentValidation)
+ {
+ TmpAbsoluteChunkHashes.reserve(LocalChunkHashes.size());
+ }
+ std::vector<uint32_t> LocalChunkIndexToAbsoluteChunkIndex;
+ LocalChunkIndexToAbsoluteChunkIndex.resize(LocalChunkHashes.size(), (uint32_t)-1);
+ std::uint32_t AbsoluteChunkCount = 0;
+ for (uint32_t ChunkIndex : LooseChunkIndexes)
+ {
+ LocalChunkIndexToAbsoluteChunkIndex[ChunkIndex] = AbsoluteChunkCount;
+ if (m_Options.DoExtraContentValidation)
+ {
+ TmpAbsoluteChunkHashes.push_back(LocalChunkHashes[ChunkIndex]);
+ }
+ AbsoluteChunkCount++;
+ }
+ for (const ChunkBlockDescription& Block : BlockDescriptions)
+ {
+ for (const IoHash& ChunkHash : Block.ChunkRawHashes)
+ {
+ if (auto It = ChunkHashToLocalChunkIndex.find(ChunkHash); It != ChunkHashToLocalChunkIndex.end())
+ {
+ const uint32_t LocalChunkIndex = It->second;
+ ZEN_ASSERT_SLOW(LocalChunkHashes[LocalChunkIndex] == ChunkHash);
+ LocalChunkIndexToAbsoluteChunkIndex[LocalChunkIndex] = AbsoluteChunkCount;
+ }
+ if (m_Options.DoExtraContentValidation)
+ {
+ TmpAbsoluteChunkHashes.push_back(ChunkHash);
+ }
+ AbsoluteChunkCount++;
+ }
+ }
+ std::vector<uint32_t> AbsoluteChunkOrder;
+ AbsoluteChunkOrder.reserve(LocalChunkHashes.size());
+ for (const uint32_t LocalChunkIndex : LocalChunkOrder)
+ {
+ const uint32_t AbsoluteChunkIndex = LocalChunkIndexToAbsoluteChunkIndex[LocalChunkIndex];
+ if (m_Options.DoExtraContentValidation)
+ {
+ ZEN_ASSERT(LocalChunkHashes[LocalChunkIndex] == TmpAbsoluteChunkHashes[AbsoluteChunkIndex]);
+ }
+ AbsoluteChunkOrder.push_back(AbsoluteChunkIndex);
+ }
+ if (m_Options.DoExtraContentValidation)
+ {
+ uint32_t OrderIndex = 0;
+ while (OrderIndex < LocalChunkOrder.size())
+ {
+ const uint32_t LocalChunkIndex = LocalChunkOrder[OrderIndex];
+ const IoHash& LocalChunkHash = LocalChunkHashes[LocalChunkIndex];
+ const uint32_t AbsoluteChunkIndex = AbsoluteChunkOrder[OrderIndex];
+ const IoHash& AbsoluteChunkHash = TmpAbsoluteChunkHashes[AbsoluteChunkIndex];
+ ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash);
+ OrderIndex++;
+ }
+ }
+ return AbsoluteChunkOrder;
+}
+
+CompositeBuffer
+BuildsOperationUploadFolder::FetchChunk(const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ const IoHash& ChunkHash,
+ ReadFileCache& OpenFileCache)
+{
+ ZEN_TRACE_CPU("FetchChunk");
+ auto It = Lookup.ChunkHashToChunkIndex.find(ChunkHash);
+ ZEN_ASSERT(It != Lookup.ChunkHashToChunkIndex.end());
+ uint32_t ChunkIndex = It->second;
+ std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkLocations = GetChunkSequenceLocations(Lookup, ChunkIndex);
+ ZEN_ASSERT(!ChunkLocations.empty());
+ CompositeBuffer Chunk =
+ OpenFileCache.GetRange(ChunkLocations[0].SequenceIndex, ChunkLocations[0].Offset, Content.ChunkedContent.ChunkRawSizes[ChunkIndex]);
+ if (!Chunk)
+ {
+ throw std::runtime_error(fmt::format("Unable to read chunk at {}, size {} from '{}'",
+ ChunkLocations[0].Offset,
+ Content.ChunkedContent.ChunkRawSizes[ChunkIndex],
+ Content.Paths[Lookup.SequenceIndexFirstPathIndex[ChunkLocations[0].SequenceIndex]]));
+ }
+ ZEN_ASSERT_SLOW(IoHash::HashBuffer(Chunk) == ChunkHash);
+ return Chunk;
+};
+
+CompressedBuffer
+BuildsOperationUploadFolder::GenerateBlock(const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ const std::vector<uint32_t>& ChunksInBlock,
+ ChunkBlockDescription& OutBlockDescription)
+{
+ ZEN_TRACE_CPU("GenerateBlock");
+ ReadFileCache OpenFileCache(m_DiskStats.OpenReadCount,
+ m_DiskStats.CurrentOpenFileCount,
+ m_DiskStats.ReadCount,
+ m_DiskStats.ReadByteCount,
+ m_Path,
+ Content,
+ Lookup,
+ 4);
+
+ std::vector<std::pair<IoHash, FetchChunkFunc>> BlockContent;
+ BlockContent.reserve(ChunksInBlock.size());
+ for (uint32_t ChunkIndex : ChunksInBlock)
+ {
+ BlockContent.emplace_back(std::make_pair(
+ Content.ChunkedContent.ChunkHashes[ChunkIndex],
+ [this, &Content, &Lookup, &OpenFileCache, ChunkIndex](const IoHash& ChunkHash) -> std::pair<uint64_t, CompositeBuffer> {
+ CompositeBuffer Chunk = FetchChunk(Content, Lookup, ChunkHash, OpenFileCache);
+ ZEN_ASSERT(Chunk);
+ uint64_t RawSize = Chunk.GetSize();
+
+ const bool ShouldCompressChunk = RawSize >= m_Options.MinimumSizeForCompressInBlock &&
+ IsChunkCompressable(m_NonCompressableExtensionHashes, Lookup, ChunkIndex);
+
+ const OodleCompressionLevel CompressionLevel =
+ ShouldCompressChunk ? OodleCompressionLevel::VeryFast : OodleCompressionLevel::None;
+ return {RawSize, CompressedBuffer::Compress(Chunk, OodleCompressor::Mermaid, CompressionLevel).GetCompressed()};
+ }));
+ }
+
+ return GenerateChunkBlock(std::move(BlockContent), OutBlockDescription);
+};
+
+CompressedBuffer
+BuildsOperationUploadFolder::RebuildBlock(const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ CompositeBuffer&& HeaderBuffer,
+ const std::vector<uint32_t>& ChunksInBlock)
+{
+ ZEN_TRACE_CPU("RebuildBlock");
+ ReadFileCache OpenFileCache(m_DiskStats.OpenReadCount,
+ m_DiskStats.CurrentOpenFileCount,
+ m_DiskStats.ReadCount,
+ m_DiskStats.ReadByteCount,
+ m_Path,
+ Content,
+ Lookup,
+ 4);
+
+ std::vector<SharedBuffer> ResultBuffers;
+ ResultBuffers.reserve(HeaderBuffer.GetSegments().size() + ChunksInBlock.size());
+ ResultBuffers.insert(ResultBuffers.end(), HeaderBuffer.GetSegments().begin(), HeaderBuffer.GetSegments().end());
+ for (uint32_t ChunkIndex : ChunksInBlock)
+ {
+ std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkLocations = GetChunkSequenceLocations(Lookup, ChunkIndex);
+ ZEN_ASSERT(!ChunkLocations.empty());
+ CompositeBuffer Chunk = OpenFileCache.GetRange(ChunkLocations[0].SequenceIndex,
+ ChunkLocations[0].Offset,
+ Content.ChunkedContent.ChunkRawSizes[ChunkIndex]);
+ ZEN_ASSERT_SLOW(IoHash::HashBuffer(Chunk) == Content.ChunkedContent.ChunkHashes[ChunkIndex]);
+
+ const uint64_t RawSize = Chunk.GetSize();
+ const bool ShouldCompressChunk =
+ RawSize >= m_Options.MinimumSizeForCompressInBlock && IsChunkCompressable(m_NonCompressableExtensionHashes, Lookup, ChunkIndex);
+
+ const OodleCompressionLevel CompressionLevel = ShouldCompressChunk ? OodleCompressionLevel::VeryFast : OodleCompressionLevel::None;
+
+ CompositeBuffer CompressedChunk =
+ CompressedBuffer::Compress(std::move(Chunk), OodleCompressor::Mermaid, CompressionLevel).GetCompressed();
+ ResultBuffers.insert(ResultBuffers.end(), CompressedChunk.GetSegments().begin(), CompressedChunk.GetSegments().end());
+ }
+ return CompressedBuffer::FromCompressedNoValidate(CompositeBuffer(std::move(ResultBuffers)));
+};
+
+void
+BuildsOperationUploadFolder::UploadBuildPart(ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache,
+ uint32_t PartIndex,
+ const UploadPart& Part,
+ uint32_t PartStepOffset,
+ uint32_t StepCount)
+{
+ Stopwatch UploadTimer;
+
+ ChunkingStatistics ChunkingStats;
+ FindBlocksStatistics FindBlocksStats;
+ ReuseBlocksStatistics ReuseBlocksStats;
+ UploadStatistics UploadStats;
+ GenerateBlocksStatistics GenerateBlocksStats;
+ LooseChunksStatistics LooseChunksStats;
+
+ m_Progress.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::ChunkPartContent, StepCount);
+
+ ChunkedFolderContent LocalContent = ScanPartContent(Part, ChunkController, ChunkCache, ChunkingStats);
+ if (m_AbortFlag)
+ {
+ return;
+ }
+
+ const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalContent);
+
+ if (PartIndex == 0)
+ {
+ ConsumePrepareBuildResult();
+ }
+
+ ZEN_ASSERT(m_PreferredMultipartChunkSize != 0);
+ ZEN_ASSERT(m_LargeAttachmentSize != 0);
+
+ m_Progress.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::CalculateDelta, StepCount);
+
+ Stopwatch BlockArrangeTimer;
+
+ std::vector<uint32_t> LooseChunkIndexes;
+ std::vector<uint32_t> NewBlockChunkIndexes;
+ std::vector<size_t> ReuseBlockIndexes;
+ ClassifyChunksByBlockEligibility(LocalContent,
+ LooseChunkIndexes,
+ NewBlockChunkIndexes,
+ ReuseBlockIndexes,
+ LooseChunksStats,
+ FindBlocksStats,
+ ReuseBlocksStats);
+
+ std::vector<std::vector<uint32_t>> NewBlockChunks;
+ ArrangeChunksIntoBlocks(LocalContent, LocalLookup, NewBlockChunkIndexes, NewBlockChunks);
+
+ FindBlocksStats.NewBlocksCount += NewBlockChunks.size();
+ for (uint32_t ChunkIndex : NewBlockChunkIndexes)
+ {
+ FindBlocksStats.NewBlocksChunkByteCount += LocalContent.ChunkedContent.ChunkRawSizes[ChunkIndex];
+ }
+ FindBlocksStats.NewBlocksChunkCount += NewBlockChunkIndexes.size();
+
+ const double AcceptedByteCountPercent = FindBlocksStats.PotentialChunkByteCount > 0
+ ? (100.0 * ReuseBlocksStats.AcceptedRawByteCount / FindBlocksStats.PotentialChunkByteCount)
+ : 0.0;
+
+ const double AcceptedReduntantByteCountPercent =
+ ReuseBlocksStats.AcceptedByteCount > 0 ? (100.0 * ReuseBlocksStats.AcceptedReduntantByteCount) /
+ (ReuseBlocksStats.AcceptedByteCount + ReuseBlocksStats.AcceptedReduntantByteCount)
+ : 0.0;
+ if (!m_Options.IsQuiet)
+ {
+ ZEN_INFO(
+ "Found {} chunks in {} ({}) blocks eligible for reuse in {}\n"
+ " Reusing {} ({}) matching chunks in {} blocks ({:.1f}%)\n"
+ " Accepting {} ({}) redundant chunks ({:.1f}%)\n"
+ " Rejected {} ({}) chunks in {} blocks\n"
+ " Arranged {} ({}) chunks in {} new blocks\n"
+ " Keeping {} ({}) chunks as loose chunks\n"
+ " Discovery completed in {}",
+ FindBlocksStats.FoundBlockChunkCount,
+ FindBlocksStats.FoundBlockCount,
+ NiceBytes(FindBlocksStats.FoundBlockByteCount),
+ NiceTimeSpanMs(FindBlocksStats.FindBlockTimeMS),
+
+ ReuseBlocksStats.AcceptedChunkCount,
+ NiceBytes(ReuseBlocksStats.AcceptedRawByteCount),
+ FindBlocksStats.AcceptedBlockCount,
+ AcceptedByteCountPercent,
+
+ ReuseBlocksStats.AcceptedReduntantChunkCount,
+ NiceBytes(ReuseBlocksStats.AcceptedReduntantByteCount),
+ AcceptedReduntantByteCountPercent,
+
+ ReuseBlocksStats.RejectedChunkCount,
+ NiceBytes(ReuseBlocksStats.RejectedByteCount),
+ ReuseBlocksStats.RejectedBlockCount,
+
+ FindBlocksStats.NewBlocksChunkCount,
+ NiceBytes(FindBlocksStats.NewBlocksChunkByteCount),
+ FindBlocksStats.NewBlocksCount,
+
+ LooseChunksStats.ChunkCount,
+ NiceBytes(LooseChunksStats.ChunkByteCount),
+
+ NiceTimeSpanMs(BlockArrangeTimer.GetElapsedTimeMs()));
+ }
+
+ m_Progress.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::GenerateBlocks, StepCount);
+ GeneratedBlocks NewBlocks;
+
+ if (!NewBlockChunks.empty())
+ {
+ Stopwatch GenerateBuildBlocksTimer;
+ auto __ = MakeGuard([&]() {
+ uint64_t BlockGenerateTimeUs = GenerateBuildBlocksTimer.GetElapsedTimeUs();
+ if (!m_Options.IsQuiet)
+ {
+ ZEN_INFO("Generated {} ({}) and uploaded {} ({}) blocks in {}. Generate speed: {}B/sec. Transfer speed {}bits/sec.",
+ GenerateBlocksStats.GeneratedBlockCount.load(),
+ NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount),
+ UploadStats.BlockCount.load(),
+ NiceBytes(UploadStats.BlocksBytes.load()),
+ NiceTimeSpanMs(BlockGenerateTimeUs / 1000),
+ NiceNum(GetBytesPerSecond(GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS,
+ GenerateBlocksStats.GeneratedBlockByteCount)),
+ NiceNum(GetBytesPerSecond(UploadStats.ElapsedWallTimeUS, UploadStats.BlocksBytes * 8)));
+ }
+ });
+ GenerateBuildBlocks(LocalContent, LocalLookup, NewBlockChunks, NewBlocks, GenerateBlocksStats, UploadStats);
+ }
+
+ m_Progress.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::BuildPartManifest, StepCount);
+
+ BuiltPartManifest Manifest =
+ BuildPartManifestObject(LocalContent, LocalLookup, ChunkController, ReuseBlockIndexes, NewBlocks, LooseChunkIndexes);
+
+ m_Progress.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::UploadBuildPart, StepCount);
+
+ Stopwatch PutBuildPartResultTimer;
+ std::pair<IoHash, std::vector<IoHash>> PutBuildPartResult =
+ m_Storage.BuildStorage->PutBuildPart(m_BuildId, Part.PartId, Part.PartName, Manifest.PartManifest);
+ if (!m_Options.IsQuiet)
+ {
+ ZEN_INFO("PutBuildPart took {}, payload size {}. {} attachments are needed.",
+ NiceTimeSpanMs(PutBuildPartResultTimer.GetElapsedTimeMs()),
+ NiceBytes(Manifest.PartManifest.GetSize()),
+ PutBuildPartResult.second.size());
+ }
+ IoHash PartHash = PutBuildPartResult.first;
+
+ m_Progress.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::UploadAttachments, StepCount);
+
+ std::vector<IoHash> UnknownChunks;
+ if (m_Options.IgnoreExistingBlocks)
+ {
+ if (m_Options.IsVerbose)
+ {
+ ZEN_INFO("PutBuildPart uploading all attachments, needs are: {}", FormatArray<IoHash>(PutBuildPartResult.second, "\n "sv));
+ }
+
+ std::vector<IoHash> ForceUploadChunkHashes;
+ ForceUploadChunkHashes.reserve(LooseChunkIndexes.size());
+
+ for (uint32_t ChunkIndex : LooseChunkIndexes)
+ {
+ ForceUploadChunkHashes.push_back(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]);
+ }
+
+ for (size_t BlockIndex = 0; BlockIndex < NewBlocks.BlockHeaders.size(); BlockIndex++)
+ {
+ if (NewBlocks.BlockHeaders[BlockIndex])
+ {
+ // Block was not uploaded during generation
+ ForceUploadChunkHashes.push_back(NewBlocks.BlockDescriptions[BlockIndex].BlockHash);
+ }
+ }
+ UploadAttachmentBatch(ForceUploadChunkHashes,
+ UnknownChunks,
+ LocalContent,
+ LocalLookup,
+ NewBlockChunks,
+ NewBlocks,
+ LooseChunkIndexes,
+ UploadStats,
+ LooseChunksStats);
+ }
+ else if (!PutBuildPartResult.second.empty())
+ {
+ if (m_Options.IsVerbose)
+ {
+ ZEN_INFO("PutBuildPart needs attachments: {}", FormatArray<IoHash>(PutBuildPartResult.second, "\n "sv));
+ }
+ UploadAttachmentBatch(PutBuildPartResult.second,
+ UnknownChunks,
+ LocalContent,
+ LocalLookup,
+ NewBlockChunks,
+ NewBlocks,
+ LooseChunkIndexes,
+ UploadStats,
+ LooseChunksStats);
+ }
+
+ FinalizeBuildPartWithRetries(Part,
+ PartHash,
+ UnknownChunks,
+ LocalContent,
+ LocalLookup,
+ NewBlockChunks,
+ NewBlocks,
+ LooseChunkIndexes,
+ UploadStats,
+ LooseChunksStats);
+
+ if (!NewBlocks.BlockDescriptions.empty() && !m_AbortFlag)
+ {
+ UploadMissingBlockMetadata(NewBlocks, UploadStats);
+ // The newly generated blocks are now known blocks so the next part upload can use those blocks as well
+ m_KnownBlocks.insert(m_KnownBlocks.end(), NewBlocks.BlockDescriptions.begin(), NewBlocks.BlockDescriptions.end());
+ }
+
+ m_Progress.SetLogOperationProgress(PartStepOffset + (uint32_t)PartTaskSteps::PutBuildPartStats, StepCount);
+
+ m_Storage.BuildStorage->PutBuildPartStats(
+ m_BuildId,
+ Part.PartId,
+ {{"totalSize", double(Part.LocalFolderScanStats.FoundFileByteCount.load())},
+ {"reusedRatio", AcceptedByteCountPercent / 100.0},
+ {"reusedBlockCount", double(FindBlocksStats.AcceptedBlockCount)},
+ {"reusedBlockByteCount", double(ReuseBlocksStats.AcceptedRawByteCount)},
+ {"newBlockCount", double(FindBlocksStats.NewBlocksCount)},
+ {"newBlockByteCount", double(FindBlocksStats.NewBlocksChunkByteCount)},
+ {"uploadedCount", double(UploadStats.BlockCount.load() + UploadStats.ChunkCount.load())},
+ {"uploadedByteCount", double(UploadStats.BlocksBytes.load() + UploadStats.ChunksBytes.load())},
+ {"uploadedBytesPerSec",
+ double(GetBytesPerSecond(UploadStats.ElapsedWallTimeUS, UploadStats.ChunksBytes + UploadStats.BlocksBytes))},
+ {"elapsedTimeSec", double(UploadTimer.GetElapsedTimeMs() / 1000.0)}});
+
+ m_LocalFolderScanStats += Part.LocalFolderScanStats;
+ m_ChunkingStats += ChunkingStats;
+ m_FindBlocksStats += FindBlocksStats;
+ m_ReuseBlocksStats += ReuseBlocksStats;
+ m_UploadStats += UploadStats;
+ m_GenerateBlocksStats += GenerateBlocksStats;
+ m_LooseChunksStats += LooseChunksStats;
+}
+
+ChunkedFolderContent
+BuildsOperationUploadFolder::ScanPartContent(const UploadPart& Part,
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache,
+ ChunkingStatistics& ChunkingStats)
+{
+ Stopwatch ScanTimer;
+
+ std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Scan Folder");
+
+ FilteredRate FilteredBytesHashed;
+ FilteredBytesHashed.Start();
+ ChunkedFolderContent LocalContent = ChunkFolderContent(
+ ChunkingStats,
+ m_IOWorkerPool,
+ m_Path,
+ Part.Content,
+ ChunkController,
+ ChunkCache,
+ m_Progress.GetProgressUpdateDelayMS(),
+ [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) {
+ FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load());
+ std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found",
+ ChunkingStats.FilesProcessed.load(),
+ Part.Content.Paths.size(),
+ NiceBytes(ChunkingStats.BytesHashed.load()),
+ NiceBytes(Part.TotalRawSize),
+ NiceNum(FilteredBytesHashed.GetCurrent()),
+ ChunkingStats.UniqueChunksFound.load(),
+ NiceBytes(ChunkingStats.UniqueBytesFound.load()));
+ ProgressBar->UpdateState({.Task = "Scanning files ",
+ .Details = Details,
+ .TotalCount = Part.TotalRawSize,
+ .RemainingCount = Part.TotalRawSize - ChunkingStats.BytesHashed.load(),
+ .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
+ false);
+ },
+ m_AbortFlag,
+ m_PauseFlag);
+ FilteredBytesHashed.Stop();
+ ProgressBar->Finish();
+ if (m_AbortFlag)
+ {
+ return LocalContent;
+ }
+
+ if (!m_Options.IsQuiet)
+ {
+ ZEN_INFO("Found {} ({}) files divided into {} ({}) unique chunks in '{}' in {}. Average hash rate {}B/sec",
+ Part.Content.Paths.size(),
+ NiceBytes(Part.TotalRawSize),
+ ChunkingStats.UniqueChunksFound.load(),
+ NiceBytes(ChunkingStats.UniqueBytesFound.load()),
+ m_Path,
+ NiceTimeSpanMs(ScanTimer.GetElapsedTimeMs()),
+ NiceNum(GetBytesPerSecond(ChunkingStats.ElapsedWallTimeUS, ChunkingStats.BytesHashed)));
+ }
+
+ return LocalContent;
+}
+
+void
+BuildsOperationUploadFolder::ConsumePrepareBuildResult()
+{
+ const PrepareBuildResult PrepBuildResult = m_PrepBuildResultFuture.get();
+
+ m_FindBlocksStats.FindBlockTimeMS = PrepBuildResult.ElapsedTimeMs;
+ m_FindBlocksStats.FoundBlockCount = PrepBuildResult.KnownBlocks.size();
+
+ if (!m_Options.IsQuiet)
+ {
+ ZEN_INFO("Build prepare took {}. {} took {}, payload size {}{}",
+ NiceTimeSpanMs(PrepBuildResult.ElapsedTimeMs),
+ m_CreateBuild ? "PutBuild" : "GetBuild",
+ NiceTimeSpanMs(PrepBuildResult.PrepareBuildTimeMs),
+ NiceBytes(PrepBuildResult.PayloadSize),
+ m_Options.IgnoreExistingBlocks ? ""
+ : fmt::format(". Found {} blocks in {}",
+ PrepBuildResult.KnownBlocks.size(),
+ NiceTimeSpanMs(PrepBuildResult.FindBlocksTimeMs)));
+ }
+
+ m_PreferredMultipartChunkSize = PrepBuildResult.PreferredMultipartChunkSize;
+ m_LargeAttachmentSize = m_Options.AllowMultiparts ? m_PreferredMultipartChunkSize * 4u : (std::uint64_t)-1;
+ m_KnownBlocks = std::move(PrepBuildResult.KnownBlocks);
+}
+
+void
+BuildsOperationUploadFolder::ClassifyChunksByBlockEligibility(const ChunkedFolderContent& LocalContent,
+ std::vector<uint32_t>& OutLooseChunkIndexes,
+ std::vector<uint32_t>& OutNewBlockChunkIndexes,
+ std::vector<size_t>& OutReuseBlockIndexes,
+ LooseChunksStatistics& LooseChunksStats,
+ FindBlocksStatistics& FindBlocksStats,
+ ReuseBlocksStatistics& ReuseBlocksStats)
+{
+ const bool EnableBlocks = true;
+ std::vector<std::uint32_t> BlockChunkIndexes;
+ for (uint32_t ChunkIndex = 0; ChunkIndex < LocalContent.ChunkedContent.ChunkHashes.size(); ChunkIndex++)
+ {
+ const uint64_t ChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[ChunkIndex];
+ if (!EnableBlocks || ChunkRawSize == 0 || ChunkRawSize > m_Options.BlockParameters.MaxChunkEmbedSize)
+ {
+ OutLooseChunkIndexes.push_back(ChunkIndex);
+ LooseChunksStats.ChunkByteCount += ChunkRawSize;
+ }
+ else
+ {
+ BlockChunkIndexes.push_back(ChunkIndex);
+ FindBlocksStats.PotentialChunkByteCount += ChunkRawSize;
+ }
+ }
+ FindBlocksStats.PotentialChunkCount += BlockChunkIndexes.size();
+ LooseChunksStats.ChunkCount = OutLooseChunkIndexes.size();
+
+ if (m_Options.IgnoreExistingBlocks)
+ {
+ if (!m_Options.IsQuiet)
+ {
+ ZEN_INFO("Ignoring any existing blocks in store");
+ }
+ OutNewBlockChunkIndexes = std::move(BlockChunkIndexes);
+ return;
+ }
+
+ OutReuseBlockIndexes = FindReuseBlocks(Log(),
+ m_Options.BlockReuseMinPercentLimit,
+ m_Options.IsVerbose,
+ ReuseBlocksStats,
+ m_KnownBlocks,
+ LocalContent.ChunkedContent.ChunkHashes,
+ BlockChunkIndexes,
+ OutNewBlockChunkIndexes);
+ FindBlocksStats.AcceptedBlockCount += OutReuseBlockIndexes.size();
+
+ for (const ChunkBlockDescription& Description : m_KnownBlocks)
+ {
+ for (uint32_t ChunkRawLength : Description.ChunkRawLengths)
+ {
+ FindBlocksStats.FoundBlockByteCount += ChunkRawLength;
+ }
+ FindBlocksStats.FoundBlockChunkCount += Description.ChunkRawHashes.size();
+ }
+}
+
+BuildsOperationUploadFolder::BuiltPartManifest
+BuildsOperationUploadFolder::BuildPartManifestObject(const ChunkedFolderContent& LocalContent,
+ const ChunkedContentLookup& LocalLookup,
+ ChunkingController& ChunkController,
+ std::span<const size_t> ReuseBlockIndexes,
+ const GeneratedBlocks& NewBlocks,
+ std::span<const uint32_t> LooseChunkIndexes)
+{
+ BuiltPartManifest Result;
+
+ CbObjectWriter PartManifestWriter;
+ Stopwatch ManifestGenerationTimer;
+ auto __ = MakeGuard([&]() {
+ if (!m_Options.IsQuiet)
+ {
+ ZEN_INFO("Generated build part manifest in {} ({})",
+ NiceTimeSpanMs(ManifestGenerationTimer.GetElapsedTimeMs()),
+ NiceBytes(PartManifestWriter.GetSaveSize()));
+ }
+ });
+
+ PartManifestWriter.BeginObject("chunker"sv);
+ {
+ PartManifestWriter.AddString("name"sv, ChunkController.GetName());
+ PartManifestWriter.AddObject("parameters"sv, ChunkController.GetParameters());
+ }
+ PartManifestWriter.EndObject(); // chunker
+
+ Result.AllChunkBlockHashes.reserve(ReuseBlockIndexes.size() + NewBlocks.BlockDescriptions.size());
+ Result.AllChunkBlockDescriptions.reserve(ReuseBlockIndexes.size() + NewBlocks.BlockDescriptions.size());
+ for (size_t ReuseBlockIndex : ReuseBlockIndexes)
+ {
+ Result.AllChunkBlockDescriptions.push_back(m_KnownBlocks[ReuseBlockIndex]);
+ Result.AllChunkBlockHashes.push_back(m_KnownBlocks[ReuseBlockIndex].BlockHash);
+ }
+ Result.AllChunkBlockDescriptions.insert(Result.AllChunkBlockDescriptions.end(),
+ NewBlocks.BlockDescriptions.begin(),
+ NewBlocks.BlockDescriptions.end());
+ for (const ChunkBlockDescription& BlockDescription : NewBlocks.BlockDescriptions)
+ {
+ Result.AllChunkBlockHashes.push_back(BlockDescription.BlockHash);
+ }
+
+ std::vector<IoHash> AbsoluteChunkHashes;
+ if (m_Options.DoExtraContentValidation)
+ {
+ tsl::robin_map<IoHash, size_t, IoHash::Hasher> ChunkHashToAbsoluteChunkIndex;
+ AbsoluteChunkHashes.reserve(LocalContent.ChunkedContent.ChunkHashes.size());
+ for (uint32_t ChunkIndex : LooseChunkIndexes)
+ {
+ ChunkHashToAbsoluteChunkIndex.insert({LocalContent.ChunkedContent.ChunkHashes[ChunkIndex], AbsoluteChunkHashes.size()});
+ AbsoluteChunkHashes.push_back(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]);
+ }
+ for (const ChunkBlockDescription& Block : Result.AllChunkBlockDescriptions)
+ {
+ for (const IoHash& ChunkHash : Block.ChunkRawHashes)
+ {
+ ChunkHashToAbsoluteChunkIndex.insert({ChunkHash, AbsoluteChunkHashes.size()});
+ AbsoluteChunkHashes.push_back(ChunkHash);
+ }
+ }
+ for (const IoHash& ChunkHash : LocalContent.ChunkedContent.ChunkHashes)
+ {
+ ZEN_ASSERT(AbsoluteChunkHashes[ChunkHashToAbsoluteChunkIndex.at(ChunkHash)] == ChunkHash);
+ ZEN_ASSERT(LocalContent.ChunkedContent.ChunkHashes[LocalLookup.ChunkHashToChunkIndex.at(ChunkHash)] == ChunkHash);
+ }
+ for (const uint32_t ChunkIndex : LocalContent.ChunkedContent.ChunkOrders)
+ {
+ ZEN_ASSERT(AbsoluteChunkHashes[ChunkHashToAbsoluteChunkIndex.at(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex])] ==
+ LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]);
+ ZEN_ASSERT(LocalLookup.ChunkHashToChunkIndex.at(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]) == ChunkIndex);
+ }
+ }
+
+ std::vector<uint32_t> AbsoluteChunkOrders = CalculateAbsoluteChunkOrders(LocalContent.ChunkedContent.ChunkHashes,
+ LocalContent.ChunkedContent.ChunkOrders,
+ LocalLookup.ChunkHashToChunkIndex,
+ LooseChunkIndexes,
+ Result.AllChunkBlockDescriptions);
+
+ if (m_Options.DoExtraContentValidation)
+ {
+ for (uint32_t ChunkOrderIndex = 0; ChunkOrderIndex < LocalContent.ChunkedContent.ChunkOrders.size(); ChunkOrderIndex++)
+ {
+ uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[ChunkOrderIndex];
+ uint32_t AbsoluteChunkIndex = AbsoluteChunkOrders[ChunkOrderIndex];
+ const IoHash& LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex];
+ const IoHash& AbsoluteChunkHash = AbsoluteChunkHashes[AbsoluteChunkIndex];
+ ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash);
+ }
+ }
+
+ WriteBuildContentToCompactBinary(PartManifestWriter,
+ LocalContent.Platform,
+ LocalContent.Paths,
+ LocalContent.RawHashes,
+ LocalContent.RawSizes,
+ LocalContent.Attributes,
+ LocalContent.ChunkedContent.SequenceRawHashes,
+ LocalContent.ChunkedContent.ChunkCounts,
+ LocalContent.ChunkedContent.ChunkHashes,
+ LocalContent.ChunkedContent.ChunkRawSizes,
+ AbsoluteChunkOrders,
+ LooseChunkIndexes,
+ Result.AllChunkBlockHashes);
+
+ if (m_Options.DoExtraContentValidation)
+ {
+ ChunkedFolderContent VerifyFolderContent;
+
+ std::vector<uint32_t> OutAbsoluteChunkOrders;
+ std::vector<IoHash> OutLooseChunkHashes;
+ std::vector<uint64_t> OutLooseChunkRawSizes;
+ std::vector<IoHash> OutBlockRawHashes;
+ ReadBuildContentFromCompactBinary(PartManifestWriter.Save(),
+ VerifyFolderContent.Platform,
+ VerifyFolderContent.Paths,
+ VerifyFolderContent.RawHashes,
+ VerifyFolderContent.RawSizes,
+ VerifyFolderContent.Attributes,
+ VerifyFolderContent.ChunkedContent.SequenceRawHashes,
+ VerifyFolderContent.ChunkedContent.ChunkCounts,
+ OutAbsoluteChunkOrders,
+ OutLooseChunkHashes,
+ OutLooseChunkRawSizes,
+ OutBlockRawHashes);
+ ZEN_ASSERT(OutBlockRawHashes == Result.AllChunkBlockHashes);
+
+ for (uint32_t OrderIndex = 0; OrderIndex < OutAbsoluteChunkOrders.size(); OrderIndex++)
+ {
+ uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[OrderIndex];
+ const IoHash LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex];
+
+ uint32_t VerifyChunkIndex = OutAbsoluteChunkOrders[OrderIndex];
+ const IoHash VerifyChunkHash = AbsoluteChunkHashes[VerifyChunkIndex];
+
+ ZEN_ASSERT(LocalChunkHash == VerifyChunkHash);
+ }
+
+ CalculateLocalChunkOrders(OutAbsoluteChunkOrders,
+ OutLooseChunkHashes,
+ OutLooseChunkRawSizes,
+ Result.AllChunkBlockDescriptions,
+ VerifyFolderContent.ChunkedContent.ChunkHashes,
+ VerifyFolderContent.ChunkedContent.ChunkRawSizes,
+ VerifyFolderContent.ChunkedContent.ChunkOrders,
+ m_Options.DoExtraContentValidation);
+
+ ZEN_ASSERT(LocalContent.Paths == VerifyFolderContent.Paths);
+ ZEN_ASSERT(LocalContent.RawHashes == VerifyFolderContent.RawHashes);
+ ZEN_ASSERT(LocalContent.RawSizes == VerifyFolderContent.RawSizes);
+ ZEN_ASSERT(LocalContent.Attributes == VerifyFolderContent.Attributes);
+ ZEN_ASSERT(LocalContent.ChunkedContent.SequenceRawHashes == VerifyFolderContent.ChunkedContent.SequenceRawHashes);
+ ZEN_ASSERT(LocalContent.ChunkedContent.ChunkCounts == VerifyFolderContent.ChunkedContent.ChunkCounts);
+
+ for (uint32_t OrderIndex = 0; OrderIndex < LocalContent.ChunkedContent.ChunkOrders.size(); OrderIndex++)
+ {
+ uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[OrderIndex];
+ const IoHash LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex];
+ uint64_t LocalChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[LocalChunkIndex];
+
+ uint32_t VerifyChunkIndex = VerifyFolderContent.ChunkedContent.ChunkOrders[OrderIndex];
+ const IoHash VerifyChunkHash = VerifyFolderContent.ChunkedContent.ChunkHashes[VerifyChunkIndex];
+ uint64_t VerifyChunkRawSize = VerifyFolderContent.ChunkedContent.ChunkRawSizes[VerifyChunkIndex];
+
+ ZEN_ASSERT(LocalChunkHash == VerifyChunkHash);
+ ZEN_ASSERT(LocalChunkRawSize == VerifyChunkRawSize);
+ }
+ }
+
+ Result.PartManifest = PartManifestWriter.Save();
+ return Result;
+}
+
+void
+BuildsOperationUploadFolder::UploadAttachmentBatch(std::span<IoHash> RawHashes,
+ std::vector<IoHash>& OutUnknownChunks,
+ const ChunkedFolderContent& LocalContent,
+ const ChunkedContentLookup& LocalLookup,
+ const std::vector<std::vector<uint32_t>>& NewBlockChunks,
+ GeneratedBlocks& NewBlocks,
+ std::span<const uint32_t> LooseChunkIndexes,
+ UploadStatistics& UploadStats,
+ LooseChunksStatistics& LooseChunksStats)
+{
+ if (m_AbortFlag)
+ {
+ return;
+ }
+
+ UploadStatistics TempUploadStats;
+ LooseChunksStatistics TempLooseChunksStats;
+
+ Stopwatch TempUploadTimer;
+ auto __ = MakeGuard([&]() {
+ if (!m_Options.IsQuiet)
+ {
+ uint64_t TempChunkUploadTimeUs = TempUploadTimer.GetElapsedTimeUs();
+ ZEN_INFO(
+ "Uploaded {} ({}) blocks. "
+ "Compressed {} ({} {}B/s) and uploaded {} ({}) chunks. "
+ "Transferred {} ({}bits/s) in {}",
+ TempUploadStats.BlockCount.load(),
+ NiceBytes(TempUploadStats.BlocksBytes),
+
+ TempLooseChunksStats.CompressedChunkCount.load(),
+ NiceBytes(TempLooseChunksStats.CompressedChunkBytes.load()),
+ NiceNum(GetBytesPerSecond(TempLooseChunksStats.CompressChunksElapsedWallTimeUS, TempLooseChunksStats.ChunkByteCount)),
+ TempUploadStats.ChunkCount.load(),
+ NiceBytes(TempUploadStats.ChunksBytes),
+
+ NiceBytes(TempUploadStats.BlocksBytes + TempUploadStats.ChunksBytes),
+ NiceNum(GetBytesPerSecond(TempUploadStats.ElapsedWallTimeUS, TempUploadStats.ChunksBytes * 8)),
+ NiceTimeSpanMs(TempChunkUploadTimeUs / 1000));
+ }
+ });
+ UploadPartBlobs(LocalContent,
+ LocalLookup,
+ RawHashes,
+ NewBlockChunks,
+ NewBlocks,
+ LooseChunkIndexes,
+ m_LargeAttachmentSize,
+ TempUploadStats,
+ TempLooseChunksStats,
+ OutUnknownChunks);
+ UploadStats += TempUploadStats;
+ LooseChunksStats += TempLooseChunksStats;
+}
+
+void
+BuildsOperationUploadFolder::FinalizeBuildPartWithRetries(const UploadPart& Part,
+ const IoHash& PartHash,
+ std::vector<IoHash>& InOutUnknownChunks,
+ const ChunkedFolderContent& LocalContent,
+ const ChunkedContentLookup& LocalLookup,
+ const std::vector<std::vector<uint32_t>>& NewBlockChunks,
+ GeneratedBlocks& NewBlocks,
+ std::span<const uint32_t> LooseChunkIndexes,
+ UploadStatistics& UploadStats,
+ LooseChunksStatistics& LooseChunksStats)
+{
+ auto BuildUnkownChunksResponse = [](const std::vector<IoHash>& UnknownChunks, bool WillRetry) {
+ return fmt::format(
+ "The following build blobs was reported as needed for upload but was reported as existing at the start of the "
+ "operation.{}{}",
+ WillRetry ? " Treating this as a transient inconsistency issue and will attempt to retry finalization."sv : ""sv,
+ FormatArray<IoHash>(UnknownChunks, "\n "sv));
+ };
+
+ if (!InOutUnknownChunks.empty())
+ {
+ ZEN_WARN("{}", BuildUnkownChunksResponse(InOutUnknownChunks, /*WillRetry*/ true));
+ }
+
+ uint32_t FinalizeBuildPartRetryCount = 5;
+ while (!m_AbortFlag && (FinalizeBuildPartRetryCount--) > 0)
+ {
+ Stopwatch FinalizeBuildPartTimer;
+ std::vector<IoHash> Needs = m_Storage.BuildStorage->FinalizeBuildPart(m_BuildId, Part.PartId, PartHash);
+ if (!m_Options.IsQuiet)
+ {
+ ZEN_INFO("FinalizeBuildPart took {}. {} attachments are missing.",
+ NiceTimeSpanMs(FinalizeBuildPartTimer.GetElapsedTimeMs()),
+ Needs.size());
+ }
+ if (Needs.empty())
+ {
+ break;
+ }
+ if (m_Options.IsVerbose)
+ {
+ ZEN_INFO("FinalizeBuildPart needs attachments: {}", FormatArray<IoHash>(Needs, "\n "sv));
+ }
+
+ std::vector<IoHash> RetryUnknownChunks;
+ UploadAttachmentBatch(Needs,
+ RetryUnknownChunks,
+ LocalContent,
+ LocalLookup,
+ NewBlockChunks,
+ NewBlocks,
+ LooseChunkIndexes,
+ UploadStats,
+ LooseChunksStats);
+ if (RetryUnknownChunks == InOutUnknownChunks)
+ {
+ if (FinalizeBuildPartRetryCount > 0)
+ {
+ // Back off a bit
+ Sleep(1000);
+ }
+ }
+ else
+ {
+ InOutUnknownChunks = RetryUnknownChunks;
+ ZEN_WARN("{}", BuildUnkownChunksResponse(InOutUnknownChunks, /*WillRetry*/ FinalizeBuildPartRetryCount != 0));
+ }
+ }
+
+ if (!InOutUnknownChunks.empty())
+ {
+ throw std::runtime_error(BuildUnkownChunksResponse(InOutUnknownChunks, /*WillRetry*/ false));
+ }
+}
+
+void
+BuildsOperationUploadFolder::UploadMissingBlockMetadata(GeneratedBlocks& NewBlocks, UploadStatistics& UploadStats)
+{
+ uint64_t UploadBlockMetadataCount = 0;
+ Stopwatch UploadBlockMetadataTimer;
+
+ uint32_t FailedMetadataUploadCount = 1;
+ int32_t MetadataUploadRetryCount = 3;
+ while ((MetadataUploadRetryCount-- > 0) && (FailedMetadataUploadCount > 0))
+ {
+ FailedMetadataUploadCount = 0;
+ for (size_t BlockIndex = 0; BlockIndex < NewBlocks.BlockDescriptions.size(); BlockIndex++)
+ {
+ if (m_AbortFlag)
+ {
+ break;
+ }
+ const IoHash& BlockHash = NewBlocks.BlockDescriptions[BlockIndex].BlockHash;
+ if (!NewBlocks.MetaDataHasBeenUploaded[BlockIndex])
+ {
+ const CbObject BlockMetaData =
+ BuildChunkBlockDescription(NewBlocks.BlockDescriptions[BlockIndex], NewBlocks.BlockMetaDatas[BlockIndex]);
+ if (m_Storage.CacheStorage && m_Options.PopulateCache)
+ {
+ m_Storage.CacheStorage->PutBlobMetadatas(m_BuildId,
+ std::vector<IoHash>({BlockHash}),
+ std::vector<CbObject>({BlockMetaData}));
+ }
+ bool MetadataSucceeded = m_Storage.BuildStorage->PutBlockMetadata(m_BuildId, BlockHash, BlockMetaData);
+ if (MetadataSucceeded)
+ {
+ UploadStats.BlocksBytes += BlockMetaData.GetSize();
+ NewBlocks.MetaDataHasBeenUploaded[BlockIndex] = true;
+ UploadBlockMetadataCount++;
+ }
+ else
+ {
+ FailedMetadataUploadCount++;
+ }
+ }
+ }
+ }
+ if (UploadBlockMetadataCount > 0)
+ {
+ uint64_t ElapsedUS = UploadBlockMetadataTimer.GetElapsedTimeUs();
+ UploadStats.ElapsedWallTimeUS += ElapsedUS;
+ if (!m_Options.IsQuiet)
+ {
+ ZEN_INFO("Uploaded metadata for {} blocks in {}", UploadBlockMetadataCount, NiceTimeSpanMs(ElapsedUS / 1000));
+ }
+ }
+}
+
+void
+BuildsOperationUploadFolder::UploadPartBlobs(const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ std::span<IoHash> RawHashes,
+ const std::vector<std::vector<uint32_t>>& NewBlockChunks,
+ GeneratedBlocks& NewBlocks,
+ std::span<const uint32_t> LooseChunkIndexes,
+ const std::uint64_t LargeAttachmentSize,
+ UploadStatistics& TempUploadStats,
+ LooseChunksStatistics& TempLooseChunksStats,
+ std::vector<IoHash>& OutUnknownChunks)
+{
+ ZEN_TRACE_CPU("UploadPartBlobs");
+
+ UploadPartClassification Classification =
+ ClassifyUploadRawHashes(RawHashes, Content, Lookup, NewBlocks, LooseChunkIndexes, OutUnknownChunks);
+
+ if (Classification.BlockIndexes.empty() && Classification.LooseChunkOrderIndexes.empty())
+ {
+ return;
+ }
+
+ std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Upload Blobs");
+
+ FilteredRate FilteredGenerateBlockBytesPerSecond;
+ FilteredRate FilteredCompressedBytesPerSecond;
+ FilteredRate FilteredUploadedBytesPerSecond;
+
+ ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
+
+ std::atomic<size_t> UploadedBlockSize = 0;
+ std::atomic<size_t> UploadedBlockCount = 0;
+ std::atomic<size_t> UploadedRawChunkSize = 0;
+ std::atomic<size_t> UploadedCompressedChunkSize = 0;
+ std::atomic<uint32_t> UploadedChunkCount = 0;
+ std::atomic<uint64_t> GeneratedBlockCount = 0;
+ std::atomic<uint64_t> GeneratedBlockByteCount = 0;
+ std::atomic<uint64_t> QueuedPendingInMemoryBlocksForUpload = 0;
+
+ const size_t UploadBlockCount = Classification.BlockIndexes.size();
+ const uint32_t UploadChunkCount = gsl::narrow<uint32_t>(Classification.LooseChunkOrderIndexes.size());
+ const uint64_t TotalRawSize = Classification.TotalLooseChunksSize + Classification.TotalBlocksSize;
+
+ UploadPartBlobsContext Context{.Work = Work,
+ .ReadChunkPool = m_IOWorkerPool,
+ .UploadChunkPool = m_NetworkPool,
+ .FilteredGenerateBlockBytesPerSecond = FilteredGenerateBlockBytesPerSecond,
+ .FilteredCompressedBytesPerSecond = FilteredCompressedBytesPerSecond,
+ .FilteredUploadedBytesPerSecond = FilteredUploadedBytesPerSecond,
+ .UploadedBlockSize = UploadedBlockSize,
+ .UploadedBlockCount = UploadedBlockCount,
+ .UploadedRawChunkSize = UploadedRawChunkSize,
+ .UploadedCompressedChunkSize = UploadedCompressedChunkSize,
+ .UploadedChunkCount = UploadedChunkCount,
+ .GeneratedBlockCount = GeneratedBlockCount,
+ .GeneratedBlockByteCount = GeneratedBlockByteCount,
+ .QueuedPendingInMemoryBlocksForUpload = QueuedPendingInMemoryBlocksForUpload,
+ .UploadBlockCount = UploadBlockCount,
+ .UploadChunkCount = UploadChunkCount,
+ .LargeAttachmentSize = LargeAttachmentSize,
+ .NewBlocks = NewBlocks,
+ .Content = Content,
+ .Lookup = Lookup,
+ .NewBlockChunks = NewBlockChunks,
+ .LooseChunkIndexes = LooseChunkIndexes,
+ .TempUploadStats = TempUploadStats,
+ .TempLooseChunksStats = TempLooseChunksStats};
+
+ ScheduleBlockGenerationAndUpload(Context, Classification.BlockIndexes);
+ ScheduleLooseChunkCompressionAndUpload(Context, Classification.LooseChunkOrderIndexes);
+
+ Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
+ ZEN_UNUSED(PendingWork);
+ FilteredCompressedBytesPerSecond.Update(TempLooseChunksStats.CompressedChunkRawBytes.load());
+ FilteredGenerateBlockBytesPerSecond.Update(GeneratedBlockByteCount.load());
+ FilteredUploadedBytesPerSecond.Update(UploadedCompressedChunkSize.load() + UploadedBlockSize.load());
+ uint64_t UploadedRawSize = UploadedRawChunkSize.load() + UploadedBlockSize.load();
+ uint64_t UploadedCompressedSize = UploadedCompressedChunkSize.load() + UploadedBlockSize.load();
+
+ std::string Details = fmt::format(
+ "Compressed {}/{} ({}/{}{}) chunks. "
+ "Uploaded {}/{} ({}/{}) blobs "
+ "({}{})",
+ TempLooseChunksStats.CompressedChunkCount.load(),
+ Classification.LooseChunkOrderIndexes.size(),
+ NiceBytes(TempLooseChunksStats.CompressedChunkRawBytes),
+ NiceBytes(Classification.TotalLooseChunksSize),
+ (TempLooseChunksStats.CompressedChunkCount == Classification.LooseChunkOrderIndexes.size())
+ ? ""
+ : fmt::format(" {}B/s", NiceNum(FilteredCompressedBytesPerSecond.GetCurrent())),
+
+ UploadedBlockCount.load() + UploadedChunkCount.load(),
+ UploadBlockCount + UploadChunkCount,
+ NiceBytes(UploadedRawSize),
+ NiceBytes(TotalRawSize),
+
+ NiceBytes(UploadedCompressedSize),
+ (UploadedBlockCount == UploadBlockCount && UploadedChunkCount == UploadChunkCount)
+ ? ""
+ : fmt::format(" {}bits/s", NiceNum(FilteredUploadedBytesPerSecond.GetCurrent())));
+
+ ProgressBar->UpdateState({.Task = "Uploading blobs ",
+ .Details = Details,
+ .TotalCount = gsl::narrow<uint64_t>(TotalRawSize),
+ .RemainingCount = gsl::narrow<uint64_t>(TotalRawSize - UploadedRawSize),
+ .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
+ false);
+ });
+
+ ZEN_ASSERT(m_AbortFlag || QueuedPendingInMemoryBlocksForUpload.load() == 0);
+
+ ProgressBar->Finish();
+
+ TempUploadStats.ElapsedWallTimeUS += FilteredUploadedBytesPerSecond.GetElapsedTimeUS();
+ TempLooseChunksStats.CompressChunksElapsedWallTimeUS += FilteredCompressedBytesPerSecond.GetElapsedTimeUS();
+}
+
+BuildsOperationUploadFolder::UploadPartClassification
+BuildsOperationUploadFolder::ClassifyUploadRawHashes(std::span<IoHash> RawHashes,
+ const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ const GeneratedBlocks& NewBlocks,
+ std::span<const uint32_t> LooseChunkIndexes,
+ std::vector<IoHash>& OutUnknownChunks)
+{
+ UploadPartClassification Result;
+
+ tsl::robin_map<uint32_t, uint32_t> ChunkIndexToLooseChunkOrderIndex;
+ ChunkIndexToLooseChunkOrderIndex.reserve(LooseChunkIndexes.size());
+ for (uint32_t OrderIndex = 0; OrderIndex < LooseChunkIndexes.size(); OrderIndex++)
+ {
+ ChunkIndexToLooseChunkOrderIndex.insert_or_assign(LooseChunkIndexes[OrderIndex], OrderIndex);
+ }
+
+ for (const IoHash& RawHash : RawHashes)
+ {
+ if (auto It = NewBlocks.BlockHashToBlockIndex.find(RawHash); It != NewBlocks.BlockHashToBlockIndex.end())
+ {
+ Result.BlockIndexes.push_back(It->second);
+ Result.TotalBlocksSize += NewBlocks.BlockSizes[It->second];
+ }
+ else if (auto ChunkIndexIt = Lookup.ChunkHashToChunkIndex.find(RawHash); ChunkIndexIt != Lookup.ChunkHashToChunkIndex.end())
+ {
+ const uint32_t ChunkIndex = ChunkIndexIt->second;
+ if (auto LooseOrderIndexIt = ChunkIndexToLooseChunkOrderIndex.find(ChunkIndex);
+ LooseOrderIndexIt != ChunkIndexToLooseChunkOrderIndex.end())
+ {
+ Result.LooseChunkOrderIndexes.push_back(LooseOrderIndexIt->second);
+ Result.TotalLooseChunksSize += Content.ChunkedContent.ChunkRawSizes[ChunkIndex];
+ }
+ }
+ else
+ {
+ OutUnknownChunks.push_back(RawHash);
+ }
+ }
+ return Result;
+}
+
+void
+BuildsOperationUploadFolder::ScheduleBlockGenerationAndUpload(UploadPartBlobsContext& Context, std::span<const size_t> BlockIndexes)
+{
+ for (const size_t BlockIndex : BlockIndexes)
+ {
+ const IoHash& BlockHash = Context.NewBlocks.BlockDescriptions[BlockIndex].BlockHash;
+ if (m_AbortFlag)
+ {
+ break;
+ }
+ Context.Work.ScheduleWork(
+ Context.ReadChunkPool,
+ [this, &Context, BlockHash = IoHash(BlockHash), BlockIndex, GenerateBlockCount = BlockIndexes.size()](std::atomic<bool>&) {
+ if (m_AbortFlag)
+ {
+ return;
+ }
+ ZEN_TRACE_CPU("UploadPartBlobs_GenerateBlock");
+
+ Context.FilteredGenerateBlockBytesPerSecond.Start();
+
+ Stopwatch GenerateTimer;
+ CompositeBuffer Payload;
+ if (Context.NewBlocks.BlockHeaders[BlockIndex])
+ {
+ Payload = RebuildBlock(Context.Content,
+ Context.Lookup,
+ std::move(Context.NewBlocks.BlockHeaders[BlockIndex]),
+ Context.NewBlockChunks[BlockIndex])
+ .GetCompressed();
+ }
+ else
+ {
+ ChunkBlockDescription BlockDescription;
+ CompressedBuffer CompressedBlock =
+ GenerateBlock(Context.Content, Context.Lookup, Context.NewBlockChunks[BlockIndex], BlockDescription);
+ if (!CompressedBlock)
+ {
+ throw std::runtime_error(fmt::format("Failed generating block {}", BlockHash));
+ }
+ ZEN_ASSERT(BlockDescription.BlockHash == BlockHash);
+ Payload = std::move(CompressedBlock).GetCompressed();
+ }
+
+ Context.GeneratedBlockByteCount += Context.NewBlocks.BlockSizes[BlockIndex];
+ if (Context.GeneratedBlockCount.fetch_add(1) + 1 == GenerateBlockCount)
+ {
+ Context.FilteredGenerateBlockBytesPerSecond.Stop();
+ }
+ if (m_Options.IsVerbose)
+ {
+ ZEN_INFO("{} block {} ({}) containing {} chunks in {}",
+ Context.NewBlocks.BlockHeaders[BlockIndex] ? "Regenerated" : "Generated",
+ Context.NewBlocks.BlockDescriptions[BlockIndex].BlockHash,
+ NiceBytes(Context.NewBlocks.BlockSizes[BlockIndex]),
+ Context.NewBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size(),
+ NiceTimeSpanMs(GenerateTimer.GetElapsedTimeMs()));
+ }
+ if (!m_AbortFlag)
+ {
+ UploadBlockPayload(Context, BlockIndex, BlockHash, std::move(Payload));
+ }
+ });
+ }
+}
+
+void
+BuildsOperationUploadFolder::UploadBlockPayload(UploadPartBlobsContext& Context,
+ size_t BlockIndex,
+ const IoHash& BlockHash,
+ CompositeBuffer Payload)
+{
+ bool IsInMemoryBlock = true;
+ if (Context.QueuedPendingInMemoryBlocksForUpload.load() > 16)
+ {
+ ZEN_TRACE_CPU("AsyncUploadBlock_WriteTempBlock");
+ std::filesystem::path TempFilePath = m_Options.TempDir / (BlockHash.ToHexString());
+ Payload = CompositeBuffer(WriteToTempFile(std::move(Payload), TempFilePath));
+ IsInMemoryBlock = false;
+ }
+ else
+ {
+ Context.QueuedPendingInMemoryBlocksForUpload++;
+ }
+
+ Context.Work.ScheduleWork(
+ Context.UploadChunkPool,
+ [this, &Context, IsInMemoryBlock, BlockIndex, BlockHash = IoHash(BlockHash), Payload = CompositeBuffer(std::move(Payload))](
+ std::atomic<bool>&) {
+ auto _ = MakeGuard([IsInMemoryBlock, &Context] {
+ if (IsInMemoryBlock)
+ {
+ Context.QueuedPendingInMemoryBlocksForUpload--;
+ }
+ });
+ if (m_AbortFlag)
+ {
+ return;
+ }
+ ZEN_TRACE_CPU("AsyncUploadBlock");
+
+ const uint64_t PayloadSize = Payload.GetSize();
+
+ Context.FilteredUploadedBytesPerSecond.Start();
+ const CbObject BlockMetaData =
+ BuildChunkBlockDescription(Context.NewBlocks.BlockDescriptions[BlockIndex], Context.NewBlocks.BlockMetaDatas[BlockIndex]);
+
+ if (m_Storage.CacheStorage && m_Options.PopulateCache)
+ {
+ m_Storage.CacheStorage->PutBuildBlob(m_BuildId, BlockHash, ZenContentType::kCompressedBinary, Payload);
+ }
+
+ try
+ {
+ m_Storage.BuildStorage->PutBuildBlob(m_BuildId, BlockHash, ZenContentType::kCompressedBinary, Payload);
+ }
+ catch (const std::exception&)
+ {
+ // Silence http errors due to abort
+ if (!m_AbortFlag)
+ {
+ throw;
+ }
+ }
+
+ if (m_AbortFlag)
+ {
+ return;
+ }
+ if (m_Options.IsVerbose)
+ {
+ ZEN_INFO("Uploaded block {} ({}) containing {} chunks",
+ BlockHash,
+ NiceBytes(PayloadSize),
+ Context.NewBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size());
+ }
+ Context.UploadedBlockSize += PayloadSize;
+ Context.TempUploadStats.BlocksBytes += PayloadSize;
+
+ if (m_Storage.CacheStorage && m_Options.PopulateCache)
+ {
+ m_Storage.CacheStorage->PutBlobMetadatas(m_BuildId,
+ std::vector<IoHash>({BlockHash}),
+ std::vector<CbObject>({BlockMetaData}));
+ }
+
+ bool MetadataSucceeded = false;
+ try
+ {
+ MetadataSucceeded = m_Storage.BuildStorage->PutBlockMetadata(m_BuildId, BlockHash, BlockMetaData);
+ }
+ catch (const std::exception&)
+ {
+ // Silence http errors due to abort
+ if (!m_AbortFlag)
+ {
+ throw;
+ }
+ }
+ if (m_AbortFlag)
+ {
+ return;
+ }
+ if (MetadataSucceeded)
+ {
+ if (m_Options.IsVerbose)
+ {
+ ZEN_INFO("Uploaded block {} metadata ({})", BlockHash, NiceBytes(BlockMetaData.GetSize()));
+ }
+ Context.NewBlocks.MetaDataHasBeenUploaded[BlockIndex] = true;
+ Context.TempUploadStats.BlocksBytes += BlockMetaData.GetSize();
+ }
+
+ Context.TempUploadStats.BlockCount++;
+
+ if (Context.UploadedBlockCount.fetch_add(1) + 1 == Context.UploadBlockCount &&
+ Context.UploadedChunkCount == Context.UploadChunkCount)
+ {
+ Context.FilteredUploadedBytesPerSecond.Stop();
+ }
+ });
+}
+
+void
+BuildsOperationUploadFolder::ScheduleLooseChunkCompressionAndUpload(UploadPartBlobsContext& Context,
+ std::span<const uint32_t> LooseChunkOrderIndexes)
+{
+ for (const uint32_t LooseChunkOrderIndex : LooseChunkOrderIndexes)
+ {
+ const uint32_t ChunkIndex = Context.LooseChunkIndexes[LooseChunkOrderIndex];
+ Context.Work.ScheduleWork(Context.ReadChunkPool,
+ [this, &Context, LooseChunkOrderCount = LooseChunkOrderIndexes.size(), ChunkIndex](std::atomic<bool>&) {
+ if (m_AbortFlag)
+ {
+ return;
+ }
+ ZEN_TRACE_CPU("UploadPartBlobs_CompressChunk");
+
+ Context.FilteredCompressedBytesPerSecond.Start();
+ Stopwatch CompressTimer;
+ CompositeBuffer Payload =
+ CompressChunk(Context.Content, Context.Lookup, ChunkIndex, Context.TempLooseChunksStats);
+ if (m_Options.IsVerbose)
+ {
+ ZEN_INFO("Compressed chunk {} ({} -> {}) in {}",
+ Context.Content.ChunkedContent.ChunkHashes[ChunkIndex],
+ NiceBytes(Context.Content.ChunkedContent.ChunkRawSizes[ChunkIndex]),
+ NiceBytes(Payload.GetSize()),
+ NiceTimeSpanMs(CompressTimer.GetElapsedTimeMs()));
+ }
+ const uint64_t ChunkRawSize = Context.Content.ChunkedContent.ChunkRawSizes[ChunkIndex];
+ Context.TempUploadStats.ReadFromDiskBytes += ChunkRawSize;
+ if (Context.TempLooseChunksStats.CompressedChunkCount == LooseChunkOrderCount)
+ {
+ Context.FilteredCompressedBytesPerSecond.Stop();
+ }
+ if (!m_AbortFlag)
+ {
+ UploadLooseChunkPayload(Context,
+ Context.Content.ChunkedContent.ChunkHashes[ChunkIndex],
+ ChunkRawSize,
+ std::move(Payload));
+ }
+ });
+ }
+}
+
+void
+BuildsOperationUploadFolder::UploadLooseChunkPayload(UploadPartBlobsContext& Context,
+ const IoHash& RawHash,
+ uint64_t RawSize,
+ CompositeBuffer Payload)
+{
+ Context.Work.ScheduleWork(
+ Context.UploadChunkPool,
+ [this, &Context, RawHash = IoHash(RawHash), RawSize, Payload = CompositeBuffer(std::move(Payload))](std::atomic<bool>&) mutable {
+ if (m_AbortFlag)
+ {
+ return;
+ }
+ ZEN_TRACE_CPU("AsyncUploadLooseChunk");
+
+ const uint64_t PayloadSize = Payload.GetSize();
+
+ if (m_Storage.CacheStorage && m_Options.PopulateCache)
+ {
+ m_Storage.CacheStorage->PutBuildBlob(m_BuildId, RawHash, ZenContentType::kCompressedBinary, Payload);
+ }
+
+ if (PayloadSize >= Context.LargeAttachmentSize)
+ {
+ ZEN_TRACE_CPU("AsyncUploadLooseChunk_Multipart");
+ Context.TempUploadStats.MultipartAttachmentCount++;
+ try
+ {
+ std::vector<std::function<void()>> MultipartWork = m_Storage.BuildStorage->PutLargeBuildBlob(
+ m_BuildId,
+ RawHash,
+ ZenContentType::kCompressedBinary,
+ PayloadSize,
+ [Payload = std::move(Payload), &Context](uint64_t Offset, uint64_t Size) -> IoBuffer {
+ Context.FilteredUploadedBytesPerSecond.Start();
+
+ IoBuffer PartPayload = Payload.Mid(Offset, Size).Flatten().AsIoBuffer();
+ PartPayload.SetContentType(ZenContentType::kBinary);
+ return PartPayload;
+ },
+ [&Context, RawSize](uint64_t SentBytes, bool IsComplete) {
+ Context.TempUploadStats.ChunksBytes += SentBytes;
+ Context.UploadedCompressedChunkSize += SentBytes;
+ if (IsComplete)
+ {
+ Context.TempUploadStats.ChunkCount++;
+ if (Context.UploadedChunkCount.fetch_add(1) + 1 == Context.UploadChunkCount &&
+ Context.UploadedBlockCount == Context.UploadBlockCount)
+ {
+ Context.FilteredUploadedBytesPerSecond.Stop();
+ }
+ Context.UploadedRawChunkSize += RawSize;
+ }
+ });
+ for (auto& WorkPart : MultipartWork)
+ {
+ Context.Work.ScheduleWork(Context.UploadChunkPool, [Work = std::move(WorkPart)](std::atomic<bool>& AbortFlag) {
+ ZEN_TRACE_CPU("AsyncUploadLooseChunk_Multipart_Work");
+ if (!AbortFlag)
+ {
+ Work();
+ }
+ });
+ }
+ if (m_Options.IsVerbose)
+ {
+ ZEN_INFO("Uploaded multipart chunk {} ({})", RawHash, NiceBytes(PayloadSize));
+ }
+ }
+ catch (const std::exception&)
+ {
+ // Silence http errors due to abort
+ if (!m_AbortFlag)
+ {
+ throw;
+ }
+ }
+ return;
+ }
+
+ ZEN_TRACE_CPU("AsyncUploadLooseChunk_Singlepart");
+ try
+ {
+ m_Storage.BuildStorage->PutBuildBlob(m_BuildId, RawHash, ZenContentType::kCompressedBinary, Payload);
+ }
+ catch (const std::exception&)
+ {
+ // Silence http errors due to abort
+ if (!m_AbortFlag)
+ {
+ throw;
+ }
+ }
+ if (m_AbortFlag)
+ {
+ return;
+ }
+ if (m_Options.IsVerbose)
+ {
+ ZEN_INFO("Uploaded chunk {} ({})", RawHash, NiceBytes(PayloadSize));
+ }
+ Context.TempUploadStats.ChunksBytes += Payload.GetSize();
+ Context.TempUploadStats.ChunkCount++;
+ Context.UploadedCompressedChunkSize += Payload.GetSize();
+ Context.UploadedRawChunkSize += RawSize;
+ if (Context.UploadedChunkCount.fetch_add(1) + 1 == Context.UploadChunkCount &&
+ Context.UploadedBlockCount == Context.UploadBlockCount)
+ {
+ Context.FilteredUploadedBytesPerSecond.Stop();
+ }
+ });
+}
+
+CompositeBuffer
+BuildsOperationUploadFolder::CompressChunk(const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ uint32_t ChunkIndex,
+ LooseChunksStatistics& TempLooseChunksStats)
+{
+ ZEN_TRACE_CPU("CompressChunk");
+ ZEN_ASSERT(!m_Options.TempDir.empty());
+ const IoHash& ChunkHash = Content.ChunkedContent.ChunkHashes[ChunkIndex];
+ const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex];
+
+ const ChunkedContentLookup::ChunkSequenceLocation& Source = GetChunkSequenceLocations(Lookup, ChunkIndex)[0];
+ const std::uint32_t PathIndex = Lookup.SequenceIndexFirstPathIndex[Source.SequenceIndex];
+ IoBuffer RawSource = IoBufferBuilder::MakeFromFile((m_Path / Content.Paths[PathIndex]).make_preferred(), Source.Offset, ChunkSize);
+ if (!RawSource)
+ {
+ throw std::runtime_error(fmt::format("Failed fetching chunk {}", ChunkHash));
+ }
+ if (RawSource.GetSize() != ChunkSize)
+ {
+ throw std::runtime_error(fmt::format("Fetched chunk {} has invalid size", ChunkHash));
+ }
+
+ const bool ShouldCompressChunk = IsChunkCompressable(m_NonCompressableExtensionHashes, Lookup, ChunkIndex);
+ const OodleCompressionLevel CompressionLevel = ShouldCompressChunk ? OodleCompressionLevel::VeryFast : OodleCompressionLevel::None;
+
+ if (ShouldCompressChunk)
+ {
+ std::filesystem::path TempFilePath = m_Options.TempDir / ChunkHash.ToHexString();
+
+ BasicFile CompressedFile;
+ std::error_code Ec;
+ CompressedFile.Open(TempFilePath, BasicFile::Mode::kTruncateDelete, Ec);
+ if (Ec)
+ {
+ throw std::runtime_error(fmt::format("Failed creating temporary file for compressing blob {}, reason: ({}) {}",
+ ChunkHash,
+ Ec.value(),
+ Ec.message()));
+ }
+
+ uint64_t StreamRawBytes = 0;
+ uint64_t StreamCompressedBytes = 0;
+
+ bool CouldCompress = CompressedBuffer::CompressToStream(
+ CompositeBuffer(SharedBuffer(RawSource)),
+ [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) {
+ ZEN_UNUSED(SourceOffset);
+ TempLooseChunksStats.CompressedChunkRawBytes += SourceSize;
+ CompressedFile.Write(RangeBuffer, Offset);
+ TempLooseChunksStats.CompressedChunkBytes += RangeBuffer.GetSize();
+ StreamRawBytes += SourceSize;
+ StreamCompressedBytes += RangeBuffer.GetSize();
+ },
+ OodleCompressor::Mermaid,
+ CompressionLevel);
+ if (CouldCompress)
+ {
+ uint64_t CompressedSize = CompressedFile.FileSize();
+ void* FileHandle = CompressedFile.Detach();
+ IoBuffer TempPayload = IoBuffer(IoBuffer::File,
+ FileHandle,
+ 0,
+ CompressedSize,
+ /*IsWholeFile*/ true);
+ ZEN_ASSERT(TempPayload);
+ TempPayload.SetDeleteOnClose(true);
+ IoHash RawHash;
+ uint64_t RawSize;
+ CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(TempPayload), RawHash, RawSize);
+ ZEN_ASSERT(Compressed);
+ ZEN_ASSERT(RawHash == ChunkHash);
+ ZEN_ASSERT(RawSize == ChunkSize);
+
+ TempLooseChunksStats.CompressedChunkCount++;
+
+ return Compressed.GetCompressed();
+ }
+ else
+ {
+ TempLooseChunksStats.CompressedChunkRawBytes -= StreamRawBytes;
+ TempLooseChunksStats.CompressedChunkBytes -= StreamCompressedBytes;
+ }
+ CompressedFile.Close();
+ RemoveFile(TempFilePath, Ec);
+ ZEN_UNUSED(Ec);
+ }
+
+ CompressedBuffer CompressedBlob =
+ CompressedBuffer::Compress(SharedBuffer(std::move(RawSource)), OodleCompressor::Mermaid, CompressionLevel);
+ if (!CompressedBlob)
+ {
+ throw std::runtime_error(fmt::format("Failed to compress large blob {}", ChunkHash));
+ }
+ ZEN_ASSERT_SLOW(CompressedBlob.DecodeRawHash() == ChunkHash);
+ ZEN_ASSERT_SLOW(CompressedBlob.DecodeRawSize() == ChunkSize);
+
+ TempLooseChunksStats.CompressedChunkRawBytes += ChunkSize;
+ TempLooseChunksStats.CompressedChunkBytes += CompressedBlob.GetCompressedSize();
+
+ // If we use none-compression, the compressed blob references the data and has 64 kb in memory so we don't need to write it to disk
+ if (ShouldCompressChunk)
+ {
+ std::filesystem::path TempFilePath = m_Options.TempDir / (ChunkHash.ToHexString());
+ IoBuffer TempPayload = WriteToTempFile(std::move(CompressedBlob).GetCompressed(), TempFilePath);
+ CompressedBlob = CompressedBuffer::FromCompressedNoValidate(std::move(TempPayload));
+ }
+
+ TempLooseChunksStats.CompressedChunkCount++;
+ return std::move(CompressedBlob).GetCompressed();
+}
+
+std::vector<std::pair<Oid, std::string>>
+UploadFolder(LoggerRef Log,
+ ProgressBase& Progress,
+ TransferThreadWorkers& Workers,
+ StorageInstance& Storage,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ const Oid& BuildId,
+ const Oid& BuildPartId,
+ std::string_view BuildPartName,
+ const std::filesystem::path& Path,
+ const std::filesystem::path& ManifestPath,
+ const CbObject& MetaData,
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache,
+ const UploadFolderOptions& Options)
+{
+ Progress.SetLogOperationName("Upload Folder");
+
+ Stopwatch UploadTimer;
+
+ BuildsOperationUploadFolder UploadOp(
+ Log,
+ Progress,
+ Storage,
+ AbortFlag,
+ PauseFlag,
+ Workers.GetIOWorkerPool(),
+ Workers.GetNetworkPool(),
+ BuildId,
+ Path,
+ Options.CreateBuild,
+ std::move(MetaData),
+ BuildsOperationUploadFolder::Options{.IsQuiet = Options.IsQuiet,
+ .IsVerbose = Options.IsVerbose,
+ .DoExtraContentValidation = Options.DoExtraContentVerify,
+ .FindBlockMaxCount = Options.FindBlockMaxCount,
+ .BlockReuseMinPercentLimit = Options.BlockReuseMinPercentLimit,
+ .AllowMultiparts = Options.AllowMultiparts,
+ .IgnoreExistingBlocks = Options.IgnoreExistingBlocks,
+ .TempDir = Options.TempDir,
+ .ExcludeFolders = Options.ExcludeFolders,
+ .ExcludeExtensions = Options.ExcludeExtensions,
+ .NonCompressableExtensions = DefaultSplitOnlyExtensions,
+ .PopulateCache = Options.UploadToZenCache});
+
+ std::vector<std::pair<Oid, std::string>> UploadedParts =
+ UploadOp.Execute(BuildPartId, BuildPartName, ManifestPath, ChunkController, ChunkCache);
+ if (AbortFlag)
+ {
+ return {};
+ }
+
+ if (Options.IsVerbose)
+ {
+ ZEN_CONSOLE(
+ "Folder scanning stats:"
+ "\n FoundFileCount: {}"
+ "\n FoundFileByteCount: {}"
+ "\n AcceptedFileCount: {}"
+ "\n AcceptedFileByteCount: {}"
+ "\n ElapsedWallTimeUS: {}",
+ UploadOp.m_LocalFolderScanStats.FoundFileCount.load(),
+ NiceBytes(UploadOp.m_LocalFolderScanStats.FoundFileByteCount.load()),
+ UploadOp.m_LocalFolderScanStats.AcceptedFileCount.load(),
+ NiceBytes(UploadOp.m_LocalFolderScanStats.AcceptedFileByteCount.load()),
+ NiceLatencyNs(UploadOp.m_LocalFolderScanStats.ElapsedWallTimeUS * 1000));
+
+ ZEN_CONSOLE(
+ "Chunking stats:"
+ "\n FilesProcessed: {}"
+ "\n FilesChunked: {}"
+ "\n BytesHashed: {}"
+ "\n UniqueChunksFound: {}"
+ "\n UniqueSequencesFound: {}"
+ "\n UniqueBytesFound: {}"
+ "\n FilesFoundInCache: {}"
+ "\n ChunksFoundInCache: {}"
+ "\n FilesStoredInCache: {}"
+ "\n ChunksStoredInCache: {}"
+ "\n ElapsedWallTimeUS: {}",
+ UploadOp.m_ChunkingStats.FilesProcessed.load(),
+ UploadOp.m_ChunkingStats.FilesChunked.load(),
+ NiceBytes(UploadOp.m_ChunkingStats.BytesHashed.load()),
+ UploadOp.m_ChunkingStats.UniqueChunksFound.load(),
+ UploadOp.m_ChunkingStats.UniqueSequencesFound.load(),
+ NiceBytes(UploadOp.m_ChunkingStats.UniqueBytesFound.load()),
+ UploadOp.m_ChunkingStats.FilesFoundInCache.load(),
+ UploadOp.m_ChunkingStats.ChunksFoundInCache.load(),
+ NiceBytes(UploadOp.m_ChunkingStats.BytesFoundInCache.load()),
+ UploadOp.m_ChunkingStats.FilesStoredInCache.load(),
+ UploadOp.m_ChunkingStats.ChunksStoredInCache.load(),
+ NiceBytes(UploadOp.m_ChunkingStats.BytesStoredInCache.load()),
+ NiceLatencyNs(UploadOp.m_ChunkingStats.ElapsedWallTimeUS * 1000));
+
+ ZEN_CONSOLE(
+ "Find block stats:"
+ "\n FindBlockTimeMS: {}"
+ "\n PotentialChunkCount: {}"
+ "\n PotentialChunkByteCount: {}"
+ "\n FoundBlockCount: {}"
+ "\n FoundBlockChunkCount: {}"
+ "\n FoundBlockByteCount: {}"
+ "\n AcceptedBlockCount: {}"
+ "\n NewBlocksCount: {}"
+ "\n NewBlocksChunkCount: {}"
+ "\n NewBlocksChunkByteCount: {}",
+ NiceTimeSpanMs(UploadOp.m_FindBlocksStats.FindBlockTimeMS),
+ UploadOp.m_FindBlocksStats.PotentialChunkCount,
+ NiceBytes(UploadOp.m_FindBlocksStats.PotentialChunkByteCount),
+ UploadOp.m_FindBlocksStats.FoundBlockCount,
+ UploadOp.m_FindBlocksStats.FoundBlockChunkCount,
+ NiceBytes(UploadOp.m_FindBlocksStats.FoundBlockByteCount),
+ UploadOp.m_FindBlocksStats.AcceptedBlockCount,
+ UploadOp.m_FindBlocksStats.NewBlocksCount,
+ UploadOp.m_FindBlocksStats.NewBlocksChunkCount,
+ NiceBytes(UploadOp.m_FindBlocksStats.NewBlocksChunkByteCount));
+
+ ZEN_CONSOLE(
+ "Reuse block stats:"
+ "\n AcceptedChunkCount: {}"
+ "\n AcceptedByteCount: {}"
+ "\n AcceptedRawByteCount: {}"
+ "\n RejectedBlockCount: {}"
+ "\n RejectedChunkCount: {}"
+ "\n RejectedByteCount: {}"
+ "\n AcceptedReduntantChunkCount: {}"
+ "\n AcceptedReduntantByteCount: {}",
+ UploadOp.m_ReuseBlocksStats.AcceptedChunkCount,
+ NiceBytes(UploadOp.m_ReuseBlocksStats.AcceptedByteCount),
+ NiceBytes(UploadOp.m_ReuseBlocksStats.AcceptedRawByteCount),
+ UploadOp.m_ReuseBlocksStats.RejectedBlockCount,
+ UploadOp.m_ReuseBlocksStats.RejectedChunkCount,
+ NiceBytes(UploadOp.m_ReuseBlocksStats.RejectedByteCount),
+ UploadOp.m_ReuseBlocksStats.AcceptedReduntantChunkCount,
+ NiceBytes(UploadOp.m_ReuseBlocksStats.AcceptedReduntantByteCount));
+
+ ZEN_CONSOLE(
+ "Generate blocks stats:"
+ "\n GeneratedBlockByteCount: {}"
+ "\n GeneratedBlockCount: {}"
+ "\n GenerateBlocksElapsedWallTimeUS: {}",
+ NiceBytes(UploadOp.m_GenerateBlocksStats.GeneratedBlockByteCount.load()),
+ UploadOp.m_GenerateBlocksStats.GeneratedBlockCount.load(),
+ NiceLatencyNs(UploadOp.m_GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS * 1000));
+
+ ZEN_CONSOLE(
+ "Loose chunks stats:"
+ "\n ChunkCount: {}"
+ "\n ChunkByteCount: {}"
+ "\n CompressedChunkCount: {}"
+ "\n CompressChunksElapsedWallTimeUS: {}",
+ UploadOp.m_LooseChunksStats.ChunkCount,
+ NiceBytes(UploadOp.m_LooseChunksStats.ChunkByteCount),
+ UploadOp.m_LooseChunksStats.CompressedChunkCount.load(),
+ NiceBytes(UploadOp.m_LooseChunksStats.CompressedChunkBytes.load()),
+ NiceLatencyNs(UploadOp.m_LooseChunksStats.CompressChunksElapsedWallTimeUS * 1000));
+
+ ZEN_CONSOLE(
+ "Disk stats:"
+ "\n OpenReadCount: {}"
+ "\n OpenWriteCount: {}"
+ "\n ReadCount: {}"
+ "\n ReadByteCount: {}"
+ "\n WriteCount: {} ({} cloned)"
+ "\n WriteByteCount: {} ({} cloned)"
+ "\n CurrentOpenFileCount: {}",
+ UploadOp.m_DiskStats.OpenReadCount.load(),
+ UploadOp.m_DiskStats.OpenWriteCount.load(),
+ UploadOp.m_DiskStats.ReadCount.load(),
+ NiceBytes(UploadOp.m_DiskStats.ReadByteCount.load()),
+ UploadOp.m_DiskStats.WriteCount.load(),
+ UploadOp.m_DiskStats.CloneCount.load(),
+ NiceBytes(UploadOp.m_DiskStats.WriteByteCount.load()),
+ NiceBytes(UploadOp.m_DiskStats.CloneByteCount.load()),
+ UploadOp.m_DiskStats.CurrentOpenFileCount.load());
+
+ ZEN_CONSOLE(
+ "Upload stats:"
+ "\n BlockCount: {}"
+ "\n BlocksBytes: {}"
+ "\n ChunkCount: {}"
+ "\n ChunksBytes: {}"
+ "\n ReadFromDiskBytes: {}"
+ "\n MultipartAttachmentCount: {}"
+ "\n ElapsedWallTimeUS: {}",
+ UploadOp.m_UploadStats.BlockCount.load(),
+ NiceBytes(UploadOp.m_UploadStats.BlocksBytes.load()),
+ UploadOp.m_UploadStats.ChunkCount.load(),
+ NiceBytes(UploadOp.m_UploadStats.ChunksBytes.load()),
+ NiceBytes(UploadOp.m_UploadStats.ReadFromDiskBytes.load()),
+ UploadOp.m_UploadStats.MultipartAttachmentCount.load(),
+ NiceLatencyNs(UploadOp.m_UploadStats.ElapsedWallTimeUS * 1000));
+ }
+
+ const double DeltaByteCountPercent =
+ UploadOp.m_ChunkingStats.BytesHashed > 0
+ ? (100.0 * (UploadOp.m_FindBlocksStats.NewBlocksChunkByteCount + UploadOp.m_LooseChunksStats.CompressedChunkBytes)) /
+ (UploadOp.m_ChunkingStats.BytesHashed)
+ : 0.0;
+
+ const std::string MultipartAttachmentStats =
+ Options.AllowMultiparts ? fmt::format(" ({} as multipart)", UploadOp.m_UploadStats.MultipartAttachmentCount.load()) : "";
+
+ if (!Options.IsQuiet)
+ {
+ ZEN_CONSOLE(
+ "Uploaded part {} ('{}') to build {}, {}\n"
+ " Scanned files: {:>8} ({}), {}B/sec, {}\n"
+ " New data: {:>8} ({}) {:.1f}%\n"
+ " New blocks: {:>8} ({} -> {}), {}B/sec, {}\n"
+ " New chunks: {:>8} ({} -> {}), {}B/sec, {}\n"
+ " Uploaded: {:>8} ({}), {}bits/sec, {}\n"
+ " Blocks: {:>8} ({})\n"
+ " Chunks: {:>8} ({}){}",
+ BuildPartId,
+ BuildPartName,
+ BuildId,
+ NiceTimeSpanMs(UploadTimer.GetElapsedTimeMs()),
+
+ UploadOp.m_LocalFolderScanStats.FoundFileCount.load(),
+ NiceBytes(UploadOp.m_LocalFolderScanStats.FoundFileByteCount.load()),
+ NiceNum(GetBytesPerSecond(UploadOp.m_ChunkingStats.ElapsedWallTimeUS, UploadOp.m_ChunkingStats.BytesHashed)),
+ NiceTimeSpanMs(UploadOp.m_ChunkingStats.ElapsedWallTimeUS / 1000),
+
+ UploadOp.m_FindBlocksStats.NewBlocksChunkCount + UploadOp.m_LooseChunksStats.CompressedChunkCount,
+ NiceBytes(UploadOp.m_FindBlocksStats.NewBlocksChunkByteCount + UploadOp.m_LooseChunksStats.CompressedChunkBytes),
+ DeltaByteCountPercent,
+
+ UploadOp.m_GenerateBlocksStats.GeneratedBlockCount.load(),
+ NiceBytes(UploadOp.m_FindBlocksStats.NewBlocksChunkByteCount),
+ NiceBytes(UploadOp.m_GenerateBlocksStats.GeneratedBlockByteCount.load()),
+ NiceNum(GetBytesPerSecond(UploadOp.m_GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS,
+ UploadOp.m_GenerateBlocksStats.GeneratedBlockByteCount)),
+ NiceTimeSpanMs(UploadOp.m_GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS / 1000),
+
+ UploadOp.m_LooseChunksStats.CompressedChunkCount.load(),
+ NiceBytes(UploadOp.m_LooseChunksStats.CompressedChunkRawBytes),
+ NiceBytes(UploadOp.m_LooseChunksStats.CompressedChunkBytes.load()),
+ NiceNum(GetBytesPerSecond(UploadOp.m_LooseChunksStats.CompressChunksElapsedWallTimeUS,
+ UploadOp.m_LooseChunksStats.CompressedChunkRawBytes)),
+ NiceTimeSpanMs(UploadOp.m_LooseChunksStats.CompressChunksElapsedWallTimeUS / 1000),
+
+ UploadOp.m_UploadStats.BlockCount.load() + UploadOp.m_UploadStats.ChunkCount.load(),
+ NiceBytes(UploadOp.m_UploadStats.BlocksBytes + UploadOp.m_UploadStats.ChunksBytes),
+ NiceNum(GetBytesPerSecond(UploadOp.m_UploadStats.ElapsedWallTimeUS,
+ (UploadOp.m_UploadStats.ChunksBytes + UploadOp.m_UploadStats.BlocksBytes) * 8)),
+ NiceTimeSpanMs(UploadOp.m_UploadStats.ElapsedWallTimeUS / 1000),
+
+ UploadOp.m_UploadStats.BlockCount.load(),
+ NiceBytes(UploadOp.m_UploadStats.BlocksBytes.load()),
+
+ UploadOp.m_UploadStats.ChunkCount.load(),
+ NiceBytes(UploadOp.m_UploadStats.ChunksBytes.load()),
+ MultipartAttachmentStats);
+ }
+ return UploadedParts;
+}
+
+} // namespace zen
diff --git a/src/zenremotestore/builds/buildvalidatebuildpart.cpp b/src/zenremotestore/builds/buildvalidatebuildpart.cpp
new file mode 100644
index 000000000..d06502683
--- /dev/null
+++ b/src/zenremotestore/builds/buildvalidatebuildpart.cpp
@@ -0,0 +1,374 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include <zenremotestore/builds/buildvalidatebuildpart.h>
+
+#include <zencore/fmtutils.h>
+#include <zencore/parallelwork.h>
+#include <zencore/scopeguard.h>
+#include <zencore/trace.h>
+#include <zenremotestore/builds/builduploadfolder.h>
+#include <zenremotestore/transferthreadworkers.h>
+#include <zenutil/filesystemutils.h>
+#include <zenutil/filteredrate.h>
+#include <zenutil/progress.h>
+
+namespace zen {
+
+using namespace std::literals;
+
+BuildsOperationValidateBuildPart::BuildsOperationValidateBuildPart(LoggerRef Log,
+ ProgressBase& Progress,
+ BuildStorageBase& Storage,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ WorkerThreadPool& IOWorkerPool,
+ WorkerThreadPool& NetworkPool,
+ const Oid& BuildId,
+ const Oid& BuildPartId,
+ const std::string_view BuildPartName,
+ const Options& Options)
+
+: m_Log(Log)
+, m_Progress(Progress)
+, m_Storage(Storage)
+, m_AbortFlag(AbortFlag)
+, m_PauseFlag(PauseFlag)
+, m_IOWorkerPool(IOWorkerPool)
+, m_NetworkPool(NetworkPool)
+, m_BuildId(BuildId)
+, m_BuildPartId(BuildPartId)
+, m_BuildPartName(BuildPartName)
+, m_Options(Options)
+{
+}
+
+void
+BuildsOperationValidateBuildPart::Execute()
+{
+ ZEN_TRACE_CPU("ValidateBuildPart");
+ try
+ {
+ auto EndProgress =
+ MakeGuard([&]() { m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::StepCount, (uint32_t)TaskSteps::StepCount); });
+
+ Stopwatch Timer;
+ auto _ = MakeGuard([&]() {
+ if (!m_Options.IsQuiet)
+ {
+ ZEN_INFO("Validated build part {}/{} ('{}') in {}",
+ m_BuildId,
+ m_BuildPartId,
+ m_BuildPartName,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ }
+ });
+
+ m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::FetchBuild, (uint32_t)TaskSteps::StepCount);
+
+ ResolvedBuildPart Resolved = ResolveBuildPart();
+
+ ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
+
+ const std::filesystem::path& TempFolder = m_Options.TempFolder;
+ ZEN_ASSERT(!TempFolder.empty());
+
+ CleanAndRemoveDirectory(m_IOWorkerPool, m_AbortFlag, m_PauseFlag, TempFolder);
+ CreateDirectories(TempFolder);
+ auto __ = MakeGuard([this, TempFolder]() { CleanAndRemoveDirectory(m_IOWorkerPool, m_AbortFlag, m_PauseFlag, TempFolder); });
+
+ m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::ValidateBlobs, (uint32_t)TaskSteps::StepCount);
+
+ std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Validate Blobs");
+
+ const uint64_t AttachmentsToVerifyCount = Resolved.ChunkAttachments.size() + Resolved.BlockAttachments.size();
+ FilteredRate FilteredDownloadedBytesPerSecond;
+ FilteredRate FilteredVerifiedBytesPerSecond;
+
+ ValidateBlobsContext Context{.Work = Work,
+ .AttachmentsToVerifyCount = AttachmentsToVerifyCount,
+ .FilteredDownloadedBytesPerSecond = FilteredDownloadedBytesPerSecond,
+ .FilteredVerifiedBytesPerSecond = FilteredVerifiedBytesPerSecond};
+
+ ScheduleChunkAttachmentValidation(Context, Resolved.ChunkAttachments, TempFolder, Resolved.PreferredMultipartChunkSize);
+ ScheduleBlockAttachmentValidation(Context, Resolved.BlockAttachments);
+
+ Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
+ ZEN_UNUSED(PendingWork);
+
+ const uint64_t DownloadedAttachmentCount = m_DownloadStats.DownloadedChunkCount + m_DownloadStats.DownloadedBlockCount;
+ const uint64_t DownloadedByteCount = m_DownloadStats.DownloadedChunkByteCount + m_DownloadStats.DownloadedBlockByteCount;
+
+ FilteredDownloadedBytesPerSecond.Update(DownloadedByteCount);
+ FilteredVerifiedBytesPerSecond.Update(m_ValidateStats.VerifiedByteCount);
+
+ std::string Details = fmt::format("Downloaded {}/{} ({}, {}bits/s). Verified {}/{} ({}, {}B/s)",
+ DownloadedAttachmentCount,
+ AttachmentsToVerifyCount,
+ NiceBytes(DownloadedByteCount),
+ NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8),
+ m_ValidateStats.VerifiedAttachmentCount.load(),
+ AttachmentsToVerifyCount,
+ NiceBytes(m_ValidateStats.VerifiedByteCount.load()),
+ NiceNum(FilteredVerifiedBytesPerSecond.GetCurrent()));
+
+ ProgressBar->UpdateState(
+ {.Task = "Validating blobs ",
+ .Details = Details,
+ .TotalCount = gsl::narrow<uint64_t>(AttachmentsToVerifyCount * 2),
+ .RemainingCount = gsl::narrow<uint64_t>(AttachmentsToVerifyCount * 2 -
+ (DownloadedAttachmentCount + m_ValidateStats.VerifiedAttachmentCount.load())),
+ .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
+ false);
+ });
+
+ ProgressBar->Finish();
+ m_ValidateStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs();
+
+ m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::Cleanup, (uint32_t)TaskSteps::StepCount);
+ }
+ catch (const std::exception&)
+ {
+ m_AbortFlag = true;
+ throw;
+ }
+}
+
+BuildsOperationValidateBuildPart::ResolvedBuildPart
+BuildsOperationValidateBuildPart::ResolveBuildPart()
+{
+ ResolvedBuildPart Result;
+ Result.PreferredMultipartChunkSize = 32u * 1024u * 1024u;
+
+ CbObject Build = m_Storage.GetBuild(m_BuildId);
+ if (!m_BuildPartName.empty())
+ {
+ m_BuildPartId = Build["parts"sv].AsObjectView()[m_BuildPartName].AsObjectId();
+ if (m_BuildPartId == Oid::Zero)
+ {
+ throw std::runtime_error(fmt::format("Build {} does not have a part named '{}'", m_BuildId, m_BuildPartName));
+ }
+ }
+ m_ValidateStats.BuildBlobSize = Build.GetSize();
+ if (auto ChunkSize = Build["chunkSize"sv].AsUInt64(); ChunkSize != 0)
+ {
+ Result.PreferredMultipartChunkSize = ChunkSize;
+ }
+
+ m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::FetchBuildPart, (uint32_t)TaskSteps::StepCount);
+
+ CbObject BuildPart = m_Storage.GetBuildPart(m_BuildId, m_BuildPartId);
+ m_ValidateStats.BuildPartSize = BuildPart.GetSize();
+ if (!m_Options.IsQuiet)
+ {
+ ZEN_INFO("Validating build part {}/{} ({})", m_BuildId, m_BuildPartId, NiceBytes(BuildPart.GetSize()));
+ }
+ if (const CbObjectView ChunkAttachmentsView = BuildPart["chunkAttachments"sv].AsObjectView())
+ {
+ for (CbFieldView LooseFileView : ChunkAttachmentsView["rawHashes"sv])
+ {
+ Result.ChunkAttachments.push_back(LooseFileView.AsBinaryAttachment());
+ }
+ }
+ m_ValidateStats.ChunkAttachmentCount = Result.ChunkAttachments.size();
+ if (const CbObjectView BlockAttachmentsView = BuildPart["blockAttachments"sv].AsObjectView())
+ {
+ for (CbFieldView BlocksView : BlockAttachmentsView["rawHashes"sv])
+ {
+ Result.BlockAttachments.push_back(BlocksView.AsBinaryAttachment());
+ }
+ }
+ m_ValidateStats.BlockAttachmentCount = Result.BlockAttachments.size();
+
+ std::vector<ChunkBlockDescription> VerifyBlockDescriptions =
+ ParseChunkBlockDescriptionList(m_Storage.GetBlockMetadatas(m_BuildId, Result.BlockAttachments));
+ if (VerifyBlockDescriptions.size() != Result.BlockAttachments.size())
+ {
+ throw std::runtime_error(fmt::format("Uploaded blocks metadata could not all be found, {} blocks metadata is missing",
+ Result.BlockAttachments.size() - VerifyBlockDescriptions.size()));
+ }
+
+ return Result;
+}
+
+void
+BuildsOperationValidateBuildPart::ScheduleChunkAttachmentValidation(ValidateBlobsContext& Context,
+ std::span<const IoHash> ChunkAttachments,
+ const std::filesystem::path& TempFolder,
+ uint64_t PreferredMultipartChunkSize)
+{
+ for (const IoHash& ChunkAttachment : ChunkAttachments)
+ {
+ Context.Work.ScheduleWork(
+ m_NetworkPool,
+ [this, &Context, &TempFolder, PreferredMultipartChunkSize, ChunkAttachment = IoHash(ChunkAttachment)](std::atomic<bool>&) {
+ if (!m_AbortFlag)
+ {
+ ZEN_TRACE_CPU("ValidateBuildPart_GetChunk");
+
+ Context.FilteredDownloadedBytesPerSecond.Start();
+ DownloadLargeBlob(
+ m_Storage,
+ TempFolder,
+ m_BuildId,
+ ChunkAttachment,
+ PreferredMultipartChunkSize,
+ Context.Work,
+ m_NetworkPool,
+ m_DownloadStats.DownloadedChunkByteCount,
+ m_DownloadStats.MultipartAttachmentCount,
+ [this, &Context, ChunkHash = IoHash(ChunkAttachment)](IoBuffer&& Payload) {
+ m_DownloadStats.DownloadedChunkCount++;
+ Payload.SetContentType(ZenContentType::kCompressedBinary);
+ if (!m_AbortFlag)
+ {
+ Context.Work.ScheduleWork(
+ m_IOWorkerPool,
+ [this, &Context, Payload = IoBuffer(std::move(Payload)), ChunkHash](std::atomic<bool>&) mutable {
+ if (!m_AbortFlag)
+ {
+ ValidateDownloadedChunk(Context, ChunkHash, std::move(Payload));
+ }
+ });
+ }
+ });
+ }
+ });
+ }
+}
+
+void
+BuildsOperationValidateBuildPart::ScheduleBlockAttachmentValidation(ValidateBlobsContext& Context, std::span<const IoHash> BlockAttachments)
+{
+ for (const IoHash& BlockAttachment : BlockAttachments)
+ {
+ Context.Work.ScheduleWork(m_NetworkPool, [this, &Context, BlockAttachment = IoHash(BlockAttachment)](std::atomic<bool>&) {
+ if (!m_AbortFlag)
+ {
+ ZEN_TRACE_CPU("ValidateBuildPart_GetBlock");
+
+ Context.FilteredDownloadedBytesPerSecond.Start();
+ IoBuffer Payload = m_Storage.GetBuildBlob(m_BuildId, BlockAttachment);
+ m_DownloadStats.DownloadedBlockCount++;
+ m_DownloadStats.DownloadedBlockByteCount += Payload.GetSize();
+ if (m_DownloadStats.DownloadedChunkCount + m_DownloadStats.DownloadedBlockCount == Context.AttachmentsToVerifyCount)
+ {
+ Context.FilteredDownloadedBytesPerSecond.Stop();
+ }
+ if (!Payload)
+ {
+ throw std::runtime_error(fmt::format("Block attachment {} could not be found", BlockAttachment));
+ }
+ if (!m_AbortFlag)
+ {
+ Context.Work.ScheduleWork(m_IOWorkerPool,
+ [this, &Context, Payload = std::move(Payload), BlockAttachment](std::atomic<bool>&) mutable {
+ if (!m_AbortFlag)
+ {
+ ValidateDownloadedBlock(Context, BlockAttachment, std::move(Payload));
+ }
+ });
+ }
+ }
+ });
+ }
+}
+
+void
+BuildsOperationValidateBuildPart::ValidateDownloadedChunk(ValidateBlobsContext& Context, const IoHash& ChunkHash, IoBuffer Payload)
+{
+ ZEN_TRACE_CPU("ValidateBuildPart_Validate");
+
+ if (m_DownloadStats.DownloadedChunkCount + m_DownloadStats.DownloadedBlockCount == Context.AttachmentsToVerifyCount)
+ {
+ Context.FilteredDownloadedBytesPerSecond.Stop();
+ }
+
+ Context.FilteredVerifiedBytesPerSecond.Start();
+
+ uint64_t CompressedSize;
+ uint64_t DecompressedSize;
+ ValidateBlob(m_AbortFlag, std::move(Payload), ChunkHash, CompressedSize, DecompressedSize);
+ m_ValidateStats.VerifiedAttachmentCount++;
+ m_ValidateStats.VerifiedByteCount += DecompressedSize;
+ if (m_ValidateStats.VerifiedAttachmentCount.load() == Context.AttachmentsToVerifyCount)
+ {
+ Context.FilteredVerifiedBytesPerSecond.Stop();
+ }
+}
+
+void
+BuildsOperationValidateBuildPart::ValidateDownloadedBlock(ValidateBlobsContext& Context, const IoHash& BlockAttachment, IoBuffer Payload)
+{
+ ZEN_TRACE_CPU("ValidateBuildPart_ValidateBlock");
+
+ Context.FilteredVerifiedBytesPerSecond.Start();
+
+ uint64_t CompressedSize;
+ uint64_t DecompressedSize;
+ ValidateChunkBlock(std::move(Payload), BlockAttachment, CompressedSize, DecompressedSize);
+ m_ValidateStats.VerifiedAttachmentCount++;
+ m_ValidateStats.VerifiedByteCount += DecompressedSize;
+ if (m_ValidateStats.VerifiedAttachmentCount.load() == Context.AttachmentsToVerifyCount)
+ {
+ Context.FilteredVerifiedBytesPerSecond.Stop();
+ }
+}
+
+ChunkBlockDescription
+BuildsOperationValidateBuildPart::ValidateChunkBlock(IoBuffer&& Payload,
+ const IoHash& BlobHash,
+ uint64_t& OutCompressedSize,
+ uint64_t& OutDecompressedSize)
+{
+ CompositeBuffer BlockBuffer = ValidateBlob(m_AbortFlag, std::move(Payload), BlobHash, OutCompressedSize, OutDecompressedSize);
+ if (!BlockBuffer)
+ {
+ throw std::runtime_error(fmt::format("Chunk block blob {} is not compressed using 'None' compression level", BlobHash));
+ }
+ return GetChunkBlockDescription(BlockBuffer.Flatten(), BlobHash);
+}
+
+void
+ValidateBuildPart(LoggerRef Log,
+ ProgressBase& Progress,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ bool IsQuiet,
+ bool IsVerbose,
+ TransferThreadWorkers& Workers,
+ BuildStorageBase& Storage,
+ const std::filesystem::path& TempFolder,
+ const Oid& BuildId,
+ const Oid& BuildPartId,
+ std::string_view BuildPartName)
+{
+ ZEN_TRACE_CPU("ValidateBuildPart");
+
+ Progress.SetLogOperationName("Validate Part");
+
+ BuildsOperationValidateBuildPart ValidateOp(
+ Log,
+ Progress,
+ Storage,
+ AbortFlag,
+ PauseFlag,
+ Workers.GetIOWorkerPool(),
+ Workers.GetNetworkPool(),
+ BuildId,
+ BuildPartId,
+ BuildPartName,
+ BuildsOperationValidateBuildPart::Options{.TempFolder = TempFolder, .IsQuiet = IsQuiet, .IsVerbose = IsVerbose});
+
+ ValidateOp.Execute();
+
+ const uint64_t DownloadedCount = ValidateOp.m_DownloadStats.DownloadedChunkCount + ValidateOp.m_DownloadStats.DownloadedBlockCount;
+ const uint64_t DownloadedByteCount =
+ ValidateOp.m_DownloadStats.DownloadedChunkByteCount + ValidateOp.m_DownloadStats.DownloadedBlockByteCount;
+ ZEN_CONSOLE("Verified: {:>8} ({}), {}B/sec, {}",
+ DownloadedCount,
+ NiceBytes(DownloadedByteCount),
+ NiceNum(GetBytesPerSecond(ValidateOp.m_ValidateStats.ElapsedWallTimeUS, DownloadedByteCount)),
+ NiceTimeSpanMs(ValidateOp.m_ValidateStats.ElapsedWallTimeUS / 1000));
+}
+
+} // namespace zen
diff --git a/src/zenremotestore/builds/filebuildstorage.cpp b/src/zenremotestore/builds/filebuildstorage.cpp
index 55e69de61..2f4904449 100644
--- a/src/zenremotestore/builds/filebuildstorage.cpp
+++ b/src/zenremotestore/builds/filebuildstorage.cpp
@@ -432,6 +432,45 @@ public:
return IoBuffer{};
}
+ virtual BuildBlobRanges GetBuildBlobRanges(const Oid& BuildId,
+ const IoHash& RawHash,
+ std::span<const std::pair<uint64_t, uint64_t>> Ranges) override
+ {
+ ZEN_TRACE_CPU("FileBuildStorage::GetBuildBlobRanges");
+ ZEN_UNUSED(BuildId);
+ ZEN_ASSERT(!Ranges.empty());
+
+ uint64_t ReceivedBytes = 0;
+ uint64_t SentBytes = Ranges.size() * 2 * 8;
+
+ SimulateLatency(SentBytes, 0);
+ auto _ = MakeGuard([&]() { SimulateLatency(0, ReceivedBytes); });
+
+ Stopwatch ExecutionTimer;
+ auto __ = MakeGuard([&]() { AddStatistic(ExecutionTimer, SentBytes, ReceivedBytes); });
+
+ BuildBlobRanges Result;
+
+ const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash);
+ if (IsFile(BlockPath))
+ {
+ BasicFile File(BlockPath, BasicFile::Mode::kRead);
+
+ uint64_t RangeOffset = Ranges.front().first;
+ uint64_t RangeBytes = Ranges.back().first + Ranges.back().second - RangeOffset;
+ Result.PayloadBuffer = IoBufferBuilder::MakeFromFileHandle(File.Detach(), RangeOffset, RangeBytes);
+
+ Result.Ranges.reserve(Ranges.size());
+
+ for (const std::pair<uint64_t, uint64_t>& Range : Ranges)
+ {
+ Result.Ranges.push_back(std::make_pair(Range.first - RangeOffset, Range.second));
+ }
+ ReceivedBytes = Result.PayloadBuffer.GetSize();
+ }
+ return Result;
+ }
+
virtual std::vector<std::function<void()>> GetLargeBuildBlob(const Oid& BuildId,
const IoHash& RawHash,
uint64_t ChunkSize,
diff --git a/src/zenremotestore/builds/jupiterbuildstorage.cpp b/src/zenremotestore/builds/jupiterbuildstorage.cpp
index 23d0ddd4c..c0cc16941 100644
--- a/src/zenremotestore/builds/jupiterbuildstorage.cpp
+++ b/src/zenremotestore/builds/jupiterbuildstorage.cpp
@@ -4,6 +4,7 @@
#include <zencore/compactbinarybuilder.h>
#include <zencore/compactbinaryutil.h>
+#include <zencore/compress.h>
#include <zencore/fmtutils.h>
#include <zencore/scopeguard.h>
#include <zencore/timer.h>
@@ -14,18 +15,19 @@ ZEN_THIRD_PARTY_INCLUDES_START
#include <tsl/robin_map.h>
ZEN_THIRD_PARTY_INCLUDES_END
-#include <regex>
+#include <string_view>
namespace zen {
using namespace std::literals;
namespace {
- void ThrowFromJupiterResult(const JupiterResult& Result, std::string_view Prefix)
+ [[noreturn]] void ThrowFromJupiterResult(const JupiterResult& Result, std::string_view Prefix)
{
- int Error = Result.ErrorCode < (int)HttpResponseCode::Continue ? Result.ErrorCode : 0;
- HttpResponseCode Status =
- Result.ErrorCode >= int(HttpResponseCode::Continue) ? HttpResponseCode(Result.ErrorCode) : HttpResponseCode::ImATeapot;
+ HttpClientErrorCode Error = Result.ErrorCode < static_cast<int>(HttpResponseCode::Continue) ? HttpClientErrorCode(Result.ErrorCode)
+ : HttpClientErrorCode::kOK;
+ HttpResponseCode Status = Result.ErrorCode >= static_cast<int>(HttpResponseCode::Continue) ? HttpResponseCode(Result.ErrorCode)
+ : HttpResponseCode::ImATeapot;
throw HttpClientError(fmt::format("{}: {} ({})", Prefix, Result.Reason, Result.ErrorCode), Error, Status);
}
} // namespace
@@ -262,7 +264,7 @@ public:
std::vector<std::function<void()>> WorkList;
for (auto& WorkItem : WorkItems)
{
- WorkList.emplace_back([this, WorkItem = std::move(WorkItem), OnSentBytes]() {
+ WorkList.emplace_back([this, WorkItem = std::move(WorkItem), OnSentBytes = std::move(OnSentBytes)]() {
Stopwatch ExecutionTimer;
auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); });
bool IsComplete = false;
@@ -284,7 +286,10 @@ public:
Stopwatch ExecutionTimer;
auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); });
- CreateDirectories(m_TempFolderPath);
+ if (!m_TempFolderPath.empty())
+ {
+ CreateDirectories(m_TempFolderPath);
+ }
JupiterResult GetBuildBlobResult =
m_Session.GetBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, m_TempFolderPath, RangeOffset, RangeBytes);
AddStatistic(GetBuildBlobResult);
@@ -295,6 +300,29 @@ public:
return std::move(GetBuildBlobResult.Response);
}
+ virtual BuildBlobRanges GetBuildBlobRanges(const Oid& BuildId,
+ const IoHash& RawHash,
+ std::span<const std::pair<uint64_t, uint64_t>> Ranges) override
+ {
+ ZEN_TRACE_CPU("Jupiter::GetBuildBlob");
+
+ Stopwatch ExecutionTimer;
+ auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); });
+ if (!m_TempFolderPath.empty())
+ {
+ CreateDirectories(m_TempFolderPath);
+ }
+
+ BuildBlobRangesResult GetBuildBlobResult =
+ m_Session.GetBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, m_TempFolderPath, Ranges);
+ AddStatistic(GetBuildBlobResult);
+ if (!GetBuildBlobResult.Success)
+ {
+ ThrowFromJupiterResult(GetBuildBlobResult, "Failed fetching build blob ranges"sv);
+ }
+ return BuildBlobRanges{.PayloadBuffer = std::move(GetBuildBlobResult.Response), .Ranges = std::move(GetBuildBlobResult.Ranges)};
+ }
+
virtual std::vector<std::function<void()>> GetLargeBuildBlob(const Oid& BuildId,
const IoHash& RawHash,
uint64_t ChunkSize,
@@ -423,11 +451,13 @@ public:
virtual bool GetExtendedStatistics(ExtendedStatistics& OutStats) override
{
- OutStats.ReceivedBytesPerSource.reserve(m_ReceivedBytesPerSource.size());
- for (auto& It : m_ReceivedBytesPerSource)
- {
- OutStats.ReceivedBytesPerSource.insert_or_assign(It.first, m_SourceBytes[It.second]);
- }
+ m_SourceLock.WithSharedLock([this, &OutStats]() {
+ OutStats.ReceivedBytesPerSource.reserve(m_ReceivedBytesPerSource.size());
+ for (auto& It : m_ReceivedBytesPerSource)
+ {
+ OutStats.ReceivedBytesPerSource.insert_or_assign(It.first, m_SourceBytes[It.second].load(std::memory_order_relaxed));
+ }
+ });
return true;
}
@@ -500,15 +530,29 @@ private:
}
if (!Result.Source.empty())
{
- if (tsl::robin_map<std::string, uint32_t>::const_iterator It = m_ReceivedBytesPerSource.find(Result.Source);
- It != m_ReceivedBytesPerSource.end())
- {
- m_SourceBytes[It->second] += Result.ReceivedBytes;
- }
- else
+ if (!m_SourceLock.WithSharedLock([&]() {
+ if (tsl::robin_map<std::string, uint32_t>::const_iterator It = m_ReceivedBytesPerSource.find(Result.Source);
+ It != m_ReceivedBytesPerSource.end())
+ {
+ m_SourceBytes[It->second] += Result.ReceivedBytes;
+ return true;
+ }
+ return false;
+ }))
{
- m_ReceivedBytesPerSource.insert_or_assign(Result.Source, m_SourceBytes.size());
- m_SourceBytes.push_back(Result.ReceivedBytes);
+ m_SourceLock.WithExclusiveLock([&]() {
+ if (tsl::robin_map<std::string, uint32_t>::const_iterator It = m_ReceivedBytesPerSource.find(Result.Source);
+ It != m_ReceivedBytesPerSource.end())
+ {
+ m_SourceBytes[It->second] += Result.ReceivedBytes;
+ }
+ else if (m_SourceCount < MaxSourceCount)
+ {
+ size_t Index = m_SourceCount++;
+ m_ReceivedBytesPerSource.insert_or_assign(Result.Source, Index);
+ m_SourceBytes[Index] += Result.ReceivedBytes;
+ }
+ });
}
}
}
@@ -519,8 +563,11 @@ private:
const std::string m_Bucket;
const std::filesystem::path m_TempFolderPath;
- tsl::robin_map<std::string, uint32_t> m_ReceivedBytesPerSource;
- std::vector<uint64_t> m_SourceBytes;
+ RwLock m_SourceLock;
+ tsl::robin_map<std::string, uint32_t> m_ReceivedBytesPerSource;
+ static constexpr size_t MaxSourceCount = 8u;
+ std::array<std::atomic<uint64_t>, MaxSourceCount> m_SourceBytes;
+ size_t m_SourceCount = 0;
};
std::unique_ptr<BuildStorageBase>
@@ -551,35 +598,135 @@ ParseBuildStorageUrl(std::string_view InUrl,
Url.erase(ApiString, ExtendedApiString.length());
}
- const std::string ArtifactURLRegExString = R"((http[s]?:\/\/.*?)\/(.*?)\/(.*?)\/(.*))";
- const std::regex ArtifactURLRegEx(ArtifactURLRegExString, std::regex::ECMAScript | std::regex::icase);
- std::match_results<std::string_view::const_iterator> MatchResults;
- std::string_view UrlToParse(Url);
- if (regex_match(begin(UrlToParse), end(UrlToParse), MatchResults, ArtifactURLRegEx) && MatchResults.size() == 5)
- {
- auto GetMatch = [&MatchResults](uint32_t Index) -> std::string_view {
- ZEN_ASSERT(Index < MatchResults.size());
+ // Parse URL of the form: http[s]://host/namespace/bucket/buildid
+ std::string_view Remaining(Url);
- const auto& Match = MatchResults[Index];
+ // Find the end of the scheme (e.g. "http://" or "https://")
+ size_t SchemeEnd = Remaining.find("://");
+ if (SchemeEnd == std::string_view::npos)
+ {
+ return false;
+ }
+ SchemeEnd += 3; // skip past "://"
- return std::string_view(&*Match.first, Match.second - Match.first);
- };
+ // Find the first '/' after the host
+ size_t HostEnd = Remaining.find('/', SchemeEnd);
+ if (HostEnd == std::string_view::npos)
+ {
+ return false;
+ }
- const std::string_view Host = GetMatch(1);
- const std::string_view Namespace = GetMatch(2);
- const std::string_view Bucket = GetMatch(3);
- const std::string_view BuildId = GetMatch(4);
+ // Find the '/' after namespace
+ size_t NamespaceEnd = Remaining.find('/', HostEnd + 1);
+ if (NamespaceEnd == std::string_view::npos)
+ {
+ return false;
+ }
- OutHost = Host;
- OutNamespace = Namespace;
- OutBucket = Bucket;
- OutBuildId = BuildId;
- return true;
+ // Find the '/' after bucket
+ size_t BucketEnd = Remaining.find('/', NamespaceEnd + 1);
+ if (BucketEnd == std::string_view::npos)
+ {
+ return false;
}
- else
+
+ // BuildId must be non-empty
+ if (BucketEnd + 1 >= Remaining.size())
{
return false;
}
+
+ OutHost = Remaining.substr(0, HostEnd);
+ OutNamespace = Remaining.substr(HostEnd + 1, NamespaceEnd - HostEnd - 1);
+ OutBucket = Remaining.substr(NamespaceEnd + 1, BucketEnd - NamespaceEnd - 1);
+ OutBuildId = Remaining.substr(BucketEnd + 1);
+ return true;
}
} // namespace zen
+
+#if ZEN_WITH_TESTS
+
+# include <zencore/testing.h>
+
+namespace zen {
+
+void
+jupiterbuildstorage_forcelink()
+{
+}
+
+} // namespace zen
+
+TEST_SUITE_BEGIN("remotestore.jupiterbuildstorage");
+
+TEST_CASE("ParseBuildStorageUrl.ValidUrl")
+{
+ std::string Host, Namespace, Bucket, BuildId;
+ bool Result =
+ zen::ParseBuildStorageUrl("https://horde.devtools.epicgames.com/mynamespace/mybucket/mybuildid", Host, Namespace, Bucket, BuildId);
+ CHECK(Result);
+ CHECK(Host == "https://horde.devtools.epicgames.com");
+ CHECK(Namespace == "mynamespace");
+ CHECK(Bucket == "mybucket");
+ CHECK(BuildId == "mybuildid");
+}
+
+TEST_CASE("ParseBuildStorageUrl.ValidUrlWithApiPrefix")
+{
+ std::string Host, Namespace, Bucket, BuildId;
+ bool Result = zen::ParseBuildStorageUrl("https://horde.devtools.epicgames.com/api/v2/builds/mynamespace/mybucket/mybuildid",
+ Host,
+ Namespace,
+ Bucket,
+ BuildId);
+ CHECK(Result);
+ CHECK(Host == "https://horde.devtools.epicgames.com");
+ CHECK(Namespace == "mynamespace");
+ CHECK(Bucket == "mybucket");
+ CHECK(BuildId == "mybuildid");
+}
+
+TEST_CASE("ParseBuildStorageUrl.HttpScheme")
+{
+ std::string Host, Namespace, Bucket, BuildId;
+ bool Result = zen::ParseBuildStorageUrl("http://localhost/ns/bucket/build123", Host, Namespace, Bucket, BuildId);
+ CHECK(Result);
+ CHECK(Host == "http://localhost");
+ CHECK(Namespace == "ns");
+ CHECK(Bucket == "bucket");
+ CHECK(BuildId == "build123");
+}
+
+TEST_CASE("ParseBuildStorageUrl.BuildIdWithSlashes")
+{
+ std::string Host, Namespace, Bucket, BuildId;
+ bool Result = zen::ParseBuildStorageUrl("https://host/ns/bucket/build/with/slashes", Host, Namespace, Bucket, BuildId);
+ CHECK(Result);
+ CHECK(Host == "https://host");
+ CHECK(Namespace == "ns");
+ CHECK(Bucket == "bucket");
+ CHECK(BuildId == "build/with/slashes");
+}
+
+TEST_CASE("ParseBuildStorageUrl.MissingBuildId")
+{
+ std::string Host, Namespace, Bucket, BuildId;
+ CHECK_FALSE(zen::ParseBuildStorageUrl("https://host/ns/bucket/", Host, Namespace, Bucket, BuildId));
+}
+
+TEST_CASE("ParseBuildStorageUrl.MissingBucket")
+{
+ std::string Host, Namespace, Bucket, BuildId;
+ CHECK_FALSE(zen::ParseBuildStorageUrl("https://host/ns", Host, Namespace, Bucket, BuildId));
+}
+
+TEST_CASE("ParseBuildStorageUrl.NoScheme")
+{
+ std::string Host, Namespace, Bucket, BuildId;
+ CHECK_FALSE(zen::ParseBuildStorageUrl("host/ns/bucket/buildid", Host, Namespace, Bucket, BuildId));
+}
+
+TEST_SUITE_END();
+
+#endif // ZEN_WITH_TESTS
diff --git a/src/zenremotestore/chunking/chunkblock.cpp b/src/zenremotestore/chunking/chunkblock.cpp
index d203e0292..f29112f53 100644
--- a/src/zenremotestore/chunking/chunkblock.cpp
+++ b/src/zenremotestore/chunking/chunkblock.cpp
@@ -8,13 +8,9 @@
#include <zencore/timer.h>
#include <zencore/trace.h>
-#include <zenremotestore/operationlogoutput.h>
-
#include <numeric>
-#include <vector>
ZEN_THIRD_PARTY_INCLUDES_START
-#include <tsl/robin_map.h>
#include <tsl/robin_set.h>
ZEN_THIRD_PARTY_INCLUDES_END
@@ -27,6 +23,184 @@ namespace zen {
using namespace std::literals;
+namespace chunkblock_impl {
+
+ struct RangeDescriptor
+ {
+ uint64_t RangeStart = 0;
+ uint64_t RangeLength = 0;
+ uint32_t ChunkBlockIndexStart = 0;
+ uint32_t ChunkBlockIndexCount = 0;
+ };
+
+ void MergeCheapestRange(std::vector<RangeDescriptor>& InOutRanges)
+ {
+ ZEN_ASSERT(InOutRanges.size() > 1);
+
+ size_t BestRangeIndexToCollapse = SIZE_MAX;
+ uint64_t BestGap = (uint64_t)-1;
+
+ for (size_t RangeIndex = 0; RangeIndex < InOutRanges.size() - 1; RangeIndex++)
+ {
+ const RangeDescriptor& Range = InOutRanges[RangeIndex];
+ const RangeDescriptor& NextRange = InOutRanges[RangeIndex + 1];
+ uint64_t Gap = NextRange.RangeStart - (Range.RangeStart + Range.RangeLength);
+ if (Gap < BestGap)
+ {
+ BestRangeIndexToCollapse = RangeIndex;
+ BestGap = Gap;
+ }
+ else if (Gap == BestGap)
+ {
+ const RangeDescriptor& BestRange = InOutRanges[BestRangeIndexToCollapse];
+ const RangeDescriptor& BestNextRange = InOutRanges[BestRangeIndexToCollapse + 1];
+ uint64_t BestMergedSize = (BestNextRange.RangeStart + BestNextRange.RangeLength) - BestRange.RangeStart;
+ uint64_t MergedSize = (NextRange.RangeStart + NextRange.RangeLength) - Range.RangeStart;
+ if (MergedSize < BestMergedSize)
+ {
+ BestRangeIndexToCollapse = RangeIndex;
+ }
+ }
+ }
+
+ ZEN_ASSERT(BestRangeIndexToCollapse != SIZE_MAX);
+ ZEN_ASSERT(BestRangeIndexToCollapse < InOutRanges.size() - 1);
+ ZEN_ASSERT(BestGap != (uint64_t)-1);
+
+ RangeDescriptor& BestRange = InOutRanges[BestRangeIndexToCollapse];
+ const RangeDescriptor& BestNextRange = InOutRanges[BestRangeIndexToCollapse + 1];
+ BestRange.RangeLength = BestNextRange.RangeStart - BestRange.RangeStart + BestNextRange.RangeLength;
+ BestRange.ChunkBlockIndexCount =
+ BestNextRange.ChunkBlockIndexStart - BestRange.ChunkBlockIndexStart + BestNextRange.ChunkBlockIndexCount;
+ InOutRanges.erase(InOutRanges.begin() + BestRangeIndexToCollapse + 1);
+ }
+
+ std::vector<RangeDescriptor> GetBlockRanges(const ChunkBlockDescription& BlockDescription,
+ const uint64_t ChunkStartOffsetInBlock,
+ std::span<const uint32_t> BlockChunkIndexNeeded)
+ {
+ ZEN_TRACE_CPU("GetBlockRanges");
+ std::vector<RangeDescriptor> BlockRanges;
+ {
+ uint64_t CurrentOffset = ChunkStartOffsetInBlock;
+ uint32_t ChunkBlockIndex = 0;
+ uint32_t NeedBlockChunkIndexOffset = 0;
+ RangeDescriptor NextRange;
+ while (NeedBlockChunkIndexOffset < BlockChunkIndexNeeded.size() && ChunkBlockIndex < BlockDescription.ChunkRawHashes.size())
+ {
+ const uint32_t ChunkCompressedLength = BlockDescription.ChunkCompressedLengths[ChunkBlockIndex];
+ if (ChunkBlockIndex < BlockChunkIndexNeeded[NeedBlockChunkIndexOffset])
+ {
+ if (NextRange.RangeLength > 0)
+ {
+ BlockRanges.push_back(NextRange);
+ NextRange = {};
+ }
+ ChunkBlockIndex++;
+ CurrentOffset += ChunkCompressedLength;
+ }
+ else if (ChunkBlockIndex == BlockChunkIndexNeeded[NeedBlockChunkIndexOffset])
+ {
+ if (NextRange.RangeLength == 0)
+ {
+ NextRange.RangeStart = CurrentOffset;
+ NextRange.ChunkBlockIndexStart = ChunkBlockIndex;
+ }
+ NextRange.RangeLength += ChunkCompressedLength;
+ NextRange.ChunkBlockIndexCount++;
+ ChunkBlockIndex++;
+ CurrentOffset += ChunkCompressedLength;
+ NeedBlockChunkIndexOffset++;
+ }
+ else
+ {
+ ZEN_ASSERT(false);
+ }
+ }
+ if (NextRange.RangeLength > 0)
+ {
+ BlockRanges.push_back(NextRange);
+ }
+ }
+ ZEN_ASSERT(!BlockRanges.empty());
+ return BlockRanges;
+ }
+
+ std::vector<RangeDescriptor> OptimizeRanges(uint64_t TotalBlockSize,
+ std::span<const RangeDescriptor> ExactRanges,
+ double LatencySec,
+ uint64_t SpeedBytesPerSec,
+ uint64_t MaxRangeCountPerRequest,
+ uint64_t MaxRangesPerBlock)
+ {
+ ZEN_TRACE_CPU("OptimizeRanges");
+ ZEN_ASSERT(MaxRangesPerBlock > 0);
+ std::vector<RangeDescriptor> Ranges(ExactRanges.begin(), ExactRanges.end());
+
+ while (Ranges.size() > MaxRangesPerBlock)
+ {
+ MergeCheapestRange(Ranges);
+ }
+
+ while (true)
+ {
+ const std::uint64_t RangeTotalSize =
+ std::accumulate(Ranges.begin(), Ranges.end(), uint64_t(0u), [](uint64_t Current, const RangeDescriptor& Value) {
+ return Current + Value.RangeLength;
+ });
+
+ const size_t RangeCount = Ranges.size();
+ const uint64_t RequestCount =
+ MaxRangeCountPerRequest == (uint64_t)-1 ? 1 : (RangeCount + MaxRangeCountPerRequest - 1) / MaxRangeCountPerRequest;
+ uint64_t RequestTimeAsBytes = uint64_t(SpeedBytesPerSec * RequestCount * LatencySec);
+
+ if (RangeCount == 1)
+ {
+ // Does fetching the full block add less time than the time it takes to complete a single request?
+ if (TotalBlockSize - RangeTotalSize < SpeedBytesPerSec * LatencySec)
+ {
+ const std::uint64_t InitialRangeTotalSize =
+ std::accumulate(ExactRanges.begin(),
+ ExactRanges.end(),
+ uint64_t(0u),
+ [](uint64_t Current, const RangeDescriptor& Value) { return Current + Value.RangeLength; });
+
+ ZEN_DEBUG(
+ "Latency round trip takes as long as receiving the extra redundant bytes - go full block, dropping {} of slack, "
+ "adding {} of bytes to fetch, for block of size {}",
+ NiceBytes(TotalBlockSize - RangeTotalSize),
+ NiceBytes(TotalBlockSize - InitialRangeTotalSize),
+ NiceBytes(TotalBlockSize));
+ return {};
+ }
+ else
+ {
+ return Ranges;
+ }
+ }
+
+ if (RequestTimeAsBytes < (TotalBlockSize - RangeTotalSize))
+ {
+ return Ranges;
+ }
+
+ if (RangeCount == 2)
+ {
+ // Merge to single range
+ Ranges.front().RangeLength = Ranges.back().RangeStart - Ranges.front().RangeStart + Ranges.back().RangeLength;
+ Ranges.front().ChunkBlockIndexCount =
+ Ranges.back().ChunkBlockIndexStart - Ranges.front().ChunkBlockIndexStart + Ranges.back().ChunkBlockIndexCount;
+ Ranges.pop_back();
+ }
+ else
+ {
+ MergeCheapestRange(Ranges);
+ }
+ }
+ }
+
+} // namespace chunkblock_impl
+
ChunkBlockDescription
ParseChunkBlockDescription(const CbObjectView& BlockObject)
{
@@ -177,9 +351,9 @@ GenerateChunkBlock(std::vector<std::pair<IoHash, FetchChunkFunc>>&& FetchChunks,
BufferEndPtr += WriteVarUInt(gsl::narrow<uint64_t>(ChunkCount), BufferEndPtr);
for (const auto& It : FetchChunks)
{
- std::pair<uint64_t, CompressedBuffer> Chunk = It.second(It.first);
- uint64_t ChunkSize = 0;
- std::span<const SharedBuffer> Segments = Chunk.second.GetCompressed().GetSegments();
+ std::pair<uint64_t, CompositeBuffer> Chunk = It.second(It.first);
+ uint64_t ChunkSize = 0;
+ std::span<const SharedBuffer> Segments = Chunk.second.GetSegments();
for (const SharedBuffer& Segment : Segments)
{
ZEN_ASSERT(Segment.IsOwned());
@@ -270,7 +444,7 @@ IterateChunkBlock(const SharedBuffer& BlockPayload,
};
std::vector<size_t>
-FindReuseBlocks(OperationLogOutput& Output,
+FindReuseBlocks(LoggerRef InLog,
const uint8_t BlockReuseMinPercentLimit,
const bool IsVerbose,
ReuseBlocksStatistics& Stats,
@@ -280,6 +454,7 @@ FindReuseBlocks(OperationLogOutput& Output,
std::vector<uint32_t>& OutUnusedChunkIndexes)
{
ZEN_TRACE_CPU("FindReuseBlocks");
+ ZEN_SCOPED_LOG(InLog);
// Find all blocks with a usage level higher than MinPercentLimit
// Pick out the blocks with usage higher or equal to MinPercentLimit
@@ -346,11 +521,10 @@ FindReuseBlocks(OperationLogOutput& Output,
{
if (IsVerbose)
{
- ZEN_OPERATION_LOG_INFO(Output,
- "Reusing block {}. {} attachments found, usage level: {}%",
- KnownBlock.BlockHash,
- FoundAttachmentCount,
- ReusePercent);
+ ZEN_INFO("Reusing block {}. {} attachments found, usage level: {}%",
+ KnownBlock.BlockHash,
+ FoundAttachmentCount,
+ ReusePercent);
}
ReuseBlockIndexes.push_back(KnownBlockIndex);
@@ -359,12 +533,13 @@ FindReuseBlocks(OperationLogOutput& Output,
}
else if (FoundAttachmentCount > 0)
{
- // if (IsVerbose)
- //{
- // ZEN_OPERATION_LOG_INFO(Output, "Skipping block {}. {} attachments found, usage level: {}%",
- // KnownBlock.BlockHash,
- // FoundAttachmentCount, ReusePercent);
- //}
+ if (IsVerbose)
+ {
+ ZEN_INFO("Skipping block {}. {} attachments found, usage level: {}%",
+ KnownBlock.BlockHash,
+ FoundAttachmentCount,
+ ReusePercent);
+ }
Stats.RejectedBlockCount++;
Stats.RejectedChunkCount += FoundAttachmentCount;
Stats.RejectedByteCount += ReuseSize;
@@ -408,11 +583,10 @@ FindReuseBlocks(OperationLogOutput& Output,
{
if (IsVerbose)
{
- ZEN_OPERATION_LOG_INFO(Output,
- "Reusing block {}. {} attachments found, usage level: {}%",
- KnownBlock.BlockHash,
- FoundChunkIndexes.size(),
- ReusePercent);
+ ZEN_INFO("Reusing block {}. {} attachments found, usage level: {}%",
+ KnownBlock.BlockHash,
+ FoundChunkIndexes.size(),
+ ReusePercent);
}
FilteredReuseBlockIndexes.push_back(KnownBlockIndex);
@@ -429,11 +603,10 @@ FindReuseBlocks(OperationLogOutput& Output,
}
else
{
- // if (IsVerbose)
- //{
- // ZEN_OPERATION_LOG_INFO(Output, "Skipping block {}. filtered usage level: {}%", KnownBlock.BlockHash,
- // ReusePercent);
- //}
+ if (IsVerbose)
+ {
+ ZEN_INFO("Skipping block {}. filtered usage level: {}%", KnownBlock.BlockHash, ReusePercent);
+ }
Stats.RejectedBlockCount++;
Stats.RejectedChunkCount += FoundChunkIndexes.size();
Stats.RejectedByteCount += AdjustedReuseSize;
@@ -454,10 +627,8 @@ FindReuseBlocks(OperationLogOutput& Output,
return FilteredReuseBlockIndexes;
}
-ChunkBlockAnalyser::ChunkBlockAnalyser(OperationLogOutput& LogOutput,
- std::span<const ChunkBlockDescription> BlockDescriptions,
- const Options& Options)
-: m_LogOutput(LogOutput)
+ChunkBlockAnalyser::ChunkBlockAnalyser(LoggerRef Log, std::span<const ChunkBlockDescription> BlockDescriptions, const Options& Options)
+: m_Log(Log)
, m_BlockDescriptions(BlockDescriptions)
, m_Options(Options)
{
@@ -555,480 +726,198 @@ ChunkBlockAnalyser::CalculatePartialBlockDownloads(std::span<const NeededBlock>
ChunkBlockAnalyser::BlockResult Result;
- uint64_t IdealDownloadTotalSize = 0;
- uint64_t AllBlocksTotalBlocksSize = 0;
-
- for (const NeededBlock& NeededBlock : NeededBlocks)
{
- const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[NeededBlock.BlockIndex];
-
- std::span<const uint32_t> BlockChunkIndexNeeded(NeededBlock.ChunkIndexes);
- if (!NeededBlock.ChunkIndexes.empty())
+ uint64_t MinRequestCount = 0;
+ uint64_t RequestCount = 0;
+ uint64_t RangeCount = 0;
+ uint64_t IdealDownloadTotalSize = 0;
+ uint64_t ActualDownloadTotalSize = 0;
+ uint64_t FullDownloadTotalSize = 0;
+ for (const NeededBlock& NeededBlock : NeededBlocks)
{
- bool WantsToDoPartialBlockDownload = NeededBlock.ChunkIndexes.size() < BlockDescription.ChunkRawHashes.size();
- bool CanDoPartialBlockDownload = (BlockDescription.HeaderSize > 0) &&
- (BlockDescription.ChunkCompressedLengths.size() == BlockDescription.ChunkRawHashes.size());
-
- EPartialBlockDownloadMode PartialBlockDownloadMode = BlockPartialDownloadModes[NeededBlock.BlockIndex];
-
- const uint32_t ChunkStartOffsetInBlock =
+ const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[NeededBlock.BlockIndex];
+ std::span<const uint32_t> BlockChunkIndexNeeded(NeededBlock.ChunkIndexes);
+ const uint32_t ChunkStartOffsetInBlock =
gsl::narrow<uint32_t>(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize);
+ uint64_t TotalBlockSize = std::accumulate(BlockDescription.ChunkCompressedLengths.begin(),
+ BlockDescription.ChunkCompressedLengths.end(),
+ uint64_t(ChunkStartOffsetInBlock));
+ uint64_t ExactRangesSize = 0;
+ uint64_t DownloadRangesSize = 0;
+ uint64_t FullDownloadSize = 0;
+
+ bool CanDoPartialBlockDownload = (BlockDescription.HeaderSize > 0) &&
+ (BlockDescription.ChunkCompressedLengths.size() == BlockDescription.ChunkRawHashes.size());
- const uint64_t TotalBlockSize = std::accumulate(BlockDescription.ChunkCompressedLengths.begin(),
- BlockDescription.ChunkCompressedLengths.end(),
- std::uint64_t(ChunkStartOffsetInBlock));
-
- AllBlocksTotalBlocksSize += TotalBlockSize;
-
- if ((PartialBlockDownloadMode != EPartialBlockDownloadMode::Off) && WantsToDoPartialBlockDownload && CanDoPartialBlockDownload)
+ if (NeededBlock.ChunkIndexes.size() == BlockDescription.ChunkRawHashes.size() || !CanDoPartialBlockDownload)
{
- ZEN_TRACE_CPU("PartialBlockAnalysis");
-
- uint64_t TotalWantedChunksSize = 0;
- std::optional<std::vector<BlockRangeDescriptor>> MaybeBlockRanges = CalculateBlockRanges(NeededBlock.BlockIndex,
- BlockDescription,
- NeededBlock.ChunkIndexes,
- PartialBlockDownloadMode,
- ChunkStartOffsetInBlock,
- TotalBlockSize,
- TotalWantedChunksSize);
- ZEN_ASSERT(TotalWantedChunksSize <= TotalBlockSize);
- IdealDownloadTotalSize += TotalWantedChunksSize;
-
- if (MaybeBlockRanges.has_value())
+ // Full block
+ ExactRangesSize = TotalBlockSize;
+ DownloadRangesSize = TotalBlockSize;
+ FullDownloadSize = TotalBlockSize;
+ MinRequestCount++;
+ RequestCount++;
+ RangeCount++;
+ Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex);
+ }
+ else if (NeededBlock.ChunkIndexes.empty())
+ {
+ // Not needed
+ }
+ else
+ {
+ FullDownloadSize = TotalBlockSize;
+ std::vector<chunkblock_impl::RangeDescriptor> Ranges =
+ chunkblock_impl::GetBlockRanges(BlockDescription, ChunkStartOffsetInBlock, BlockChunkIndexNeeded);
+ ExactRangesSize = std::accumulate(
+ Ranges.begin(),
+ Ranges.end(),
+ uint64_t(0),
+ [](uint64_t Current, const chunkblock_impl::RangeDescriptor& Range) { return Current + Range.RangeLength; });
+
+ EPartialBlockDownloadMode PartialBlockDownloadMode = BlockPartialDownloadModes[NeededBlock.BlockIndex];
+ if (PartialBlockDownloadMode == EPartialBlockDownloadMode::Off)
{
- std::vector<BlockRangeDescriptor> BlockRanges = MaybeBlockRanges.value();
- ZEN_ASSERT(!BlockRanges.empty());
-
- uint64_t RequestedSize =
- std::accumulate(BlockRanges.begin(),
- BlockRanges.end(),
- uint64_t(0),
- [](uint64_t Current, const BlockRangeDescriptor& Range) { return Current + Range.RangeLength; });
+ // Use full block
+ MinRequestCount++;
+ RangeCount++;
+ RequestCount++;
+ Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex);
+ DownloadRangesSize = TotalBlockSize;
+ }
+ else
+ {
+ const bool IsHighSpeed = (PartialBlockDownloadMode == EPartialBlockDownloadMode::MultiRangeHighSpeed);
+ uint64_t MaxRangeCountPerRequest =
+ IsHighSpeed ? m_Options.HostHighSpeedMaxRangeCountPerRequest : m_Options.HostMaxRangeCountPerRequest;
+ ZEN_ASSERT(MaxRangeCountPerRequest != 0);
- if (PartialBlockDownloadMode != EPartialBlockDownloadMode::Exact && BlockRanges.size() > 1)
+ if (PartialBlockDownloadMode == EPartialBlockDownloadMode::Exact)
{
- // TODO: Once we have support in our http client to request multiple ranges in one request this
- // logic would need to change as the per-request overhead would go away
-
- const double LatencySec = PartialBlockDownloadMode == EPartialBlockDownloadMode::MultiRangeHighSpeed
- ? m_Options.HostHighSpeedLatencySec
- : m_Options.HostLatencySec;
- if (LatencySec > 0)
+ // Use exact ranges
+ for (const chunkblock_impl::RangeDescriptor& Range : Ranges)
{
- const uint64_t BytesPerSec = PartialBlockDownloadMode == EPartialBlockDownloadMode::MultiRangeHighSpeed
- ? m_Options.HostHighSpeedBytesPerSec
- : m_Options.HostSpeedBytesPerSec;
-
- const double ExtraRequestTimeSec = (BlockRanges.size() - 1) * LatencySec;
- const uint64_t ExtraRequestTimeBytes = uint64_t(ExtraRequestTimeSec * BytesPerSec);
-
- const uint64_t FullRangeSize =
- BlockRanges.back().RangeStart + BlockRanges.back().RangeLength - BlockRanges.front().RangeStart;
+ Result.BlockRanges.push_back(BlockRangeDescriptor{.BlockIndex = NeededBlock.BlockIndex,
+ .RangeStart = Range.RangeStart,
+ .RangeLength = Range.RangeLength,
+ .ChunkBlockIndexStart = Range.ChunkBlockIndexStart,
+ .ChunkBlockIndexCount = Range.ChunkBlockIndexCount});
+ }
- if (ExtraRequestTimeBytes + RequestedSize >= FullRangeSize)
+ MinRequestCount++;
+ RangeCount += Ranges.size();
+ RequestCount += MaxRangeCountPerRequest == (uint64_t)-1
+ ? 1
+ : (Ranges.size() + MaxRangeCountPerRequest - 1) / MaxRangeCountPerRequest;
+ DownloadRangesSize = ExactRangesSize;
+ }
+ else
+ {
+ if (PartialBlockDownloadMode == EPartialBlockDownloadMode::SingleRange)
+ {
+ // Use single range
+ if (Ranges.size() > 1)
{
- BlockRanges = std::vector<BlockRangeDescriptor>{MergeBlockRanges(BlockRanges)};
-
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Merging {} chunks ({}) from block {} ({}) to single request (extra bytes {})",
- NeededBlock.ChunkIndexes.size(),
- NiceBytes(RequestedSize),
- BlockDescription.BlockHash,
- NiceBytes(TotalBlockSize),
- NiceBytes(BlockRanges.front().RangeLength - RequestedSize));
- }
-
- RequestedSize = BlockRanges.front().RangeLength;
+ Ranges = {chunkblock_impl::RangeDescriptor{
+ .RangeStart = Ranges.front().RangeStart,
+ .RangeLength = Ranges.back().RangeStart + Ranges.back().RangeLength - Ranges.front().RangeStart,
+ .ChunkBlockIndexStart = Ranges.front().ChunkBlockIndexStart,
+ .ChunkBlockIndexCount = Ranges.back().ChunkBlockIndexStart + Ranges.back().ChunkBlockIndexCount -
+ Ranges.front().ChunkBlockIndexStart}};
}
+
+ // We still do the optimize pass to see if it is more effective to use a full block
}
- }
- if ((PartialBlockDownloadMode != EPartialBlockDownloadMode::Exact) &&
- ((TotalBlockSize - RequestedSize) < (512u * 1024u)))
- {
- if (m_Options.IsVerbose)
+ double LatencySec = IsHighSpeed ? m_Options.HostHighSpeedLatencySec : m_Options.HostLatencySec;
+ uint64_t SpeedBytesPerSec = IsHighSpeed ? m_Options.HostHighSpeedBytesPerSec : m_Options.HostSpeedBytesPerSec;
+ if (LatencySec > 0.0 && SpeedBytesPerSec > 0u)
{
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Requesting {} chunks ({}) from block {} ({}) using full block request due to small "
- "total slack (extra bytes {})",
- NeededBlock.ChunkIndexes.size(),
- NiceBytes(RequestedSize),
- BlockDescription.BlockHash,
- NiceBytes(TotalBlockSize),
- NiceBytes(TotalBlockSize - TotalWantedChunksSize));
+ Ranges = chunkblock_impl::OptimizeRanges(TotalBlockSize,
+ Ranges,
+ LatencySec,
+ SpeedBytesPerSec,
+ MaxRangeCountPerRequest,
+ m_Options.MaxRangesPerBlock);
}
- Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex);
- }
- else
- {
- Result.BlockRanges.insert(Result.BlockRanges.end(), BlockRanges.begin(), BlockRanges.end());
- if (m_Options.IsVerbose)
+ MinRequestCount++;
+ if (Ranges.empty())
{
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Requesting {} chunks ({}) from block {} ({}) using {} requests (extra bytes {})",
- NeededBlock.ChunkIndexes.size(),
- NiceBytes(RequestedSize),
- BlockDescription.BlockHash,
- NiceBytes(TotalBlockSize),
- BlockRanges.size(),
- NiceBytes(RequestedSize - TotalWantedChunksSize));
+ Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex);
+ RequestCount++;
+ RangeCount++;
+ DownloadRangesSize = TotalBlockSize;
}
+ else
+ {
+ for (const chunkblock_impl::RangeDescriptor& Range : Ranges)
+ {
+ Result.BlockRanges.push_back(BlockRangeDescriptor{.BlockIndex = NeededBlock.BlockIndex,
+ .RangeStart = Range.RangeStart,
+ .RangeLength = Range.RangeLength,
+ .ChunkBlockIndexStart = Range.ChunkBlockIndexStart,
+ .ChunkBlockIndexCount = Range.ChunkBlockIndexCount});
+ }
+ RangeCount += Ranges.size();
+ RequestCount += MaxRangeCountPerRequest == (uint64_t)-1
+ ? 1
+ : (Ranges.size() + MaxRangeCountPerRequest - 1) / MaxRangeCountPerRequest;
+ }
+
+ DownloadRangesSize = Ranges.empty()
+ ? TotalBlockSize
+ : std::accumulate(Ranges.begin(),
+ Ranges.end(),
+ uint64_t(0),
+ [](uint64_t Current, const chunkblock_impl::RangeDescriptor& Range) {
+ return Current + Range.RangeLength;
+ });
}
}
- else
- {
- Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex);
- }
}
- else
- {
- Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex);
- IdealDownloadTotalSize += TotalBlockSize;
- }
- }
- }
+ IdealDownloadTotalSize += ExactRangesSize;
+ ActualDownloadTotalSize += DownloadRangesSize;
+ FullDownloadTotalSize += FullDownloadSize;
- if (!Result.BlockRanges.empty() && !m_Options.IsQuiet)
- {
- tsl::robin_set<uint32_t> PartialBlockIndexes;
- uint64_t PartialBlocksTotalSize = std::accumulate(Result.BlockRanges.begin(),
- Result.BlockRanges.end(),
- uint64_t(0u),
- [&](uint64_t Current, const BlockRangeDescriptor& Range) {
- PartialBlockIndexes.insert(Range.BlockIndex);
- return Current + Range.RangeLength;
- });
-
- uint64_t FullBlocksTotalSize =
- std::accumulate(Result.FullBlockIndexes.begin(),
- Result.FullBlockIndexes.end(),
- uint64_t(0u),
- [&](uint64_t Current, uint32_t BlockIndex) {
- const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex];
- uint32_t CurrentOffset =
- gsl::narrow<uint32_t>(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize);
-
- return Current + std::accumulate(BlockDescription.ChunkCompressedLengths.begin(),
- BlockDescription.ChunkCompressedLengths.end(),
- std::uint64_t(CurrentOffset));
- });
-
- uint64_t PartialBlockRequestCount = Result.BlockRanges.size();
- uint64_t PartialBlockCount = PartialBlockIndexes.size();
-
- uint64_t TotalExtraPartialBlocksRequestCount = PartialBlockRequestCount - PartialBlockCount;
- uint64_t ActualPartialDownloadTotalSize = FullBlocksTotalSize + PartialBlocksTotalSize;
-
- uint64_t IdealSkippedSize = AllBlocksTotalBlocksSize - IdealDownloadTotalSize;
- uint64_t ActualSkippedSize = AllBlocksTotalBlocksSize - ActualPartialDownloadTotalSize;
-
- double PercentOfIdealPartialSkippedSize = (ActualSkippedSize * 100.0) / IdealSkippedSize;
-
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Analysis of partial block requests saves download of {} out of {}, {:.1f}% of possible {} using {} extra "
- "requests. Completed in {}",
- NiceBytes(ActualSkippedSize),
- NiceBytes(AllBlocksTotalBlocksSize),
- PercentOfIdealPartialSkippedSize,
- NiceBytes(IdealSkippedSize),
- TotalExtraPartialBlocksRequestCount,
- NiceTimeSpanMs(PartialAnalisysTimer.GetElapsedTimeMs()));
- }
-
- return Result;
-}
-
-ChunkBlockAnalyser::BlockRangeDescriptor
-ChunkBlockAnalyser::MergeBlockRanges(std::span<const BlockRangeDescriptor> Ranges)
-{
- ZEN_ASSERT(Ranges.size() > 1);
- const BlockRangeDescriptor& First = Ranges.front();
- const BlockRangeDescriptor& Last = Ranges.back();
-
- return BlockRangeDescriptor{.BlockIndex = First.BlockIndex,
- .RangeStart = First.RangeStart,
- .RangeLength = Last.RangeStart + Last.RangeLength - First.RangeStart,
- .ChunkBlockIndexStart = First.ChunkBlockIndexStart,
- .ChunkBlockIndexCount = Last.ChunkBlockIndexStart + Last.ChunkBlockIndexCount - First.ChunkBlockIndexStart};
-}
-
-std::optional<std::vector<ChunkBlockAnalyser::BlockRangeDescriptor>>
-ChunkBlockAnalyser::MakeOptionalBlockRangeVector(uint64_t TotalBlockSize, const BlockRangeDescriptor& Range)
-{
- if (Range.RangeLength == TotalBlockSize)
- {
- return {};
- }
- else
- {
- return std::vector<BlockRangeDescriptor>{Range};
- }
-};
-
-const ChunkBlockAnalyser::BlockRangeLimit*
-ChunkBlockAnalyser::GetBlockRangeLimitForRange(std::span<const BlockRangeLimit> Limits,
- uint64_t TotalBlockSize,
- std::span<const BlockRangeDescriptor> Ranges)
-{
- if (Ranges.size() > 1)
- {
- const std::uint64_t WantedSize =
- std::accumulate(Ranges.begin(), Ranges.end(), uint64_t(0), [](uint64_t Current, const BlockRangeDescriptor& Range) {
- return Current + Range.RangeLength;
- });
-
- const double RangeRequestedPercent = (WantedSize * 100.0) / TotalBlockSize;
-
- for (const BlockRangeLimit& Limit : Limits)
- {
- if (RangeRequestedPercent >= Limit.SizePercent && Ranges.size() > Limit.MaxRangeCount)
+ if (ExactRangesSize < FullDownloadSize)
{
- return &Limit;
+ ZEN_DEBUG("Block {}: Full: {}, Ideal: {}, Actual: {}, Saves: {}",
+ NeededBlock.BlockIndex,
+ NiceBytes(FullDownloadSize),
+ NiceBytes(ExactRangesSize),
+ NiceBytes(DownloadRangesSize),
+ NiceBytes(FullDownloadSize - DownloadRangesSize));
}
}
- }
- return nullptr;
-};
-
-std::vector<ChunkBlockAnalyser::BlockRangeDescriptor>
-ChunkBlockAnalyser::CollapseBlockRanges(const uint64_t AlwaysAcceptableGap, std::span<const BlockRangeDescriptor> BlockRanges)
-{
- ZEN_ASSERT(BlockRanges.size() > 1);
- std::vector<BlockRangeDescriptor> CollapsedBlockRanges;
-
- auto BlockRangesIt = BlockRanges.begin();
- CollapsedBlockRanges.push_back(*BlockRangesIt++);
- for (; BlockRangesIt != BlockRanges.end(); BlockRangesIt++)
- {
- BlockRangeDescriptor& LastRange = CollapsedBlockRanges.back();
-
- const uint64_t BothRangeSize = BlockRangesIt->RangeLength + LastRange.RangeLength;
-
- const uint64_t Gap = BlockRangesIt->RangeStart - (LastRange.RangeStart + LastRange.RangeLength);
- if (Gap <= Max(BothRangeSize / 16, AlwaysAcceptableGap))
- {
- LastRange.ChunkBlockIndexCount =
- (BlockRangesIt->ChunkBlockIndexStart + BlockRangesIt->ChunkBlockIndexCount) - LastRange.ChunkBlockIndexStart;
- LastRange.RangeLength = (BlockRangesIt->RangeStart + BlockRangesIt->RangeLength) - LastRange.RangeStart;
- }
- else
+ uint64_t Actual = FullDownloadTotalSize - ActualDownloadTotalSize;
+ uint64_t Ideal = FullDownloadTotalSize - IdealDownloadTotalSize;
+ if (Ideal < FullDownloadTotalSize && !m_Options.IsQuiet)
{
- CollapsedBlockRanges.push_back(*BlockRangesIt);
+ const double AchievedPercent = Ideal == 0 ? 100.0 : (100.0 * Actual) / Ideal;
+ ZEN_INFO(
+ "Block Partial Analysis: Blocks: {}, Full: {}, Ideal: {}, Actual: {}. Skipping {} ({:.1f}%) out of "
+ "possible {} using {} extra ranges "
+ "via {} extra requests. Completed in {}",
+ NeededBlocks.size(),
+ NiceBytes(FullDownloadTotalSize),
+ NiceBytes(IdealDownloadTotalSize),
+ NiceBytes(ActualDownloadTotalSize),
+ NiceBytes(FullDownloadTotalSize - ActualDownloadTotalSize),
+ AchievedPercent,
+ NiceBytes(Ideal),
+ RangeCount - MinRequestCount,
+ RequestCount - MinRequestCount,
+ NiceTimeSpanMs(PartialAnalisysTimer.GetElapsedTimeMs()));
}
}
- return CollapsedBlockRanges;
-};
-
-uint64_t
-ChunkBlockAnalyser::CalculateNextGap(const uint64_t AlwaysAcceptableGap, std::span<const BlockRangeDescriptor> BlockRanges)
-{
- ZEN_ASSERT(BlockRanges.size() > 1);
- uint64_t AcceptableGap = (uint64_t)-1;
- for (size_t RangeIndex = 0; RangeIndex < BlockRanges.size() - 1; RangeIndex++)
- {
- const BlockRangeDescriptor& Range = BlockRanges[RangeIndex];
- const BlockRangeDescriptor& NextRange = BlockRanges[RangeIndex + 1];
-
- const uint64_t Gap = NextRange.RangeStart - (Range.RangeStart + Range.RangeLength);
- AcceptableGap = Min(Gap, AcceptableGap);
- }
- AcceptableGap = RoundUp(AcceptableGap, AlwaysAcceptableGap);
- return AcceptableGap;
-};
-
-std::optional<std::vector<ChunkBlockAnalyser::BlockRangeDescriptor>>
-ChunkBlockAnalyser::CalculateBlockRanges(uint32_t BlockIndex,
- const ChunkBlockDescription& BlockDescription,
- std::span<const uint32_t> BlockChunkIndexNeeded,
- EPartialBlockDownloadMode PartialBlockDownloadMode,
- const uint64_t ChunkStartOffsetInBlock,
- const uint64_t TotalBlockSize,
- uint64_t& OutTotalWantedChunksSize)
-{
- ZEN_TRACE_CPU("CalculateBlockRanges");
-
- if (PartialBlockDownloadMode == EPartialBlockDownloadMode::Off)
- {
- return {};
- }
-
- std::vector<BlockRangeDescriptor> BlockRanges;
- {
- uint64_t CurrentOffset = ChunkStartOffsetInBlock;
- uint32_t ChunkBlockIndex = 0;
- uint32_t NeedBlockChunkIndexOffset = 0;
- BlockRangeDescriptor NextRange{.BlockIndex = BlockIndex};
- while (NeedBlockChunkIndexOffset < BlockChunkIndexNeeded.size() && ChunkBlockIndex < BlockDescription.ChunkRawHashes.size())
- {
- const uint32_t ChunkCompressedLength = BlockDescription.ChunkCompressedLengths[ChunkBlockIndex];
- if (ChunkBlockIndex < BlockChunkIndexNeeded[NeedBlockChunkIndexOffset])
- {
- if (NextRange.RangeLength > 0)
- {
- BlockRanges.push_back(NextRange);
- NextRange = {.BlockIndex = BlockIndex};
- }
- ChunkBlockIndex++;
- CurrentOffset += ChunkCompressedLength;
- }
- else if (ChunkBlockIndex == BlockChunkIndexNeeded[NeedBlockChunkIndexOffset])
- {
- if (NextRange.RangeLength == 0)
- {
- NextRange.RangeStart = CurrentOffset;
- NextRange.ChunkBlockIndexStart = ChunkBlockIndex;
- }
- NextRange.RangeLength += ChunkCompressedLength;
- NextRange.ChunkBlockIndexCount++;
- ChunkBlockIndex++;
- CurrentOffset += ChunkCompressedLength;
- NeedBlockChunkIndexOffset++;
- }
- else
- {
- ZEN_ASSERT(false);
- }
- }
- if (NextRange.RangeLength > 0)
- {
- BlockRanges.push_back(NextRange);
- }
- }
- ZEN_ASSERT(!BlockRanges.empty());
-
- OutTotalWantedChunksSize =
- std::accumulate(BlockRanges.begin(), BlockRanges.end(), uint64_t(0), [](uint64_t Current, const BlockRangeDescriptor& Range) {
- return Current + Range.RangeLength;
- });
-
- double RangeWantedPercent = (OutTotalWantedChunksSize * 100.0) / TotalBlockSize;
-
- if (BlockRanges.size() == 1)
- {
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Range request of {} ({:.2f}%) using single range from block {} ({}) as is",
- NiceBytes(OutTotalWantedChunksSize),
- RangeWantedPercent,
- BlockDescription.BlockHash,
- NiceBytes(TotalBlockSize));
- }
- return BlockRanges;
- }
-
- if (PartialBlockDownloadMode == EPartialBlockDownloadMode::Exact)
- {
- if (m_Options.IsVerbose)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Range request of {} ({:.2f}%) using {} ranges from block {} ({})",
- NiceBytes(OutTotalWantedChunksSize),
- RangeWantedPercent,
- BlockRanges.size(),
- BlockDescription.BlockHash,
- NiceBytes(TotalBlockSize));
- }
- return BlockRanges;
- }
-
- if (PartialBlockDownloadMode == EPartialBlockDownloadMode::SingleRange)
- {
- const BlockRangeDescriptor MergedRange = MergeBlockRanges(BlockRanges);
- if (m_Options.IsVerbose)
- {
- const double RangeRequestedPercent = (MergedRange.RangeLength * 100.0) / TotalBlockSize;
- const double WastedPercent = ((MergedRange.RangeLength - OutTotalWantedChunksSize) * 100.0) / MergedRange.RangeLength;
-
- ZEN_OPERATION_LOG_INFO(
- m_LogOutput,
- "Range request of {} ({:.2f}%) using {} ranges from block {} ({}) limited to single block range {} ({:.2f}%) wasting "
- "{:.2f}% ({})",
- NiceBytes(OutTotalWantedChunksSize),
- RangeWantedPercent,
- BlockRanges.size(),
- BlockDescription.BlockHash,
- NiceBytes(TotalBlockSize),
- NiceBytes(MergedRange.RangeLength),
- RangeRequestedPercent,
- WastedPercent,
- NiceBytes(MergedRange.RangeLength - OutTotalWantedChunksSize));
- }
- return MakeOptionalBlockRangeVector(TotalBlockSize, MergedRange);
- }
-
- if (RangeWantedPercent > FullBlockRangePercentLimit)
- {
- const BlockRangeDescriptor MergedRange = MergeBlockRanges(BlockRanges);
- if (m_Options.IsVerbose)
- {
- const double RangeRequestedPercent = (MergedRange.RangeLength * 100.0) / TotalBlockSize;
- const double WastedPercent = ((MergedRange.RangeLength - OutTotalWantedChunksSize) * 100.0) / MergedRange.RangeLength;
-
- ZEN_OPERATION_LOG_INFO(
- m_LogOutput,
- "Range request of {} ({:.2f}%) using {} ranges from block {} ({}) exceeds {}%. Merged to single block range {} "
- "({:.2f}%) wasting {:.2f}% ({})",
- NiceBytes(OutTotalWantedChunksSize),
- RangeWantedPercent,
- BlockRanges.size(),
- BlockDescription.BlockHash,
- NiceBytes(TotalBlockSize),
- FullBlockRangePercentLimit,
- NiceBytes(MergedRange.RangeLength),
- RangeRequestedPercent,
- WastedPercent,
- NiceBytes(MergedRange.RangeLength - OutTotalWantedChunksSize));
- }
- return MakeOptionalBlockRangeVector(TotalBlockSize, MergedRange);
- }
-
- const uint64_t AlwaysAcceptableGap = 4u * 1024u;
-
- std::vector<BlockRangeDescriptor> CollapsedBlockRanges = CollapseBlockRanges(AlwaysAcceptableGap, BlockRanges);
- while (GetBlockRangeLimitForRange(ForceMergeLimits, TotalBlockSize, CollapsedBlockRanges))
- {
- CollapsedBlockRanges = CollapseBlockRanges(CalculateNextGap(AlwaysAcceptableGap, CollapsedBlockRanges), CollapsedBlockRanges);
- }
-
- const std::uint64_t WantedCollapsedSize =
- std::accumulate(CollapsedBlockRanges.begin(),
- CollapsedBlockRanges.end(),
- uint64_t(0),
- [](uint64_t Current, const BlockRangeDescriptor& Range) { return Current + Range.RangeLength; });
-
- const double CollapsedRangeRequestedPercent = (WantedCollapsedSize * 100.0) / TotalBlockSize;
-
- if (m_Options.IsVerbose)
- {
- const double WastedPercent = ((WantedCollapsedSize - OutTotalWantedChunksSize) * 100.0) / WantedCollapsedSize;
-
- ZEN_OPERATION_LOG_INFO(
- m_LogOutput,
- "Range request of {} ({:.2f}%) using {} ranges from block {} ({}) collapsed to {} {:.2f}% using {} ranges wasting {:.2f}% "
- "({})",
- NiceBytes(OutTotalWantedChunksSize),
- RangeWantedPercent,
- BlockRanges.size(),
- BlockDescription.BlockHash,
- NiceBytes(TotalBlockSize),
- NiceBytes(WantedCollapsedSize),
- CollapsedRangeRequestedPercent,
- CollapsedBlockRanges.size(),
- WastedPercent,
- NiceBytes(WantedCollapsedSize - OutTotalWantedChunksSize));
- }
- return CollapsedBlockRanges;
+ return Result;
}
#if ZEN_WITH_TESTS
-namespace testutils {
+namespace chunkblock_testutils {
static std::vector<std::pair<Oid, CompressedBuffer>> CreateAttachments(
const std::span<const size_t>& Sizes,
OodleCompressionLevel CompressionLevel = OodleCompressionLevel::VeryFast,
@@ -1045,12 +934,14 @@ namespace testutils {
return Result;
}
-} // namespace testutils
+} // namespace chunkblock_testutils
+
+TEST_SUITE_BEGIN("remotestore.chunkblock");
TEST_CASE("chunkblock.block")
{
using namespace std::literals;
- using namespace testutils;
+ using namespace chunkblock_testutils;
std::vector<std::size_t> AttachmentSizes({7633, 6825, 5738, 8031, 7225, 566, 3656, 6006, 24, 3466, 1093, 4269, 2257, 3685, 3489,
7194, 6151, 5482, 6217, 3511, 6738, 5061, 7537, 2759, 1916, 8210, 2235, 4024, 1582, 5251,
@@ -1062,8 +953,8 @@ TEST_CASE("chunkblock.block")
for (const auto& It : AttachmentsWithId)
{
Chunks.push_back(
- std::make_pair(It.second.DecodeRawHash(), [Buffer = It.second](const IoHash&) -> std::pair<uint64_t, CompressedBuffer> {
- return {Buffer.DecodeRawSize(), Buffer};
+ std::make_pair(It.second.DecodeRawHash(), [Buffer = It.second](const IoHash&) -> std::pair<uint64_t, CompositeBuffer> {
+ return {Buffer.DecodeRawSize(), Buffer.GetCompressed()};
}));
}
ChunkBlockDescription Block;
@@ -1078,7 +969,7 @@ TEST_CASE("chunkblock.block")
TEST_CASE("chunkblock.reuseblocks")
{
using namespace std::literals;
- using namespace testutils;
+ using namespace chunkblock_testutils;
std::vector<std::vector<std::size_t>> BlockAttachmentSizes(
{std::vector<std::size_t>{7633, 6825, 5738, 8031, 7225, 566, 3656, 6006, 24, 3466, 1093, 4269, 2257, 3685, 3489,
@@ -1097,8 +988,8 @@ TEST_CASE("chunkblock.reuseblocks")
for (const auto& It : AttachmentsWithId)
{
Chunks.push_back(
- std::make_pair(It.second.DecodeRawHash(), [Buffer = It.second](const IoHash&) -> std::pair<uint64_t, CompressedBuffer> {
- return {Buffer.DecodeRawSize(), Buffer};
+ std::make_pair(It.second.DecodeRawHash(), [Buffer = It.second](const IoHash&) -> std::pair<uint64_t, CompositeBuffer> {
+ return {Buffer.DecodeRawSize(), Buffer.GetCompressed()};
}));
}
ChunkBlockDescription Block;
@@ -1106,8 +997,7 @@ TEST_CASE("chunkblock.reuseblocks")
BlockDescriptions.emplace_back(std::move(Block));
}
- LoggerRef LogRef = Log();
- std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef));
+ LoggerRef LogRef = Log();
{
// We use just about all the chunks - should result in use of both blocks
@@ -1124,14 +1014,8 @@ TEST_CASE("chunkblock.reuseblocks")
std::iota(ManyChunkIndexes.begin(), ManyChunkIndexes.end(), 0);
std::vector<uint32_t> UnusedChunkIndexes;
- std::vector<size_t> ReusedBlocks = FindReuseBlocks(*LogOutput,
- 80,
- false,
- ReuseBlocksStats,
- BlockDescriptions,
- ManyChunkHashes,
- ManyChunkIndexes,
- UnusedChunkIndexes);
+ std::vector<size_t> ReusedBlocks =
+ FindReuseBlocks(LogRef, 80, false, ReuseBlocksStats, BlockDescriptions, ManyChunkHashes, ManyChunkIndexes, UnusedChunkIndexes);
CHECK_EQ(2u, ReusedBlocks.size());
CHECK_EQ(0u, UnusedChunkIndexes.size());
@@ -1152,7 +1036,7 @@ TEST_CASE("chunkblock.reuseblocks")
std::iota(ManyChunkIndexes.begin(), ManyChunkIndexes.end(), 0);
std::vector<uint32_t> UnusedChunkIndexes;
- std::vector<size_t> ReusedBlocks = FindReuseBlocks(*LogOutput,
+ std::vector<size_t> ReusedBlocks = FindReuseBlocks(LogRef,
80,
false,
ReuseBlocksStats,
@@ -1181,7 +1065,7 @@ TEST_CASE("chunkblock.reuseblocks")
// We use half the chunks - should result in no use of blocks due to 80% limit
std::vector<uint32_t> UnusedChunkIndexes80Percent;
ReuseBlocksStatistics ReuseBlocksStats;
- std::vector<size_t> ReusedBlocks80Percent = FindReuseBlocks(*LogOutput,
+ std::vector<size_t> ReusedBlocks80Percent = FindReuseBlocks(LogRef,
80,
false,
ReuseBlocksStats,
@@ -1197,7 +1081,7 @@ TEST_CASE("chunkblock.reuseblocks")
// We use half the chunks - should result in use of both blocks due to 40% limit
std::vector<uint32_t> UnusedChunkIndexes40Percent;
ReuseBlocksStatistics ReuseBlocksStats;
- std::vector<size_t> ReusedBlocks40Percent = FindReuseBlocks(*LogOutput,
+ std::vector<size_t> ReusedBlocks40Percent = FindReuseBlocks(LogRef,
40,
false,
ReuseBlocksStats,
@@ -1227,7 +1111,7 @@ TEST_CASE("chunkblock.reuseblocks")
// We use half the chunks for first block - should result in use of one blocks due to 80% limit
ReuseBlocksStatistics ReuseBlocksStats;
std::vector<uint32_t> UnusedChunkIndexes80Percent;
- std::vector<size_t> ReusedBlocks80Percent = FindReuseBlocks(*LogOutput,
+ std::vector<size_t> ReusedBlocks80Percent = FindReuseBlocks(LogRef,
80,
false,
ReuseBlocksStats,
@@ -1244,7 +1128,7 @@ TEST_CASE("chunkblock.reuseblocks")
// We use half the chunks - should result in use of both blocks due to 40% limit
ReuseBlocksStatistics ReuseBlocksStats;
std::vector<uint32_t> UnusedChunkIndexes40Percent;
- std::vector<size_t> ReusedBlocks40Percent = FindReuseBlocks(*LogOutput,
+ std::vector<size_t> ReusedBlocks40Percent = FindReuseBlocks(LogRef,
40,
false,
ReuseBlocksStats,
@@ -1283,7 +1167,7 @@ TEST_CASE("chunkblock.reuseblocks")
// We use half the chunks for first block - should result in use of one blocks due to 80% limit
ReuseBlocksStatistics ReuseBlocksStats;
std::vector<uint32_t> UnusedChunkIndexes80Percent;
- std::vector<size_t> ReusedBlocks80Percent = FindReuseBlocks(*LogOutput,
+ std::vector<size_t> ReusedBlocks80Percent = FindReuseBlocks(LogRef,
80,
false,
ReuseBlocksStats,
@@ -1300,7 +1184,7 @@ TEST_CASE("chunkblock.reuseblocks")
// We use half the chunks - should result in use of both blocks due to 40% limit
ReuseBlocksStatistics ReuseBlocksStats;
std::vector<uint32_t> UnusedChunkIndexes40Percent;
- std::vector<size_t> ReusedBlocks40Percent = FindReuseBlocks(*LogOutput,
+ std::vector<size_t> ReusedBlocks40Percent = FindReuseBlocks(LogRef,
40,
false,
ReuseBlocksStats,
@@ -1315,6 +1199,877 @@ TEST_CASE("chunkblock.reuseblocks")
}
}
+namespace chunkblock_analyser_testutils {
+
+ // Build a ChunkBlockDescription without any real payload.
+ // Hashes are derived deterministically from (BlockSeed XOR ChunkIndex) so that the same
+ // seed produces the same hashes - useful for deduplication tests.
+ static ChunkBlockDescription MakeBlockDesc(uint64_t HeaderSize,
+ std::initializer_list<uint32_t> CompressedLengths,
+ uint32_t BlockSeed = 0)
+ {
+ ChunkBlockDescription Desc;
+ Desc.HeaderSize = HeaderSize;
+ uint32_t ChunkIndex = 0;
+ for (uint32_t Length : CompressedLengths)
+ {
+ uint64_t HashInput = uint64_t(BlockSeed ^ ChunkIndex);
+ Desc.ChunkRawHashes.push_back(IoHash::HashBuffer(MemoryView(&HashInput, sizeof(HashInput))));
+ Desc.ChunkRawLengths.push_back(Length);
+ Desc.ChunkCompressedLengths.push_back(Length);
+ ChunkIndex++;
+ }
+ return Desc;
+ }
+
+ // Build the robin_map<IoHash, uint32_t> needed by GetNeeded from a flat list of blocks.
+ // First occurrence of each hash wins; index is assigned sequentially across all blocks.
+ [[maybe_unused]] static tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> MakeHashMap(const std::vector<ChunkBlockDescription>& Blocks)
+ {
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> Result;
+ uint32_t Index = 0;
+ for (const ChunkBlockDescription& Block : Blocks)
+ {
+ for (const IoHash& Hash : Block.ChunkRawHashes)
+ {
+ if (!Result.contains(Hash))
+ {
+ Result.emplace(Hash, Index++);
+ }
+ }
+ }
+ return Result;
+ }
+
+} // namespace chunkblock_analyser_testutils
+
+TEST_CASE("chunkblock.mergecheapestrange.picks_smallest_gap")
+{
+ using RD = chunkblock_impl::RangeDescriptor;
+ // Gap between ranges 0-1 is 50, gap between 1-2 is 150 -> pair 0-1 gets merged
+ std::vector<RD> Ranges = {
+ {.RangeStart = 0, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 150, .RangeLength = 100, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 400, .RangeLength = 100, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1},
+ };
+ chunkblock_impl::MergeCheapestRange(Ranges);
+
+ REQUIRE_EQ(2u, Ranges.size());
+ CHECK_EQ(0u, Ranges[0].RangeStart);
+ CHECK_EQ(250u, Ranges[0].RangeLength); // 150+100
+ CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart);
+ CHECK_EQ(2u, Ranges[0].ChunkBlockIndexCount);
+ CHECK_EQ(400u, Ranges[1].RangeStart);
+ CHECK_EQ(100u, Ranges[1].RangeLength);
+ CHECK_EQ(2u, Ranges[1].ChunkBlockIndexStart);
+ CHECK_EQ(1u, Ranges[1].ChunkBlockIndexCount);
+}
+
+TEST_CASE("chunkblock.mergecheapestrange.tiebreak_smaller_merged")
+{
+ using RD = chunkblock_impl::RangeDescriptor;
+ // Gap 0-1 == gap 1-2 == 100; merged size 0-1 (250) < merged size 1-2 (350) -> pair 0-1 wins
+ std::vector<RD> Ranges = {
+ {.RangeStart = 0, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 200, .RangeLength = 50, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 350, .RangeLength = 200, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1},
+ };
+ chunkblock_impl::MergeCheapestRange(Ranges);
+
+ REQUIRE_EQ(2u, Ranges.size());
+ // Pair 0-1 merged: start=0, length = (200+50)-0 = 250
+ CHECK_EQ(0u, Ranges[0].RangeStart);
+ CHECK_EQ(250u, Ranges[0].RangeLength);
+ CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart);
+ CHECK_EQ(2u, Ranges[0].ChunkBlockIndexCount);
+ // Pair 1 unchanged (was index 2)
+ CHECK_EQ(350u, Ranges[1].RangeStart);
+ CHECK_EQ(200u, Ranges[1].RangeLength);
+ CHECK_EQ(2u, Ranges[1].ChunkBlockIndexStart);
+ CHECK_EQ(1u, Ranges[1].ChunkBlockIndexCount);
+}
+
+TEST_CASE("chunkblock.optimizeranges.preserves_ranges_low_latency")
+{
+ using RD = chunkblock_impl::RangeDescriptor;
+ // With MaxRangeCountPerRequest unlimited, RequestCount=1
+ // RequestTimeAsBytes = 100000 * 1 * 0.001 = 100 << slack=7000 -> all ranges preserved
+ std::vector<RD> ExactRanges = {
+ {.RangeStart = 0, .RangeLength = 1000, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 2000, .RangeLength = 1000, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 4000, .RangeLength = 1000, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1},
+ };
+ uint64_t TotalBlockSize = 10000;
+ double LatencySec = 0.001;
+ uint64_t SpeedBytesPerSec = 100000;
+ uint64_t MaxRangeCountPerReq = (uint64_t)-1;
+ uint64_t MaxRangesPerBlock = 1024;
+
+ auto Result =
+ chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock);
+
+ REQUIRE_EQ(3u, Result.size());
+}
+
+TEST_CASE("chunkblock.optimizeranges.falls_back_to_full_block")
+{
+ using RD = chunkblock_impl::RangeDescriptor;
+ // 1 range already; slack=100 < SpeedBytesPerSec*LatencySec=200 -> full block (empty result)
+ std::vector<RD> ExactRanges = {
+ {.RangeStart = 100, .RangeLength = 900, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 3},
+ };
+ uint64_t TotalBlockSize = 1000;
+ double LatencySec = 0.01;
+ uint64_t SpeedBytesPerSec = 20000;
+ uint64_t MaxRangeCountPerReq = (uint64_t)-1;
+ uint64_t MaxRangesPerBlock = 1024;
+
+ auto Result =
+ chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock);
+
+ CHECK(Result.empty());
+}
+
+TEST_CASE("chunkblock.optimizeranges.maxrangesperblock_clamp")
+{
+ using RD = chunkblock_impl::RangeDescriptor;
+ // 5 input ranges; MaxRangesPerBlock=2 clamps to <=2 before the cost model runs
+ std::vector<RD> ExactRanges = {
+ {.RangeStart = 0, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 300, .RangeLength = 100, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 600, .RangeLength = 100, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 900, .RangeLength = 100, .ChunkBlockIndexStart = 3, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 1200, .RangeLength = 100, .ChunkBlockIndexStart = 4, .ChunkBlockIndexCount = 1},
+ };
+ uint64_t TotalBlockSize = 5000;
+ double LatencySec = 0.001;
+ uint64_t SpeedBytesPerSec = 100000;
+ uint64_t MaxRangeCountPerReq = (uint64_t)-1;
+ uint64_t MaxRangesPerBlock = 2;
+
+ auto Result =
+ chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock);
+
+ CHECK(Result.size() <= 2u);
+ CHECK(!Result.empty());
+}
+
+TEST_CASE("chunkblock.optimizeranges.low_maxrangecountperrequest_drives_merge")
+{
+ using RD = chunkblock_impl::RangeDescriptor;
+ // MaxRangeCountPerRequest=1 means RequestCount==RangeCount; high latency drives merging
+ // With MaxRangeCountPerRequest=-1 the same 3 ranges would be preserved (verified by comment below)
+ std::vector<RD> ExactRanges = {
+ {.RangeStart = 100, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 250, .RangeLength = 100, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 400, .RangeLength = 100, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1},
+ };
+ uint64_t TotalBlockSize = 1000;
+ double LatencySec = 1.0;
+ uint64_t SpeedBytesPerSec = 500;
+ // With MaxRangeCountPerRequest=-1: RequestCount=1, RequestTimeAsBytes=500 < slack=700 -> preserved
+ // With MaxRangeCountPerRequest=1: RequestCount=3, RequestTimeAsBytes=1500 > slack=700 -> merged
+ uint64_t MaxRangesPerBlock = 1024;
+
+ auto Unlimited =
+ chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, (uint64_t)-1, MaxRangesPerBlock);
+ CHECK_EQ(3u, Unlimited.size());
+
+ auto Limited =
+ chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, uint64_t(1), MaxRangesPerBlock);
+ CHECK(Limited.size() < 3u);
+}
+
+TEST_CASE("chunkblock.optimizeranges.unlimited_rangecountperrequest_no_extra_cost")
+{
+ using RD = chunkblock_impl::RangeDescriptor;
+ // MaxRangeCountPerRequest=-1 -> RequestCount always 1, even with many ranges and high latency
+ std::vector<RD> ExactRanges = {
+ {.RangeStart = 0, .RangeLength = 50, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 200, .RangeLength = 50, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 400, .RangeLength = 50, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 600, .RangeLength = 50, .ChunkBlockIndexStart = 3, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 800, .RangeLength = 50, .ChunkBlockIndexStart = 4, .ChunkBlockIndexCount = 1},
+ };
+ uint64_t TotalBlockSize = 5000;
+ double LatencySec = 0.1;
+ uint64_t SpeedBytesPerSec = 10000; // RequestTimeAsBytes=1000 << slack=4750
+ uint64_t MaxRangeCountPerReq = (uint64_t)-1;
+ uint64_t MaxRangesPerBlock = 1024;
+
+ auto Result =
+ chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock);
+
+ CHECK_EQ(5u, Result.size());
+}
+
+TEST_CASE("chunkblock.optimizeranges.two_range_direct_merge_path")
+{
+ using RD = chunkblock_impl::RangeDescriptor;
+ // Exactly 2 ranges; cost model demands merge; exercises the RangeCount==2 direct-merge branch
+ // After direct merge -> 1 range with small slack -> full block (empty)
+ std::vector<RD> ExactRanges = {
+ {.RangeStart = 0, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 2},
+ {.RangeStart = 400, .RangeLength = 100, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 2},
+ };
+ uint64_t TotalBlockSize = 600;
+ double LatencySec = 0.1;
+ uint64_t SpeedBytesPerSec = 5000; // RequestTimeAsBytes=500 > slack=400 on first iter
+ uint64_t MaxRangeCountPerReq = (uint64_t)-1;
+ uint64_t MaxRangesPerBlock = 1024;
+
+ // Iteration 1: RangeCount=2, RequestCount=1, RequestTimeAsBytes=500 > slack=400 -> direct merge
+ // After merge: 1 range [{0,500,0,4}], slack=100 < Speed*Lat=500 -> full block
+ auto Result =
+ chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock);
+
+ CHECK(Result.empty());
+}
+
+TEST_CASE("chunkblock.getneeded.all_chunks")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ LoggerRef LogRef = Log();
+
+ auto Block = MakeBlockDesc(50, {100, 100, 100, 100});
+ ChunkBlockAnalyser::Options Options;
+ ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ auto HashMap = MakeHashMap({Block});
+ auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t) { return true; });
+
+ REQUIRE_EQ(1u, NeededBlocks.size());
+ CHECK_EQ(0u, NeededBlocks[0].BlockIndex);
+ REQUIRE_EQ(4u, NeededBlocks[0].ChunkIndexes.size());
+ CHECK_EQ(0u, NeededBlocks[0].ChunkIndexes[0]);
+ CHECK_EQ(1u, NeededBlocks[0].ChunkIndexes[1]);
+ CHECK_EQ(2u, NeededBlocks[0].ChunkIndexes[2]);
+ CHECK_EQ(3u, NeededBlocks[0].ChunkIndexes[3]);
+}
+
+TEST_CASE("chunkblock.getneeded.no_chunks")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ LoggerRef LogRef = Log();
+
+ auto Block = MakeBlockDesc(50, {100, 100, 100, 100});
+ ChunkBlockAnalyser::Options Options;
+ ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ auto HashMap = MakeHashMap({Block});
+ auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t) { return false; });
+
+ CHECK(NeededBlocks.empty());
+}
+
+TEST_CASE("chunkblock.getneeded.subset_within_block")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ LoggerRef LogRef = Log();
+
+ auto Block = MakeBlockDesc(50, {100, 100, 100, 100});
+ ChunkBlockAnalyser::Options Options;
+ ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ auto HashMap = MakeHashMap({Block});
+ // Indices 0 and 2 are needed; 1 and 3 are not
+ auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t ChunkIndex) { return ChunkIndex == 0 || ChunkIndex == 2; });
+
+ REQUIRE_EQ(1u, NeededBlocks.size());
+ CHECK_EQ(0u, NeededBlocks[0].BlockIndex);
+ REQUIRE_EQ(2u, NeededBlocks[0].ChunkIndexes.size());
+ CHECK_EQ(0u, NeededBlocks[0].ChunkIndexes[0]);
+ CHECK_EQ(2u, NeededBlocks[0].ChunkIndexes[1]);
+}
+
+TEST_CASE("chunkblock.getneeded.dedup_low_slack_wins")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ LoggerRef LogRef = Log();
+
+ // Block 0: {H0, H1, SharedH, H3} - 3 of 4 needed (H3 not needed); slack = 100
+ // Block 1: {H4, H5, SharedH, H6} - only SharedH needed; slack = 300
+ // Block 0 has less slack -> processed first -> SharedH assigned to block 0
+ IoHash SharedH = IoHash::HashBuffer(MemoryView("shared_chunk_dedup", 18));
+ IoHash H0 = IoHash::HashBuffer(MemoryView("block0_chunk0", 13));
+ IoHash H1 = IoHash::HashBuffer(MemoryView("block0_chunk1", 13));
+ IoHash H3 = IoHash::HashBuffer(MemoryView("block0_chunk3", 13));
+ IoHash H4 = IoHash::HashBuffer(MemoryView("block1_chunk0", 13));
+ IoHash H5 = IoHash::HashBuffer(MemoryView("block1_chunk1", 13));
+ IoHash H6 = IoHash::HashBuffer(MemoryView("block1_chunk3", 13));
+
+ ChunkBlockDescription Block0;
+ Block0.HeaderSize = 50;
+ Block0.ChunkRawHashes = {H0, H1, SharedH, H3};
+ Block0.ChunkRawLengths = {100, 100, 100, 100};
+ Block0.ChunkCompressedLengths = {100, 100, 100, 100};
+
+ ChunkBlockDescription Block1;
+ Block1.HeaderSize = 50;
+ Block1.ChunkRawHashes = {H4, H5, SharedH, H6};
+ Block1.ChunkRawLengths = {100, 100, 100, 100};
+ Block1.ChunkCompressedLengths = {100, 100, 100, 100};
+
+ std::vector<ChunkBlockDescription> Blocks = {Block0, Block1};
+ ChunkBlockAnalyser::Options Options;
+ ChunkBlockAnalyser Analyser(LogRef, Blocks, Options);
+
+ // Map: H0->0, H1->1, SharedH->2, H3->3, H4->4, H5->5, H6->6
+ auto HashMap = MakeHashMap(Blocks);
+ // Need H0(0), H1(1), SharedH(2) from block 0; SharedH from block 1 (already index 2)
+ // H3(3) not needed; H4,H5,H6 not needed
+ auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t ChunkIndex) { return ChunkIndex <= 2; });
+
+ // Block 0 slack=100 (H3 unused), block 1 slack=300 (H4,H5,H6 unused)
+ // Block 0 processed first; picks up H0, H1, SharedH
+ // Block 1 tries SharedH but it's already picked up -> empty -> not added
+ REQUIRE_EQ(1u, NeededBlocks.size());
+ CHECK_EQ(0u, NeededBlocks[0].BlockIndex);
+ REQUIRE_EQ(3u, NeededBlocks[0].ChunkIndexes.size());
+ CHECK_EQ(0u, NeededBlocks[0].ChunkIndexes[0]);
+ CHECK_EQ(1u, NeededBlocks[0].ChunkIndexes[1]);
+ CHECK_EQ(2u, NeededBlocks[0].ChunkIndexes[2]);
+}
+
+TEST_CASE("chunkblock.getneeded.dedup_no_double_pickup")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ LoggerRef LogRef = Log();
+
+ // SharedH appears in both blocks; should appear in the result exactly once
+ IoHash SharedH = IoHash::HashBuffer(MemoryView("shared_chunk_nodup", 18));
+ IoHash H0 = IoHash::HashBuffer(MemoryView("unique_chunk_b0", 15));
+ IoHash H1 = IoHash::HashBuffer(MemoryView("unique_chunk_b1a", 16));
+ IoHash H2 = IoHash::HashBuffer(MemoryView("unique_chunk_b1b", 16));
+ IoHash H3 = IoHash::HashBuffer(MemoryView("unique_chunk_b1c", 16));
+
+ ChunkBlockDescription Block0;
+ Block0.HeaderSize = 50;
+ Block0.ChunkRawHashes = {SharedH, H0};
+ Block0.ChunkRawLengths = {100, 100};
+ Block0.ChunkCompressedLengths = {100, 100};
+
+ ChunkBlockDescription Block1;
+ Block1.HeaderSize = 50;
+ Block1.ChunkRawHashes = {H1, H2, H3, SharedH};
+ Block1.ChunkRawLengths = {100, 100, 100, 100};
+ Block1.ChunkCompressedLengths = {100, 100, 100, 100};
+
+ std::vector<ChunkBlockDescription> Blocks = {Block0, Block1};
+ ChunkBlockAnalyser::Options Options;
+ ChunkBlockAnalyser Analyser(LogRef, Blocks, Options);
+
+ // Map: SharedH->0, H0->1, H1->2, H2->3, H3->4
+ // Only SharedH (index 0) needed; no other chunks
+ auto HashMap = MakeHashMap(Blocks);
+ auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t ChunkIndex) { return ChunkIndex == 0; });
+
+ // Block 0: SharedH needed, H0 not needed -> slack=100
+ // Block 1: SharedH needed, H1/H2/H3 not needed -> slack=300
+ // Block 0 processed first -> picks up SharedH; Block 1 skips it
+
+ // Count total occurrences of SharedH across all NeededBlocks
+ uint32_t SharedOccurrences = 0;
+ for (const auto& NB : NeededBlocks)
+ {
+ for (uint32_t Idx : NB.ChunkIndexes)
+ {
+ // SharedH is at block-local index 0 in Block0 and index 3 in Block1
+ (void)Idx;
+ SharedOccurrences++;
+ }
+ }
+ CHECK_EQ(1u, SharedOccurrences);
+ REQUIRE_EQ(1u, NeededBlocks.size());
+ CHECK_EQ(0u, NeededBlocks[0].BlockIndex);
+}
+
+TEST_CASE("chunkblock.getneeded.skips_unrequested_chunks")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ LoggerRef LogRef = Log();
+
+ // Block has 4 chunks but only 2 appear in the hash map -> ChunkIndexes has exactly those 2
+ auto Block = MakeBlockDesc(50, {100, 100, 100, 100});
+ ChunkBlockAnalyser::Options Options;
+ ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ // Only put chunks at positions 0 and 2 in the map
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> HashMap;
+ HashMap.emplace(Block.ChunkRawHashes[0], 0u);
+ HashMap.emplace(Block.ChunkRawHashes[2], 1u);
+
+ auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t) { return true; });
+
+ REQUIRE_EQ(1u, NeededBlocks.size());
+ CHECK_EQ(0u, NeededBlocks[0].BlockIndex);
+ REQUIRE_EQ(2u, NeededBlocks[0].ChunkIndexes.size());
+ CHECK_EQ(0u, NeededBlocks[0].ChunkIndexes[0]);
+ CHECK_EQ(2u, NeededBlocks[0].ChunkIndexes[1]);
+}
+
+TEST_CASE("chunkblock.getneeded.two_blocks_both_contribute")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ LoggerRef LogRef = Log();
+
+ // Block 0: all 4 needed (slack=0); block 1: 3 of 4 needed (slack=100)
+ // Both blocks contribute chunks -> 2 NeededBlocks in result
+ auto Block0 = MakeBlockDesc(50, {100, 100, 100, 100}, /*BlockSeed=*/0);
+ auto Block1 = MakeBlockDesc(50, {100, 100, 100, 100}, /*BlockSeed=*/200);
+
+ std::vector<ChunkBlockDescription> Blocks = {Block0, Block1};
+ ChunkBlockAnalyser::Options Options;
+ ChunkBlockAnalyser Analyser(LogRef, Blocks, Options);
+
+ // HashMap: Block0 hashes -> indices 0-3, Block1 hashes -> indices 4-7
+ auto HashMap = MakeHashMap(Blocks);
+ // Need all Block0 chunks (0-3) and Block1 chunks 0-2 (indices 4-6); not chunk index 7 (Block1 chunk 3)
+ auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t ChunkIndex) { return ChunkIndex <= 6; });
+
+ CHECK_EQ(2u, NeededBlocks.size());
+ // Block 0 has slack=0 (all 4 needed), Block 1 has slack=100 (1 not needed)
+ // Block 0 comes first in result
+ CHECK_EQ(0u, NeededBlocks[0].BlockIndex);
+ CHECK_EQ(4u, NeededBlocks[0].ChunkIndexes.size());
+ CHECK_EQ(1u, NeededBlocks[1].BlockIndex);
+ CHECK_EQ(3u, NeededBlocks[1].ChunkIndexes.size());
+}
+
+TEST_CASE("chunkblock.calc.off_mode")
+{
+ using namespace chunkblock_analyser_testutils;
+ using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode;
+
+ LoggerRef LogRef = Log();
+
+ // HeaderSize > 0, chunks size matches -> CanDoPartialBlockDownload = true
+ // But mode Off forces full block regardless
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ ChunkBlockAnalyser::Options Options;
+ Options.IsQuiet = true;
+ ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}};
+ std::vector<Mode> Modes = {Mode::Off};
+
+ auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes);
+
+ REQUIRE_EQ(1u, Result.FullBlockIndexes.size());
+ CHECK_EQ(0u, Result.FullBlockIndexes[0]);
+ CHECK(Result.BlockRanges.empty());
+}
+
+TEST_CASE("chunkblock.calc.exact_mode")
+{
+ using namespace chunkblock_analyser_testutils;
+ using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode;
+
+ LoggerRef LogRef = Log();
+
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ ChunkBlockAnalyser::Options Options;
+ Options.IsQuiet = true;
+ ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ // Need chunks 0 and 2 -> 2 non-contiguous ranges; Exact mode passes them straight through
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}};
+ std::vector<Mode> Modes = {Mode::Exact};
+
+ auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes);
+
+ CHECK(Result.FullBlockIndexes.empty());
+ REQUIRE_EQ(2u, Result.BlockRanges.size());
+
+ CHECK_EQ(0u, Result.BlockRanges[0].BlockIndex);
+ CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart);
+ CHECK_EQ(100u, Result.BlockRanges[0].RangeLength);
+ CHECK_EQ(0u, Result.BlockRanges[0].ChunkBlockIndexStart);
+ CHECK_EQ(1u, Result.BlockRanges[0].ChunkBlockIndexCount);
+
+ CHECK_EQ(0u, Result.BlockRanges[1].BlockIndex);
+ CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart); // 100+200 before chunk 2
+ CHECK_EQ(300u, Result.BlockRanges[1].RangeLength);
+ CHECK_EQ(2u, Result.BlockRanges[1].ChunkBlockIndexStart);
+ CHECK_EQ(1u, Result.BlockRanges[1].ChunkBlockIndexCount);
+}
+
+TEST_CASE("chunkblock.calc.singlerange_mode")
+{
+ using namespace chunkblock_analyser_testutils;
+ using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode;
+
+ LoggerRef LogRef = Log();
+
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ // Default HostLatencySec=-1 -> OptimizeRanges not called after SingleRange collapse
+ ChunkBlockAnalyser::Options Options;
+ Options.IsQuiet = true;
+ ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ // Need chunks 0 and 2 -> 2 ranges that get collapsed to 1
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}};
+ std::vector<Mode> Modes = {Mode::SingleRange};
+
+ auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes);
+
+ CHECK(Result.FullBlockIndexes.empty());
+ REQUIRE_EQ(1u, Result.BlockRanges.size());
+ CHECK_EQ(0u, Result.BlockRanges[0].BlockIndex);
+ CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart);
+ // Spans from chunk 0 start to chunk 2 end: 100+200+300=600
+ CHECK_EQ(600u, Result.BlockRanges[0].RangeLength);
+ CHECK_EQ(0u, Result.BlockRanges[0].ChunkBlockIndexStart);
+ // ChunkBlockIndexCount = (2+1) - 0 = 3
+ CHECK_EQ(3u, Result.BlockRanges[0].ChunkBlockIndexCount);
+}
+
+TEST_CASE("chunkblock.calc.multirange_mode")
+{
+ using namespace chunkblock_analyser_testutils;
+ using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode;
+
+ LoggerRef LogRef = Log();
+
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ // Low latency: RequestTimeAsBytes=100 << slack -> OptimizeRanges preserves ranges
+ ChunkBlockAnalyser::Options Options;
+ Options.IsQuiet = true;
+ Options.HostLatencySec = 0.001;
+ Options.HostSpeedBytesPerSec = 100000;
+ ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}};
+ std::vector<Mode> Modes = {Mode::MultiRange};
+
+ auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes);
+
+ CHECK(Result.FullBlockIndexes.empty());
+ REQUIRE_EQ(2u, Result.BlockRanges.size());
+ CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart);
+ CHECK_EQ(100u, Result.BlockRanges[0].RangeLength);
+ CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart);
+ CHECK_EQ(300u, Result.BlockRanges[1].RangeLength);
+}
+
+TEST_CASE("chunkblock.calc.multirangehighspeed_mode")
+{
+ using namespace chunkblock_analyser_testutils;
+ using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode;
+
+ LoggerRef LogRef = Log();
+
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ // Block slack ~= 714 bytes (TotalBlockSize~=1114, RangeTotalSize=400 for chunks 0+2)
+ // RequestTimeAsBytes = 400000 * 1 * 0.001 = 400 < 714 -> ranges preserved
+ ChunkBlockAnalyser::Options Options;
+ Options.IsQuiet = true;
+ Options.HostHighSpeedLatencySec = 0.001;
+ Options.HostHighSpeedBytesPerSec = 400000;
+ ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}};
+ std::vector<Mode> Modes = {Mode::MultiRangeHighSpeed};
+
+ auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes);
+
+ CHECK(Result.FullBlockIndexes.empty());
+ REQUIRE_EQ(2u, Result.BlockRanges.size());
+ CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart);
+ CHECK_EQ(100u, Result.BlockRanges[0].RangeLength);
+ CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart);
+ CHECK_EQ(300u, Result.BlockRanges[1].RangeLength);
+}
+
+TEST_CASE("chunkblock.calc.all_chunks_needed_full_block")
+{
+ using namespace chunkblock_analyser_testutils;
+ using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode;
+
+ LoggerRef LogRef = Log();
+
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ ChunkBlockAnalyser::Options Options;
+ Options.IsQuiet = true;
+ Options.HostLatencySec = 0.001;
+ Options.HostSpeedBytesPerSec = 100000;
+ ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ // All 4 chunks needed -> short-circuit to full block regardless of mode
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 1, 2, 3}}};
+ std::vector<Mode> Modes = {Mode::Exact};
+
+ auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes);
+
+ REQUIRE_EQ(1u, Result.FullBlockIndexes.size());
+ CHECK_EQ(0u, Result.FullBlockIndexes[0]);
+ CHECK(Result.BlockRanges.empty());
+}
+
+TEST_CASE("chunkblock.calc.headersize_zero_forces_full_block")
+{
+ using namespace chunkblock_analyser_testutils;
+ using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode;
+
+ LoggerRef LogRef = Log();
+
+ // HeaderSize=0 -> CanDoPartialBlockDownload=false -> full block even in Exact mode
+ auto Block = MakeBlockDesc(0, {100, 200, 300, 400});
+ ChunkBlockAnalyser::Options Options;
+ Options.IsQuiet = true;
+ ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}};
+ std::vector<Mode> Modes = {Mode::Exact};
+
+ auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes);
+
+ REQUIRE_EQ(1u, Result.FullBlockIndexes.size());
+ CHECK_EQ(0u, Result.FullBlockIndexes[0]);
+ CHECK(Result.BlockRanges.empty());
+}
+
+TEST_CASE("chunkblock.calc.low_maxrangecountperrequest")
+{
+ using namespace chunkblock_analyser_testutils;
+ using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode;
+
+ LoggerRef LogRef = Log();
+
+ // 5 chunks of 100 bytes each; need chunks 0, 2, 4 -> 3 non-contiguous ranges
+ // With MaxRangeCountPerRequest=1 and high latency, cost model merges aggressively -> full block
+ auto Block = MakeBlockDesc(10, {100, 100, 100, 100, 100});
+ ChunkBlockAnalyser::Options Options;
+ Options.IsQuiet = true;
+ Options.HostLatencySec = 0.1;
+ Options.HostSpeedBytesPerSec = 1000;
+ Options.HostMaxRangeCountPerRequest = 1;
+ ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2, 4}}};
+ std::vector<Mode> Modes = {Mode::MultiRange};
+
+ auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes);
+
+ // Cost model drives merging: 3 requests x 1000 x 0.1 = 300 > slack ~= 210+headersize
+ // After merges converges to full block
+ REQUIRE_EQ(1u, Result.FullBlockIndexes.size());
+ CHECK_EQ(0u, Result.FullBlockIndexes[0]);
+ CHECK(Result.BlockRanges.empty());
+}
+
+TEST_CASE("chunkblock.calc.no_latency_skips_optimize")
+{
+ using namespace chunkblock_analyser_testutils;
+ using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode;
+
+ LoggerRef LogRef = Log();
+
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ // Default HostLatencySec=-1 -> OptimizeRanges not called; raw GetBlockRanges result used
+ ChunkBlockAnalyser::Options Options;
+ Options.IsQuiet = true;
+ ChunkBlockAnalyser Analyser(LogRef, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}};
+ std::vector<Mode> Modes = {Mode::MultiRange};
+
+ auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes);
+
+ // No optimize pass -> exact ranges from GetBlockRanges
+ CHECK(Result.FullBlockIndexes.empty());
+ REQUIRE_EQ(2u, Result.BlockRanges.size());
+ CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart);
+ CHECK_EQ(100u, Result.BlockRanges[0].RangeLength);
+ CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart);
+ CHECK_EQ(300u, Result.BlockRanges[1].RangeLength);
+}
+
+TEST_CASE("chunkblock.calc.multiple_blocks_different_modes")
+{
+ using namespace chunkblock_analyser_testutils;
+ using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode;
+
+ LoggerRef LogRef = Log();
+
+ // 3 blocks with different modes: Off, Exact, MultiRange
+ auto Block0 = MakeBlockDesc(50, {100, 200, 300, 400}, /*BlockSeed=*/0);
+ auto Block1 = MakeBlockDesc(50, {100, 200, 300, 400}, /*BlockSeed=*/10);
+ auto Block2 = MakeBlockDesc(50, {100, 200, 300, 400}, /*BlockSeed=*/20);
+
+ ChunkBlockAnalyser::Options Options;
+ Options.IsQuiet = true;
+ Options.HostLatencySec = 0.001;
+ Options.HostSpeedBytesPerSec = 100000;
+
+ std::vector<ChunkBlockDescription> Blocks = {Block0, Block1, Block2};
+ ChunkBlockAnalyser Analyser(LogRef, Blocks, Options);
+
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + 50;
+
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {
+ {.BlockIndex = 0, .ChunkIndexes = {0, 2}},
+ {.BlockIndex = 1, .ChunkIndexes = {0, 2}},
+ {.BlockIndex = 2, .ChunkIndexes = {0, 2}},
+ };
+ std::vector<Mode> Modes = {Mode::Off, Mode::Exact, Mode::MultiRange};
+
+ auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes);
+
+ // Block 0: Off -> FullBlockIndexes
+ REQUIRE_EQ(1u, Result.FullBlockIndexes.size());
+ CHECK_EQ(0u, Result.FullBlockIndexes[0]);
+
+ // Block 1: Exact -> 2 ranges; Block 2: MultiRange (low latency) -> 2 ranges
+ // Total: 4 ranges
+ REQUIRE_EQ(4u, Result.BlockRanges.size());
+
+ // First 2 ranges belong to Block 1 (Exact)
+ CHECK_EQ(1u, Result.BlockRanges[0].BlockIndex);
+ CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart);
+ CHECK_EQ(100u, Result.BlockRanges[0].RangeLength);
+ CHECK_EQ(1u, Result.BlockRanges[1].BlockIndex);
+ CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart);
+ CHECK_EQ(300u, Result.BlockRanges[1].RangeLength);
+
+ // Last 2 ranges belong to Block 2 (MultiRange preserved)
+ CHECK_EQ(2u, Result.BlockRanges[2].BlockIndex);
+ CHECK_EQ(ChunkStartOffset, Result.BlockRanges[2].RangeStart);
+ CHECK_EQ(100u, Result.BlockRanges[2].RangeLength);
+ CHECK_EQ(2u, Result.BlockRanges[3].BlockIndex);
+ CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[3].RangeStart);
+ CHECK_EQ(300u, Result.BlockRanges[3].RangeLength);
+}
+
+TEST_CASE("chunkblock.getblockranges.first_chunk_only")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ std::vector<uint32_t> Needed = {0};
+ auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed);
+
+ REQUIRE_EQ(1u, Ranges.size());
+ CHECK_EQ(ChunkStartOffset, Ranges[0].RangeStart);
+ CHECK_EQ(100u, Ranges[0].RangeLength);
+ CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart);
+ CHECK_EQ(1u, Ranges[0].ChunkBlockIndexCount);
+}
+
+TEST_CASE("chunkblock.getblockranges.last_chunk_only")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ std::vector<uint32_t> Needed = {3};
+ auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed);
+
+ REQUIRE_EQ(1u, Ranges.size());
+ CHECK_EQ(ChunkStartOffset + 600u, Ranges[0].RangeStart); // 100+200+300 before chunk 3
+ CHECK_EQ(400u, Ranges[0].RangeLength);
+ CHECK_EQ(3u, Ranges[0].ChunkBlockIndexStart);
+ CHECK_EQ(1u, Ranges[0].ChunkBlockIndexCount);
+}
+
+TEST_CASE("chunkblock.getblockranges.middle_chunk_only")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ std::vector<uint32_t> Needed = {1};
+ auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed);
+
+ REQUIRE_EQ(1u, Ranges.size());
+ CHECK_EQ(ChunkStartOffset + 100u, Ranges[0].RangeStart); // 100 before chunk 1
+ CHECK_EQ(200u, Ranges[0].RangeLength);
+ CHECK_EQ(1u, Ranges[0].ChunkBlockIndexStart);
+ CHECK_EQ(1u, Ranges[0].ChunkBlockIndexCount);
+}
+
+TEST_CASE("chunkblock.getblockranges.all_chunks")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ std::vector<uint32_t> Needed = {0, 1, 2, 3};
+ auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed);
+
+ REQUIRE_EQ(1u, Ranges.size());
+ CHECK_EQ(ChunkStartOffset, Ranges[0].RangeStart);
+ CHECK_EQ(1000u, Ranges[0].RangeLength); // 100+200+300+400
+ CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart);
+ CHECK_EQ(4u, Ranges[0].ChunkBlockIndexCount);
+}
+
+TEST_CASE("chunkblock.getblockranges.non_contiguous")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ // Chunks 0 and 2 needed, chunk 1 skipped -> two separate ranges
+ auto Block = MakeBlockDesc(50, {100, 200, 300});
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ std::vector<uint32_t> Needed = {0, 2};
+ auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed);
+
+ REQUIRE_EQ(2u, Ranges.size());
+
+ CHECK_EQ(ChunkStartOffset, Ranges[0].RangeStart);
+ CHECK_EQ(100u, Ranges[0].RangeLength);
+ CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart);
+ CHECK_EQ(1u, Ranges[0].ChunkBlockIndexCount);
+
+ CHECK_EQ(ChunkStartOffset + 300u, Ranges[1].RangeStart); // 100+200 before chunk 2
+ CHECK_EQ(300u, Ranges[1].RangeLength);
+ CHECK_EQ(2u, Ranges[1].ChunkBlockIndexStart);
+ CHECK_EQ(1u, Ranges[1].ChunkBlockIndexCount);
+}
+
+TEST_CASE("chunkblock.getblockranges.contiguous_run")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ // Chunks 1, 2, 3 needed (consecutive) -> one merged range
+ auto Block = MakeBlockDesc(50, {50, 100, 150, 200, 250});
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ std::vector<uint32_t> Needed = {1, 2, 3};
+ auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed);
+
+ REQUIRE_EQ(1u, Ranges.size());
+ CHECK_EQ(ChunkStartOffset + 50u, Ranges[0].RangeStart); // 50 before chunk 1
+ CHECK_EQ(450u, Ranges[0].RangeLength); // 100+150+200
+ CHECK_EQ(1u, Ranges[0].ChunkBlockIndexStart);
+ CHECK_EQ(3u, Ranges[0].ChunkBlockIndexCount);
+}
+
+TEST_SUITE_END();
+
void
chunkblock_forcelink()
{
diff --git a/src/zenremotestore/chunking/chunkedcontent.cpp b/src/zenremotestore/chunking/chunkedcontent.cpp
index 26d179f14..c09ab9d3a 100644
--- a/src/zenremotestore/chunking/chunkedcontent.cpp
+++ b/src/zenremotestore/chunking/chunkedcontent.cpp
@@ -166,7 +166,6 @@ namespace {
if (Chunked.Info.ChunkSequence.empty())
{
AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Chunked.Info.RawHash, RawSize);
- Stats.UniqueSequencesFound++;
}
else
{
@@ -186,7 +185,6 @@ namespace {
Chunked.Info.ChunkHashes,
ChunkSizes);
}
- Stats.UniqueSequencesFound++;
}
});
Stats.FilesChunked++;
@@ -253,7 +251,7 @@ FolderContent::operator==(const FolderContent& Rhs) const
if ((Platform == Rhs.Platform) && (RawSizes == Rhs.RawSizes) && (Attributes == Rhs.Attributes) &&
(ModificationTicks == Rhs.ModificationTicks) && (Paths.size() == Rhs.Paths.size()))
{
- size_t PathCount = 0;
+ size_t PathCount = Paths.size();
for (size_t PathIndex = 0; PathIndex < PathCount; PathIndex++)
{
if (Paths[PathIndex].generic_string() != Rhs.Paths[PathIndex].generic_string())
@@ -1706,6 +1704,8 @@ namespace chunkedcontent_testutils {
} // namespace chunkedcontent_testutils
+TEST_SUITE_BEGIN("remotestore.chunkedcontent");
+
TEST_CASE("chunkedcontent.DeletePathsFromContent")
{
FastRandom BaseRandom;
@@ -1924,6 +1924,8 @@ TEST_CASE("chunkedcontent.ApplyChunkedContentOverlay")
}
}
+TEST_SUITE_END();
+
#endif // ZEN_WITH_TESTS
} // namespace zen
diff --git a/src/zenremotestore/chunking/chunkedfile.cpp b/src/zenremotestore/chunking/chunkedfile.cpp
index 652110605..633ddfd0d 100644
--- a/src/zenremotestore/chunking/chunkedfile.cpp
+++ b/src/zenremotestore/chunking/chunkedfile.cpp
@@ -211,6 +211,8 @@ ZEN_THIRD_PARTY_INCLUDES_END
namespace zen {
# if 0
+TEST_SUITE_BEGIN("remotestore.chunkedfile");
+
TEST_CASE("chunkedfile.findparams")
{
# if 1
@@ -513,6 +515,8 @@ TEST_CASE("chunkedfile.findparams")
// WorkLatch.CountDown();
// WorkLatch.Wait();
}
+
+TEST_SUITE_END();
# endif // 0
void
diff --git a/src/zenremotestore/chunking/chunkingcache.cpp b/src/zenremotestore/chunking/chunkingcache.cpp
index 7f0a26330..e9b783a00 100644
--- a/src/zenremotestore/chunking/chunkingcache.cpp
+++ b/src/zenremotestore/chunking/chunkingcache.cpp
@@ -75,13 +75,13 @@ public:
{
Lock.ReleaseNow();
RwLock::ExclusiveLockScope EditLock(m_Lock);
- if (auto RemoveIt = m_PathHashToEntry.find(PathHash); It != m_PathHashToEntry.end())
+ if (auto RemoveIt = m_PathHashToEntry.find(PathHash); RemoveIt != m_PathHashToEntry.end())
{
- CachedEntry& DeleteEntry = m_Entries[It->second];
+ CachedEntry& DeleteEntry = m_Entries[RemoveIt->second];
DeleteEntry.Chunked = {};
DeleteEntry.ModificationTick = 0;
- m_FreeEntryIndexes.push_back(It->second);
- m_PathHashToEntry.erase(It);
+ m_FreeEntryIndexes.push_back(RemoveIt->second);
+ m_PathHashToEntry.erase(RemoveIt);
}
}
}
@@ -461,6 +461,8 @@ namespace chunkingcache_testutils {
}
} // namespace chunkingcache_testutils
+TEST_SUITE_BEGIN("remotestore.chunkingcache");
+
TEST_CASE("chunkingcache.nullchunkingcache")
{
using namespace chunkingcache_testutils;
@@ -617,6 +619,8 @@ TEST_CASE("chunkingcache.diskchunkingcache")
}
}
+TEST_SUITE_END();
+
void
chunkingcache_forcelink()
{
diff --git a/src/zenremotestore/filesystemutils.cpp b/src/zenremotestore/filesystemutils.cpp
deleted file mode 100644
index fa1ce6f78..000000000
--- a/src/zenremotestore/filesystemutils.cpp
+++ /dev/null
@@ -1,697 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#include <zenremotestore/filesystemutils.h>
-
-#include <zenremotestore/chunking/chunkedcontent.h>
-
-#include <zencore/filesystem.h>
-#include <zencore/fmtutils.h>
-#include <zencore/parallelwork.h>
-#include <zencore/scopeguard.h>
-#include <zencore/timer.h>
-#include <zencore/trace.h>
-
-#if ZEN_WITH_TESTS
-# include <zencore/testing.h>
-# include <zencore/testutils.h>
-#endif // ZEN_WITH_TESTS
-
-namespace zen {
-
-BufferedOpenFile::BufferedOpenFile(const std::filesystem::path Path,
- std::atomic<uint64_t>& OpenReadCount,
- std::atomic<uint64_t>& CurrentOpenFileCount,
- std::atomic<uint64_t>& ReadCount,
- std::atomic<uint64_t>& ReadByteCount)
-: m_Source(Path, BasicFile::Mode::kRead)
-, m_SourceSize(m_Source.FileSize())
-, m_OpenReadCount(OpenReadCount)
-, m_CurrentOpenFileCount(CurrentOpenFileCount)
-, m_ReadCount(ReadCount)
-, m_ReadByteCount(ReadByteCount)
-
-{
- m_OpenReadCount++;
- m_CurrentOpenFileCount++;
-}
-
-BufferedOpenFile::~BufferedOpenFile()
-{
- m_CurrentOpenFileCount--;
-}
-
-CompositeBuffer
-BufferedOpenFile::GetRange(uint64_t Offset, uint64_t Size)
-{
- ZEN_TRACE_CPU("BufferedOpenFile::GetRange");
-
- ZEN_ASSERT((m_CacheBlockIndex == (uint64_t)-1) || m_Cache);
- auto _ = MakeGuard([&]() { ZEN_ASSERT((m_CacheBlockIndex == (uint64_t)-1) || m_Cache); });
-
- ZEN_ASSERT((Offset + Size) <= m_SourceSize);
- const uint64_t BlockIndexStart = Offset / BlockSize;
- const uint64_t BlockIndexEnd = (Offset + Size - 1) / BlockSize;
-
- std::vector<SharedBuffer> BufferRanges;
- BufferRanges.reserve(BlockIndexEnd - BlockIndexStart + 1);
-
- uint64_t ReadOffset = Offset;
- for (uint64_t BlockIndex = BlockIndexStart; BlockIndex <= BlockIndexEnd; BlockIndex++)
- {
- const uint64_t BlockStartOffset = BlockIndex * BlockSize;
- if (m_CacheBlockIndex != BlockIndex)
- {
- uint64_t CacheSize = Min(BlockSize, m_SourceSize - BlockStartOffset);
- ZEN_ASSERT(CacheSize > 0);
- m_Cache = IoBuffer(CacheSize);
- m_Source.Read(m_Cache.GetMutableView().GetData(), CacheSize, BlockStartOffset);
- m_ReadCount++;
- m_ReadByteCount += CacheSize;
- m_CacheBlockIndex = BlockIndex;
- }
-
- const uint64_t BytesRead = ReadOffset - Offset;
- ZEN_ASSERT(BlockStartOffset <= ReadOffset);
- const uint64_t OffsetIntoBlock = ReadOffset - BlockStartOffset;
- ZEN_ASSERT(OffsetIntoBlock < m_Cache.GetSize());
- const uint64_t BlockBytes = Min(m_Cache.GetSize() - OffsetIntoBlock, Size - BytesRead);
- BufferRanges.emplace_back(SharedBuffer(IoBuffer(m_Cache, OffsetIntoBlock, BlockBytes)));
- ReadOffset += BlockBytes;
- }
- CompositeBuffer Result(std::move(BufferRanges));
- ZEN_ASSERT(Result.GetSize() == Size);
- return Result;
-}
-
-ReadFileCache::ReadFileCache(std::atomic<uint64_t>& OpenReadCount,
- std::atomic<uint64_t>& CurrentOpenFileCount,
- std::atomic<uint64_t>& ReadCount,
- std::atomic<uint64_t>& ReadByteCount,
- const std::filesystem::path& Path,
- const ChunkedFolderContent& LocalContent,
- const ChunkedContentLookup& LocalLookup,
- size_t MaxOpenFileCount)
-: m_Path(Path)
-, m_LocalContent(LocalContent)
-, m_LocalLookup(LocalLookup)
-, m_OpenReadCount(OpenReadCount)
-, m_CurrentOpenFileCount(CurrentOpenFileCount)
-, m_ReadCount(ReadCount)
-, m_ReadByteCount(ReadByteCount)
-{
- m_OpenFiles.reserve(MaxOpenFileCount);
-}
-ReadFileCache::~ReadFileCache()
-{
- m_OpenFiles.clear();
-}
-
-CompositeBuffer
-ReadFileCache::GetRange(uint32_t SequenceIndex, uint64_t Offset, uint64_t Size)
-{
- ZEN_TRACE_CPU("ReadFileCache::GetRange");
-
- auto CacheIt =
- std::find_if(m_OpenFiles.begin(), m_OpenFiles.end(), [SequenceIndex](const auto& Lhs) { return Lhs.first == SequenceIndex; });
- if (CacheIt != m_OpenFiles.end())
- {
- if (CacheIt != m_OpenFiles.begin())
- {
- auto CachedFile(std::move(CacheIt->second));
- m_OpenFiles.erase(CacheIt);
- m_OpenFiles.insert(m_OpenFiles.begin(), std::make_pair(SequenceIndex, std::move(CachedFile)));
- }
- CompositeBuffer Result = m_OpenFiles.front().second->GetRange(Offset, Size);
- return Result;
- }
- const uint32_t LocalPathIndex = m_LocalLookup.SequenceIndexFirstPathIndex[SequenceIndex];
- const std::filesystem::path LocalFilePath = (m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred();
- if (Size == m_LocalContent.RawSizes[LocalPathIndex])
- {
- IoBuffer Result = IoBufferBuilder::MakeFromFile(LocalFilePath);
- return CompositeBuffer(SharedBuffer(Result));
- }
- if (m_OpenFiles.size() == m_OpenFiles.capacity())
- {
- m_OpenFiles.pop_back();
- }
- m_OpenFiles.insert(
- m_OpenFiles.begin(),
- std::make_pair(
- SequenceIndex,
- std::make_unique<BufferedOpenFile>(LocalFilePath, m_OpenReadCount, m_CurrentOpenFileCount, m_ReadCount, m_ReadByteCount)));
- CompositeBuffer Result = m_OpenFiles.front().second->GetRange(Offset, Size);
- return Result;
-}
-
-uint32_t
-SetNativeFileAttributes(const std::filesystem::path FilePath, SourcePlatform SourcePlatform, uint32_t Attributes)
-{
-#if ZEN_PLATFORM_WINDOWS
- if (SourcePlatform == SourcePlatform::Windows)
- {
- SetFileAttributesToPath(FilePath, Attributes);
- return Attributes;
- }
- else
- {
- uint32_t CurrentAttributes = GetFileAttributesFromPath(FilePath);
- uint32_t NewAttributes = zen::MakeFileAttributeReadOnly(CurrentAttributes, zen::IsFileModeReadOnly(Attributes));
- if (CurrentAttributes != NewAttributes)
- {
- SetFileAttributesToPath(FilePath, NewAttributes);
- }
- return NewAttributes;
- }
-#endif // ZEN_PLATFORM_WINDOWS
-#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC
- if (SourcePlatform != SourcePlatform::Windows)
- {
- zen::SetFileMode(FilePath, Attributes);
- return Attributes;
- }
- else
- {
- uint32_t CurrentMode = zen::GetFileMode(FilePath);
- uint32_t NewMode = zen::MakeFileModeReadOnly(CurrentMode, zen::IsFileAttributeReadOnly(Attributes));
- if (CurrentMode != NewMode)
- {
- zen::SetFileMode(FilePath, NewMode);
- }
- return NewMode;
- }
-#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC
-};
-
-uint32_t
-GetNativeFileAttributes(const std::filesystem::path FilePath)
-{
-#if ZEN_PLATFORM_WINDOWS
- return GetFileAttributesFromPath(FilePath);
-#endif // ZEN_PLATFORM_WINDOWS
-#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC
- return GetFileMode(FilePath);
-#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC
-}
-
-bool
-IsFileWithRetry(const std::filesystem::path& Path)
-{
- std::error_code Ec;
- bool Result = IsFile(Path, Ec);
- for (size_t Retries = 0; Ec && Retries < 3; Retries++)
- {
- Sleep(100 + int(Retries * 50));
- Ec.clear();
- Result = IsFile(Path, Ec);
- }
- if (Ec)
- {
- throw std::system_error(std::error_code(Ec.value(), std::system_category()),
- fmt::format("Failed to check path '{}' is file, reason: ({}) {}", Path, Ec.value(), Ec.message()));
- }
- return Result;
-}
-
-bool
-SetFileReadOnlyWithRetry(const std::filesystem::path& Path, bool ReadOnly)
-{
- std::error_code Ec;
- bool Result = SetFileReadOnly(Path, ReadOnly, Ec);
- for (size_t Retries = 0; Ec && Retries < 3; Retries++)
- {
- if (!IsFileWithRetry(Path))
- {
- return false;
- }
- Sleep(100 + int(Retries * 50));
- Ec.clear();
- Result = SetFileReadOnly(Path, ReadOnly, Ec);
- }
- if (Ec)
- {
- throw std::system_error(std::error_code(Ec.value(), std::system_category()),
- fmt::format("Failed {} read only flag for file '{}', reason: ({}) {}",
- ReadOnly ? "setting" : "clearing",
- Path,
- Ec.value(),
- Ec.message()));
- }
- return Result;
-}
-
-std::error_code
-RenameFileWithRetry(const std::filesystem::path& SourcePath, const std::filesystem::path& TargetPath)
-{
- std::error_code Ec;
- RenameFile(SourcePath, TargetPath, Ec);
- for (size_t Retries = 0; Ec && Retries < 5; Retries++)
- {
- ZEN_ASSERT_SLOW(IsFile(SourcePath));
- Sleep(50 + int(Retries * 150));
- Ec.clear();
- RenameFile(SourcePath, TargetPath, Ec);
- }
- return Ec;
-}
-
-std::error_code
-TryRemoveFile(const std::filesystem::path& Path)
-{
- std::error_code Ec;
- RemoveFile(Path, Ec);
- if (Ec)
- {
- if (IsFile(Path, Ec))
- {
- Ec.clear();
- RemoveFile(Path, Ec);
- if (Ec)
- {
- return Ec;
- }
- }
- }
- return {};
-}
-
-void
-RemoveFileWithRetry(const std::filesystem::path& Path)
-{
- std::error_code Ec;
- RemoveFile(Path, Ec);
- for (size_t Retries = 0; Ec && Retries < 6; Retries++)
- {
- if (!IsFileWithRetry(Path))
- {
- return;
- }
- Sleep(100 + int(Retries * 50));
- Ec.clear();
- RemoveFile(Path, Ec);
- }
- if (Ec)
- {
- throw std::system_error(std::error_code(Ec.value(), std::system_category()),
- fmt::format("Failed removing file '{}', reason: ({}) {}", Path, Ec.value(), Ec.message()));
- }
-}
-
-void
-FastCopyFile(bool AllowFileClone,
- bool UseSparseFiles,
- const std::filesystem::path& SourceFilePath,
- const std::filesystem::path& TargetFilePath,
- uint64_t RawSize,
- std::atomic<uint64_t>& WriteCount,
- std::atomic<uint64_t>& WriteByteCount,
- std::atomic<uint64_t>& CloneCount,
- std::atomic<uint64_t>& CloneByteCount)
-{
- ZEN_TRACE_CPU("CopyFile");
- if (AllowFileClone && TryCloneFile(SourceFilePath, TargetFilePath))
- {
- WriteCount += 1;
- WriteByteCount += RawSize;
- CloneCount += 1;
- CloneByteCount += RawSize;
- }
- else
- {
- BasicFile TargetFile(TargetFilePath, BasicFile::Mode::kTruncate);
- if (UseSparseFiles)
- {
- PrepareFileForScatteredWrite(TargetFile.Handle(), RawSize);
- }
- uint64_t Offset = 0;
- if (!ScanFile(SourceFilePath, 512u * 1024u, [&](const void* Data, size_t Size) {
- TargetFile.Write(Data, Size, Offset);
- Offset += Size;
- WriteCount++;
- WriteByteCount += Size;
- }))
- {
- throw std::runtime_error(fmt::format("Failed to copy file '{}' to '{}'", SourceFilePath, TargetFilePath));
- }
- }
-}
-
-CleanDirectoryResult
-CleanDirectory(
- WorkerThreadPool& IOWorkerPool,
- std::atomic<bool>& AbortFlag,
- std::atomic<bool>& PauseFlag,
- const std::filesystem::path& Path,
- std::span<const std::string> ExcludeDirectories,
- std::function<void(const std::string_view Details, uint64_t TotalCount, uint64_t RemainingCount, bool IsPaused, bool IsAborted)>&&
- ProgressFunc,
- uint32_t ProgressUpdateDelayMS)
-{
- ZEN_TRACE_CPU("CleanDirectory");
- Stopwatch Timer;
-
- std::atomic<uint64_t> DiscoveredItemCount = 0;
- std::atomic<uint64_t> DeletedItemCount = 0;
- std::atomic<uint64_t> DeletedByteCount = 0;
-
- std::vector<std::filesystem::path> DirectoriesToDelete;
- CleanDirectoryResult Result;
- RwLock ResultLock;
- auto _ = MakeGuard([&]() {
- Result.DeletedCount = DeletedItemCount.load();
- Result.DeletedByteCount = DeletedByteCount.load();
- Result.FoundCount = DiscoveredItemCount.load();
- });
-
- ParallelWork Work(AbortFlag,
- PauseFlag,
- ProgressFunc ? WorkerThreadPool::EMode::DisableBacklog : WorkerThreadPool::EMode::EnableBacklog);
-
- struct AsyncVisitor : public GetDirectoryContentVisitor
- {
- AsyncVisitor(const std::filesystem::path& InPath,
- std::atomic<bool>& InAbortFlag,
- std::atomic<uint64_t>& InDiscoveredItemCount,
- std::atomic<uint64_t>& InDeletedItemCount,
- std::atomic<uint64_t>& InDeletedByteCount,
- std::span<const std::string> InExcludeDirectories,
- std::vector<std::filesystem::path>& OutDirectoriesToDelete,
- CleanDirectoryResult& InResult,
- RwLock& InResultLock)
- : Path(InPath)
- , AbortFlag(InAbortFlag)
- , DiscoveredItemCount(InDiscoveredItemCount)
- , DeletedItemCount(InDeletedItemCount)
- , DeletedByteCount(InDeletedByteCount)
- , ExcludeDirectories(InExcludeDirectories)
- , DirectoriesToDelete(OutDirectoriesToDelete)
- , Result(InResult)
- , ResultLock(InResultLock)
- {
- }
-
- virtual bool AsyncAllowDirectory(const std::filesystem::path& Parent, const std::filesystem::path& DirectoryName) const override
- {
- ZEN_UNUSED(Parent);
-
- if (AbortFlag)
- {
- return false;
- }
- const std::string DirectoryString = DirectoryName.string();
- for (const std::string_view ExcludeDirectory : ExcludeDirectories)
- {
- if (DirectoryString == ExcludeDirectory)
- {
- return false;
- }
- }
- return true;
- }
-
- virtual void AsyncVisitDirectory(const std::filesystem::path& RelativeRoot, DirectoryContent&& Content) override
- {
- ZEN_TRACE_CPU("CleanDirectory_AsyncVisitDirectory");
- if (!AbortFlag)
- {
- DiscoveredItemCount += Content.FileNames.size();
-
- ZEN_TRACE_CPU("DeleteFiles");
- std::vector<std::pair<std::filesystem::path, std::error_code>> FailedRemovePaths;
- for (size_t FileIndex = 0; FileIndex < Content.FileNames.size(); FileIndex++)
- {
- const std::filesystem::path& FileName = Content.FileNames[FileIndex];
- const std::filesystem::path FilePath = (Path / RelativeRoot / FileName).make_preferred();
-
- bool IsRemoved = false;
- std::error_code Ec;
- (void)SetFileReadOnly(FilePath, false, Ec);
- for (size_t Retries = 0; Ec && Retries < 3; Retries++)
- {
- if (!IsFileWithRetry(FilePath))
- {
- IsRemoved = true;
- Ec.clear();
- break;
- }
- Sleep(100 + int(Retries * 50));
- Ec.clear();
- (void)SetFileReadOnly(FilePath, false, Ec);
- }
- if (!IsRemoved && !Ec)
- {
- (void)RemoveFile(FilePath, Ec);
- for (size_t Retries = 0; Ec && Retries < 6; Retries++)
- {
- if (!IsFileWithRetry(FilePath))
- {
- IsRemoved = true;
- Ec.clear();
- break;
- }
- Sleep(100 + int(Retries * 50));
- Ec.clear();
- (void)RemoveFile(FilePath, Ec);
- }
- }
- if (!IsRemoved && Ec)
- {
- FailedRemovePaths.push_back(std::make_pair(FilePath, Ec));
- }
- else
- {
- DeletedItemCount++;
- DeletedByteCount += Content.FileSizes[FileIndex];
- }
- }
-
- if (!FailedRemovePaths.empty())
- {
- RwLock::ExclusiveLockScope _(ResultLock);
- FailedRemovePaths.insert(FailedRemovePaths.end(), FailedRemovePaths.begin(), FailedRemovePaths.end());
- }
- else if (!RelativeRoot.empty())
- {
- DiscoveredItemCount++;
- RwLock::ExclusiveLockScope _(ResultLock);
- DirectoriesToDelete.push_back(RelativeRoot);
- }
- }
- }
- const std::filesystem::path& Path;
- std::atomic<bool>& AbortFlag;
- std::atomic<uint64_t>& DiscoveredItemCount;
- std::atomic<uint64_t>& DeletedItemCount;
- std::atomic<uint64_t>& DeletedByteCount;
- std::span<const std::string> ExcludeDirectories;
- std::vector<std::filesystem::path>& DirectoriesToDelete;
- CleanDirectoryResult& Result;
- RwLock& ResultLock;
- } Visitor(Path,
- AbortFlag,
- DiscoveredItemCount,
- DeletedItemCount,
- DeletedByteCount,
- ExcludeDirectories,
- DirectoriesToDelete,
- Result,
- ResultLock);
-
- GetDirectoryContent(Path,
- DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive | DirectoryContentFlags::IncludeFileSizes,
- Visitor,
- IOWorkerPool,
- Work.PendingWork());
-
- uint64_t LastUpdateTimeMs = Timer.GetElapsedTimeMs();
-
- if (ProgressFunc && ProgressUpdateDelayMS != 0)
- {
- Work.Wait(ProgressUpdateDelayMS, [&](bool IsAborted, bool IsPaused, ptrdiff_t PendingWork) {
- ZEN_UNUSED(PendingWork);
- LastUpdateTimeMs = Timer.GetElapsedTimeMs();
-
- uint64_t Deleted = DeletedItemCount.load();
- uint64_t DeletedBytes = DeletedByteCount.load();
- uint64_t Discovered = DiscoveredItemCount.load();
- std::string Details = fmt::format("Found {}, Deleted {} ({})", Discovered, Deleted, NiceBytes(DeletedBytes));
- ProgressFunc(Details, Discovered, Discovered - Deleted, IsPaused, IsAborted);
- });
- }
- else
- {
- Work.Wait();
- }
-
- {
- ZEN_TRACE_CPU("DeleteDirs");
-
- std::sort(DirectoriesToDelete.begin(),
- DirectoriesToDelete.end(),
- [](const std::filesystem::path& Lhs, const std::filesystem::path& Rhs) {
- auto DistanceLhs = std::distance(Lhs.begin(), Lhs.end());
- auto DistanceRhs = std::distance(Rhs.begin(), Rhs.end());
- return DistanceLhs > DistanceRhs;
- });
-
- for (const std::filesystem::path& DirectoryToDelete : DirectoriesToDelete)
- {
- if (AbortFlag)
- {
- break;
- }
- else
- {
- while (PauseFlag && !AbortFlag)
- {
- Sleep(2000);
- }
- }
-
- const std::filesystem::path FullPath = Path / DirectoryToDelete;
-
- std::error_code Ec;
- RemoveDir(FullPath, Ec);
- if (Ec)
- {
- for (size_t Retries = 0; Ec && Retries < 3; Retries++)
- {
- if (!IsDir(FullPath))
- {
- Ec.clear();
- break;
- }
- Sleep(100 + int(Retries * 50));
- Ec.clear();
- RemoveDir(FullPath, Ec);
- }
- }
- if (Ec)
- {
- RwLock::ExclusiveLockScope __(ResultLock);
- Result.FailedRemovePaths.push_back(std::make_pair(DirectoryToDelete, Ec));
- }
- else
- {
- DeletedItemCount++;
- }
-
- if (ProgressFunc)
- {
- uint64_t NowMs = Timer.GetElapsedTimeMs();
-
- if ((NowMs - LastUpdateTimeMs) > 0)
- {
- LastUpdateTimeMs = NowMs;
-
- uint64_t Deleted = DeletedItemCount.load();
- uint64_t DeletedBytes = DeletedByteCount.load();
- uint64_t Discovered = DiscoveredItemCount.load();
- std::string Details = fmt::format("Found {}, Deleted {} ({})", Discovered, Deleted, NiceBytes(DeletedBytes));
- ProgressFunc(Details, Discovered, Discovered - Deleted, PauseFlag, AbortFlag);
- }
- }
- }
- }
-
- return Result;
-}
-
-bool
-CleanAndRemoveDirectory(WorkerThreadPool& WorkerPool,
- std::atomic<bool>& AbortFlag,
- std::atomic<bool>& PauseFlag,
- const std::filesystem::path& Directory)
-{
- if (!IsDir(Directory))
- {
- return true;
- }
- if (CleanDirectoryResult Res = CleanDirectory(
- WorkerPool,
- AbortFlag,
- PauseFlag,
- Directory,
- {},
- [](const std::string_view Details, uint64_t TotalCount, uint64_t RemainingCount, bool IsPaused, bool IsAborted) {
- ZEN_UNUSED(Details, TotalCount, RemainingCount, IsPaused, IsAborted);
- },
- 1000);
- Res.FailedRemovePaths.empty())
- {
- std::error_code Ec;
- RemoveDir(Directory, Ec);
- return !Ec;
- }
- return false;
-}
-
-#if ZEN_WITH_TESTS
-
-void
-filesystemutils_forcelink()
-{
-}
-
-namespace {
- void GenerateFile(const std::filesystem::path& Path) { BasicFile _(Path, BasicFile::Mode::kTruncate); }
-} // namespace
-
-TEST_CASE("filesystemutils.CleanDirectory")
-{
- ScopedTemporaryDirectory TmpDir;
-
- CreateDirectories(TmpDir.Path() / ".keepme");
- GenerateFile(TmpDir.Path() / ".keepme" / "keep");
- GenerateFile(TmpDir.Path() / "deleteme1");
- GenerateFile(TmpDir.Path() / "deleteme2");
- GenerateFile(TmpDir.Path() / "deleteme3");
- CreateDirectories(TmpDir.Path() / ".keepmenot");
- CreateDirectories(TmpDir.Path() / "no.keepme");
-
- CreateDirectories(TmpDir.Path() / "DeleteMe");
- GenerateFile(TmpDir.Path() / "DeleteMe" / "delete1");
- CreateDirectories(TmpDir.Path() / "CantDeleteMe");
- GenerateFile(TmpDir.Path() / "CantDeleteMe" / "delete1");
- GenerateFile(TmpDir.Path() / "CantDeleteMe" / "delete2");
- GenerateFile(TmpDir.Path() / "CantDeleteMe" / "delete3");
- CreateDirectories(TmpDir.Path() / "CantDeleteMe" / ".keepme");
- CreateDirectories(TmpDir.Path() / "CantDeleteMe" / "DeleteMe2");
- GenerateFile(TmpDir.Path() / "CantDeleteMe" / "DeleteMe2" / "delete2");
- GenerateFile(TmpDir.Path() / "CantDeleteMe" / "DeleteMe2" / "delete3");
- CreateDirectories(TmpDir.Path() / "CantDeleteMe2" / ".keepme");
- CreateDirectories(TmpDir.Path() / "CantDeleteMe2" / ".keepme" / "Kept");
- GenerateFile(TmpDir.Path() / "CantDeleteMe2" / ".keepme" / "Kept" / "kept1");
- GenerateFile(TmpDir.Path() / "CantDeleteMe2" / ".keepme" / "Kept" / "kept2");
- GenerateFile(TmpDir.Path() / "CantDeleteMe2" / "deleteme");
-
- WorkerThreadPool Pool(4);
- std::atomic<bool> AbortFlag;
- std::atomic<bool> PauseFlag;
-
- CleanDirectory(Pool, AbortFlag, PauseFlag, TmpDir.Path(), std::vector<std::string>{".keepme"}, {}, 0);
-
- CHECK(IsDir(TmpDir.Path() / ".keepme"));
- CHECK(IsFile(TmpDir.Path() / ".keepme" / "keep"));
- CHECK(!IsFile(TmpDir.Path() / "deleteme1"));
- CHECK(!IsFile(TmpDir.Path() / "deleteme2"));
- CHECK(!IsFile(TmpDir.Path() / "deleteme3"));
- CHECK(!IsFile(TmpDir.Path() / ".keepmenot"));
- CHECK(!IsFile(TmpDir.Path() / "no.keepme"));
-
- CHECK(!IsDir(TmpDir.Path() / "DeleteMe"));
- CHECK(!IsDir(TmpDir.Path() / "DeleteMe2"));
-
- CHECK(IsDir(TmpDir.Path() / "CantDeleteMe"));
- CHECK(IsDir(TmpDir.Path() / "CantDeleteMe" / ".keepme"));
- CHECK(IsDir(TmpDir.Path() / "CantDeleteMe2"));
- CHECK(IsDir(TmpDir.Path() / "CantDeleteMe2" / ".keepme"));
- CHECK(IsDir(TmpDir.Path() / "CantDeleteMe2" / ".keepme" / "Kept"));
- CHECK(IsFile(TmpDir.Path() / "CantDeleteMe2" / ".keepme" / "Kept" / "kept1"));
- CHECK(IsFile(TmpDir.Path() / "CantDeleteMe2" / ".keepme" / "Kept" / "kept2"));
- CHECK(!IsFile(TmpDir.Path() / "CantDeleteMe2" / "deleteme"));
-}
-
-#endif
-
-} // namespace zen
diff --git a/src/zenremotestore/include/zenremotestore/builds/buildinspect.h b/src/zenremotestore/include/zenremotestore/builds/buildinspect.h
new file mode 100644
index 000000000..7f6c65367
--- /dev/null
+++ b/src/zenremotestore/include/zenremotestore/builds/buildinspect.h
@@ -0,0 +1,60 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/uid.h>
+#include <zenremotestore/chunking/chunkedcontent.h>
+
+#include <atomic>
+#include <filesystem>
+#include <span>
+#include <string>
+#include <vector>
+
+namespace zen {
+
+class CbObjectWriter;
+class ChunkingCache;
+class ChunkingController;
+class ProgressBase;
+class TransferThreadWorkers;
+struct StorageInstance;
+
+ChunkedFolderContent ScanAndChunkFolder(
+ ProgressBase& Progress,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ bool IsQuiet,
+ TransferThreadWorkers& Workers,
+ GetFolderContentStatistics& GetFolderContentStats,
+ ChunkingStatistics& ChunkingStats,
+ const std::filesystem::path& Path,
+ std::function<bool(const std::string_view& RelativePath)>&& IsAcceptedFolder,
+ std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& IsAcceptedFile,
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache);
+
+//////////////////////////////////////////////////////////////////////////
+
+void ListBuild(bool IsQuiet,
+ StorageInstance& Storage,
+ const Oid& BuildId,
+ const std::vector<Oid>& BuildPartIds,
+ std::span<const std::string> BuildPartNames,
+ std::span<const std::string> IncludeWildcards,
+ std::span<const std::string> ExcludeWildcards,
+ CbObjectWriter* OptionalStructuredOutput);
+
+void DiffFolders(ProgressBase& Progress,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ bool IsQuiet,
+ TransferThreadWorkers& Workers,
+ const std::filesystem::path& BasePath,
+ const std::filesystem::path& ComparePath,
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache,
+ const std::vector<std::string>& ExcludeFolders,
+ const std::vector<std::string>& ExcludeExtensions);
+
+} // namespace zen
diff --git a/src/zenremotestore/include/zenremotestore/builds/buildprimecache.h b/src/zenremotestore/include/zenremotestore/builds/buildprimecache.h
new file mode 100644
index 000000000..1d04ccbfe
--- /dev/null
+++ b/src/zenremotestore/include/zenremotestore/builds/buildprimecache.h
@@ -0,0 +1,96 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/iohash.h>
+#include <zencore/logging.h>
+#include <zencore/uid.h>
+#include <zencore/zencore.h>
+#include <zenremotestore/builds/buildstoragecache.h>
+#include <zenremotestore/builds/buildstoragestats.h>
+
+#include <atomic>
+#include <filesystem>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <tsl/robin_map.h>
+#include <tsl/robin_set.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+namespace zen {
+
+class FilteredRate;
+class ParallelWork;
+class ProgressBase;
+class WorkerThreadPool;
+struct StorageInstance;
+
+class BuildsOperationPrimeCache
+{
+public:
+ struct Options
+ {
+ bool IsQuiet = false;
+ bool IsVerbose = false;
+ std::filesystem::path ZenFolderPath;
+ std::uint64_t LargeAttachmentSize = 32u * 1024u * 1024u * 4u;
+ std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u;
+ bool ForceUpload = false;
+ };
+
+ BuildsOperationPrimeCache(LoggerRef Log,
+ ProgressBase& Progress,
+ StorageInstance& Storage,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ WorkerThreadPool& NetworkPool,
+ const Oid& BuildId,
+ std::span<const Oid> BuildPartIds,
+ const Options& Options,
+ BuildStorageCache::Statistics& StorageCacheStats);
+
+ void Execute();
+
+ DownloadStatistics m_DownloadStats;
+
+private:
+ LoggerRef Log() { return m_Log; }
+
+ void CollectReferencedBlobs(tsl::robin_set<IoHash, IoHash::Hasher>& OutBuildBlobs,
+ tsl::robin_map<IoHash, uint64_t, IoHash::Hasher>& OutLooseChunkRawSizes);
+
+ std::vector<IoHash> FilterAlreadyCachedBlobs(const tsl::robin_set<IoHash, IoHash::Hasher>& BuildBlobs);
+
+ void ScheduleBlobDownloads(std::span<const IoHash> BlobsToDownload,
+ const tsl::robin_map<IoHash, uint64_t, IoHash::Hasher>& LooseChunkRawSizes,
+ std::atomic<uint64_t>& MultipartAttachmentCount,
+ std::atomic<size_t>& CompletedDownloadCount,
+ FilteredRate& FilteredDownloadedBytesPerSecond);
+
+ void DownloadLargeBlobForCache(ParallelWork& Work,
+ const IoHash& BlobHash,
+ size_t BlobCount,
+ std::atomic<size_t>& CompletedDownloadCount,
+ std::atomic<uint64_t>& MultipartAttachmentCount,
+ FilteredRate& FilteredDownloadedBytesPerSecond);
+
+ void DownloadSingleBlobForCache(const IoHash& BlobHash,
+ size_t BlobCount,
+ std::atomic<size_t>& CompletedDownloadCount,
+ FilteredRate& FilteredDownloadedBytesPerSecond);
+
+ LoggerRef m_Log;
+ ProgressBase& m_Progress;
+ StorageInstance& m_Storage;
+ std::atomic<bool>& m_AbortFlag;
+ std::atomic<bool>& m_PauseFlag;
+ WorkerThreadPool& m_NetworkPool;
+ const Oid m_BuildId;
+ std::vector<Oid> m_BuildPartIds;
+ Options m_Options;
+ std::filesystem::path m_TempPath;
+
+ BuildStorageCache::Statistics& m_StorageCacheStats;
+};
+
+} // namespace zen
diff --git a/src/zenremotestore/include/zenremotestore/builds/buildsavedstate.h b/src/zenremotestore/include/zenremotestore/builds/buildsavedstate.h
index f808a7a3b..2a214f196 100644
--- a/src/zenremotestore/include/zenremotestore/builds/buildsavedstate.h
+++ b/src/zenremotestore/include/zenremotestore/builds/buildsavedstate.h
@@ -44,6 +44,11 @@ struct BuildState
struct BuildSaveState
{
+ static constexpr uint32_t NoVersion = 0;
+ static constexpr uint32_t kVersion1 = 1;
+ static constexpr uint32_t kCurrentVersion = kVersion1;
+
+ uint32_t Version = kCurrentVersion;
BuildState State;
FolderContent FolderState;
std::filesystem::path LocalPath;
diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstorage.h b/src/zenremotestore/include/zenremotestore/builds/buildstorage.h
index 85dabc59f..b933ab95d 100644
--- a/src/zenremotestore/include/zenremotestore/builds/buildstorage.h
+++ b/src/zenremotestore/include/zenremotestore/builds/buildstorage.h
@@ -3,7 +3,7 @@
#pragma once
#include <zencore/compactbinary.h>
-#include <zenremotestore/chunking/chunkblock.h>
+#include <zencore/compositebuffer.h>
ZEN_THIRD_PARTY_INCLUDES_START
#include <tsl/robin_map.h>
@@ -53,15 +53,24 @@ public:
std::function<IoBuffer(uint64_t Offset, uint64_t Size)>&& Transmitter,
std::function<void(uint64_t, bool)>&& OnSentBytes) = 0;
- virtual IoBuffer GetBuildBlob(const Oid& BuildId,
- const IoHash& RawHash,
- uint64_t RangeOffset = 0,
- uint64_t RangeBytes = (uint64_t)-1) = 0;
+ virtual IoBuffer GetBuildBlob(const Oid& BuildId,
+ const IoHash& RawHash,
+ uint64_t RangeOffset = 0,
+ uint64_t RangeBytes = (uint64_t)-1) = 0;
+
+ struct BuildBlobRanges
+ {
+ IoBuffer PayloadBuffer;
+ std::vector<std::pair<uint64_t, uint64_t>> Ranges;
+ };
+ virtual BuildBlobRanges GetBuildBlobRanges(const Oid& BuildId,
+ const IoHash& RawHash,
+ std::span<const std::pair<uint64_t, uint64_t>> Ranges) = 0;
virtual std::vector<std::function<void()>> GetLargeBuildBlob(const Oid& BuildId,
const IoHash& RawHash,
uint64_t ChunkSize,
std::function<void(uint64_t Offset, const IoBuffer& Chunk)>&& OnReceive,
- std::function<void()>&& OnComplete) = 0;
+ std::function<void()>&& OnComplete) = 0;
[[nodiscard]] virtual bool PutBlockMetadata(const Oid& BuildId, const IoHash& BlockRawHash, const CbObject& MetaData) = 0;
virtual CbObject FindBlocks(const Oid& BuildId, uint64_t MaxBlockCount) = 0;
diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstoragecache.h b/src/zenremotestore/include/zenremotestore/builds/buildstoragecache.h
index f25ce5b5e..4e0bd7243 100644
--- a/src/zenremotestore/include/zenremotestore/builds/buildstoragecache.h
+++ b/src/zenremotestore/include/zenremotestore/builds/buildstoragecache.h
@@ -2,11 +2,9 @@
#pragma once
-#include <zencore/logging.h>
-
#include <zencore/compactbinary.h>
#include <zencore/compositebuffer.h>
-#include <zenremotestore/chunking/chunkblock.h>
+#include <zencore/logging.h>
namespace zen {
@@ -37,6 +35,14 @@ public:
const IoHash& RawHash,
uint64_t RangeOffset = 0,
uint64_t RangeBytes = (uint64_t)-1) = 0;
+ struct BuildBlobRanges
+ {
+ IoBuffer PayloadBuffer;
+ std::vector<std::pair<uint64_t, uint64_t>> Ranges;
+ };
+ virtual BuildBlobRanges GetBuildBlobRanges(const Oid& BuildId,
+ const IoHash& RawHash,
+ std::span<const std::pair<uint64_t, uint64_t>> Ranges) = 0;
virtual void PutBlobMetadatas(const Oid& BuildId, std::span<const IoHash> BlobHashes, std::span<const CbObject> MetaDatas) = 0;
virtual std::vector<CbObject> GetBlobMetadatas(const Oid& BuildId, std::span<const IoHash> BlobHashes) = 0;
@@ -61,11 +67,19 @@ std::unique_ptr<BuildStorageCache> CreateZenBuildStorageCache(HttpClient& H
const std::filesystem::path& TempFolderPath,
WorkerThreadPool& BackgroundWorkerPool);
+#if ZEN_WITH_TESTS
+std::unique_ptr<BuildStorageCache> CreateInMemoryBuildStorageCache(uint64_t MaxRangeSupported,
+ BuildStorageCache::Statistics& Stats,
+ double LatencySec = 0.0,
+ double DelayPerKBSec = 0.0);
+#endif // ZEN_WITH_TESTS
+
struct ZenCacheEndpointTestResult
{
bool Success = false;
std::string FailureReason;
- double LatencySeconds = -1.0;
+ double LatencySeconds = -1.0;
+ uint64_t MaxRangeCountPerRequest = 1;
};
ZenCacheEndpointTestResult TestZenCacheEndpoint(std::string_view BaseUrl, const bool AssumeHttp2, const bool HttpVerbose);
diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h b/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h
deleted file mode 100644
index 31733569e..000000000
--- a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h
+++ /dev/null
@@ -1,774 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#pragma once
-
-#include <zencore/iohash.h>
-#include <zencore/logging.h>
-#include <zencore/uid.h>
-#include <zencore/zencore.h>
-#include <zenremotestore/builds/buildstoragecache.h>
-#include <zenremotestore/chunking/chunkblock.h>
-#include <zenremotestore/chunking/chunkedcontent.h>
-#include <zenremotestore/partialblockrequestmode.h>
-#include <zenutil/bufferedwritefilecache.h>
-
-#include <atomic>
-#include <future>
-#include <memory>
-
-ZEN_THIRD_PARTY_INCLUDES_START
-#include <tsl/robin_set.h>
-ZEN_THIRD_PARTY_INCLUDES_END
-
-namespace zen {
-
-class CloneQueryInterface;
-
-class OperationLogOutput;
-class BuildStorageBase;
-class HttpClient;
-class ParallelWork;
-class WorkerThreadPool;
-class FilteredRate;
-class ReadFileCache;
-struct StorageInstance;
-
-class BufferedWriteFileCache;
-struct ChunkBlockDescription;
-struct ChunkedFolderContent;
-
-struct DiskStatistics
-{
- std::atomic<uint64_t> OpenReadCount = 0;
- std::atomic<uint64_t> OpenWriteCount = 0;
- std::atomic<uint64_t> ReadCount = 0;
- std::atomic<uint64_t> ReadByteCount = 0;
- std::atomic<uint64_t> WriteCount = 0;
- std::atomic<uint64_t> WriteByteCount = 0;
- std::atomic<uint64_t> CloneCount = 0;
- std::atomic<uint64_t> CloneByteCount = 0;
- std::atomic<uint64_t> CurrentOpenFileCount = 0;
-};
-
-struct CacheMappingStatistics
-{
- uint64_t CacheChunkCount = 0;
- uint64_t CacheChunkByteCount = 0;
-
- uint64_t CacheBlockCount = 0;
- uint64_t CacheBlocksByteCount = 0;
-
- uint64_t CacheSequenceHashesCount = 0;
- uint64_t CacheSequenceHashesByteCount = 0;
-
- uint64_t CacheScanElapsedWallTimeUs = 0;
-
- uint32_t LocalPathsMatchingSequencesCount = 0;
- uint64_t LocalPathsMatchingSequencesByteCount = 0;
-
- uint64_t LocalChunkMatchingRemoteCount = 0;
- uint64_t LocalChunkMatchingRemoteByteCount = 0;
-
- uint64_t LocalScanElapsedWallTimeUs = 0;
-
- uint32_t ScavengedPathsMatchingSequencesCount = 0;
- uint64_t ScavengedPathsMatchingSequencesByteCount = 0;
-
- uint64_t ScavengedChunkMatchingRemoteCount = 0;
- uint64_t ScavengedChunkMatchingRemoteByteCount = 0;
-
- uint64_t ScavengeElapsedWallTimeUs = 0;
-};
-
-struct DownloadStatistics
-{
- std::atomic<uint64_t> RequestsCompleteCount = 0;
-
- std::atomic<uint64_t> DownloadedChunkCount = 0;
- std::atomic<uint64_t> DownloadedChunkByteCount = 0;
- std::atomic<uint64_t> MultipartAttachmentCount = 0;
-
- std::atomic<uint64_t> DownloadedBlockCount = 0;
- std::atomic<uint64_t> DownloadedBlockByteCount = 0;
-
- std::atomic<uint64_t> DownloadedPartialBlockCount = 0;
- std::atomic<uint64_t> DownloadedPartialBlockByteCount = 0;
-};
-
-struct WriteChunkStatistics
-{
- uint64_t DownloadTimeUs = 0;
- uint64_t WriteTimeUs = 0;
- uint64_t WriteChunksElapsedWallTimeUs = 0;
-};
-
-struct RebuildFolderStateStatistics
-{
- uint64_t CleanFolderElapsedWallTimeUs = 0;
- std::atomic<uint32_t> FinalizeTreeFilesMovedCount = 0;
- std::atomic<uint32_t> FinalizeTreeFilesCopiedCount = 0;
- uint64_t FinalizeTreeElapsedWallTimeUs = 0;
-};
-
-std::filesystem::path ZenStateFilePath(const std::filesystem::path& ZenFolderPath);
-std::filesystem::path ZenTempFolderPath(const std::filesystem::path& ZenFolderPath);
-
-class BuildsOperationUpdateFolder
-{
-public:
- struct Options
- {
- bool IsQuiet = false;
- bool IsVerbose = false;
- bool AllowFileClone = true;
- bool UseSparseFiles = true;
- std::filesystem::path SystemRootDir;
- std::filesystem::path ZenFolderPath;
- std::uint64_t LargeAttachmentSize = 32u * 1024u * 1024u * 4u;
- std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u;
- EPartialBlockRequestMode PartialBlockRequestMode = EPartialBlockRequestMode::Mixed;
- bool WipeTargetFolder = false;
- bool PrimeCacheOnly = false;
- bool EnableOtherDownloadsScavenging = true;
- bool EnableTargetFolderScavenging = true;
- bool ValidateCompletedSequences = true;
- std::vector<std::string> ExcludeFolders;
- uint64_t MaximumInMemoryPayloadSize = 512u * 1024u;
- bool PopulateCache = true;
- };
-
- BuildsOperationUpdateFolder(OperationLogOutput& OperationLogOutput,
- StorageInstance& Storage,
- std::atomic<bool>& AbortFlag,
- std::atomic<bool>& PauseFlag,
- WorkerThreadPool& IOWorkerPool,
- WorkerThreadPool& NetworkPool,
- const Oid& BuildId,
- const std::filesystem::path& Path,
- const ChunkedFolderContent& LocalContent,
- const ChunkedContentLookup& LocalLookup,
- const ChunkedFolderContent& RemoteContent,
- const ChunkedContentLookup& RemoteLookup,
- const std::vector<ChunkBlockDescription>& BlockDescriptions,
- const std::vector<IoHash>& LooseChunkHashes,
- const Options& Options);
-
- void Execute(FolderContent& OutLocalFolderState);
-
- DiskStatistics m_DiskStats;
- CacheMappingStatistics m_CacheMappingStats;
- GetFolderContentStatistics m_ScavengedFolderScanStats;
- DownloadStatistics m_DownloadStats;
- WriteChunkStatistics m_WriteChunkStats;
- RebuildFolderStateStatistics m_RebuildFolderStateStats;
- std::atomic<uint64_t> m_WrittenChunkByteCount;
-
-private:
- struct BlockWriteOps
- {
- std::vector<CompositeBuffer> ChunkBuffers;
- struct WriteOpData
- {
- const ChunkedContentLookup::ChunkSequenceLocation* Target = nullptr;
- size_t ChunkBufferIndex = (size_t)-1;
- };
- std::vector<WriteOpData> WriteOps;
- };
-
- struct ScavengeSource
- {
- std::filesystem::path StateFilePath;
- std::filesystem::path Path;
- };
-
- struct ScavengedSequenceCopyOperation
- {
- uint32_t ScavengedContentIndex = (uint32_t)-1;
- uint32_t ScavengedPathIndex = (uint32_t)-1;
- uint32_t RemoteSequenceIndex = (uint32_t)-1;
- uint64_t RawSize = (uint32_t)-1;
- };
-
- struct CopyChunkData
- {
- uint32_t ScavengeSourceIndex = (uint32_t)-1;
- uint32_t SourceSequenceIndex = (uint32_t)-1;
- std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> TargetChunkLocationPtrs;
- struct ChunkTarget
- {
- uint32_t TargetChunkLocationCount = (uint32_t)-1;
- uint32_t RemoteChunkIndex = (uint32_t)-1;
- uint64_t CacheFileOffset = (uint64_t)-1;
- };
- std::vector<ChunkTarget> ChunkTargets;
- };
-
- struct BlobsExistsResult
- {
- tsl::robin_set<IoHash> ExistingBlobs;
- uint64_t ElapsedTimeMs = 0;
- };
-
- void ScanCacheFolder(tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedChunkHashesFound,
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedSequenceHashesFound);
- void ScanTempBlocksFolder(tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedBlocksFound);
- std::vector<uint32_t> ScanTargetFolder(const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedChunkHashesFound,
- const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedSequenceHashesFound);
-
- std::vector<ScavengeSource> FindScavengeSources();
-
- bool FindScavengeContent(const ScavengeSource& Source,
- ChunkedFolderContent& OutScavengedLocalContent,
- ChunkedContentLookup& OutScavengedLookup);
-
- void ScavengeSourceForChunks(uint32_t& InOutRemainingChunkCount,
- std::vector<bool>& InOutRemoteChunkIndexNeedsCopyFromLocalFileFlags,
- tsl::robin_map<IoHash, size_t, IoHash::Hasher>& InOutRawHashToCopyChunkDataIndex,
- const std::vector<std::atomic<uint32_t>>& SequenceIndexChunksLeftToWriteCounters,
- const ChunkedFolderContent& ScavengedContent,
- const ChunkedContentLookup& ScavengedLookup,
- std::vector<CopyChunkData>& InOutCopyChunkDatas,
- uint32_t ScavengedContentIndex,
- uint64_t& InOutChunkMatchingRemoteCount,
- uint64_t& InOutChunkMatchingRemoteByteCount);
-
- std::filesystem::path FindDownloadedChunk(const IoHash& ChunkHash);
-
- std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> GetRemainingChunkTargets(
- std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
- uint32_t ChunkIndex);
-
- uint64_t GetChunkWriteCount(std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, uint32_t ChunkIndex);
-
- void CheckRequiredDiskSpace(const tsl::robin_map<std::string, uint32_t>& RemotePathToRemoteIndex);
-
- void WriteScavengedSequenceToCache(const std::filesystem::path& ScavengeRootPath,
- const ChunkedFolderContent& ScavengedContent,
- const ScavengedSequenceCopyOperation& ScavengeOp);
-
- void WriteLooseChunk(const uint32_t RemoteChunkIndex,
- const BlobsExistsResult& ExistsResult,
- std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
- std::atomic<uint64_t>& WritePartsComplete,
- std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>&& ChunkTargetPtrs,
- BufferedWriteFileCache& WriteCache,
- ParallelWork& Work,
- uint64_t TotalRequestCount,
- uint64_t TotalPartWriteCount,
- FilteredRate& FilteredDownloadedBytesPerSecond,
- FilteredRate& FilteredWrittenBytesPerSecond);
-
- void DownloadBuildBlob(uint32_t RemoteChunkIndex,
- const BlobsExistsResult& ExistsResult,
- ParallelWork& Work,
- std::function<void(IoBuffer&& Payload)>&& OnDownloaded);
-
- void DownloadPartialBlock(const ChunkBlockAnalyser::BlockRangeDescriptor BlockRange,
- const BlobsExistsResult& ExistsResult,
- std::function<void(IoBuffer&& InMemoryBuffer, const std::filesystem::path& OnDiskPath)>&& OnDownloaded);
-
- std::vector<uint32_t> WriteLocalChunkToCache(CloneQueryInterface* CloneQuery,
- const CopyChunkData& CopyData,
- const std::vector<ChunkedFolderContent>& ScavengedContents,
- const std::vector<ChunkedContentLookup>& ScavengedLookups,
- const std::vector<std::filesystem::path>& ScavengedPaths,
- BufferedWriteFileCache& WriteCache);
-
- bool WriteCompressedChunkToCache(const IoHash& ChunkHash,
- const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>& ChunkTargetPtrs,
- BufferedWriteFileCache& WriteCache,
- IoBuffer&& CompressedPart);
-
- void StreamDecompress(const IoHash& SequenceRawHash, CompositeBuffer&& CompressedPart);
-
- void WriteSequenceChunkToCache(BufferedWriteFileCache::Local& LocalWriter,
- const CompositeBuffer& Chunk,
- const uint32_t SequenceIndex,
- const uint64_t FileOffset,
- const uint32_t PathIndex);
-
- bool GetBlockWriteOps(const IoHash& BlockRawHash,
- std::span<const IoHash> ChunkRawHashes,
- std::span<const uint32_t> ChunkCompressedLengths,
- std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
- std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags,
- const MemoryView BlockView,
- uint32_t FirstIncludedBlockChunkIndex,
- uint32_t LastIncludedBlockChunkIndex,
- BlockWriteOps& OutOps);
-
- void WriteBlockChunkOpsToCache(std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
- const BlockWriteOps& Ops,
- BufferedWriteFileCache& WriteCache,
- ParallelWork& Work);
-
- bool WriteChunksBlockToCache(const ChunkBlockDescription& BlockDescription,
- std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
- ParallelWork& Work,
- CompositeBuffer&& BlockBuffer,
- std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags,
- BufferedWriteFileCache& WriteCache);
-
- bool WritePartialBlockChunksToCache(const ChunkBlockDescription& BlockDescription,
- std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
- ParallelWork& Work,
- CompositeBuffer&& PartialBlockBuffer,
- uint32_t FirstIncludedBlockChunkIndex,
- uint32_t LastIncludedBlockChunkIndex,
- std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags,
- BufferedWriteFileCache& WriteCache);
-
- void AsyncWriteDownloadedChunk(const std::filesystem::path& ZenFolderPath,
- uint32_t RemoteChunkIndex,
- std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>&& ChunkTargetPtrs,
- BufferedWriteFileCache& WriteCache,
- ParallelWork& Work,
- IoBuffer&& Payload,
- std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
- std::atomic<uint64_t>& WritePartsComplete,
- const uint64_t TotalPartWriteCount,
- FilteredRate& FilteredWrittenBytesPerSecond,
- bool EnableBacklog);
-
- void VerifyAndCompleteChunkSequencesAsync(std::span<const uint32_t> RemoteSequenceIndexes, ParallelWork& Work);
- bool CompleteSequenceChunk(uint32_t RemoteSequenceIndex, std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters);
- std::vector<uint32_t> CompleteChunkTargets(const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>& ChunkTargetPtrs,
- std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters);
- void FinalizeChunkSequence(const IoHash& SequenceRawHash);
- void FinalizeChunkSequences(std::span<const uint32_t> RemoteSequenceIndexes);
- void VerifySequence(uint32_t RemoteSequenceIndex);
-
- OperationLogOutput& m_LogOutput;
- StorageInstance& m_Storage;
- std::atomic<bool>& m_AbortFlag;
- std::atomic<bool>& m_PauseFlag;
- WorkerThreadPool& m_IOWorkerPool;
- WorkerThreadPool& m_NetworkPool;
- const Oid m_BuildId;
- const std::filesystem::path m_Path;
- const ChunkedFolderContent& m_LocalContent;
- const ChunkedContentLookup& m_LocalLookup;
- const ChunkedFolderContent& m_RemoteContent;
- const ChunkedContentLookup& m_RemoteLookup;
- const std::vector<ChunkBlockDescription>& m_BlockDescriptions;
- const std::vector<IoHash>& m_LooseChunkHashes;
- const Options m_Options;
- const std::filesystem::path m_CacheFolderPath;
- const std::filesystem::path m_TempDownloadFolderPath;
- const std::filesystem::path m_TempBlockFolderPath;
-
- std::atomic<uint64_t> m_ValidatedChunkByteCount;
-};
-
-struct FindBlocksStatistics
-{
- uint64_t FindBlockTimeMS = 0;
- uint64_t PotentialChunkCount = 0;
- uint64_t PotentialChunkByteCount = 0;
- uint64_t FoundBlockCount = 0;
- uint64_t FoundBlockChunkCount = 0;
- uint64_t FoundBlockByteCount = 0;
- uint64_t AcceptedBlockCount = 0;
- uint64_t NewBlocksCount = 0;
- uint64_t NewBlocksChunkCount = 0;
- uint64_t NewBlocksChunkByteCount = 0;
-
- FindBlocksStatistics& operator+=(const FindBlocksStatistics& Rhs)
- {
- FindBlockTimeMS += Rhs.FindBlockTimeMS;
- PotentialChunkCount += Rhs.PotentialChunkCount;
- PotentialChunkByteCount += Rhs.PotentialChunkByteCount;
- FoundBlockCount += Rhs.FoundBlockCount;
- FoundBlockChunkCount += Rhs.FoundBlockChunkCount;
- FoundBlockByteCount += Rhs.FoundBlockByteCount;
- AcceptedBlockCount += Rhs.AcceptedBlockCount;
- NewBlocksCount += Rhs.NewBlocksCount;
- NewBlocksChunkCount += Rhs.NewBlocksChunkCount;
- NewBlocksChunkByteCount += Rhs.NewBlocksChunkByteCount;
- return *this;
- }
-};
-
-struct UploadStatistics
-{
- std::atomic<uint64_t> BlockCount = 0;
- std::atomic<uint64_t> BlocksBytes = 0;
- std::atomic<uint64_t> ChunkCount = 0;
- std::atomic<uint64_t> ChunksBytes = 0;
- std::atomic<uint64_t> ReadFromDiskBytes = 0;
- std::atomic<uint64_t> MultipartAttachmentCount = 0;
- uint64_t ElapsedWallTimeUS = 0;
-
- UploadStatistics& operator+=(const UploadStatistics& Rhs)
- {
- BlockCount += Rhs.BlockCount;
- BlocksBytes += Rhs.BlocksBytes;
- ChunkCount += Rhs.ChunkCount;
- ChunksBytes += Rhs.ChunksBytes;
- ReadFromDiskBytes += Rhs.ReadFromDiskBytes;
- MultipartAttachmentCount += Rhs.MultipartAttachmentCount;
- ElapsedWallTimeUS += Rhs.ElapsedWallTimeUS;
- return *this;
- }
-};
-
-struct LooseChunksStatistics
-{
- uint64_t ChunkCount = 0;
- uint64_t ChunkByteCount = 0;
- std::atomic<uint64_t> CompressedChunkCount = 0;
- std::atomic<uint64_t> CompressedChunkRawBytes = 0;
- std::atomic<uint64_t> CompressedChunkBytes = 0;
- uint64_t CompressChunksElapsedWallTimeUS = 0;
-
- LooseChunksStatistics& operator+=(const LooseChunksStatistics& Rhs)
- {
- ChunkCount += Rhs.ChunkCount;
- ChunkByteCount += Rhs.ChunkByteCount;
- CompressedChunkCount += Rhs.CompressedChunkCount;
- CompressedChunkRawBytes += Rhs.CompressedChunkRawBytes;
- CompressedChunkBytes += Rhs.CompressedChunkBytes;
- CompressChunksElapsedWallTimeUS += Rhs.CompressChunksElapsedWallTimeUS;
- return *this;
- }
-};
-
-struct GenerateBlocksStatistics
-{
- std::atomic<uint64_t> GeneratedBlockByteCount = 0;
- std::atomic<uint64_t> GeneratedBlockCount = 0;
- uint64_t GenerateBlocksElapsedWallTimeUS = 0;
-
- GenerateBlocksStatistics& operator+=(const GenerateBlocksStatistics& Rhs)
- {
- GeneratedBlockByteCount += Rhs.GeneratedBlockByteCount;
- GeneratedBlockCount += Rhs.GeneratedBlockCount;
- GenerateBlocksElapsedWallTimeUS += Rhs.GenerateBlocksElapsedWallTimeUS;
- return *this;
- }
-};
-
-static constexpr size_t DefaultMaxChunkBlockSize = 64u * 1024u * 1024u;
-static constexpr size_t DefaultMaxChunksPerChunkBlock = 4u * 1000u;
-static constexpr size_t DefaultMaxChunkBlockEmbedSize = 3u * 512u * 1024u;
-
-class BuildsOperationUploadFolder
-{
-public:
- struct ChunksBlockParameters
- {
- size_t MaxBlockSize = DefaultMaxChunkBlockSize;
- size_t MaxChunksPerBlock = DefaultMaxChunksPerChunkBlock;
- size_t MaxChunkEmbedSize = DefaultMaxChunkBlockEmbedSize;
- };
-
- struct Options
- {
- bool IsQuiet = false;
- bool IsVerbose = false;
- bool DoExtraContentValidation = false;
-
- const uint64_t FindBlockMaxCount = 10000;
- const uint8_t BlockReuseMinPercentLimit = 85;
- bool AllowMultiparts = true;
- bool IgnoreExistingBlocks = false;
- ChunksBlockParameters BlockParameters;
-
- uint32_t PreferredMultipartChunkSize = 32u * 1024u * 1024u;
-
- const uint64_t MinimumSizeForCompressInBlock = 2u * 1024u;
-
- std::filesystem::path TempDir;
- std::vector<std::string> ExcludeFolders;
- std::vector<std::string> ExcludeExtensions;
- std::string ZenExcludeManifestName = ".zen_exclude_manifest.txt";
-
- std::vector<std::string> NonCompressableExtensions;
-
- bool PopulateCache = true;
- };
- BuildsOperationUploadFolder(OperationLogOutput& OperationLogOutput,
- StorageInstance& Storage,
- std::atomic<bool>& AbortFlag,
- std::atomic<bool>& PauseFlag,
- WorkerThreadPool& IOWorkerPool,
- WorkerThreadPool& NetworkPool,
- const Oid& BuildId,
- const std::filesystem::path& Path,
- bool CreateBuild,
- const CbObject& MetaData,
- const Options& Options);
-
- std::vector<std::pair<Oid, std::string>> Execute(const Oid& BuildPartId,
- const std::string_view BuildPartName,
- const std::filesystem::path& ManifestPath,
- ChunkingController& ChunkController,
- ChunkingCache& ChunkCache);
-
- DiskStatistics m_DiskStats;
- GetFolderContentStatistics m_LocalFolderScanStats;
- ChunkingStatistics m_ChunkingStats;
- FindBlocksStatistics m_FindBlocksStats;
- ReuseBlocksStatistics m_ReuseBlocksStats;
- UploadStatistics m_UploadStats;
- GenerateBlocksStatistics m_GenerateBlocksStats;
- LooseChunksStatistics m_LooseChunksStats;
-
-private:
- struct PrepareBuildResult
- {
- std::vector<ChunkBlockDescription> KnownBlocks;
- uint64_t PreferredMultipartChunkSize = 0;
- uint64_t PayloadSize = 0;
- uint64_t PrepareBuildTimeMs = 0;
- uint64_t FindBlocksTimeMs = 0;
- uint64_t ElapsedTimeMs = 0;
- };
-
- PrepareBuildResult PrepareBuild();
-
- struct UploadPart
- {
- Oid PartId = Oid::Zero;
- std::string PartName;
- FolderContent Content;
- uint64_t TotalRawSize = 0;
- GetFolderContentStatistics LocalFolderScanStats;
- };
-
- std::vector<BuildsOperationUploadFolder::UploadPart> ReadFolder();
- std::vector<UploadPart> ReadManifestParts(const std::filesystem::path& ManifestPath);
-
- bool IsAcceptedFolder(const std::string_view& RelativePath) const;
- bool IsAcceptedFile(const std::string_view& RelativePath) const;
-
- void ArrangeChunksIntoBlocks(const ChunkedFolderContent& Content,
- const ChunkedContentLookup& Lookup,
- std::vector<uint32_t>& ChunkIndexes,
- std::vector<std::vector<uint32_t>>& OutBlocks);
- struct GeneratedBlocks
- {
- std::vector<ChunkBlockDescription> BlockDescriptions;
- std::vector<uint64_t> BlockSizes;
- std::vector<CompositeBuffer> BlockHeaders;
- std::vector<CbObject> BlockMetaDatas;
- std::vector<uint8_t>
- MetaDataHasBeenUploaded; // NOTE: Do not use std::vector<bool> here as this vector is modified by multiple threads
- tsl::robin_map<IoHash, size_t, IoHash::Hasher> BlockHashToBlockIndex;
- };
-
- void GenerateBuildBlocks(const ChunkedFolderContent& Content,
- const ChunkedContentLookup& Lookup,
- const std::vector<std::vector<uint32_t>>& NewBlockChunks,
- GeneratedBlocks& OutBlocks,
- GenerateBlocksStatistics& GenerateBlocksStats,
- UploadStatistics& UploadStats);
-
- std::vector<uint32_t> CalculateAbsoluteChunkOrders(const std::span<const IoHash> LocalChunkHashes,
- const std::span<const uint32_t> LocalChunkOrder,
- const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToLocalChunkIndex,
- const std::span<const uint32_t>& LooseChunkIndexes,
- const std::span<const ChunkBlockDescription>& BlockDescriptions);
-
- CompositeBuffer FetchChunk(const ChunkedFolderContent& Content,
- const ChunkedContentLookup& Lookup,
- const IoHash& ChunkHash,
- ReadFileCache& OpenFileCache);
-
- CompressedBuffer GenerateBlock(const ChunkedFolderContent& Content,
- const ChunkedContentLookup& Lookup,
- const std::vector<uint32_t>& ChunksInBlock,
- ChunkBlockDescription& OutBlockDescription);
-
- CompressedBuffer RebuildBlock(const ChunkedFolderContent& Content,
- const ChunkedContentLookup& Lookup,
- CompositeBuffer&& HeaderBuffer,
- const std::vector<uint32_t>& ChunksInBlock);
-
- enum class PartTaskSteps : uint32_t
- {
- ChunkPartContent = 0,
- CalculateDelta,
- GenerateBlocks,
- BuildPartManifest,
- UploadBuildPart,
- UploadAttachments,
- PutBuildPartStats,
- StepCount
- };
-
- void UploadBuildPart(ChunkingController& ChunkController,
- ChunkingCache& ChunkCache,
- uint32_t PartIndex,
- const UploadPart& Part,
- uint32_t PartStepOffset,
- uint32_t StepCount);
-
- void UploadPartBlobs(const ChunkedFolderContent& Content,
- const ChunkedContentLookup& Lookup,
- std::span<IoHash> RawHashes,
- const std::vector<std::vector<uint32_t>>& NewBlockChunks,
- GeneratedBlocks& NewBlocks,
- std::span<const uint32_t> LooseChunkIndexes,
- const std::uint64_t LargeAttachmentSize,
- UploadStatistics& TempUploadStats,
- LooseChunksStatistics& TempLooseChunksStats,
- std::vector<IoHash>& OutUnknownChunks);
-
- CompositeBuffer CompressChunk(const ChunkedFolderContent& Content,
- const ChunkedContentLookup& Lookup,
- uint32_t ChunkIndex,
- LooseChunksStatistics& TempLooseChunksStats);
-
- OperationLogOutput& m_LogOutput;
- StorageInstance& m_Storage;
- std::atomic<bool>& m_AbortFlag;
- std::atomic<bool>& m_PauseFlag;
- WorkerThreadPool& m_IOWorkerPool;
- WorkerThreadPool& m_NetworkPool;
- const Oid m_BuildId;
-
- const std::filesystem::path m_Path;
- const bool m_CreateBuild; // ?? Member?
- const CbObject m_MetaData; // ?? Member
- const Options m_Options;
-
- tsl::robin_set<uint32_t> m_NonCompressableExtensionHashes;
-
- std::future<PrepareBuildResult> m_PrepBuildResultFuture;
- std::vector<ChunkBlockDescription> m_KnownBlocks;
- uint64_t m_PreferredMultipartChunkSize = 0;
- uint64_t m_LargeAttachmentSize = 0;
-};
-
-struct ValidateStatistics
-{
- uint64_t BuildBlobSize = 0;
- uint64_t BuildPartSize = 0;
- uint64_t ChunkAttachmentCount = 0;
- uint64_t BlockAttachmentCount = 0;
- std::atomic<uint64_t> VerifiedAttachmentCount = 0;
- std::atomic<uint64_t> VerifiedByteCount = 0;
- uint64_t ElapsedWallTimeUS = 0;
-};
-
-class BuildsOperationValidateBuildPart
-{
-public:
- struct Options
- {
- bool IsQuiet = false;
- bool IsVerbose = false;
- };
- BuildsOperationValidateBuildPart(OperationLogOutput& OperationLogOutput,
- BuildStorageBase& Storage,
- std::atomic<bool>& AbortFlag,
- std::atomic<bool>& PauseFlag,
- WorkerThreadPool& IOWorkerPool,
- WorkerThreadPool& NetworkPool,
- const Oid& BuildId,
- const Oid& BuildPartId,
- const std::string_view BuildPartName,
- const Options& Options);
-
- void Execute();
-
- ValidateStatistics m_ValidateStats;
- DownloadStatistics m_DownloadStats;
-
-private:
- ChunkBlockDescription ValidateChunkBlock(IoBuffer&& Payload,
- const IoHash& BlobHash,
- uint64_t& OutCompressedSize,
- uint64_t& OutDecompressedSize);
-
- OperationLogOutput& m_LogOutput;
- BuildStorageBase& m_Storage;
- std::atomic<bool>& m_AbortFlag;
- std::atomic<bool>& m_PauseFlag;
- WorkerThreadPool& m_IOWorkerPool;
- WorkerThreadPool& m_NetworkPool;
- const Oid m_BuildId;
- Oid m_BuildPartId;
- const std::string m_BuildPartName;
- const Options m_Options;
-};
-
-class BuildsOperationPrimeCache
-{
-public:
- struct Options
- {
- bool IsQuiet = false;
- bool IsVerbose = false;
- std::filesystem::path ZenFolderPath;
- std::uint64_t LargeAttachmentSize = 32u * 1024u * 1024u * 4u;
- std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u;
- bool ForceUpload = false;
- };
-
- BuildsOperationPrimeCache(OperationLogOutput& OperationLogOutput,
- StorageInstance& Storage,
- std::atomic<bool>& AbortFlag,
- std::atomic<bool>& PauseFlag,
- WorkerThreadPool& NetworkPool,
- const Oid& BuildId,
- std::span<const Oid> BuildPartIds,
- const Options& Options,
- BuildStorageCache::Statistics& StorageCacheStats);
-
- void Execute();
-
- DownloadStatistics m_DownloadStats;
-
-private:
- OperationLogOutput& m_LogOutput;
- StorageInstance& m_Storage;
- std::atomic<bool>& m_AbortFlag;
- std::atomic<bool>& m_PauseFlag;
- WorkerThreadPool& m_NetworkPool;
- const Oid m_BuildId;
- std::vector<Oid> m_BuildPartIds;
- Options m_Options;
- std::filesystem::path m_TempPath;
-
- BuildStorageCache::Statistics& m_StorageCacheStats;
-};
-
-CompositeBuffer ValidateBlob(std::atomic<bool>& AbortFlag,
- BuildStorageBase& Storage,
- const Oid& BuildId,
- const IoHash& BlobHash,
- uint64_t& OutCompressedSize,
- uint64_t& OutDecompressedSize);
-
-std::vector<std::pair<Oid, std::string>> ResolveBuildPartNames(CbObjectView BuildObject,
- const Oid& BuildId,
- const std::vector<Oid>& BuildPartIds,
- std::span<const std::string> BuildPartNames,
- std::uint64_t& OutPreferredMultipartChunkSize);
-
-struct BuildManifest;
-
-ChunkedFolderContent GetRemoteContent(OperationLogOutput& Output,
- StorageInstance& Storage,
- const Oid& BuildId,
- const std::vector<std::pair<Oid, std::string>>& BuildParts,
- const BuildManifest& Manifest,
- std::span<const std::string> IncludeWildcards,
- std::span<const std::string> ExcludeWildcards,
- std::unique_ptr<ChunkingController>& OutChunkController,
- std::vector<ChunkedFolderContent>& OutPartContents,
- std::vector<ChunkBlockDescription>& OutBlockDescriptions,
- std::vector<IoHash>& OutLooseChunkHashes,
- bool IsQuiet,
- bool IsVerbose,
- bool DoExtraContentVerify);
-
-std::string GetCbObjectAsNiceString(CbObjectView Object, std::string_view Prefix, std::string_view Suffix);
-
-#if ZEN_WITH_TESTS
-void buildstorageoperations_forcelink();
-#endif // ZEN_WITH_TESTS
-
-} // namespace zen
diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstorageresolve.h b/src/zenremotestore/include/zenremotestore/builds/buildstorageresolve.h
new file mode 100644
index 000000000..c964ad6cc
--- /dev/null
+++ b/src/zenremotestore/include/zenremotestore/builds/buildstorageresolve.h
@@ -0,0 +1,46 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/logging.h>
+#include <zenhttp/httpclient.h>
+
+namespace zen {
+
+struct BuildStorageResolveResult
+{
+ struct Capabilities
+ {
+ uint64_t MaxRangeCountPerRequest = 1;
+ };
+ struct Host
+ {
+ std::string Address;
+ std::string Name;
+ bool AssumeHttp2 = false;
+ double LatencySec = -1.0;
+ Capabilities Caps;
+ };
+ Host Cloud;
+ Host Cache;
+};
+
+//////////////////////////////////////////////////////////////////////////
+
+enum class ZenCacheResolveMode
+{
+ Off,
+ Discovery,
+ LocalHost,
+ All
+};
+
+BuildStorageResolveResult ResolveBuildStorage(LoggerRef InLog,
+ const HttpClientSettings& ClientSettings,
+ std::string_view Host,
+ std::string_view OverrideHost,
+ std::string_view ZenCacheHost,
+ ZenCacheResolveMode ZenResolveMode,
+ bool Verbose);
+
+} // namespace zen
diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstoragestats.h b/src/zenremotestore/include/zenremotestore/builds/buildstoragestats.h
new file mode 100644
index 000000000..e0de9ed6b
--- /dev/null
+++ b/src/zenremotestore/include/zenremotestore/builds/buildstoragestats.h
@@ -0,0 +1,182 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+
+namespace zen {
+
+struct DiskStatistics
+{
+ std::atomic<uint64_t> OpenReadCount = 0;
+ std::atomic<uint64_t> OpenWriteCount = 0;
+ std::atomic<uint64_t> ReadCount = 0;
+ std::atomic<uint64_t> ReadByteCount = 0;
+ std::atomic<uint64_t> WriteCount = 0;
+ std::atomic<uint64_t> WriteByteCount = 0;
+ std::atomic<uint64_t> CloneCount = 0;
+ std::atomic<uint64_t> CloneByteCount = 0;
+ std::atomic<uint64_t> CurrentOpenFileCount = 0;
+};
+
+struct DownloadStatistics
+{
+ std::atomic<uint64_t> RequestsCompleteCount = 0;
+
+ std::atomic<uint64_t> DownloadedChunkCount = 0;
+ std::atomic<uint64_t> DownloadedChunkByteCount = 0;
+ std::atomic<uint64_t> MultipartAttachmentCount = 0;
+
+ std::atomic<uint64_t> DownloadedBlockCount = 0;
+ std::atomic<uint64_t> DownloadedBlockByteCount = 0;
+
+ std::atomic<uint64_t> DownloadedPartialBlockCount = 0;
+ std::atomic<uint64_t> DownloadedPartialBlockByteCount = 0;
+};
+
+struct CacheMappingStatistics
+{
+ uint64_t CacheChunkCount = 0;
+ uint64_t CacheChunkByteCount = 0;
+
+ uint64_t CacheBlockCount = 0;
+ uint64_t CacheBlocksByteCount = 0;
+
+ uint64_t CacheSequenceHashesCount = 0;
+ uint64_t CacheSequenceHashesByteCount = 0;
+
+ uint64_t CacheScanElapsedWallTimeUs = 0;
+
+ uint32_t LocalPathsMatchingSequencesCount = 0;
+ uint64_t LocalPathsMatchingSequencesByteCount = 0;
+
+ uint64_t LocalChunkMatchingRemoteCount = 0;
+ uint64_t LocalChunkMatchingRemoteByteCount = 0;
+
+ uint64_t LocalScanElapsedWallTimeUs = 0;
+
+ uint32_t ScavengedPathsMatchingSequencesCount = 0;
+ uint64_t ScavengedPathsMatchingSequencesByteCount = 0;
+
+ uint64_t ScavengedChunkMatchingRemoteCount = 0;
+ uint64_t ScavengedChunkMatchingRemoteByteCount = 0;
+
+ uint64_t ScavengeElapsedWallTimeUs = 0;
+};
+
+struct WriteChunkStatistics
+{
+ uint64_t DownloadTimeUs = 0;
+ uint64_t WriteTimeUs = 0;
+ uint64_t WriteChunksElapsedWallTimeUs = 0;
+};
+
+struct RebuildFolderStateStatistics
+{
+ uint64_t CleanFolderElapsedWallTimeUs = 0;
+ std::atomic<uint32_t> FinalizeTreeFilesMovedCount = 0;
+ std::atomic<uint32_t> FinalizeTreeFilesCopiedCount = 0;
+ uint64_t FinalizeTreeElapsedWallTimeUs = 0;
+};
+
+struct FindBlocksStatistics
+{
+ uint64_t FindBlockTimeMS = 0;
+ uint64_t PotentialChunkCount = 0;
+ uint64_t PotentialChunkByteCount = 0;
+ uint64_t FoundBlockCount = 0;
+ uint64_t FoundBlockChunkCount = 0;
+ uint64_t FoundBlockByteCount = 0;
+ uint64_t AcceptedBlockCount = 0;
+ uint64_t NewBlocksCount = 0;
+ uint64_t NewBlocksChunkCount = 0;
+ uint64_t NewBlocksChunkByteCount = 0;
+
+ FindBlocksStatistics& operator+=(const FindBlocksStatistics& Rhs)
+ {
+ FindBlockTimeMS += Rhs.FindBlockTimeMS;
+ PotentialChunkCount += Rhs.PotentialChunkCount;
+ PotentialChunkByteCount += Rhs.PotentialChunkByteCount;
+ FoundBlockCount += Rhs.FoundBlockCount;
+ FoundBlockChunkCount += Rhs.FoundBlockChunkCount;
+ FoundBlockByteCount += Rhs.FoundBlockByteCount;
+ AcceptedBlockCount += Rhs.AcceptedBlockCount;
+ NewBlocksCount += Rhs.NewBlocksCount;
+ NewBlocksChunkCount += Rhs.NewBlocksChunkCount;
+ NewBlocksChunkByteCount += Rhs.NewBlocksChunkByteCount;
+ return *this;
+ }
+};
+
+struct UploadStatistics
+{
+ std::atomic<uint64_t> BlockCount = 0;
+ std::atomic<uint64_t> BlocksBytes = 0;
+ std::atomic<uint64_t> ChunkCount = 0;
+ std::atomic<uint64_t> ChunksBytes = 0;
+ std::atomic<uint64_t> ReadFromDiskBytes = 0;
+ std::atomic<uint64_t> MultipartAttachmentCount = 0;
+ uint64_t ElapsedWallTimeUS = 0;
+
+ UploadStatistics& operator+=(const UploadStatistics& Rhs)
+ {
+ BlockCount += Rhs.BlockCount;
+ BlocksBytes += Rhs.BlocksBytes;
+ ChunkCount += Rhs.ChunkCount;
+ ChunksBytes += Rhs.ChunksBytes;
+ ReadFromDiskBytes += Rhs.ReadFromDiskBytes;
+ MultipartAttachmentCount += Rhs.MultipartAttachmentCount;
+ ElapsedWallTimeUS += Rhs.ElapsedWallTimeUS;
+ return *this;
+ }
+};
+
+struct LooseChunksStatistics
+{
+ uint64_t ChunkCount = 0;
+ uint64_t ChunkByteCount = 0;
+ std::atomic<uint64_t> CompressedChunkCount = 0;
+ std::atomic<uint64_t> CompressedChunkRawBytes = 0;
+ std::atomic<uint64_t> CompressedChunkBytes = 0;
+ uint64_t CompressChunksElapsedWallTimeUS = 0;
+
+ LooseChunksStatistics& operator+=(const LooseChunksStatistics& Rhs)
+ {
+ ChunkCount += Rhs.ChunkCount;
+ ChunkByteCount += Rhs.ChunkByteCount;
+ CompressedChunkCount += Rhs.CompressedChunkCount;
+ CompressedChunkRawBytes += Rhs.CompressedChunkRawBytes;
+ CompressedChunkBytes += Rhs.CompressedChunkBytes;
+ CompressChunksElapsedWallTimeUS += Rhs.CompressChunksElapsedWallTimeUS;
+ return *this;
+ }
+};
+
+struct GenerateBlocksStatistics
+{
+ std::atomic<uint64_t> GeneratedBlockByteCount = 0;
+ std::atomic<uint64_t> GeneratedBlockCount = 0;
+ uint64_t GenerateBlocksElapsedWallTimeUS = 0;
+
+ GenerateBlocksStatistics& operator+=(const GenerateBlocksStatistics& Rhs)
+ {
+ GeneratedBlockByteCount += Rhs.GeneratedBlockByteCount;
+ GeneratedBlockCount += Rhs.GeneratedBlockCount;
+ GenerateBlocksElapsedWallTimeUS += Rhs.GenerateBlocksElapsedWallTimeUS;
+ return *this;
+ }
+};
+
+struct ValidateStatistics
+{
+ uint64_t BuildBlobSize = 0;
+ uint64_t BuildPartSize = 0;
+ uint64_t ChunkAttachmentCount = 0;
+ uint64_t BlockAttachmentCount = 0;
+ std::atomic<uint64_t> VerifiedAttachmentCount = 0;
+ std::atomic<uint64_t> VerifiedByteCount = 0;
+ uint64_t ElapsedWallTimeUS = 0;
+};
+
+} // namespace zen
diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstorageutil.h b/src/zenremotestore/include/zenremotestore/builds/buildstorageutil.h
index 4b85d8f1e..df35f65be 100644
--- a/src/zenremotestore/include/zenremotestore/builds/buildstorageutil.h
+++ b/src/zenremotestore/include/zenremotestore/builds/buildstorageutil.h
@@ -3,49 +3,39 @@
#pragma once
#include <zencore/logging.h>
+#include <zencore/logging/sink.h>
#include <zenhttp/httpclient.h>
#include <zenremotestore/builds/buildstorage.h>
+#include <zenremotestore/builds/buildstorageresolve.h>
+#include <zenremotestore/chunking/chunkblock.h>
+#include <zenutil/sessionsclient.h>
+
+#include <atomic>
+#include <filesystem>
+#include <string>
+#include <vector>
namespace zen {
-class OperationLogOutput;
-class BuildStorageBase;
class BuildStorageCache;
+class ParallelWork;
+class WorkerThreadPool;
+struct ChunkedFolderContent;
+struct BuildManifest;
+class ChunkingController;
-struct BuildStorageResolveResult
-{
- std::string HostUrl;
- std::string HostName;
- bool HostAssumeHttp2 = false;
- double HostLatencySec = -1.0;
-
- std::string CacheUrl;
- std::string CacheName;
- bool CacheAssumeHttp2 = false;
- double CacheLatencySec = -1.0;
-};
-
-enum class ZenCacheResolveMode
-{
- Off,
- Discovery,
- LocalHost,
- All
-};
+inline const std::string ZenFolderName = ".zen";
+inline const std::string UnsyncFolderName = ".unsync";
+inline const std::string UGSFolderName = ".ugs";
+inline const std::string LegacyZenTempFolderName = ".zen-tmp";
-BuildStorageResolveResult ResolveBuildStorage(OperationLogOutput& Output,
- const HttpClientSettings& ClientSettings,
- std::string_view Host,
- std::string_view OverrideHost,
- std::string_view ZenCacheHost,
- ZenCacheResolveMode ZenResolveMode,
- bool Verbose);
+inline const std::vector<std::string> DefaultExcludeFolders{UnsyncFolderName, ZenFolderName, UGSFolderName, LegacyZenTempFolderName};
+inline const std::vector<std::string> DefaultExcludeExtensions{};
-std::vector<ChunkBlockDescription> GetBlockDescriptions(OperationLogOutput& Output,
+std::vector<ChunkBlockDescription> GetBlockDescriptions(LoggerRef InLog,
BuildStorageBase& Storage,
BuildStorageCache* OptionalCacheStorage,
const Oid& BuildId,
- const Oid& BuildPartId,
std::span<const IoHash> BlockRawHashes,
bool AttemptFallback,
bool IsQuiet,
@@ -53,14 +43,85 @@ std::vector<ChunkBlockDescription> GetBlockDescriptions(OperationLogOutput& Out
struct StorageInstance
{
- std::unique_ptr<HttpClient> BuildStorageHttp;
- std::unique_ptr<BuildStorageBase> BuildStorage;
- std::string StorageName;
- double BuildStorageLatencySec = -1.0;
- std::unique_ptr<HttpClient> CacheHttp;
- std::unique_ptr<BuildStorageCache> BuildCacheStorage;
- std::string CacheName;
- double CacheLatencySec = -1.0;
+ ~StorageInstance();
+
+ StorageInstance() = default;
+ StorageInstance(StorageInstance&&) = default;
+ StorageInstance& operator=(StorageInstance&&) = default;
+ StorageInstance(const StorageInstance&) = delete;
+ StorageInstance& operator=(const StorageInstance&) = delete;
+
+ BuildStorageResolveResult::Host BuildStorageHost;
+ std::unique_ptr<HttpClient> BuildStorageHttp;
+ std::unique_ptr<BuildStorageBase> BuildStorage;
+
+ BuildStorageResolveResult::Host CacheHost;
+ std::unique_ptr<HttpClient> CacheHttp;
+ std::unique_ptr<BuildStorageCache> CacheStorage;
+ std::unique_ptr<SessionsServiceClient> CacheSession;
+ logging::SinkPtr CacheLogSink;
+
+ void SetupCacheSession(std::string_view TargetUrl, std::string_view Mode, const Oid& SessionId);
};
+std::filesystem::path ZenStateFilePath(const std::filesystem::path& ZenFolderPath);
+std::filesystem::path ZenTempFolderPath(const std::filesystem::path& ZenFolderPath);
+
+CbObject GetBuild(BuildStorageBase& Storage, const Oid& BuildId, bool IsQuiet);
+
+uint64_t GetMaxMemoryBufferSize(size_t MaxBlockSize, bool BoostWorkerMemory);
+
+void DownloadLargeBlob(BuildStorageBase& Storage,
+ const std::filesystem::path& DownloadFolder,
+ const Oid& BuildId,
+ const IoHash& ChunkHash,
+ const std::uint64_t PreferredMultipartChunkSize,
+ ParallelWork& Work,
+ WorkerThreadPool& NetworkPool,
+ std::atomic<uint64_t>& DownloadedChunkByteCount,
+ std::atomic<uint64_t>& MultipartAttachmentCount,
+ std::function<void(IoBuffer&& Payload)>&& OnDownloadComplete);
+
+CompositeBuffer ValidateBlob(std::atomic<bool>& AbortFlag,
+ IoBuffer&& Payload,
+ const IoHash& BlobHash,
+ uint64_t& OutCompressedSize,
+ uint64_t& OutDecompressedSize);
+
+CompositeBuffer ValidateBlob(std::atomic<bool>& AbortFlag,
+ BuildStorageBase& Storage,
+ const Oid& BuildId,
+ const IoHash& BlobHash,
+ uint64_t& OutCompressedSize,
+ uint64_t& OutDecompressedSize);
+
+std::vector<std::pair<Oid, std::string>> ResolveBuildPartNames(CbObjectView BuildObject,
+ const Oid& BuildId,
+ const std::vector<Oid>& BuildPartIds,
+ std::span<const std::string> BuildPartNames,
+ std::uint64_t& OutPreferredMultipartChunkSize);
+
+void NormalizePartSelection(std::vector<Oid>& BuildPartIds, std::vector<std::string>& BuildPartNames, std::string_view HelpText);
+
+ChunkedFolderContent GetRemoteContent(LoggerRef InLog,
+ StorageInstance& Storage,
+ const Oid& BuildId,
+ const std::vector<std::pair<Oid, std::string>>& BuildParts,
+ const BuildManifest& Manifest,
+ std::span<const std::string> IncludeWildcards,
+ std::span<const std::string> ExcludeWildcards,
+ std::unique_ptr<ChunkingController>& OutChunkController,
+ std::vector<ChunkedFolderContent>& OutPartContents,
+ std::vector<ChunkBlockDescription>& OutBlockDescriptions,
+ std::vector<IoHash>& OutLooseChunkHashes,
+ bool IsQuiet,
+ bool IsVerbose,
+ bool DoExtraContentVerify);
+
+std::string GetCbObjectAsNiceString(CbObjectView Object, std::string_view Prefix, std::string_view Suffix);
+
+#if ZEN_WITH_TESTS
+void buildstorageutil_forcelink();
+#endif // ZEN_WITH_TESTS
+
} // namespace zen
diff --git a/src/zenremotestore/include/zenremotestore/builds/buildupdatefolder.h b/src/zenremotestore/include/zenremotestore/builds/buildupdatefolder.h
new file mode 100644
index 000000000..c820f4dcb
--- /dev/null
+++ b/src/zenremotestore/include/zenremotestore/builds/buildupdatefolder.h
@@ -0,0 +1,529 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/iohash.h>
+#include <zencore/logging.h>
+#include <zencore/uid.h>
+#include <zencore/zencore.h>
+#include <zenremotestore/builds/buildsavedstate.h>
+#include <zenremotestore/builds/buildstoragecache.h>
+#include <zenremotestore/builds/buildstoragestats.h>
+#include <zenremotestore/builds/buildstorageutil.h>
+#include <zenremotestore/builds/builduploadfolder.h>
+#include <zenremotestore/chunking/chunkblock.h>
+#include <zenremotestore/chunking/chunkedcontent.h>
+#include <zenremotestore/partialblockrequestmode.h>
+#include <zenutil/bufferedwritefilecache.h>
+
+#include <filesystem>
+#include <span>
+#include <string>
+#include <vector>
+
+#include <atomic>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <tsl/robin_set.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+namespace zen {
+
+class CloneQueryInterface;
+class FilteredRate;
+class ParallelWork;
+class ProgressBase;
+class WorkerThreadPool;
+
+//////////////////////////////////////////////////////////////////////////
+
+class BuildsOperationUpdateFolder
+{
+public:
+ struct Options
+ {
+ bool IsQuiet = false;
+ bool IsVerbose = false;
+ bool AllowFileClone = true;
+ bool UseSparseFiles = true;
+ std::filesystem::path SystemRootDir;
+ std::filesystem::path ZenFolderPath;
+ std::uint64_t LargeAttachmentSize = 32u * 1024u * 1024u * 4u;
+ std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u;
+ EPartialBlockRequestMode PartialBlockRequestMode = EPartialBlockRequestMode::Mixed;
+ bool WipeTargetFolder = false;
+ bool EnableOtherDownloadsScavenging = true;
+ bool EnableTargetFolderScavenging = true;
+ bool ValidateCompletedSequences = true;
+ std::vector<std::string> ExcludeFolders;
+ uint64_t MaximumInMemoryPayloadSize = 512u * 1024u;
+ bool PopulateCache = true;
+ };
+
+ BuildsOperationUpdateFolder(LoggerRef Log,
+ ProgressBase& Progress,
+ StorageInstance& Storage,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ WorkerThreadPool& IOWorkerPool,
+ WorkerThreadPool& NetworkPool,
+ const Oid& BuildId,
+ const std::filesystem::path& Path,
+ const ChunkedFolderContent& LocalContent,
+ const ChunkedContentLookup& LocalLookup,
+ const ChunkedFolderContent& RemoteContent,
+ const ChunkedContentLookup& RemoteLookup,
+ const std::vector<ChunkBlockDescription>& BlockDescriptions,
+ const std::vector<IoHash>& LooseChunkHashes,
+ const Options& Options);
+
+ void Execute(FolderContent& OutLocalFolderState);
+
+ DiskStatistics m_DiskStats;
+ CacheMappingStatistics m_CacheMappingStats;
+ GetFolderContentStatistics m_ScavengedFolderScanStats;
+ DownloadStatistics m_DownloadStats;
+ WriteChunkStatistics m_WriteChunkStats;
+ RebuildFolderStateStatistics m_RebuildFolderStateStats;
+ std::atomic<uint64_t> m_WrittenChunkByteCount = 0;
+
+private:
+ struct BlockWriteOps
+ {
+ std::vector<CompositeBuffer> ChunkBuffers;
+ struct WriteOpData
+ {
+ const ChunkedContentLookup::ChunkSequenceLocation* Target = nullptr;
+ size_t ChunkBufferIndex = (size_t)-1;
+ };
+ std::vector<WriteOpData> WriteOps;
+ };
+
+ struct ScavengeSource
+ {
+ std::filesystem::path StateFilePath;
+ std::filesystem::path Path;
+ };
+
+ struct ScavengedSequenceCopyOperation
+ {
+ uint32_t ScavengedContentIndex = (uint32_t)-1;
+ uint32_t ScavengedPathIndex = (uint32_t)-1;
+ uint32_t RemoteSequenceIndex = (uint32_t)-1;
+ uint64_t RawSize = (uint64_t)-1;
+ };
+
+ struct CopyChunkData
+ {
+ uint32_t ScavengeSourceIndex = (uint32_t)-1;
+ uint32_t SourceSequenceIndex = (uint32_t)-1;
+ std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> TargetChunkLocationPtrs;
+ struct ChunkTarget
+ {
+ uint32_t TargetChunkLocationCount = (uint32_t)-1;
+ uint32_t RemoteChunkIndex = (uint32_t)-1;
+ uint64_t CacheFileOffset = (uint64_t)-1;
+ };
+ std::vector<ChunkTarget> ChunkTargets;
+ };
+
+ struct BlobsExistsResult
+ {
+ tsl::robin_set<IoHash> ExistingBlobs;
+ uint64_t ElapsedTimeMs = 0;
+ };
+
+ struct LooseChunkHashWorkData
+ {
+ std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs;
+ uint32_t RemoteChunkIndex = (uint32_t)-1;
+ };
+
+ struct FinalizeTarget
+ {
+ IoHash RawHash;
+ uint32_t RemotePathIndex;
+ };
+
+ struct LocalPathCategorization
+ {
+ std::vector<uint32_t> FilesToCache;
+ std::vector<uint32_t> RemoveLocalPathIndexes;
+ tsl::robin_map<uint32_t, uint32_t> RemotePathIndexToLocalPathIndex;
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> SequenceHashToLocalPathIndex;
+ uint64_t MatchCount = 0;
+ uint64_t PathMismatchCount = 0;
+ uint64_t HashMismatchCount = 0;
+ uint64_t SkippedCount = 0;
+ uint64_t DeleteCount = 0;
+ };
+
+ struct WriteChunksContext
+ {
+ ParallelWork& Work;
+ BufferedWriteFileCache& WriteCache;
+ std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters;
+ std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags;
+ std::atomic<uint64_t>& WritePartsComplete;
+ uint64_t TotalPartWriteCount;
+ uint64_t TotalRequestCount;
+ const BlobsExistsResult& ExistsResult;
+ FilteredRate& FilteredDownloadedBytesPerSecond;
+ FilteredRate& FilteredWrittenBytesPerSecond;
+ };
+
+ void ScanCacheFolder(tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedChunkHashesFound,
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedSequenceHashesFound);
+ void ScanTempBlocksFolder(tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutCachedBlocksFound);
+ std::vector<uint32_t> ScanTargetFolder(const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedChunkHashesFound,
+ const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedSequenceHashesFound);
+
+ std::vector<ScavengeSource> FindScavengeSources();
+
+ bool FindScavengeContent(const ScavengeSource& Source,
+ ChunkedFolderContent& OutScavengedLocalContent,
+ ChunkedContentLookup& OutScavengedLookup);
+
+ void ScavengeSourceForChunks(uint32_t& InOutRemainingChunkCount,
+ std::vector<bool>& InOutRemoteChunkIndexNeedsCopyFromLocalFileFlags,
+ tsl::robin_map<IoHash, size_t, IoHash::Hasher>& InOutRawHashToCopyChunkDataIndex,
+ const std::vector<std::atomic<uint32_t>>& SequenceIndexChunksLeftToWriteCounters,
+ const ChunkedFolderContent& ScavengedContent,
+ const ChunkedContentLookup& ScavengedLookup,
+ std::vector<CopyChunkData>& InOutCopyChunkDatas,
+ uint32_t ScavengedContentIndex,
+ uint64_t& InOutChunkMatchingRemoteCount,
+ uint64_t& InOutChunkMatchingRemoteByteCount);
+
+ std::filesystem::path FindDownloadedChunk(const IoHash& ChunkHash);
+
+ std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> GetRemainingChunkTargets(
+ std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
+ uint32_t ChunkIndex);
+
+ uint64_t GetChunkWriteCount(std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, uint32_t ChunkIndex);
+
+ void CheckRequiredDiskSpace(const tsl::robin_map<std::string, uint32_t>& RemotePathToRemoteIndex);
+
+ void WriteScavengedSequenceToCache(const std::filesystem::path& ScavengeRootPath,
+ const ChunkedFolderContent& ScavengedContent,
+ const ScavengedSequenceCopyOperation& ScavengeOp);
+
+ void WriteLooseChunk(const uint32_t RemoteChunkIndex,
+ const BlobsExistsResult& ExistsResult,
+ std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
+ std::atomic<uint64_t>& WritePartsComplete,
+ std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>&& ChunkTargetPtrs,
+ BufferedWriteFileCache& WriteCache,
+ ParallelWork& Work,
+ uint64_t TotalRequestCount,
+ uint64_t TotalPartWriteCount,
+ FilteredRate& FilteredDownloadedBytesPerSecond,
+ FilteredRate& FilteredWrittenBytesPerSecond);
+
+ void DownloadBuildBlob(uint32_t RemoteChunkIndex,
+ const BlobsExistsResult& ExistsResult,
+ ParallelWork& Work,
+ uint64_t TotalRequestCount,
+ FilteredRate& FilteredDownloadedBytesPerSecond,
+ std::function<void(IoBuffer&& Payload)>&& OnDownloaded);
+
+ void DownloadPartialBlock(std::span<const ChunkBlockAnalyser::BlockRangeDescriptor> BlockRanges,
+ size_t BlockRangeIndex,
+ size_t BlockRangeCount,
+ const BlobsExistsResult& ExistsResult,
+ uint64_t TotalRequestCount,
+ FilteredRate& FilteredDownloadedBytesPerSecond,
+ std::function<void(IoBuffer&& InMemoryBuffer,
+ const std::filesystem::path& OnDiskPath,
+ size_t BlockRangeStartIndex,
+ std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths)>&& OnDownloaded);
+
+ std::vector<uint32_t> WriteLocalChunkToCache(CloneQueryInterface* CloneQuery,
+ const CopyChunkData& CopyData,
+ const std::vector<ChunkedFolderContent>& ScavengedContents,
+ const std::vector<ChunkedContentLookup>& ScavengedLookups,
+ const std::vector<std::filesystem::path>& ScavengedPaths,
+ BufferedWriteFileCache& WriteCache);
+
+ bool WriteCompressedChunkToCache(const IoHash& ChunkHash,
+ const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>& ChunkTargetPtrs,
+ BufferedWriteFileCache& WriteCache,
+ IoBuffer&& CompressedPart);
+
+ void StreamDecompress(const IoHash& SequenceRawHash, CompositeBuffer&& CompressedPart);
+
+ void WriteSequenceChunkToCache(BufferedWriteFileCache::Local& LocalWriter,
+ const CompositeBuffer& Chunk,
+ const uint32_t SequenceIndex,
+ const uint64_t FileOffset,
+ const uint32_t PathIndex);
+
+ bool GetBlockWriteOps(const IoHash& BlockRawHash,
+ std::span<const IoHash> ChunkRawHashes,
+ std::span<const uint32_t> ChunkCompressedLengths,
+ std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
+ std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags,
+ const MemoryView BlockView,
+ uint32_t FirstIncludedBlockChunkIndex,
+ uint32_t LastIncludedBlockChunkIndex,
+ BlockWriteOps& OutOps);
+
+ void WriteBlockChunkOpsToCache(std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
+ const BlockWriteOps& Ops,
+ BufferedWriteFileCache& WriteCache,
+ ParallelWork& Work);
+
+ bool WriteChunksBlockToCache(const ChunkBlockDescription& BlockDescription,
+ std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
+ ParallelWork& Work,
+ CompositeBuffer&& BlockBuffer,
+ std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags,
+ BufferedWriteFileCache& WriteCache);
+
+ bool WritePartialBlockChunksToCache(const ChunkBlockDescription& BlockDescription,
+ std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
+ ParallelWork& Work,
+ CompositeBuffer&& PartialBlockBuffer,
+ uint32_t FirstIncludedBlockChunkIndex,
+ uint32_t LastIncludedBlockChunkIndex,
+ std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags,
+ BufferedWriteFileCache& WriteCache);
+
+ void AsyncWriteDownloadedChunk(uint32_t RemoteChunkIndex,
+ const BlobsExistsResult& ExistsResult,
+ std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>&& ChunkTargetPtrs,
+ BufferedWriteFileCache& WriteCache,
+ ParallelWork& Work,
+ IoBuffer&& Payload,
+ std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters,
+ std::atomic<uint64_t>& WritePartsComplete,
+ const uint64_t TotalPartWriteCount,
+ FilteredRate& FilteredWrittenBytesPerSecond);
+
+ void VerifyAndCompleteChunkSequencesAsync(std::span<const uint32_t> RemoteSequenceIndexes, ParallelWork& Work);
+ bool CompleteSequenceChunk(uint32_t RemoteSequenceIndex, std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters);
+ std::vector<uint32_t> CompleteChunkTargets(const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>& ChunkTargetPtrs,
+ std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters);
+ void FinalizeChunkSequence(const IoHash& SequenceRawHash);
+ void FinalizeChunkSequences(std::span<const uint32_t> RemoteSequenceIndexes);
+ void VerifySequence(uint32_t RemoteSequenceIndex);
+
+ void InitializeSequenceCounters(std::vector<std::atomic<uint32_t>>& OutSequenceCounters,
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& OutSequencesLeftToFind,
+ const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedChunkHashesFound,
+ const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedSequenceHashesFound);
+
+ void MatchScavengedSequencesToRemote(std::span<const ChunkedFolderContent> Contents,
+ std::span<const ChunkedContentLookup> Lookups,
+ std::span<const std::filesystem::path> Paths,
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& InOutSequencesLeftToFind,
+ std::vector<std::atomic<uint32_t>>& InOutSequenceCounters,
+ std::vector<ScavengedSequenceCopyOperation>& OutCopyOperations,
+ uint64_t& OutScavengedPathsCount);
+
+ uint64_t CalculateBytesToWriteAndFlagNeededChunks(std::span<const std::atomic<uint32_t>> SequenceCounters,
+ const std::vector<bool>& NeedsCopyFromLocalFileFlags,
+ std::span<std::atomic<bool>> OutNeedsCopyFromSourceFlags);
+
+ void ClassifyCachedAndFetchBlocks(std::span<const ChunkBlockAnalyser::NeededBlock> NeededBlocks,
+ const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& CachedBlocksFound,
+ uint64_t& TotalPartWriteCount,
+ std::vector<uint32_t>& OutCachedChunkBlockIndexes,
+ std::vector<uint32_t>& OutFetchBlockIndexes);
+
+ std::vector<uint32_t> DetermineNeededLooseChunkIndexes(std::span<const std::atomic<uint32_t>> SequenceCounters,
+ const std::vector<bool>& NeedsCopyFromLocalFileFlags,
+ std::span<std::atomic<bool>> NeedsCopyFromSourceFlags);
+
+ BlobsExistsResult QueryBlobCacheExists(std::span<const uint32_t> NeededLooseChunkIndexes, std::span<const uint32_t> FetchBlockIndexes);
+
+ std::vector<ChunkBlockAnalyser::EPartialBlockDownloadMode> DeterminePartialDownloadModes(const BlobsExistsResult& ExistsResult);
+
+ std::vector<LooseChunkHashWorkData> BuildLooseChunkHashWorks(std::span<const uint32_t> NeededLooseChunkIndexes,
+ std::span<const std::atomic<uint32_t>> SequenceCounters);
+
+ void VerifyWriteChunksComplete(std::span<const std::atomic<uint32_t>> SequenceCounters,
+ uint64_t BytesToWrite,
+ uint64_t BytesToValidate);
+
+ std::vector<FinalizeTarget> BuildSortedFinalizeTargets();
+
+ void ScanScavengeSources(std::span<const ScavengeSource> Sources,
+ std::vector<ChunkedFolderContent>& OutContents,
+ std::vector<ChunkedContentLookup>& OutLookups,
+ std::vector<std::filesystem::path>& OutPaths);
+
+ LocalPathCategorization CategorizeLocalPaths(const tsl::robin_map<std::string, uint32_t>& RemotePathToRemoteIndex);
+
+ void ScheduleLocalFileCaching(std::span<const uint32_t> FilesToCache,
+ std::atomic<uint64_t>& OutCachedCount,
+ std::atomic<uint64_t>& OutCachedByteCount);
+
+ void ScheduleScavengedSequenceWrites(WriteChunksContext& Context,
+ std::span<const ScavengedSequenceCopyOperation> CopyOperations,
+ const std::vector<ChunkedFolderContent>& ScavengedContents,
+ const std::vector<std::filesystem::path>& ScavengedPaths);
+
+ void ScheduleLooseChunkWrites(WriteChunksContext& Context, std::vector<LooseChunkHashWorkData>& LooseChunkHashWorks);
+
+ void ScheduleLocalChunkCopies(WriteChunksContext& Context,
+ std::span<const CopyChunkData> CopyChunkDatas,
+ CloneQueryInterface* CloneQuery,
+ const std::vector<ChunkedFolderContent>& ScavengedContents,
+ const std::vector<ChunkedContentLookup>& ScavengedLookups,
+ const std::vector<std::filesystem::path>& ScavengedPaths);
+
+ void ScheduleCachedBlockWrites(WriteChunksContext& Context, std::span<const uint32_t> CachedBlockIndexes);
+
+ void SchedulePartialBlockDownloads(WriteChunksContext& Context, const ChunkBlockAnalyser::BlockResult& PartialBlocks);
+
+ void WritePartialBlockToCache(WriteChunksContext& Context,
+ size_t BlockRangeStartIndex,
+ IoBuffer BlockPartialBuffer,
+ const std::filesystem::path& BlockChunkPath,
+ std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths,
+ const ChunkBlockAnalyser::BlockResult& PartialBlocks);
+
+ void ScheduleFullBlockDownloads(WriteChunksContext& Context, std::span<const uint32_t> FullBlockIndexes);
+
+ void WriteFullBlockToCache(WriteChunksContext& Context,
+ uint32_t BlockIndex,
+ IoBuffer BlockBuffer,
+ const std::filesystem::path& BlockChunkPath);
+
+ void ScheduleLocalFileRemovals(ParallelWork& Work,
+ std::span<const uint32_t> RemoveLocalPathIndexes,
+ std::atomic<uint64_t>& DeletedCount);
+
+ void ScheduleTargetFinalization(ParallelWork& Work,
+ std::span<const FinalizeTarget> Targets,
+ const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& SequenceHashToLocalPathIndex,
+ const tsl::robin_map<uint32_t, uint32_t>& RemotePathIndexToLocalPathIndex,
+ FolderContent& OutLocalFolderState,
+ std::atomic<uint64_t>& TargetsComplete);
+
+ void FinalizeTargetGroup(size_t BaseOffset,
+ size_t Count,
+ std::span<const FinalizeTarget> Targets,
+ const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& SequenceHashToLocalPathIndex,
+ const tsl::robin_map<uint32_t, uint32_t>& RemotePathIndexToLocalPathIndex,
+ FolderContent& OutLocalFolderState,
+ std::atomic<uint64_t>& TargetsComplete);
+
+ LoggerRef Log() { return m_Log; }
+
+ LoggerRef m_Log;
+ ProgressBase& m_Progress;
+ StorageInstance& m_Storage;
+ std::atomic<bool>& m_AbortFlag;
+ std::atomic<bool>& m_PauseFlag;
+ WorkerThreadPool& m_IOWorkerPool;
+ WorkerThreadPool& m_NetworkPool;
+ const Oid m_BuildId;
+ const std::filesystem::path m_Path;
+ const ChunkedFolderContent& m_LocalContent;
+ const ChunkedContentLookup& m_LocalLookup;
+ const ChunkedFolderContent& m_RemoteContent;
+ const ChunkedContentLookup& m_RemoteLookup;
+ const std::vector<ChunkBlockDescription>& m_BlockDescriptions;
+ const std::vector<IoHash>& m_LooseChunkHashes;
+ const Options m_Options;
+ const std::filesystem::path m_CacheFolderPath;
+ const std::filesystem::path m_TempDownloadFolderPath;
+ const std::filesystem::path m_TempBlockFolderPath;
+
+ std::atomic<uint64_t> m_ValidatedChunkByteCount = 0;
+};
+
+//////////////////////////////////////////////////////////////////////////
+
+class TransferThreadWorkers;
+
+struct VerifyFolderStatistics
+{
+ std::atomic<uint64_t> FilesVerified = 0;
+ std::atomic<uint64_t> FilesFailed = 0;
+ std::atomic<uint64_t> ReadBytes = 0;
+ uint64_t VerifyElapsedWallTimeUs = 0;
+};
+
+//////////////////////////////////////////////////////////////////////////
+
+std::vector<std::filesystem::path> GetNewPaths(std::span<const std::filesystem::path> KnownPaths,
+ std::span<const std::filesystem::path> Paths);
+
+BuildSaveState GetLocalStateFromPaths(ProgressBase& Progress,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ TransferThreadWorkers& Workers,
+ GetFolderContentStatistics& LocalFolderScanStats,
+ ChunkingStatistics& ChunkingStats,
+ const std::filesystem::path& Path,
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache,
+ std::span<const std::filesystem::path> PathsToCheck);
+
+BuildSaveState GetLocalContent(ProgressBase& Progress,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ bool IsQuiet,
+ TransferThreadWorkers& Workers,
+ GetFolderContentStatistics& LocalFolderScanStats,
+ ChunkingStatistics& ChunkingStats,
+ const std::filesystem::path& Path,
+ const std::filesystem::path& StateFilePath,
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache);
+
+void VerifyFolder(ProgressBase& Progress,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ TransferThreadWorkers& Workers,
+ const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ const std::filesystem::path& Path,
+ const std::vector<std::string>& ExcludeFolders,
+ bool VerifyFileHash,
+ VerifyFolderStatistics& VerifyFolderStats);
+
+//////////////////////////////////////////////////////////////////////////
+
+struct DownloadOptions
+{
+ std::filesystem::path SystemRootDir;
+ std::filesystem::path ZenFolderPath;
+ bool AllowMultiparts = true;
+ EPartialBlockRequestMode PartialBlockRequestMode = EPartialBlockRequestMode::Mixed;
+ bool CleanTargetFolder = false;
+ bool PostDownloadVerify = false;
+ bool EnableOtherDownloadsScavenging = true;
+ bool EnableTargetFolderScavenging = true;
+ bool AllowFileClone = true;
+ std::vector<std::string> IncludeWildcards;
+ std::vector<std::string> ExcludeWildcards;
+ uint64_t MaximumInMemoryPayloadSize = 512u * 1024u;
+ bool PopulateCache = true;
+ bool AppendNewContent = false;
+ bool IsQuiet = false;
+ bool IsVerbose = false;
+ bool UseSparseFiles = false;
+ bool DoExtraContentVerify = false;
+ std::vector<std::string> ExcludeFolders = DefaultExcludeFolders;
+};
+
+void DownloadFolder(LoggerRef InLog,
+ ProgressBase& Progress,
+ TransferThreadWorkers& Workers,
+ StorageInstance& Storage,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ const BuildStorageCache::Statistics& StorageCacheStats,
+ const Oid& BuildId,
+ const std::vector<Oid>& BuildPartIds,
+ std::span<const std::string> BuildPartNames,
+ const std::filesystem::path& DownloadSpecPath,
+ const std::filesystem::path& Path,
+ const DownloadOptions& Options);
+
+} // namespace zen
diff --git a/src/zenremotestore/include/zenremotestore/builds/builduploadfolder.h b/src/zenremotestore/include/zenremotestore/builds/builduploadfolder.h
new file mode 100644
index 000000000..9ab80955a
--- /dev/null
+++ b/src/zenremotestore/include/zenremotestore/builds/builduploadfolder.h
@@ -0,0 +1,393 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/iohash.h>
+#include <zencore/logging.h>
+#include <zencore/uid.h>
+#include <zencore/zencore.h>
+#include <zenremotestore/builds/buildstoragestats.h>
+#include <zenremotestore/builds/buildstorageutil.h>
+#include <zenremotestore/chunking/chunkblock.h>
+#include <zenremotestore/chunking/chunkedcontent.h>
+
+#include <filesystem>
+#include <string>
+#include <vector>
+
+#include <atomic>
+#include <future>
+
+ZEN_THIRD_PARTY_INCLUDES_START
+#include <tsl/robin_set.h>
+ZEN_THIRD_PARTY_INCLUDES_END
+
+namespace zen {
+
+class FilteredRate;
+class ParallelWork;
+class ProgressBase;
+class ReadFileCache;
+class RwLock;
+class TransferThreadWorkers;
+class WorkerThreadPool;
+
+static constexpr size_t DefaultMaxChunkBlockSize = 64u * 1024u * 1024u;
+static constexpr size_t DefaultMaxChunksPerChunkBlock = 4u * 1000u;
+static constexpr size_t DefaultMaxChunkBlockEmbedSize = 3u * 512u * 1024u;
+
+//////////////////////////////////////////////////////////////////////////
+
+class BuildsOperationUploadFolder
+{
+public:
+ struct ChunksBlockParameters
+ {
+ size_t MaxBlockSize = DefaultMaxChunkBlockSize;
+ size_t MaxChunksPerBlock = DefaultMaxChunksPerChunkBlock;
+ size_t MaxChunkEmbedSize = DefaultMaxChunkBlockEmbedSize;
+ };
+
+ struct Options
+ {
+ bool IsQuiet = false;
+ bool IsVerbose = false;
+ bool DoExtraContentValidation = false;
+
+ const uint64_t FindBlockMaxCount = 10000;
+ const uint8_t BlockReuseMinPercentLimit = 85;
+ bool AllowMultiparts = true;
+ bool IgnoreExistingBlocks = false;
+ ChunksBlockParameters BlockParameters;
+
+ uint32_t PreferredMultipartChunkSize = 32u * 1024u * 1024u;
+
+ const uint64_t MinimumSizeForCompressInBlock = 2u * 1024u;
+
+ std::filesystem::path TempDir;
+ std::vector<std::string> ExcludeFolders;
+ std::vector<std::string> ExcludeExtensions;
+ std::string ZenExcludeManifestName = ".zen_exclude_manifest.txt";
+
+ std::vector<std::string> NonCompressableExtensions;
+
+ bool PopulateCache = true;
+ };
+ BuildsOperationUploadFolder(LoggerRef Log,
+ ProgressBase& Progress,
+ StorageInstance& Storage,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ WorkerThreadPool& IOWorkerPool,
+ WorkerThreadPool& NetworkPool,
+ const Oid& BuildId,
+ const std::filesystem::path& Path,
+ bool CreateBuild,
+ const CbObject& MetaData,
+ const Options& Options);
+
+ std::vector<std::pair<Oid, std::string>> Execute(const Oid& BuildPartId,
+ const std::string_view BuildPartName,
+ const std::filesystem::path& ManifestPath,
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache);
+
+ DiskStatistics m_DiskStats;
+ GetFolderContentStatistics m_LocalFolderScanStats;
+ ChunkingStatistics m_ChunkingStats;
+ FindBlocksStatistics m_FindBlocksStats;
+ ReuseBlocksStatistics m_ReuseBlocksStats;
+ UploadStatistics m_UploadStats;
+ GenerateBlocksStatistics m_GenerateBlocksStats;
+ LooseChunksStatistics m_LooseChunksStats;
+
+private:
+ struct PrepareBuildResult
+ {
+ std::vector<ChunkBlockDescription> KnownBlocks;
+ uint64_t PreferredMultipartChunkSize = 0;
+ uint64_t PayloadSize = 0;
+ uint64_t PrepareBuildTimeMs = 0;
+ uint64_t FindBlocksTimeMs = 0;
+ uint64_t ElapsedTimeMs = 0;
+ };
+
+ PrepareBuildResult PrepareBuild();
+
+ struct UploadPart
+ {
+ Oid PartId = Oid::Zero;
+ std::string PartName;
+ FolderContent Content;
+ uint64_t TotalRawSize = 0;
+ GetFolderContentStatistics LocalFolderScanStats;
+ };
+
+ std::vector<BuildsOperationUploadFolder::UploadPart> ReadFolder();
+ std::vector<UploadPart> ReadManifestParts(const std::filesystem::path& ManifestPath);
+
+ bool IsAcceptedFolder(const std::string_view& RelativePath) const;
+ bool IsAcceptedFile(const std::string_view& RelativePath) const;
+
+ void ArrangeChunksIntoBlocks(const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ std::vector<uint32_t>& ChunkIndexes,
+ std::vector<std::vector<uint32_t>>& OutBlocks);
+ struct GeneratedBlocks
+ {
+ std::vector<ChunkBlockDescription> BlockDescriptions;
+ std::vector<uint64_t> BlockSizes;
+ std::vector<CompositeBuffer> BlockHeaders;
+ std::vector<CbObject> BlockMetaDatas;
+ std::vector<uint8_t>
+ MetaDataHasBeenUploaded; // NOTE: Do not use std::vector<bool> here as this vector is modified by multiple threads
+ tsl::robin_map<IoHash, size_t, IoHash::Hasher> BlockHashToBlockIndex;
+ };
+
+ void GenerateBuildBlocks(const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ const std::vector<std::vector<uint32_t>>& NewBlockChunks,
+ GeneratedBlocks& OutBlocks,
+ GenerateBlocksStatistics& GenerateBlocksStats,
+ UploadStatistics& UploadStats);
+
+ struct GenerateBuildBlocksContext
+ {
+ ParallelWork& Work;
+ WorkerThreadPool& GenerateBlobsPool;
+ WorkerThreadPool& UploadBlocksPool;
+ FilteredRate& FilteredGeneratedBytesPerSecond;
+ FilteredRate& FilteredUploadedBytesPerSecond;
+ std::atomic<uint64_t>& QueuedPendingBlocksForUpload;
+ RwLock& Lock;
+ GeneratedBlocks& OutBlocks;
+ GenerateBlocksStatistics& GenerateBlocksStats;
+ UploadStatistics& UploadStats;
+ size_t NewBlockCount;
+ };
+
+ void ScheduleBlockGeneration(GenerateBuildBlocksContext& Context,
+ const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ const std::vector<std::vector<uint32_t>>& NewBlockChunks);
+
+ void UploadGeneratedBlock(GenerateBuildBlocksContext& Context, size_t BlockIndex, CompressedBuffer Payload);
+
+ std::vector<uint32_t> CalculateAbsoluteChunkOrders(const std::span<const IoHash> LocalChunkHashes,
+ const std::span<const uint32_t> LocalChunkOrder,
+ const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToLocalChunkIndex,
+ const std::span<const uint32_t>& LooseChunkIndexes,
+ const std::span<const ChunkBlockDescription>& BlockDescriptions);
+
+ CompositeBuffer FetchChunk(const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ const IoHash& ChunkHash,
+ ReadFileCache& OpenFileCache);
+
+ CompressedBuffer GenerateBlock(const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ const std::vector<uint32_t>& ChunksInBlock,
+ ChunkBlockDescription& OutBlockDescription);
+
+ CompressedBuffer RebuildBlock(const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ CompositeBuffer&& HeaderBuffer,
+ const std::vector<uint32_t>& ChunksInBlock);
+
+ enum class PartTaskSteps : uint32_t
+ {
+ ChunkPartContent = 0,
+ CalculateDelta,
+ GenerateBlocks,
+ BuildPartManifest,
+ UploadBuildPart,
+ UploadAttachments,
+ PutBuildPartStats,
+ StepCount
+ };
+
+ void UploadBuildPart(ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache,
+ uint32_t PartIndex,
+ const UploadPart& Part,
+ uint32_t PartStepOffset,
+ uint32_t StepCount);
+
+ ChunkedFolderContent ScanPartContent(const UploadPart& Part,
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache,
+ ChunkingStatistics& ChunkingStats);
+
+ void ConsumePrepareBuildResult();
+
+ void ClassifyChunksByBlockEligibility(const ChunkedFolderContent& LocalContent,
+ std::vector<uint32_t>& OutLooseChunkIndexes,
+ std::vector<uint32_t>& OutNewBlockChunkIndexes,
+ std::vector<size_t>& OutReuseBlockIndexes,
+ LooseChunksStatistics& LooseChunksStats,
+ FindBlocksStatistics& FindBlocksStats,
+ ReuseBlocksStatistics& ReuseBlocksStats);
+
+ struct BuiltPartManifest
+ {
+ CbObject PartManifest;
+ std::vector<ChunkBlockDescription> AllChunkBlockDescriptions;
+ std::vector<IoHash> AllChunkBlockHashes;
+ };
+
+ BuiltPartManifest BuildPartManifestObject(const ChunkedFolderContent& LocalContent,
+ const ChunkedContentLookup& LocalLookup,
+ ChunkingController& ChunkController,
+ std::span<const size_t> ReuseBlockIndexes,
+ const GeneratedBlocks& NewBlocks,
+ std::span<const uint32_t> LooseChunkIndexes);
+
+ void UploadAttachmentBatch(std::span<IoHash> RawHashes,
+ std::vector<IoHash>& OutUnknownChunks,
+ const ChunkedFolderContent& LocalContent,
+ const ChunkedContentLookup& LocalLookup,
+ const std::vector<std::vector<uint32_t>>& NewBlockChunks,
+ GeneratedBlocks& NewBlocks,
+ std::span<const uint32_t> LooseChunkIndexes,
+ UploadStatistics& UploadStats,
+ LooseChunksStatistics& LooseChunksStats);
+
+ void FinalizeBuildPartWithRetries(const UploadPart& Part,
+ const IoHash& PartHash,
+ std::vector<IoHash>& InOutUnknownChunks,
+ const ChunkedFolderContent& LocalContent,
+ const ChunkedContentLookup& LocalLookup,
+ const std::vector<std::vector<uint32_t>>& NewBlockChunks,
+ GeneratedBlocks& NewBlocks,
+ std::span<const uint32_t> LooseChunkIndexes,
+ UploadStatistics& UploadStats,
+ LooseChunksStatistics& LooseChunksStats);
+
+ void UploadMissingBlockMetadata(GeneratedBlocks& NewBlocks, UploadStatistics& UploadStats);
+
+ void UploadPartBlobs(const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ std::span<IoHash> RawHashes,
+ const std::vector<std::vector<uint32_t>>& NewBlockChunks,
+ GeneratedBlocks& NewBlocks,
+ std::span<const uint32_t> LooseChunkIndexes,
+ const std::uint64_t LargeAttachmentSize,
+ UploadStatistics& TempUploadStats,
+ LooseChunksStatistics& TempLooseChunksStats,
+ std::vector<IoHash>& OutUnknownChunks);
+
+ struct UploadPartClassification
+ {
+ std::vector<size_t> BlockIndexes;
+ std::vector<uint32_t> LooseChunkOrderIndexes;
+ uint64_t TotalBlocksSize = 0;
+ uint64_t TotalLooseChunksSize = 0;
+ };
+
+ UploadPartClassification ClassifyUploadRawHashes(std::span<IoHash> RawHashes,
+ const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ const GeneratedBlocks& NewBlocks,
+ std::span<const uint32_t> LooseChunkIndexes,
+ std::vector<IoHash>& OutUnknownChunks);
+
+ struct UploadPartBlobsContext
+ {
+ ParallelWork& Work;
+ WorkerThreadPool& ReadChunkPool;
+ WorkerThreadPool& UploadChunkPool;
+ FilteredRate& FilteredGenerateBlockBytesPerSecond;
+ FilteredRate& FilteredCompressedBytesPerSecond;
+ FilteredRate& FilteredUploadedBytesPerSecond;
+ std::atomic<size_t>& UploadedBlockSize;
+ std::atomic<size_t>& UploadedBlockCount;
+ std::atomic<size_t>& UploadedRawChunkSize;
+ std::atomic<size_t>& UploadedCompressedChunkSize;
+ std::atomic<uint32_t>& UploadedChunkCount;
+ std::atomic<uint64_t>& GeneratedBlockCount;
+ std::atomic<uint64_t>& GeneratedBlockByteCount;
+ std::atomic<uint64_t>& QueuedPendingInMemoryBlocksForUpload;
+ size_t UploadBlockCount;
+ uint32_t UploadChunkCount;
+ uint64_t LargeAttachmentSize;
+ GeneratedBlocks& NewBlocks;
+ const ChunkedFolderContent& Content;
+ const ChunkedContentLookup& Lookup;
+ const std::vector<std::vector<uint32_t>>& NewBlockChunks;
+ std::span<const uint32_t> LooseChunkIndexes;
+ UploadStatistics& TempUploadStats;
+ LooseChunksStatistics& TempLooseChunksStats;
+ };
+
+ void ScheduleBlockGenerationAndUpload(UploadPartBlobsContext& Context, std::span<const size_t> BlockIndexes);
+
+ void ScheduleLooseChunkCompressionAndUpload(UploadPartBlobsContext& Context, std::span<const uint32_t> LooseChunkOrderIndexes);
+
+ void UploadBlockPayload(UploadPartBlobsContext& Context, size_t BlockIndex, const IoHash& BlockHash, CompositeBuffer Payload);
+
+ void UploadLooseChunkPayload(UploadPartBlobsContext& Context, const IoHash& RawHash, uint64_t RawSize, CompositeBuffer Payload);
+
+ CompositeBuffer CompressChunk(const ChunkedFolderContent& Content,
+ const ChunkedContentLookup& Lookup,
+ uint32_t ChunkIndex,
+ LooseChunksStatistics& TempLooseChunksStats);
+
+ LoggerRef Log() { return m_Log; }
+
+ LoggerRef m_Log;
+ ProgressBase& m_Progress;
+ StorageInstance& m_Storage;
+ std::atomic<bool>& m_AbortFlag;
+ std::atomic<bool>& m_PauseFlag;
+ WorkerThreadPool& m_IOWorkerPool;
+ WorkerThreadPool& m_NetworkPool;
+ const Oid m_BuildId;
+
+ const std::filesystem::path m_Path;
+ const bool m_CreateBuild;
+ const CbObject m_MetaData;
+ const Options m_Options;
+
+ tsl::robin_set<uint32_t> m_NonCompressableExtensionHashes;
+
+ std::future<PrepareBuildResult> m_PrepBuildResultFuture;
+ std::vector<ChunkBlockDescription> m_KnownBlocks;
+ uint64_t m_PreferredMultipartChunkSize = 0;
+ uint64_t m_LargeAttachmentSize = 0;
+};
+
+//////////////////////////////////////////////////////////////////////////
+
+struct UploadFolderOptions
+{
+ std::filesystem::path TempDir;
+ uint64_t FindBlockMaxCount;
+ uint8_t BlockReuseMinPercentLimit;
+ bool AllowMultiparts;
+ bool CreateBuild;
+ bool IgnoreExistingBlocks;
+ bool UploadToZenCache;
+ bool IsQuiet = false;
+ bool IsVerbose = false;
+ bool DoExtraContentVerify = false;
+ const std::vector<std::string>& ExcludeFolders = DefaultExcludeFolders;
+ const std::vector<std::string>& ExcludeExtensions = DefaultExcludeExtensions;
+};
+
+std::vector<std::pair<Oid, std::string>> UploadFolder(LoggerRef Log,
+ ProgressBase& Progress,
+ TransferThreadWorkers& Workers,
+ StorageInstance& Storage,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ const Oid& BuildId,
+ const Oid& BuildPartId,
+ std::string_view BuildPartName,
+ const std::filesystem::path& Path,
+ const std::filesystem::path& ManifestPath,
+ const CbObject& MetaData,
+ ChunkingController& ChunkController,
+ ChunkingCache& ChunkCache,
+ const UploadFolderOptions& Options);
+
+} // namespace zen
diff --git a/src/zenremotestore/include/zenremotestore/builds/buildvalidatebuildpart.h b/src/zenremotestore/include/zenremotestore/builds/buildvalidatebuildpart.h
new file mode 100644
index 000000000..d9403c67b
--- /dev/null
+++ b/src/zenremotestore/include/zenremotestore/builds/buildvalidatebuildpart.h
@@ -0,0 +1,125 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/iohash.h>
+#include <zencore/logging.h>
+#include <zencore/uid.h>
+#include <zenremotestore/builds/buildstoragestats.h>
+#include <zenremotestore/chunking/chunkblock.h>
+
+#include <atomic>
+#include <filesystem>
+
+namespace zen {
+
+class BuildStorageBase;
+class FilteredRate;
+class ParallelWork;
+class ProgressBase;
+class TransferThreadWorkers;
+class WorkerThreadPool;
+
+//////////////////////////////////////////////////////////////////////////
+
+class BuildsOperationValidateBuildPart
+{
+public:
+ struct Options
+ {
+ // Scratch area for streaming large chunk downloads. Must be non-empty.
+ std::filesystem::path TempFolder;
+ bool IsQuiet = false;
+ bool IsVerbose = false;
+ };
+ BuildsOperationValidateBuildPart(LoggerRef Log,
+ ProgressBase& Progress,
+ BuildStorageBase& Storage,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ WorkerThreadPool& IOWorkerPool,
+ WorkerThreadPool& NetworkPool,
+ const Oid& BuildId,
+ const Oid& BuildPartId,
+ const std::string_view BuildPartName,
+ const Options& Options);
+
+ void Execute();
+
+ ValidateStatistics m_ValidateStats;
+ DownloadStatistics m_DownloadStats;
+
+private:
+ enum class TaskSteps : uint32_t
+ {
+ FetchBuild,
+ FetchBuildPart,
+ ValidateBlobs,
+ Cleanup,
+ StepCount
+ };
+
+ ChunkBlockDescription ValidateChunkBlock(IoBuffer&& Payload,
+ const IoHash& BlobHash,
+ uint64_t& OutCompressedSize,
+ uint64_t& OutDecompressedSize);
+
+ struct ValidateBlobsContext
+ {
+ ParallelWork& Work;
+ uint64_t AttachmentsToVerifyCount;
+ FilteredRate& FilteredDownloadedBytesPerSecond;
+ FilteredRate& FilteredVerifiedBytesPerSecond;
+ };
+
+ struct ResolvedBuildPart
+ {
+ std::vector<IoHash> ChunkAttachments;
+ std::vector<IoHash> BlockAttachments;
+ uint64_t PreferredMultipartChunkSize = 0;
+ };
+
+ ResolvedBuildPart ResolveBuildPart();
+
+ void ScheduleChunkAttachmentValidation(ValidateBlobsContext& Context,
+ std::span<const IoHash> ChunkAttachments,
+ const std::filesystem::path& TempFolder,
+ uint64_t PreferredMultipartChunkSize);
+
+ void ScheduleBlockAttachmentValidation(ValidateBlobsContext& Context, std::span<const IoHash> BlockAttachments);
+
+ void ValidateDownloadedChunk(ValidateBlobsContext& Context, const IoHash& ChunkHash, IoBuffer Payload);
+
+ void ValidateDownloadedBlock(ValidateBlobsContext& Context, const IoHash& BlockAttachment, IoBuffer Payload);
+
+ LoggerRef Log() { return m_Log; }
+
+ LoggerRef m_Log;
+ ProgressBase& m_Progress;
+ BuildStorageBase& m_Storage;
+ std::atomic<bool>& m_AbortFlag;
+ std::atomic<bool>& m_PauseFlag;
+ WorkerThreadPool& m_IOWorkerPool;
+ WorkerThreadPool& m_NetworkPool;
+ const Oid m_BuildId;
+ Oid m_BuildPartId;
+ const std::string m_BuildPartName;
+ const Options m_Options;
+};
+
+//////////////////////////////////////////////////////////////////////////
+
+void ValidateBuildPart(LoggerRef Log,
+ ProgressBase& Progress,
+ std::atomic<bool>& AbortFlag,
+ std::atomic<bool>& PauseFlag,
+ bool IsQuiet,
+ bool IsVerbose,
+ TransferThreadWorkers& Workers,
+ BuildStorageBase& Storage,
+ const std::filesystem::path& TempFolder,
+ const Oid& BuildId,
+ const Oid& BuildPartId,
+ std::string_view BuildPartName);
+
+} // namespace zen
diff --git a/src/zenremotestore/include/zenremotestore/builds/jupiterbuildstorage.h b/src/zenremotestore/include/zenremotestore/builds/jupiterbuildstorage.h
index 888ec8ead..270835521 100644
--- a/src/zenremotestore/include/zenremotestore/builds/jupiterbuildstorage.h
+++ b/src/zenremotestore/include/zenremotestore/builds/jupiterbuildstorage.h
@@ -22,4 +22,6 @@ bool ParseBuildStorageUrl(std::string_view InUrl,
std::string& OutBucket,
std::string& OutBuildId);
+void jupiterbuildstorage_forcelink();
+
} // namespace zen
diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkblock.h b/src/zenremotestore/include/zenremotestore/chunking/chunkblock.h
index 5a17ef79c..73d037542 100644
--- a/src/zenremotestore/include/zenremotestore/chunking/chunkblock.h
+++ b/src/zenremotestore/include/zenremotestore/chunking/chunkblock.h
@@ -3,6 +3,7 @@
#pragma once
#include <zencore/iohash.h>
+#include <zencore/logbase.h>
#include <zencore/compactbinary.h>
#include <zencore/compress.h>
@@ -11,9 +12,6 @@ ZEN_THIRD_PARTY_INCLUDES_START
#include <tsl/robin_map.h>
ZEN_THIRD_PARTY_INCLUDES_END
-#include <optional>
-#include <vector>
-
namespace zen {
struct ThinChunkBlockDescription
@@ -24,16 +22,17 @@ struct ThinChunkBlockDescription
struct ChunkBlockDescription : public ThinChunkBlockDescription
{
- uint64_t HeaderSize;
+ uint64_t HeaderSize = 0;
std::vector<uint32_t> ChunkRawLengths;
std::vector<uint32_t> ChunkCompressedLengths;
};
std::vector<ChunkBlockDescription> ParseChunkBlockDescriptionList(const CbObjectView& BlocksObject);
ChunkBlockDescription ParseChunkBlockDescription(const CbObjectView& BlockObject);
+std::vector<ChunkBlockDescription> ParseBlockMetadatas(std::span<const CbObject> BlockMetadatas);
CbObject BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView MetaData);
ChunkBlockDescription GetChunkBlockDescription(const SharedBuffer& BlockPayload, const IoHash& RawHash);
-typedef std::function<std::pair<uint64_t, CompressedBuffer>(const IoHash& RawHash)> FetchChunkFunc;
+typedef std::function<std::pair<uint64_t, CompositeBuffer>(const IoHash& RawHash)> FetchChunkFunc;
CompressedBuffer GenerateChunkBlock(std::vector<std::pair<IoHash, FetchChunkFunc>>&& FetchChunks, ChunkBlockDescription& OutBlock);
bool IterateChunkBlock(const SharedBuffer& BlockPayload,
@@ -66,9 +65,7 @@ struct ReuseBlocksStatistics
}
};
-class OperationLogOutput;
-
-std::vector<size_t> FindReuseBlocks(OperationLogOutput& Output,
+std::vector<size_t> FindReuseBlocks(LoggerRef InLog,
const uint8_t BlockReuseMinPercentLimit,
const bool IsVerbose,
ReuseBlocksStatistics& Stats,
@@ -82,15 +79,18 @@ class ChunkBlockAnalyser
public:
struct Options
{
- bool IsQuiet = false;
- bool IsVerbose = false;
- double HostLatencySec = -1.0;
- double HostHighSpeedLatencySec = -1.0;
- uint64_t HostSpeedBytesPerSec = (1u * 1024u * 1024u * 1024u) / 8u; // 1GBit
- uint64_t HostHighSpeedBytesPerSec = (2u * 1024u * 1024u * 1024u) / 8u; // 2GBit
+ bool IsQuiet = false;
+ bool IsVerbose = false;
+ double HostLatencySec = -1.0;
+ double HostHighSpeedLatencySec = -1.0;
+ uint64_t HostSpeedBytesPerSec = (1u * 1024u * 1024u * 1024u) / 8u; // 1GBit
+ uint64_t HostHighSpeedBytesPerSec = (2u * 1024u * 1024u * 1024u) / 8u; // 2GBit
+ uint64_t HostMaxRangeCountPerRequest = (uint64_t)-1;
+ uint64_t HostHighSpeedMaxRangeCountPerRequest = (uint64_t)-1; // No limit
+ uint64_t MaxRangesPerBlock = 1024u;
};
- ChunkBlockAnalyser(OperationLogOutput& LogOutput, std::span<const ChunkBlockDescription> BlockDescriptions, const Options& Options);
+ ChunkBlockAnalyser(LoggerRef Log, std::span<const ChunkBlockDescription> BlockDescriptions, const Options& Options);
struct BlockRangeDescriptor
{
@@ -110,7 +110,7 @@ public:
std::vector<NeededBlock> GetNeeded(const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex,
std::function<bool(uint32_t ChunkIndex)>&& NeedsBlockChunk);
- enum EPartialBlockDownloadMode
+ enum class EPartialBlockDownloadMode
{
Off,
SingleRange,
@@ -129,49 +129,17 @@ public:
std::span<const EPartialBlockDownloadMode> BlockPartialDownloadModes);
private:
- struct BlockRangeLimit
- {
- uint16_t SizePercent;
- uint16_t MaxRangeCount;
- };
+ LoggerRef Log() { return m_Log; }
- static constexpr uint16_t FullBlockRangePercentLimit = 98;
-
- static constexpr BlockRangeLimit ForceMergeLimits[] = {{.SizePercent = FullBlockRangePercentLimit, .MaxRangeCount = 1},
- {.SizePercent = 90, .MaxRangeCount = 4},
- {.SizePercent = 85, .MaxRangeCount = 16},
- {.SizePercent = 80, .MaxRangeCount = 32},
- {.SizePercent = 75, .MaxRangeCount = 48},
- {.SizePercent = 70, .MaxRangeCount = 64},
- {.SizePercent = 4, .MaxRangeCount = 82},
- {.SizePercent = 0, .MaxRangeCount = 96}};
-
- BlockRangeDescriptor MergeBlockRanges(std::span<const BlockRangeDescriptor> Ranges);
- std::optional<std::vector<BlockRangeDescriptor>> MakeOptionalBlockRangeVector(uint64_t TotalBlockSize,
- const BlockRangeDescriptor& Range);
- const BlockRangeLimit* GetBlockRangeLimitForRange(std::span<const BlockRangeLimit> Limits,
- uint64_t TotalBlockSize,
- std::span<const BlockRangeDescriptor> Ranges);
- std::vector<BlockRangeDescriptor> CollapseBlockRanges(const uint64_t AlwaysAcceptableGap,
- std::span<const BlockRangeDescriptor> BlockRanges);
- uint64_t CalculateNextGap(const uint64_t AlwaysAcceptableGap, std::span<const BlockRangeDescriptor> BlockRanges);
- std::optional<std::vector<BlockRangeDescriptor>> CalculateBlockRanges(uint32_t BlockIndex,
- const ChunkBlockDescription& BlockDescription,
- std::span<const uint32_t> BlockChunkIndexNeeded,
- EPartialBlockDownloadMode PartialBlockDownloadMode,
- const uint64_t ChunkStartOffsetInBlock,
- const uint64_t TotalBlockSize,
- uint64_t& OutTotalWantedChunksSize);
-
- OperationLogOutput& m_LogOutput;
+ LoggerRef m_Log;
const std::span<const ChunkBlockDescription> m_BlockDescriptions;
const Options m_Options;
};
#if ZEN_WITH_TESTS
-class CbWriter;
void chunkblock_forcelink();
+
#endif // ZEN_WITH_TESTS
} // namespace zen
diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h
index d402bd3f0..f374211f2 100644
--- a/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h
+++ b/src/zenremotestore/include/zenremotestore/chunking/chunkedcontent.h
@@ -3,7 +3,6 @@
#pragma once
#include <zencore/compactbinary.h>
-#include <zencore/compactbinarybuilder.h>
#include <zencore/iohash.h>
#include <filesystem>
@@ -231,7 +230,7 @@ GetSequenceIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& Raw
inline uint32_t
GetChunkIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash)
{
- return Lookup.RawHashToSequenceIndex.at(RawHash);
+ return Lookup.ChunkHashToChunkIndex.at(RawHash);
}
inline uint32_t
diff --git a/src/zenremotestore/include/zenremotestore/filesystemutils.h b/src/zenremotestore/include/zenremotestore/filesystemutils.h
deleted file mode 100644
index cb2d718f7..000000000
--- a/src/zenremotestore/include/zenremotestore/filesystemutils.h
+++ /dev/null
@@ -1,121 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#pragma once
-
-#include <zencore/basicfile.h>
-#include <zenremotestore/chunking/chunkedcontent.h>
-
-namespace zen {
-
-class CompositeBuffer;
-
-class BufferedOpenFile
-{
-public:
- static constexpr uint64_t BlockSize = 256u * 1024u;
-
- BufferedOpenFile(const std::filesystem::path Path,
- std::atomic<uint64_t>& OpenReadCount,
- std::atomic<uint64_t>& CurrentOpenFileCount,
- std::atomic<uint64_t>& ReadCount,
- std::atomic<uint64_t>& ReadByteCount);
- ~BufferedOpenFile();
- BufferedOpenFile() = delete;
- BufferedOpenFile(const BufferedOpenFile&) = delete;
- BufferedOpenFile(BufferedOpenFile&&) = delete;
- BufferedOpenFile& operator=(BufferedOpenFile&&) = delete;
- BufferedOpenFile& operator=(const BufferedOpenFile&) = delete;
-
- CompositeBuffer GetRange(uint64_t Offset, uint64_t Size);
-
-public:
- void* Handle() { return m_Source.Handle(); }
-
-private:
- BasicFile m_Source;
- const uint64_t m_SourceSize;
- std::atomic<uint64_t>& m_OpenReadCount;
- std::atomic<uint64_t>& m_CurrentOpenFileCount;
- std::atomic<uint64_t>& m_ReadCount;
- std::atomic<uint64_t>& m_ReadByteCount;
- uint64_t m_CacheBlockIndex = (uint64_t)-1;
- IoBuffer m_Cache;
-};
-
-class ReadFileCache
-{
-public:
- // A buffered file reader that provides CompositeBuffer where the buffers are owned and the memory never overwritten
- ReadFileCache(std::atomic<uint64_t>& OpenReadCount,
- std::atomic<uint64_t>& CurrentOpenFileCount,
- std::atomic<uint64_t>& ReadCount,
- std::atomic<uint64_t>& ReadByteCount,
- const std::filesystem::path& Path,
- const ChunkedFolderContent& LocalContent,
- const ChunkedContentLookup& LocalLookup,
- size_t MaxOpenFileCount);
- ~ReadFileCache();
-
- CompositeBuffer GetRange(uint32_t SequenceIndex, uint64_t Offset, uint64_t Size);
-
-private:
- const std::filesystem::path m_Path;
- const ChunkedFolderContent& m_LocalContent;
- const ChunkedContentLookup& m_LocalLookup;
- std::vector<std::pair<uint32_t, std::unique_ptr<BufferedOpenFile>>> m_OpenFiles;
- std::atomic<uint64_t>& m_OpenReadCount;
- std::atomic<uint64_t>& m_CurrentOpenFileCount;
- std::atomic<uint64_t>& m_ReadCount;
- std::atomic<uint64_t>& m_ReadByteCount;
-};
-
-uint32_t SetNativeFileAttributes(const std::filesystem::path FilePath, SourcePlatform SourcePlatform, uint32_t Attributes);
-
-uint32_t GetNativeFileAttributes(const std::filesystem::path FilePath);
-
-bool IsFileWithRetry(const std::filesystem::path& Path);
-
-bool SetFileReadOnlyWithRetry(const std::filesystem::path& Path, bool ReadOnly);
-
-std::error_code RenameFileWithRetry(const std::filesystem::path& SourcePath, const std::filesystem::path& TargetPath);
-
-std::error_code TryRemoveFile(const std::filesystem::path& Path);
-
-void RemoveFileWithRetry(const std::filesystem::path& Path);
-
-void FastCopyFile(bool AllowFileClone,
- bool UseSparseFiles,
- const std::filesystem::path& SourceFilePath,
- const std::filesystem::path& TargetFilePath,
- uint64_t RawSize,
- std::atomic<uint64_t>& WriteCount,
- std::atomic<uint64_t>& WriteByteCount,
- std::atomic<uint64_t>& CloneCount,
- std::atomic<uint64_t>& CloneByteCount);
-
-struct CleanDirectoryResult
-{
- uint64_t FoundCount = 0;
- uint64_t DeletedCount = 0;
- uint64_t DeletedByteCount = 0;
- std::vector<std::pair<std::filesystem::path, std::error_code>> FailedRemovePaths;
-};
-
-CleanDirectoryResult CleanDirectory(
- WorkerThreadPool& IOWorkerPool,
- std::atomic<bool>& AbortFlag,
- std::atomic<bool>& PauseFlag,
- const std::filesystem::path& Path,
- std::span<const std::string> ExcludeDirectories,
- std::function<void(const std::string_view Details, uint64_t TotalCount, uint64_t RemainingCount, bool IsPaused, bool IsAborted)>&&
- ProgressFunc,
- uint32_t ProgressUpdateDelayMS);
-
-bool CleanAndRemoveDirectory(WorkerThreadPool& WorkerPool,
- std::atomic<bool>& AbortFlag,
- std::atomic<bool>& PauseFlag,
- const std::filesystem::path& Directory);
-
-void filesystemutils_forcelink(); // internal
-
-} // namespace zen
diff --git a/src/zenremotestore/include/zenremotestore/jupiter/jupiterhost.h b/src/zenremotestore/include/zenremotestore/jupiter/jupiterhost.h
index 7bbf40dfa..caf7ecd28 100644
--- a/src/zenremotestore/include/zenremotestore/jupiter/jupiterhost.h
+++ b/src/zenremotestore/include/zenremotestore/jupiter/jupiterhost.h
@@ -2,6 +2,7 @@
#pragma once
+#include <cstdint>
#include <string>
#include <string_view>
#include <vector>
@@ -28,7 +29,8 @@ struct JupiterEndpointTestResult
{
bool Success = false;
std::string FailureReason;
- double LatencySeconds = -1.0;
+ double LatencySeconds = -1.0;
+ uint64_t MaxRangeCountPerRequest = 1;
};
JupiterEndpointTestResult TestJupiterEndpoint(std::string_view BaseUrl, const bool AssumeHttp2, const bool HttpVerbose);
diff --git a/src/zenremotestore/include/zenremotestore/jupiter/jupitersession.h b/src/zenremotestore/include/zenremotestore/jupiter/jupitersession.h
index eaf6962fd..8721bc37f 100644
--- a/src/zenremotestore/include/zenremotestore/jupiter/jupitersession.h
+++ b/src/zenremotestore/include/zenremotestore/jupiter/jupitersession.h
@@ -56,6 +56,11 @@ struct FinalizeBuildPartResult : JupiterResult
std::vector<IoHash> Needs;
};
+struct BuildBlobRangesResult : JupiterResult
+{
+ std::vector<std::pair<uint64_t, uint64_t>> Ranges;
+};
+
/**
* Context for performing Jupiter operations
*
@@ -135,6 +140,13 @@ public:
uint64_t Offset = 0,
uint64_t Size = (uint64_t)-1);
+ BuildBlobRangesResult GetBuildBlob(std::string_view Namespace,
+ std::string_view BucketId,
+ const Oid& BuildId,
+ const IoHash& Hash,
+ std::filesystem::path TempFolderPath,
+ std::span<const std::pair<uint64_t, uint64_t>> Ranges);
+
JupiterResult PutMultipartBuildBlob(std::string_view Namespace,
std::string_view BucketId,
const Oid& BuildId,
diff --git a/src/zenremotestore/include/zenremotestore/operationlogoutput.h b/src/zenremotestore/include/zenremotestore/operationlogoutput.h
deleted file mode 100644
index 6f10ab156..000000000
--- a/src/zenremotestore/include/zenremotestore/operationlogoutput.h
+++ /dev/null
@@ -1,78 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#pragma once
-
-#include <zencore/fmtutils.h>
-#include <zencore/logbase.h>
-
-namespace zen {
-
-class OperationLogOutput
-{
-public:
- virtual ~OperationLogOutput() {}
- virtual void EmitLogMessage(int LogLevel, std::string_view Format, fmt::format_args Args) = 0;
-
- virtual void SetLogOperationName(std::string_view Name) = 0;
- virtual void SetLogOperationProgress(uint32_t StepIndex, uint32_t StepCount) = 0;
- virtual uint32_t GetProgressUpdateDelayMS() = 0;
-
- class ProgressBar
- {
- public:
- struct State
- {
- bool operator==(const State&) const = default;
- std::string Task;
- std::string Details;
- uint64_t TotalCount = 0;
- uint64_t RemainingCount = 0;
- enum class EStatus
- {
- Running,
- Aborted,
- Paused
- };
- EStatus Status = EStatus::Running;
-
- static EStatus CalculateStatus(bool IsAborted, bool IsPaused)
- {
- if (IsAborted)
- {
- return EStatus::Aborted;
- }
- if (IsPaused)
- {
- return EStatus::Paused;
- }
- return EStatus::Running;
- }
- };
-
- virtual ~ProgressBar() {}
-
- virtual void UpdateState(const State& NewState, bool DoLinebreak) = 0;
- virtual void Finish() = 0;
- };
-
- virtual ProgressBar* CreateProgressBar(std::string_view InSubTask) = 0;
-};
-
-OperationLogOutput* CreateStandardLogOutput(LoggerRef Log);
-
-#define ZEN_OPERATION_LOG(OutputTarget, InLevel, fmtstr, ...) \
- do \
- { \
- using namespace std::literals; \
- ZEN_CHECK_FORMAT_STRING(fmtstr##sv, ##__VA_ARGS__); \
- OutputTarget.EmitLogMessage(InLevel, fmtstr, zen::logging::LogCaptureArguments(__VA_ARGS__)); \
- } while (false)
-
-#define ZEN_OPERATION_LOG_INFO(OutputTarget, fmtstr, ...) \
- ZEN_OPERATION_LOG((OutputTarget), zen::logging::level::Info, fmtstr, ##__VA_ARGS__)
-#define ZEN_OPERATION_LOG_DEBUG(OutputTarget, fmtstr, ...) \
- ZEN_OPERATION_LOG((OutputTarget), zen::logging::level::Debug, fmtstr, ##__VA_ARGS__)
-#define ZEN_OPERATION_LOG_WARN(OutputTarget, fmtstr, ...) \
- ZEN_OPERATION_LOG((OutputTarget), zen::logging::level::Warn, fmtstr, ##__VA_ARGS__)
-
-} // namespace zen
diff --git a/src/zenremotestore/include/zenremotestore/projectstore/buildsremoteprojectstore.h b/src/zenremotestore/include/zenremotestore/projectstore/buildsremoteprojectstore.h
index 66dfcc62d..c058e1c1f 100644
--- a/src/zenremotestore/include/zenremotestore/projectstore/buildsremoteprojectstore.h
+++ b/src/zenremotestore/include/zenremotestore/projectstore/buildsremoteprojectstore.h
@@ -2,6 +2,7 @@
#pragma once
+#include <zenhttp/httpclient.h>
#include <zenremotestore/projectstore/remoteprojectstore.h>
namespace zen {
@@ -10,9 +11,6 @@ class AuthMgr;
struct BuildsRemoteStoreOptions : RemoteStoreOptions
{
- std::string Host;
- std::string OverrideHost;
- std::string ZenHost;
std::string Namespace;
std::string Bucket;
Oid BuildId;
@@ -22,20 +20,16 @@ struct BuildsRemoteStoreOptions : RemoteStoreOptions
std::filesystem::path OidcExePath;
bool ForceDisableBlocks = false;
bool ForceDisableTempBlocks = false;
- bool AssumeHttp2 = false;
- bool PopulateCache = true;
IoBuffer MetaData;
size_t MaximumInMemoryDownloadSize = 1024u * 1024u;
};
-std::shared_ptr<RemoteProjectStore> CreateJupiterBuildsRemoteStore(LoggerRef InLog,
- const BuildsRemoteStoreOptions& Options,
- const std::filesystem::path& TempFilePath,
- bool Quiet,
- bool Unattended,
- bool Hidden,
- WorkerThreadPool& CacheBackgroundWorkerPool,
- double& OutHostLatencySec,
- double& OutCacheLatencySec);
+struct BuildStorageResolveResult;
+
+std::shared_ptr<RemoteProjectStore> CreateJupiterBuildsRemoteStore(LoggerRef InLog,
+ const BuildStorageResolveResult& ResolveResult,
+ std::function<HttpClientAccessToken()>&& TokenProvider,
+ const BuildsRemoteStoreOptions& Options,
+ const std::filesystem::path& TempFilePath);
} // namespace zen
diff --git a/src/zenremotestore/include/zenremotestore/projectstore/projectstoreoperations.h b/src/zenremotestore/include/zenremotestore/projectstore/projectstoreoperations.h
index a07ede6f6..db5b27d3f 100644
--- a/src/zenremotestore/include/zenremotestore/projectstore/projectstoreoperations.h
+++ b/src/zenremotestore/include/zenremotestore/projectstore/projectstoreoperations.h
@@ -20,7 +20,7 @@ ZEN_THIRD_PARTY_INCLUDES_END
namespace zen {
class BuildStorageBase;
-class OperationLogOutput;
+class ProgressBase;
struct StorageInstance;
class ProjectStoreOperationOplogState
@@ -34,10 +34,7 @@ public:
std::filesystem::path TempFolderPath;
};
- ProjectStoreOperationOplogState(OperationLogOutput& OperationLogOutput,
- StorageInstance& Storage,
- const Oid& BuildId,
- const Options& Options);
+ ProjectStoreOperationOplogState(LoggerRef Log, StorageInstance& Storage, const Oid& BuildId, const Options& Options);
CbObjectView LoadBuildObject();
CbObjectView LoadBuildPartsObject();
@@ -51,10 +48,12 @@ public:
const Oid& GetBuildPartId();
private:
- OperationLogOutput& m_LogOutput;
- StorageInstance& m_Storage;
- const Oid m_BuildId;
- const Options m_Options;
+ LoggerRef Log() { return m_Log; }
+
+ LoggerRef m_Log;
+ StorageInstance& m_Storage;
+ const Oid m_BuildId;
+ const Options m_Options;
Oid m_BuildPartId = Oid::Zero;
CbObject m_BuildObject;
@@ -79,7 +78,8 @@ public:
bool PopulateCache = true;
};
- ProjectStoreOperationDownloadAttachments(OperationLogOutput& OperationLogOutput,
+ ProjectStoreOperationDownloadAttachments(LoggerRef Log,
+ ProgressBase& Progress,
StorageInstance& Storage,
std::atomic<bool>& AbortFlag,
std::atomic<bool>& PauseFlag,
@@ -92,12 +92,15 @@ public:
void Execute();
private:
- OperationLogOutput& m_LogOutput;
- StorageInstance& m_Storage;
- std::atomic<bool>& m_AbortFlag;
- std::atomic<bool>& m_PauseFlag;
- WorkerThreadPool& m_IOWorkerPool;
- WorkerThreadPool& m_NetworkPool;
+ LoggerRef Log() { return m_Log; }
+
+ LoggerRef m_Log;
+ ProgressBase& m_Progress;
+ StorageInstance& m_Storage;
+ std::atomic<bool>& m_AbortFlag;
+ std::atomic<bool>& m_PauseFlag;
+ WorkerThreadPool& m_IOWorkerPool;
+ WorkerThreadPool& m_NetworkPool;
ProjectStoreOperationOplogState& m_State;
const tsl::robin_set<IoHash, IoHash::Hasher> m_AttachmentHashes;
diff --git a/src/zenremotestore/include/zenremotestore/projectstore/remoteprojectstore.h b/src/zenremotestore/include/zenremotestore/projectstore/remoteprojectstore.h
index 152c02ee2..b81708341 100644
--- a/src/zenremotestore/include/zenremotestore/projectstore/remoteprojectstore.h
+++ b/src/zenremotestore/include/zenremotestore/projectstore/remoteprojectstore.h
@@ -5,6 +5,7 @@
#include <zencore/jobqueue.h>
#include <zenstore/projectstore.h>
+#include <zenremotestore/builds/buildstoragecache.h>
#include <zenremotestore/chunking/chunkblock.h>
#include <zenremotestore/partialblockrequestmode.h>
@@ -79,29 +80,30 @@ public:
std::vector<ChunkBlockDescription> Blocks;
};
- struct AttachmentExistsInCacheResult : public Result
+ struct LoadAttachmentRangesResult : public Result
{
- std::vector<bool> HasBody;
+ IoBuffer Bytes;
+ std::vector<std::pair<uint64_t, uint64_t>> Ranges;
};
struct RemoteStoreInfo
{
- bool CreateBlocks;
- bool UseTempBlockFiles;
- bool AllowChunking;
+ bool CreateBlocks = false;
+ bool UseTempBlockFiles = false;
+ bool AllowChunking = false;
std::string ContainerName;
std::string Description;
};
struct Stats
{
- std::uint64_t m_SentBytes;
- std::uint64_t m_ReceivedBytes;
- std::uint64_t m_RequestTimeNS;
- std::uint64_t m_RequestCount;
- std::uint64_t m_PeakSentBytes;
- std::uint64_t m_PeakReceivedBytes;
- std::uint64_t m_PeakBytesPerSec;
+ std::uint64_t m_SentBytes = 0;
+ std::uint64_t m_ReceivedBytes = 0;
+ std::uint64_t m_RequestTimeNS = 0;
+ std::uint64_t m_RequestCount = 0;
+ std::uint64_t m_PeakSentBytes = 0;
+ std::uint64_t m_PeakReceivedBytes = 0;
+ std::uint64_t m_PeakBytesPerSec = 0;
};
struct ExtendedStats
@@ -122,22 +124,17 @@ public:
virtual FinalizeResult FinalizeContainer(const IoHash& RawHash) = 0;
virtual SaveAttachmentsResult SaveAttachments(const std::vector<SharedBuffer>& Payloads) = 0;
- virtual LoadContainerResult LoadContainer() = 0;
- virtual GetKnownBlocksResult GetKnownBlocks() = 0;
- virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes) = 0;
- virtual AttachmentExistsInCacheResult AttachmentExistsInCache(std::span<const IoHash> RawHashes) = 0;
+ virtual LoadContainerResult LoadContainer() = 0;
+ virtual GetKnownBlocksResult GetKnownBlocks() = 0;
+ virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes,
+ BuildStorageCache* OptionalCache,
+ const Oid& CacheBuildId) = 0;
- struct AttachmentRange
- {
- uint64_t Offset = 0;
- uint64_t Bytes = (uint64_t)-1;
-
- inline operator bool() const { return Offset != 0 || Bytes != (uint64_t)-1; }
- };
- virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash, const AttachmentRange& Range) = 0;
- virtual LoadAttachmentsResult LoadAttachments(const std::vector<IoHash>& RawHashes) = 0;
+ virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash) = 0;
- virtual void Flush() = 0;
+ virtual LoadAttachmentRangesResult LoadAttachmentRanges(const IoHash& RawHash,
+ std::span<const std::pair<uint64_t, uint64_t>> Ranges) = 0;
+ virtual LoadAttachmentsResult LoadAttachments(const std::vector<IoHash>& RawHashes) = 0;
};
struct RemoteStoreOptions
@@ -153,27 +150,52 @@ struct RemoteStoreOptions
size_t ChunkFileSizeLimit = DefaultChunkFileSizeLimit;
};
-typedef std::function<IoBuffer(const IoHash& AttachmentHash)> TGetAttachmentBufferFunc;
-
-RemoteProjectStore::LoadContainerResult BuildContainer(
- CidStore& ChunkStore,
- ProjectStore::Project& Project,
- ProjectStore::Oplog& Oplog,
- WorkerThreadPool& WorkerPool,
- size_t MaxBlockSize,
- size_t MaxChunksPerBlock,
- size_t MaxChunkEmbedSize,
- size_t ChunkFileSizeLimit,
- bool BuildBlocks,
- bool IgnoreMissingAttachments,
- bool AllowChunking,
- const std::function<void(CompressedBuffer&&, ChunkBlockDescription&&)>& AsyncOnBlock,
- const std::function<void(const IoHash&, TGetAttachmentBufferFunc&&)>& OnLargeAttachment,
- const std::function<void(std::vector<std::pair<IoHash, FetchChunkFunc>>&&)>& OnBlockChunks,
- bool EmbedLooseFiles);
+typedef std::function<CompositeBuffer(const IoHash& AttachmentHash)> TGetAttachmentBufferFunc;
+
+CbObject BuildContainer(LoggerRef InLog,
+ CidStore& ChunkStore,
+ ProjectStore::Project& Project,
+ ProjectStore::Oplog& Oplog,
+ WorkerThreadPool& WorkerPool,
+ size_t MaxBlockSize,
+ size_t MaxChunksPerBlock,
+ size_t MaxChunkEmbedSize,
+ size_t ChunkFileSizeLimit,
+ bool BuildBlocks,
+ bool IgnoreMissingAttachments,
+ bool AllowChunking,
+ const std::function<void(CompressedBuffer&&, ChunkBlockDescription&&)>& AsyncOnBlock,
+ const std::function<void(const IoHash&, TGetAttachmentBufferFunc&&)>& OnLargeAttachment,
+ const std::function<void(std::vector<std::pair<IoHash, FetchChunkFunc>>&&)>& OnBlockChunks,
+ bool EmbedLooseFiles);
class JobContext;
+class RemoteStoreError : public std::runtime_error
+{
+public:
+ RemoteStoreError(const std::string& Message, int32_t ErrorCode, std::string_view Text)
+ : std::runtime_error(Message)
+ , m_ErrorCode(ErrorCode)
+ , m_Text(Text)
+ {
+ }
+
+ RemoteStoreError(const char* Message, int32_t ErrorCode, std::string_view Text)
+ : std::runtime_error(Message)
+ , m_ErrorCode(ErrorCode)
+ , m_Text(Text)
+ {
+ }
+
+ inline int32_t GetErrorCode() const { return m_ErrorCode; }
+ inline std::string_view GetText() const { return m_Text; }
+
+private:
+ int32_t m_ErrorCode = 0;
+ std::string m_Text;
+};
+
RemoteProjectStore::Result SaveOplogContainer(
ProjectStore::Oplog& Oplog,
const CbObject& ContainerObject,
@@ -184,33 +206,46 @@ RemoteProjectStore::Result SaveOplogContainer(
const std::function<void(const ChunkedInfo& Chunked)>& OnChunkedAttachment,
JobContext* OptionalContext);
-RemoteProjectStore::Result SaveOplog(CidStore& ChunkStore,
- RemoteProjectStore& RemoteStore,
- ProjectStore::Project& Project,
- ProjectStore::Oplog& Oplog,
- WorkerThreadPool& NetworkWorkerPool,
- WorkerThreadPool& WorkerPool,
- size_t MaxBlockSize,
- size_t MaxChunksPerBlock,
- size_t MaxChunkEmbedSize,
- size_t ChunkFileSizeLimit,
- bool EmbedLooseFiles,
- bool ForceUpload,
- bool IgnoreMissingAttachments,
- JobContext* OptionalContext);
-
-RemoteProjectStore::Result LoadOplog(CidStore& ChunkStore,
- RemoteProjectStore& RemoteStore,
- ProjectStore::Oplog& Oplog,
- WorkerThreadPool& NetworkWorkerPool,
- WorkerThreadPool& WorkerPool,
- bool ForceDownload,
- bool IgnoreMissingAttachments,
- bool CleanOplog,
- EPartialBlockRequestMode PartialBlockRequestMode,
- double HostLatencySec,
- double CacheLatencySec,
- JobContext* OptionalContext);
+void SaveOplog(LoggerRef InLog,
+ CidStore& ChunkStore,
+ RemoteProjectStore& RemoteStore,
+ ProjectStore::Project& Project,
+ ProjectStore::Oplog& Oplog,
+ WorkerThreadPool& NetworkWorkerPool,
+ WorkerThreadPool& WorkerPool,
+ size_t MaxBlockSize,
+ size_t MaxChunksPerBlock,
+ size_t MaxChunkEmbedSize,
+ size_t ChunkFileSizeLimit,
+ bool EmbedLooseFiles,
+ bool ForceUpload,
+ bool IgnoreMissingAttachments,
+ JobContext* OptionalContext);
+
+struct LoadOplogContext
+{
+ LoggerRef Log;
+ CidStore& ChunkStore;
+ RemoteProjectStore& RemoteStore;
+ BuildStorageCache* OptionalCache = nullptr;
+ Oid CacheBuildId = Oid::Zero;
+ BuildStorageCache::Statistics* OptionalCacheStats = nullptr;
+ ProjectStore::Oplog& Oplog;
+ WorkerThreadPool& NetworkWorkerPool;
+ WorkerThreadPool& WorkerPool;
+ bool ForceDownload = false;
+ bool IgnoreMissingAttachments = false;
+ bool CleanOplog = false;
+ EPartialBlockRequestMode PartialBlockRequestMode = EPartialBlockRequestMode::All;
+ bool PopulateCache = false;
+ double StoreLatencySec = -1.0;
+ uint64_t StoreMaxRangeCountPerRequest = 1;
+ double CacheLatencySec = -1.0;
+ uint64_t CacheMaxRangeCountPerRequest = 1;
+ JobContext* OptionalJobContext = nullptr;
+};
+
+void LoadOplog(LoadOplogContext&& Context);
std::vector<IoHash> GetBlockHashesFromOplog(CbObjectView ContainerObject);
std::vector<ThinChunkBlockDescription> GetBlocksFromOplog(CbObjectView ContainerObject, std::span<const IoHash> IncludeBlockHashes);
diff --git a/src/zenremotestore/include/zenremotestore/transferthreadworkers.h b/src/zenremotestore/include/zenremotestore/transferthreadworkers.h
index a7faacfd5..6b6584614 100644
--- a/src/zenremotestore/include/zenremotestore/transferthreadworkers.h
+++ b/src/zenremotestore/include/zenremotestore/transferthreadworkers.h
@@ -3,7 +3,6 @@
#pragma once
#include <zenbase/refcount.h>
-#include <zencore/timer.h>
#include <zencore/zencore.h>
#include <memory>
diff --git a/src/zenremotestore/jupiter/jupiterhost.cpp b/src/zenremotestore/jupiter/jupiterhost.cpp
index 2583cfc84..314aafc78 100644
--- a/src/zenremotestore/jupiter/jupiterhost.cpp
+++ b/src/zenremotestore/jupiter/jupiterhost.cpp
@@ -59,13 +59,22 @@ TestJupiterEndpoint(std::string_view BaseUrl, const bool AssumeHttp2, const bool
HttpClient::Response TestResponse = TestHttpClient.Get("/health/live");
if (TestResponse.IsSuccess())
{
+ // TODO: dan.engelbrecht 20260305 - replace this naive nginx detection with proper capabilites end point once it exists in Jupiter
+ uint64_t MaxRangeCountPerRequest = 1;
+ if (auto It = TestResponse.Header.Entries.find("Server"); It != TestResponse.Header.Entries.end())
+ {
+ if (StrCaseCompare(It->second.c_str(), "nginx", 5) == 0)
+ {
+ MaxRangeCountPerRequest = 128u; // This leaves more than 2k header space for auth token etc
+ }
+ }
LatencyTestResult LatencyResult = MeasureLatency(TestHttpClient, "/health/ready");
if (!LatencyResult.Success)
{
return {.Success = false, .FailureReason = LatencyResult.FailureReason};
}
- return {.Success = true, .LatencySeconds = LatencyResult.LatencySeconds};
+ return {.Success = true, .LatencySeconds = LatencyResult.LatencySeconds, .MaxRangeCountPerRequest = MaxRangeCountPerRequest};
}
return {.Success = false, .FailureReason = TestResponse.ErrorMessage("")};
}
diff --git a/src/zenremotestore/jupiter/jupitersession.cpp b/src/zenremotestore/jupiter/jupitersession.cpp
index 1bc6564ce..d610d1fc8 100644
--- a/src/zenremotestore/jupiter/jupitersession.cpp
+++ b/src/zenremotestore/jupiter/jupitersession.cpp
@@ -68,7 +68,7 @@ namespace detail {
return {.SentBytes = gsl::narrow<uint64_t>(Response.UploadedBytes),
.ReceivedBytes = gsl::narrow<uint64_t>(Response.DownloadedBytes),
.ElapsedSeconds = Response.ElapsedSeconds,
- .ErrorCode = Response.Error.value().ErrorCode,
+ .ErrorCode = static_cast<int32_t>(Response.Error.value().ErrorCode),
.Reason = Response.ErrorMessage(ErrorPrefix),
.Success = false};
}
@@ -673,7 +673,7 @@ JupiterSession::PutMultipartBuildBlob(std::string_view Namespace,
size_t RetryPartIndex = PartNameToIndex.at(RetryPartId);
const MultipartUploadResponse::Part& RetryPart = Workload->PartDescription.Parts[RetryPartIndex];
IoBuffer RetryPartPayload =
- Workload->Transmitter(RetryPart.FirstByte, RetryPart.LastByte - RetryPart.FirstByte - 1);
+ Workload->Transmitter(RetryPart.FirstByte, RetryPart.LastByte - RetryPart.FirstByte);
std::string RetryMultipartUploadResponseRequestString =
fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}/uploadMultipart{}&supportsRedirect={}",
Namespace,
@@ -852,6 +852,72 @@ JupiterSession::GetBuildBlob(std::string_view Namespace,
return detail::ConvertResponse(Response, "JupiterSession::GetBuildBlob"sv);
}
+BuildBlobRangesResult
+JupiterSession::GetBuildBlob(std::string_view Namespace,
+ std::string_view BucketId,
+ const Oid& BuildId,
+ const IoHash& Hash,
+ std::filesystem::path TempFolderPath,
+ std::span<const std::pair<uint64_t, uint64_t>> Ranges)
+{
+ HttpClient::KeyValueMap Headers;
+ if (!Ranges.empty())
+ {
+ ExtendableStringBuilder<512> SB;
+ for (const std::pair<uint64_t, uint64_t>& R : Ranges)
+ {
+ if (SB.Size() > 0)
+ {
+ SB << ", ";
+ }
+ SB << R.first << "-" << R.first + R.second - 1;
+ }
+ Headers.Entries.insert({"Range", fmt::format("bytes={}", SB.ToView())});
+ }
+ std::string Url = fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}?supportsRedirect={}",
+ Namespace,
+ BucketId,
+ BuildId,
+ Hash.ToHexString(),
+ m_AllowRedirect ? "true"sv : "false"sv);
+
+ HttpClient::Response Response = m_HttpClient.Download(Url, TempFolderPath, Headers);
+ if ((Response.StatusCode == HttpResponseCode::RangeNotSatisfiable || Response.StatusCode == HttpResponseCode::NotImplemented) &&
+ Ranges.size() > 1)
+ {
+ // Requests to Jupiter that is not served via nginx (content not stored locally in the file system) can not serve multi-range
+ // requests (asp.net limitation) This rejection is not implemented as of 2026-03-02, it is in the backlog (@joakim.lindqvist)
+ // If we encounter this error we fall back to a single range which covers all the requested ranges
+ uint64_t RangeStart = Ranges.front().first;
+ uint64_t RangeEnd = Ranges.back().first + Ranges.back().second - 1;
+ Headers.Entries.insert_or_assign("Range", fmt::format("bytes={}-{}", RangeStart, RangeEnd));
+ Response = m_HttpClient.Download(Url, TempFolderPath, Headers);
+ }
+ if (Response.IsSuccess())
+ {
+ // If we get a redirect to S3 or a non-Jupiter endpoint the content type will not be correct, validate it and set it
+ if (m_AllowRedirect && (Response.ResponsePayload.GetContentType() == HttpContentType::kBinary))
+ {
+ IoHash ValidateRawHash;
+ uint64_t ValidateRawSize = 0;
+ if (!Headers.Entries.contains("Range"))
+ {
+ ZEN_ASSERT_SLOW(CompressedBuffer::ValidateCompressedHeader(Response.ResponsePayload,
+ ValidateRawHash,
+ ValidateRawSize,
+ /*OutOptionalTotalCompressedSize*/ nullptr));
+ ZEN_ASSERT_SLOW(ValidateRawHash == Hash);
+ ZEN_ASSERT_SLOW(ValidateRawSize > 0);
+ ZEN_UNUSED(ValidateRawHash, ValidateRawSize);
+ Response.ResponsePayload.SetContentType(ZenContentType::kCompressedBinary);
+ }
+ }
+ }
+ BuildBlobRangesResult Result = {detail::ConvertResponse(Response, "JupiterSession::GetBuildBlob"sv)};
+ Result.Ranges = Response.GetRanges(Ranges);
+ return Result;
+}
+
JupiterResult
JupiterSession::PutBlockMetadata(std::string_view Namespace,
std::string_view BucketId,
diff --git a/src/zenremotestore/operationlogoutput.cpp b/src/zenremotestore/operationlogoutput.cpp
deleted file mode 100644
index 7ed93c947..000000000
--- a/src/zenremotestore/operationlogoutput.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-// Copyright Epic Games, Inc. All Rights Reserved.
-
-#include <zenremotestore/operationlogoutput.h>
-
-#include <zencore/logging.h>
-
-ZEN_THIRD_PARTY_INCLUDES_START
-#include <gsl/gsl-lite.hpp>
-ZEN_THIRD_PARTY_INCLUDES_END
-
-namespace zen {
-
-class StandardLogOutput;
-
-class StandardLogOutputProgressBar : public OperationLogOutput::ProgressBar
-{
-public:
- StandardLogOutputProgressBar(StandardLogOutput& Output, std::string_view InSubTask) : m_Output(Output), m_SubTask(InSubTask) {}
-
- virtual void UpdateState(const State& NewState, bool DoLinebreak) override;
- virtual void Finish() override;
-
-private:
- StandardLogOutput& m_Output;
- std::string m_SubTask;
- State m_State;
-};
-
-class StandardLogOutput : public OperationLogOutput
-{
-public:
- StandardLogOutput(LoggerRef& Log) : m_Log(Log) {}
- virtual void EmitLogMessage(int LogLevel, std::string_view Format, fmt::format_args Args) override
- {
- if (m_Log.ShouldLog(LogLevel))
- {
- fmt::basic_memory_buffer<char, 250> MessageBuffer;
- fmt::vformat_to(fmt::appender(MessageBuffer), Format, Args);
- ZEN_LOG(m_Log, LogLevel, "{}", std::string_view(MessageBuffer.data(), MessageBuffer.size()));
- }
- }
-
- virtual void SetLogOperationName(std::string_view Name) override
- {
- m_LogOperationName = Name;
- ZEN_OPERATION_LOG_INFO(*this, "{}", m_LogOperationName);
- }
- virtual void SetLogOperationProgress(uint32_t StepIndex, uint32_t StepCount) override
- {
- const size_t PercentDone = StepCount > 0u ? gsl::narrow<uint8_t>((100 * StepIndex) / StepCount) : 0u;
- ZEN_OPERATION_LOG_INFO(*this, "{}: {}%", m_LogOperationName, PercentDone);
- }
- virtual uint32_t GetProgressUpdateDelayMS() override { return 2000; }
- virtual ProgressBar* CreateProgressBar(std::string_view InSubTask) override
- {
- return new StandardLogOutputProgressBar(*this, InSubTask);
- }
-
-private:
- LoggerRef m_Log;
- std::string m_LogOperationName;
-};
-
-void
-StandardLogOutputProgressBar::UpdateState(const State& NewState, bool DoLinebreak)
-{
- ZEN_UNUSED(DoLinebreak);
- const size_t PercentDone =
- NewState.TotalCount > 0u ? gsl::narrow<uint8_t>((100 * (NewState.TotalCount - NewState.RemainingCount)) / NewState.TotalCount) : 0u;
- std::string Task = NewState.Task;
- switch (NewState.Status)
- {
- case State::EStatus::Aborted:
- Task = "Aborting";
- break;
- case State::EStatus::Paused:
- Task = "Paused";
- break;
- default:
- break;
- }
- ZEN_OPERATION_LOG_INFO(m_Output, "{}: {}%{}", Task, PercentDone, NewState.Details.empty() ? "" : fmt::format(" {}", NewState.Details));
- m_State = NewState;
-}
-void
-StandardLogOutputProgressBar::Finish()
-{
- if (m_State.RemainingCount > 0)
- {
- State NewState = m_State;
- NewState.RemainingCount = 0;
- NewState.Details = "";
- UpdateState(NewState, /*DoLinebreak*/ true);
- }
-}
-
-OperationLogOutput*
-CreateStandardLogOutput(LoggerRef Log)
-{
- return new StandardLogOutput(Log);
-}
-
-} // namespace zen
diff --git a/src/zenremotestore/projectstore/buildsremoteprojectstore.cpp b/src/zenremotestore/projectstore/buildsremoteprojectstore.cpp
index c42373e4d..d7596263b 100644
--- a/src/zenremotestore/projectstore/buildsremoteprojectstore.cpp
+++ b/src/zenremotestore/projectstore/buildsremoteprojectstore.cpp
@@ -7,11 +7,8 @@
#include <zencore/fmtutils.h>
#include <zencore/scopeguard.h>
-#include <zenhttp/httpclientauth.h>
-#include <zenremotestore/builds/buildstoragecache.h>
#include <zenremotestore/builds/buildstorageutil.h>
#include <zenremotestore/builds/jupiterbuildstorage.h>
-#include <zenremotestore/operationlogoutput.h>
#include <numeric>
@@ -26,18 +23,14 @@ class BuildsRemoteStore : public RemoteProjectStore
public:
BuildsRemoteStore(LoggerRef InLog,
const HttpClientSettings& ClientSettings,
- HttpClientSettings* OptionalCacheClientSettings,
std::string_view HostUrl,
- std::string_view CacheUrl,
const std::filesystem::path& TempFilePath,
- WorkerThreadPool& CacheBackgroundWorkerPool,
std::string_view Namespace,
std::string_view Bucket,
const Oid& BuildId,
const IoBuffer& MetaData,
bool ForceDisableBlocks,
- bool ForceDisableTempBlocks,
- bool PopulateCache)
+ bool ForceDisableTempBlocks)
: m_Log(InLog)
, m_BuildStorageHttp(HostUrl, ClientSettings)
, m_BuildStorage(CreateJupiterBuildStorage(Log(),
@@ -53,20 +46,8 @@ public:
, m_MetaData(MetaData)
, m_EnableBlocks(!ForceDisableBlocks)
, m_UseTempBlocks(!ForceDisableTempBlocks)
- , m_PopulateCache(PopulateCache)
{
m_MetaData.MakeOwned();
- if (OptionalCacheClientSettings)
- {
- ZEN_ASSERT(!CacheUrl.empty());
- m_BuildCacheStorageHttp = std::make_unique<HttpClient>(CacheUrl, *OptionalCacheClientSettings);
- m_BuildCacheStorage = CreateZenBuildStorageCache(*m_BuildCacheStorageHttp,
- m_StorageCacheStats,
- Namespace,
- Bucket,
- TempFilePath,
- CacheBackgroundWorkerPool);
- }
}
virtual RemoteStoreInfo GetInfo() const override
@@ -75,9 +56,8 @@ public:
.UseTempBlockFiles = m_UseTempBlocks,
.AllowChunking = true,
.ContainerName = fmt::format("{}/{}/{}", m_Namespace, m_Bucket, m_BuildId),
- .Description = fmt::format("[cloud] {}{}. SessionId: {}. {}/{}/{}"sv,
+ .Description = fmt::format("[cloud] {}. SessionId: {}. {}/{}/{}"sv,
m_BuildStorageHttp.GetBaseUri(),
- m_BuildCacheStorage ? fmt::format(" (Cache: {})", m_BuildCacheStorageHttp->GetBaseUri()) : ""sv,
m_BuildStorageHttp.GetSessionId(),
m_Namespace,
m_Bucket,
@@ -86,15 +66,13 @@ public:
virtual Stats GetStats() const override
{
- return {
- .m_SentBytes = m_BuildStorageStats.TotalBytesWritten.load() + m_StorageCacheStats.TotalBytesWritten.load(),
- .m_ReceivedBytes = m_BuildStorageStats.TotalBytesRead.load() + m_StorageCacheStats.TotalBytesRead.load(),
- .m_RequestTimeNS = m_BuildStorageStats.TotalRequestTimeUs.load() * 1000 + m_StorageCacheStats.TotalRequestTimeUs.load() * 1000,
- .m_RequestCount = m_BuildStorageStats.TotalRequestCount.load() + m_StorageCacheStats.TotalRequestCount.load(),
- .m_PeakSentBytes = Max(m_BuildStorageStats.PeakSentBytes.load(), m_StorageCacheStats.PeakSentBytes.load()),
- .m_PeakReceivedBytes = Max(m_BuildStorageStats.PeakReceivedBytes.load(), m_StorageCacheStats.PeakReceivedBytes.load()),
- .m_PeakBytesPerSec = Max(m_BuildStorageStats.PeakBytesPerSec.load(), m_StorageCacheStats.PeakBytesPerSec.load()),
- };
+ return {.m_SentBytes = m_BuildStorageStats.TotalBytesWritten.load(),
+ .m_ReceivedBytes = m_BuildStorageStats.TotalBytesRead.load(),
+ .m_RequestTimeNS = m_BuildStorageStats.TotalRequestTimeUs.load() * 1000,
+ .m_RequestCount = m_BuildStorageStats.TotalRequestCount.load(),
+ .m_PeakSentBytes = m_BuildStorageStats.PeakSentBytes.load(),
+ .m_PeakReceivedBytes = m_BuildStorageStats.PeakReceivedBytes.load(),
+ .m_PeakBytesPerSec = m_BuildStorageStats.PeakBytesPerSec.load()};
}
virtual bool GetExtendedStats(ExtendedStats& OutStats) const override
@@ -109,11 +87,6 @@ public:
}
Result = true;
}
- if (m_BuildCacheStorage)
- {
- OutStats.m_ReceivedBytesPerSource.insert_or_assign("Cache", m_StorageCacheStats.TotalBytesRead);
- Result = true;
- }
return Result;
}
@@ -310,9 +283,7 @@ public:
}
catch (const HttpClientError& Ex)
{
- Result.ErrorCode = Ex.GetInternalErrorCode() != 0 ? Ex.GetInternalErrorCode()
- : Ex.GetHttpResponseCode() != HttpResponseCode::ImATeapot ? (int)Ex.GetHttpResponseCode()
- : 0;
+ Result.ErrorCode = MakeErrorCode(Ex);
Result.Reason = fmt::format("Failed finalizing oplog container build part to {}/{}/{}/{}/{}. Reason: '{}'",
m_BuildStorageHttp.GetBaseUri(),
m_Namespace,
@@ -341,9 +312,7 @@ public:
}
catch (const HttpClientError& Ex)
{
- Result.ErrorCode = Ex.GetInternalErrorCode() != 0 ? Ex.GetInternalErrorCode()
- : Ex.GetHttpResponseCode() != HttpResponseCode::ImATeapot ? (int)Ex.GetHttpResponseCode()
- : 0;
+ Result.ErrorCode = MakeErrorCode(Ex);
Result.Reason = fmt::format("Failed finalizing oplog container build to {}/{}/{}/{}. Reason: '{}'",
m_BuildStorageHttp.GetBaseUri(),
m_Namespace,
@@ -462,11 +431,12 @@ public:
return Result;
}
- virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes) override
+ virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes,
+ BuildStorageCache* OptionalCache,
+ const Oid& CacheBuildId) override
{
- std::unique_ptr<OperationLogOutput> Output(CreateStandardLogOutput(Log()));
-
ZEN_ASSERT(m_OplogBuildPartId != Oid::Zero);
+ ZEN_ASSERT(OptionalCache == nullptr || CacheBuildId == m_BuildId);
GetBlockDescriptionsResult Result;
Stopwatch Timer;
@@ -474,11 +444,10 @@ public:
try
{
- Result.Blocks = zen::GetBlockDescriptions(*Output,
+ Result.Blocks = zen::GetBlockDescriptions(Log(),
*m_BuildStorage,
- m_BuildCacheStorage.get(),
+ OptionalCache,
m_BuildId,
- m_OplogBuildPartId,
BlockHashes,
/*AttemptFallback*/ false,
/*IsQuiet*/ false,
@@ -507,99 +476,83 @@ public:
return Result;
}
- virtual AttachmentExistsInCacheResult AttachmentExistsInCache(std::span<const IoHash> RawHashes) override
+ virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash) override
{
- AttachmentExistsInCacheResult Result;
- Stopwatch Timer;
- auto _ = MakeGuard([&Timer, &Result]() { Result.ElapsedSeconds = Timer.GetElapsedTimeUs() / 1000000.0; });
+ ZEN_ASSERT(m_OplogBuildPartId != Oid::Zero);
+
+ LoadAttachmentResult Result;
+ Stopwatch Timer;
+ auto _ = MakeGuard([&Timer, &Result]() { Result.ElapsedSeconds = Timer.GetElapsedTimeUs() / 1000000.0; });
+
try
{
- const std::vector<BuildStorageCache::BlobExistsResult> CacheExistsResult =
- m_BuildCacheStorage->BlobsExists(m_BuildId, RawHashes);
-
- if (CacheExistsResult.size() == RawHashes.size())
- {
- Result.HasBody.reserve(CacheExistsResult.size());
- for (size_t BlobIndex = 0; BlobIndex < CacheExistsResult.size(); BlobIndex++)
- {
- Result.HasBody.push_back(CacheExistsResult[BlobIndex].HasBody);
- }
- }
+ Result.Bytes = m_BuildStorage->GetBuildBlob(m_BuildId, RawHash);
}
catch (const HttpClientError& Ex)
{
Result.ErrorCode = MakeErrorCode(Ex);
- Result.Reason = fmt::format("Remote cache: Failed finding known blobs for {}/{}/{}/{}. Reason: '{}'",
+ Result.Reason = fmt::format("Failed getting blob {}/{}/{}/{}/{}. Reason: '{}'",
m_BuildStorageHttp.GetBaseUri(),
m_Namespace,
m_Bucket,
m_BuildId,
+ RawHash,
Ex.what());
}
catch (const std::exception& Ex)
{
Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError);
- Result.Reason = fmt::format("Remote cache: Failed finding known blobs for {}/{}/{}/{}. Reason: '{}'",
+ Result.Reason = fmt::format("Failed getting blob {}/{}/{}/{}/{}. Reason: '{}'",
m_BuildStorageHttp.GetBaseUri(),
m_Namespace,
m_Bucket,
m_BuildId,
+ RawHash,
Ex.what());
}
+
return Result;
}
- virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash, const AttachmentRange& Range) override
+ virtual LoadAttachmentRangesResult LoadAttachmentRanges(const IoHash& RawHash,
+ std::span<const std::pair<uint64_t, uint64_t>> Ranges) override
{
- ZEN_ASSERT(m_OplogBuildPartId != Oid::Zero);
-
- LoadAttachmentResult Result;
- Stopwatch Timer;
- auto _ = MakeGuard([&Timer, &Result]() { Result.ElapsedSeconds = Timer.GetElapsedTimeUs() / 1000000.0; });
+ ZEN_ASSERT(!Ranges.empty());
+ LoadAttachmentRangesResult Result;
+ Stopwatch Timer;
+ auto _ = MakeGuard([&Timer, &Result]() { Result.ElapsedSeconds = Timer.GetElapsedTimeUs() / 1000000.0; });
try
{
- if (m_BuildCacheStorage)
+ BuildStorageBase::BuildBlobRanges BlobRanges = m_BuildStorage->GetBuildBlobRanges(m_BuildId, RawHash, Ranges);
+ if (BlobRanges.PayloadBuffer)
{
- IoBuffer CachedBlob = m_BuildCacheStorage->GetBuildBlob(m_BuildId, RawHash, Range.Offset, Range.Bytes);
- if (CachedBlob)
- {
- Result.Bytes = std::move(CachedBlob);
- }
- }
- if (!Result.Bytes)
- {
- Result.Bytes = m_BuildStorage->GetBuildBlob(m_BuildId, RawHash, Range.Offset, Range.Bytes);
- if (m_BuildCacheStorage && Result.Bytes && m_PopulateCache)
- {
- if (!Range)
- {
- m_BuildCacheStorage->PutBuildBlob(m_BuildId,
- RawHash,
- Result.Bytes.GetContentType(),
- CompositeBuffer(SharedBuffer(Result.Bytes)));
- }
- }
+ Result.Bytes = std::move(BlobRanges.PayloadBuffer);
+ Result.Ranges = std::move(BlobRanges.Ranges);
}
}
catch (const HttpClientError& Ex)
{
Result.ErrorCode = MakeErrorCode(Ex);
- Result.Reason = fmt::format("Failed listing known blocks for {}/{}/{}/{}. Reason: '{}'",
+ Result.Reason = fmt::format("Failed getting {} ranges for blob {}/{}/{}/{}/{}. Reason: '{}'",
+ Ranges.size(),
m_BuildStorageHttp.GetBaseUri(),
m_Namespace,
m_Bucket,
m_BuildId,
+ RawHash,
Ex.what());
}
catch (const std::exception& Ex)
{
Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError);
- Result.Reason = fmt::format("Failed listing known blocks for {}/{}/{}/{}. Reason: '{}'",
+ Result.Reason = fmt::format("Failed getting {} ranges for blob {}/{}/{}/{}/{}. Reason: '{}'",
+ Ranges.size(),
m_BuildStorageHttp.GetBaseUri(),
m_Namespace,
m_Bucket,
m_BuildId,
+ RawHash,
Ex.what());
}
@@ -614,72 +567,25 @@ public:
std::vector<IoHash> AttachmentsLeftToFind = RawHashes;
- if (m_BuildCacheStorage)
- {
- std::vector<BuildStorageCache::BlobExistsResult> ExistCheck = m_BuildCacheStorage->BlobsExists(m_BuildId, RawHashes);
- if (ExistCheck.size() == RawHashes.size())
- {
- AttachmentsLeftToFind.clear();
- for (size_t BlobIndex = 0; BlobIndex < RawHashes.size(); BlobIndex++)
- {
- const IoHash& Hash = RawHashes[BlobIndex];
- const BuildStorageCache::BlobExistsResult& BlobExists = ExistCheck[BlobIndex];
- if (BlobExists.HasBody)
- {
- IoBuffer CachedPayload = m_BuildCacheStorage->GetBuildBlob(m_BuildId, Hash);
- if (CachedPayload)
- {
- Result.Chunks.emplace_back(
- std::pair<IoHash, CompressedBuffer>{Hash,
- CompressedBuffer::FromCompressedNoValidate(std::move(CachedPayload))});
- }
- else
- {
- AttachmentsLeftToFind.push_back(Hash);
- }
- }
- else
- {
- AttachmentsLeftToFind.push_back(Hash);
- }
- }
- }
- }
-
for (const IoHash& Hash : AttachmentsLeftToFind)
{
- LoadAttachmentResult ChunkResult = LoadAttachment(Hash, {});
+ LoadAttachmentResult ChunkResult = LoadAttachment(Hash);
if (ChunkResult.ErrorCode)
{
return LoadAttachmentsResult{ChunkResult};
}
ZEN_DEBUG("Loaded attachment in {}", NiceTimeSpanMs(static_cast<uint64_t>(ChunkResult.ElapsedSeconds * 1000)));
- if (m_BuildCacheStorage && ChunkResult.Bytes && m_PopulateCache)
- {
- m_BuildCacheStorage->PutBuildBlob(m_BuildId,
- Hash,
- ChunkResult.Bytes.GetContentType(),
- CompositeBuffer(SharedBuffer(ChunkResult.Bytes)));
- }
Result.Chunks.emplace_back(
std::pair<IoHash, CompressedBuffer>{Hash, CompressedBuffer::FromCompressedNoValidate(std::move(ChunkResult.Bytes))});
}
return Result;
}
- virtual void Flush() override
- {
- if (m_BuildCacheStorage)
- {
- m_BuildCacheStorage->Flush(100, [](intptr_t) { return false; });
- }
- }
-
private:
static int MakeErrorCode(const HttpClientError& Ex)
{
- return Ex.GetInternalErrorCode() != 0 ? Ex.GetInternalErrorCode()
- : Ex.GetHttpResponseCode() != HttpResponseCode::ImATeapot ? (int)Ex.GetHttpResponseCode()
+ return Ex.GetInternalErrorCode() != HttpClientErrorCode::kOK ? static_cast<int>(Ex.GetInternalErrorCode())
+ : Ex.GetHttpResponseCode() != HttpResponseCode::ImATeapot ? static_cast<int>(Ex.GetHttpResponseCode())
: 0;
}
@@ -691,10 +597,6 @@ private:
HttpClient m_BuildStorageHttp;
std::unique_ptr<BuildStorageBase> m_BuildStorage;
- BuildStorageCache::Statistics m_StorageCacheStats;
- std::unique_ptr<HttpClient> m_BuildCacheStorageHttp;
- std::unique_ptr<BuildStorageCache> m_BuildCacheStorage;
-
const std::string m_Namespace;
const std::string m_Bucket;
const Oid m_BuildId;
@@ -703,125 +605,34 @@ private:
const bool m_EnableBlocks = true;
const bool m_UseTempBlocks = true;
const bool m_AllowRedirect = false;
- const bool m_PopulateCache = true;
};
std::shared_ptr<RemoteProjectStore>
-CreateJupiterBuildsRemoteStore(LoggerRef InLog,
- const BuildsRemoteStoreOptions& Options,
- const std::filesystem::path& TempFilePath,
- bool Quiet,
- bool Unattended,
- bool Hidden,
- WorkerThreadPool& CacheBackgroundWorkerPool,
- double& OutHostLatencySec,
- double& OutCacheLatencySec)
+CreateJupiterBuildsRemoteStore(LoggerRef InLog,
+ const BuildStorageResolveResult& ResolveResult,
+ std::function<HttpClientAccessToken()>&& TokenProvider,
+ const BuildsRemoteStoreOptions& Options,
+ const std::filesystem::path& TempFilePath)
{
- std::string Host = Options.Host;
- if (!Host.empty() && Host.find("://"sv) == std::string::npos)
- {
- // Assume https URL
- Host = fmt::format("https://{}"sv, Host);
- }
- std::string OverrideUrl = Options.OverrideHost;
- if (!OverrideUrl.empty() && OverrideUrl.find("://"sv) == std::string::npos)
- {
- // Assume https URL
- OverrideUrl = fmt::format("https://{}"sv, OverrideUrl);
- }
- std::string ZenHost = Options.ZenHost;
- if (!ZenHost.empty() && ZenHost.find("://"sv) == std::string::npos)
- {
- // Assume https URL
- ZenHost = fmt::format("https://{}"sv, ZenHost);
- }
-
- // 1) openid-provider if given (assumes oidctoken.exe -Zen true has been run with matching Options.OpenIdProvider
- // 2) Access token as parameter in request
- // 3) Environment variable (different win vs linux/mac)
- // 4) Default openid-provider (assumes oidctoken.exe -Zen true has been run with matching Options.OpenIdProvider
-
- std::function<HttpClientAccessToken()> TokenProvider;
- if (!Options.OpenIdProvider.empty())
- {
- TokenProvider = httpclientauth::CreateFromOpenIdProvider(Options.AuthManager, Options.OpenIdProvider);
- }
- else if (!Options.AccessToken.empty())
- {
- TokenProvider = httpclientauth::CreateFromStaticToken(Options.AccessToken);
- }
- else if (!Options.OidcExePath.empty())
- {
- if (auto TokenProviderMaybe = httpclientauth::CreateFromOidcTokenExecutable(Options.OidcExePath,
- Host.empty() ? OverrideUrl : Host,
- Quiet,
- Unattended,
- Hidden);
- TokenProviderMaybe)
- {
- TokenProvider = TokenProviderMaybe.value();
- }
- }
-
- if (!TokenProvider)
- {
- TokenProvider = httpclientauth::CreateFromDefaultOpenIdProvider(Options.AuthManager);
- }
-
- BuildStorageResolveResult ResolveRes;
- {
- HttpClientSettings ClientSettings{.LogCategory = "httpbuildsclient",
- .AccessTokenProvider = TokenProvider,
- .AssumeHttp2 = Options.AssumeHttp2,
- .AllowResume = true,
- .RetryCount = 2};
-
- std::unique_ptr<OperationLogOutput> Output(CreateStandardLogOutput(InLog));
-
- ResolveRes =
- ResolveBuildStorage(*Output, ClientSettings, Host, OverrideUrl, ZenHost, ZenCacheResolveMode::Discovery, /*Verbose*/ false);
- }
-
HttpClientSettings ClientSettings{.LogCategory = "httpbuildsclient",
.ConnectTimeout = std::chrono::milliseconds(3000),
.Timeout = std::chrono::milliseconds(1800000),
.AccessTokenProvider = std::move(TokenProvider),
- .AssumeHttp2 = ResolveRes.HostAssumeHttp2,
+ .AssumeHttp2 = ResolveResult.Cloud.AssumeHttp2,
.AllowResume = true,
.RetryCount = 4,
.MaximumInMemoryDownloadSize = Options.MaximumInMemoryDownloadSize};
- std::unique_ptr<HttpClientSettings> CacheClientSettings;
-
- if (!ResolveRes.CacheUrl.empty())
- {
- CacheClientSettings =
- std::make_unique<HttpClientSettings>(HttpClientSettings{.LogCategory = "httpcacheclient",
- .ConnectTimeout = std::chrono::milliseconds{3000},
- .Timeout = std::chrono::milliseconds{30000},
- .AssumeHttp2 = ResolveRes.CacheAssumeHttp2,
- .AllowResume = true,
- .RetryCount = 0,
- .MaximumInMemoryDownloadSize = Options.MaximumInMemoryDownloadSize});
- }
-
std::shared_ptr<RemoteProjectStore> RemoteStore = std::make_shared<BuildsRemoteStore>(InLog,
ClientSettings,
- CacheClientSettings.get(),
- ResolveRes.HostUrl,
- ResolveRes.CacheUrl,
+ ResolveResult.Cloud.Address,
TempFilePath,
- CacheBackgroundWorkerPool,
Options.Namespace,
Options.Bucket,
Options.BuildId,
Options.MetaData,
Options.ForceDisableBlocks,
- Options.ForceDisableTempBlocks,
- Options.PopulateCache);
-
- OutHostLatencySec = ResolveRes.HostLatencySec;
- OutCacheLatencySec = ResolveRes.CacheLatencySec;
+ Options.ForceDisableTempBlocks);
return RemoteStore;
}
diff --git a/src/zenremotestore/projectstore/fileremoteprojectstore.cpp b/src/zenremotestore/projectstore/fileremoteprojectstore.cpp
index ec7fb7bbc..bb21de12c 100644
--- a/src/zenremotestore/projectstore/fileremoteprojectstore.cpp
+++ b/src/zenremotestore/projectstore/fileremoteprojectstore.cpp
@@ -7,8 +7,12 @@
#include <zencore/filesystem.h>
#include <zencore/fmtutils.h>
#include <zencore/logging.h>
+#include <zencore/scopeguard.h>
#include <zencore/timer.h>
#include <zenhttp/httpcommon.h>
+#include <zenremotestore/builds/buildstoragecache.h>
+
+#include <numeric>
namespace zen {
@@ -74,9 +78,11 @@ public:
virtual SaveResult SaveContainer(const IoBuffer& Payload) override
{
- Stopwatch Timer;
SaveResult Result;
+ Stopwatch Timer;
+ auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; });
+
{
CbObject ContainerObject = LoadCompactBinaryObject(Payload);
@@ -87,6 +93,10 @@ public:
{
Result.Needs.insert(AttachmentHash);
}
+ else if (std::filesystem::path AttachmentMetaPath = GetAttachmentMetaPath(AttachmentHash); IsFile(AttachmentMetaPath))
+ {
+ BasicFile TouchIt(AttachmentMetaPath, BasicFile::Mode::kWrite);
+ }
});
}
@@ -112,14 +122,18 @@ public:
Result.Reason = fmt::format("Failed saving oplog container to '{}'. Reason: {}", ContainerPath, Ex.what());
}
AddStats(Payload.GetSize(), 0, Timer.GetElapsedTimeUs() * 1000);
- Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0;
return Result;
}
- virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, ChunkBlockDescription&&) override
+ virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload,
+ const IoHash& RawHash,
+ ChunkBlockDescription&& BlockDescription) override
{
- Stopwatch Timer;
- SaveAttachmentResult Result;
+ SaveAttachmentResult Result;
+
+ Stopwatch Timer;
+ auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; });
+
std::filesystem::path ChunkPath = GetAttachmentPath(RawHash);
if (!IsFile(ChunkPath))
{
@@ -142,14 +156,33 @@ public:
Result.Reason = fmt::format("Failed saving oplog attachment to '{}'. Reason: {}", ChunkPath, Ex.what());
}
}
+ if (!Result.ErrorCode && BlockDescription.BlockHash != IoHash::Zero)
+ {
+ try
+ {
+ std::filesystem::path MetaPath = GetAttachmentMetaPath(RawHash);
+ CbObject MetaData = BuildChunkBlockDescription(BlockDescription, {});
+ SharedBuffer MetaBuffer = MetaData.GetBuffer();
+ BasicFile MetaFile;
+ MetaFile.Open(MetaPath, BasicFile::Mode::kTruncate);
+ MetaFile.Write(MetaBuffer.GetView(), 0);
+ }
+ catch (const std::exception& Ex)
+ {
+ Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError);
+ Result.Reason = fmt::format("Failed saving block description to '{}'. Reason: {}", RawHash, Ex.what());
+ }
+ }
AddStats(Payload.GetSize(), 0, Timer.GetElapsedTimeUs() * 1000);
- Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0;
return Result;
}
virtual SaveAttachmentsResult SaveAttachments(const std::vector<SharedBuffer>& Chunks) override
{
+ SaveAttachmentsResult Result;
+
Stopwatch Timer;
+ auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; });
for (const SharedBuffer& Chunk : Chunks)
{
@@ -157,12 +190,10 @@ public:
SaveAttachmentResult ChunkResult = SaveAttachment(Compressed.GetCompressed(), Compressed.DecodeRawHash(), {});
if (ChunkResult.ErrorCode)
{
- ChunkResult.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0;
- return SaveAttachmentsResult{ChunkResult};
+ Result = SaveAttachmentsResult{ChunkResult};
+ break;
}
}
- SaveAttachmentsResult Result;
- Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0;
return Result;
}
@@ -172,21 +203,60 @@ public:
virtual GetKnownBlocksResult GetKnownBlocks() override
{
+ Stopwatch Timer;
if (m_OptionalBaseName.empty())
{
- return GetKnownBlocksResult{{.ErrorCode = static_cast<int>(HttpResponseCode::NoContent)}};
+ size_t MaxBlockCount = 10000;
+
+ GetKnownBlocksResult Result;
+
+ DirectoryContent Content;
+ GetDirectoryContent(
+ m_OutputPath,
+ DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive | DirectoryContentFlags::IncludeModificationTick,
+ Content);
+ std::vector<size_t> RecentOrder(Content.Files.size());
+ std::iota(RecentOrder.begin(), RecentOrder.end(), 0u);
+ std::sort(RecentOrder.begin(), RecentOrder.end(), [&Content](size_t Lhs, size_t Rhs) {
+ return Content.FileModificationTicks[Lhs] > Content.FileModificationTicks[Rhs];
+ });
+
+ for (size_t FileIndex : RecentOrder)
+ {
+ std::filesystem::path MetaPath = Content.Files[FileIndex];
+ if (MetaPath.extension() == MetaExtension)
+ {
+ IoBuffer MetaFile = ReadFile(MetaPath).Flatten();
+ CbValidateError Err;
+ CbObject ValidatedObject = ValidateAndReadCompactBinaryObject(std::move(MetaFile), Err);
+ if (Err == CbValidateError::None)
+ {
+ ChunkBlockDescription Description = ParseChunkBlockDescription(ValidatedObject);
+ if (Description.BlockHash != IoHash::Zero)
+ {
+ Result.Blocks.emplace_back(std::move(Description));
+ if (Result.Blocks.size() == MaxBlockCount)
+ {
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0;
+ return Result;
}
LoadContainerResult LoadResult = LoadContainer(m_OptionalBaseName);
if (LoadResult.ErrorCode)
{
return GetKnownBlocksResult{LoadResult};
}
- Stopwatch Timer;
std::vector<IoHash> BlockHashes = GetBlockHashesFromOplog(LoadResult.ContainerObject);
if (BlockHashes.empty())
{
return GetKnownBlocksResult{{.ErrorCode = static_cast<int>(HttpResponseCode::NoContent),
- .ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000}};
+ .ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeMs() / 1000.0}};
}
std::vector<IoHash> ExistingBlockHashes;
for (const IoHash& RawHash : BlockHashes)
@@ -200,15 +270,15 @@ public:
if (ExistingBlockHashes.empty())
{
return GetKnownBlocksResult{{.ErrorCode = static_cast<int>(HttpResponseCode::NoContent),
- .ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000}};
+ .ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeMs() / 1000.0}};
}
std::vector<ThinChunkBlockDescription> ThinKnownBlocks = GetBlocksFromOplog(LoadResult.ContainerObject, ExistingBlockHashes);
- const size_t KnowBlockCount = ThinKnownBlocks.size();
+ const size_t KnownBlockCount = ThinKnownBlocks.size();
- GetKnownBlocksResult Result{{.ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000}};
- Result.Blocks.resize(KnowBlockCount);
- for (size_t BlockIndex = 0; BlockIndex < KnowBlockCount; BlockIndex++)
+ GetKnownBlocksResult Result{{.ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeMs() / 1000.0}};
+ Result.Blocks.resize(KnownBlockCount);
+ for (size_t BlockIndex = 0; BlockIndex < KnownBlockCount; BlockIndex++)
{
Result.Blocks[BlockIndex].BlockHash = ThinKnownBlocks[BlockIndex].BlockHash;
Result.Blocks[BlockIndex].ChunkRawHashes = std::move(ThinKnownBlocks[BlockIndex].ChunkRawHashes);
@@ -217,43 +287,131 @@ public:
return Result;
}
- virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes) override
+ virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes,
+ BuildStorageCache* OptionalCache,
+ const Oid& CacheBuildId) override
{
- ZEN_UNUSED(BlockHashes);
- return GetBlockDescriptionsResult{Result{.ErrorCode = int(HttpResponseCode::NotFound)}};
- }
+ GetBlockDescriptionsResult Result;
- virtual AttachmentExistsInCacheResult AttachmentExistsInCache(std::span<const IoHash> RawHashes) override
- {
- return AttachmentExistsInCacheResult{Result{.ErrorCode = 0}, std::vector<bool>(RawHashes.size(), false)};
+ Stopwatch Timer;
+ auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; });
+
+ Result.Blocks.reserve(BlockHashes.size());
+
+ uint64_t ByteCount = 0;
+
+ std::vector<ChunkBlockDescription> UnorderedList;
+ {
+ if (OptionalCache)
+ {
+ std::vector<CbObject> CacheBlockMetadatas = OptionalCache->GetBlobMetadatas(CacheBuildId, BlockHashes);
+ for (const CbObject& BlockObject : CacheBlockMetadatas)
+ {
+ ByteCount += BlockObject.GetSize();
+ }
+ UnorderedList = ParseBlockMetadatas(CacheBlockMetadatas);
+ }
+
+ tsl::robin_map<IoHash, size_t, IoHash::Hasher> BlockDescriptionLookup;
+ BlockDescriptionLookup.reserve(BlockHashes.size());
+ for (size_t DescriptionIndex = 0; DescriptionIndex < UnorderedList.size(); DescriptionIndex++)
+ {
+ const ChunkBlockDescription& Description = UnorderedList[DescriptionIndex];
+ BlockDescriptionLookup.insert_or_assign(Description.BlockHash, DescriptionIndex);
+ }
+
+ if (UnorderedList.size() < BlockHashes.size())
+ {
+ for (const IoHash& RawHash : BlockHashes)
+ {
+ if (!BlockDescriptionLookup.contains(RawHash))
+ {
+ std::filesystem::path MetaPath = GetAttachmentMetaPath(RawHash);
+ IoBuffer MetaFile = ReadFile(MetaPath).Flatten();
+ ByteCount += MetaFile.GetSize();
+ CbValidateError Err;
+ CbObject ValidatedObject = ValidateAndReadCompactBinaryObject(std::move(MetaFile), Err);
+ if (Err == CbValidateError::None)
+ {
+ ChunkBlockDescription Description = ParseChunkBlockDescription(ValidatedObject);
+ if (Description.BlockHash != IoHash::Zero)
+ {
+ BlockDescriptionLookup.insert_or_assign(Description.BlockHash, UnorderedList.size());
+ UnorderedList.emplace_back(std::move(Description));
+ }
+ }
+ }
+ }
+ }
+
+ Result.Blocks.reserve(UnorderedList.size());
+ for (const IoHash& RawHash : BlockHashes)
+ {
+ if (auto It = BlockDescriptionLookup.find(RawHash); It != BlockDescriptionLookup.end())
+ {
+ Result.Blocks.emplace_back(std::move(UnorderedList[It->second]));
+ }
+ }
+ }
+ AddStats(0, ByteCount, Timer.GetElapsedTimeUs() * 1000);
+ return Result;
}
- virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash, const AttachmentRange& Range) override
+ virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash) override
{
- Stopwatch Timer;
- LoadAttachmentResult Result;
+ LoadAttachmentResult Result;
+
+ Stopwatch Timer;
+ auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; });
+
std::filesystem::path ChunkPath = GetAttachmentPath(RawHash);
if (!IsFile(ChunkPath))
{
Result.ErrorCode = gsl::narrow<int>(HttpResponseCode::NotFound);
Result.Reason = fmt::format("Failed loading oplog attachment from '{}'. Reason: 'The file does not exist'", ChunkPath.string());
- Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0;
return Result;
}
{
BasicFile ChunkFile;
ChunkFile.Open(ChunkPath, BasicFile::Mode::kRead);
- if (Range)
- {
- Result.Bytes = ChunkFile.ReadRange(Range.Offset, Range.Bytes);
- }
- else
+ Result.Bytes = ChunkFile.ReadAll();
+ }
+ AddStats(0, Result.Bytes.GetSize(), Timer.GetElapsedTimeUs() * 1000);
+ return Result;
+ }
+
+ virtual LoadAttachmentRangesResult LoadAttachmentRanges(const IoHash& RawHash,
+ std::span<const std::pair<uint64_t, uint64_t>> Ranges) override
+ {
+ ZEN_ASSERT(!Ranges.empty());
+ LoadAttachmentRangesResult Result;
+
+ Stopwatch Timer;
+ auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; });
+
+ std::filesystem::path ChunkPath = GetAttachmentPath(RawHash);
+ if (!IsFile(ChunkPath))
+ {
+ Result.ErrorCode = gsl::narrow<int>(HttpResponseCode::NotFound);
+ Result.Reason = fmt::format("Failed loading oplog attachment from '{}'. Reason: 'The file does not exist'", ChunkPath.string());
+ return Result;
+ }
+ {
+ uint64_t Start = Ranges.front().first;
+ uint64_t Length = Ranges.back().first + Ranges.back().second - Ranges.front().first;
+ Result.Bytes = IoBufferBuilder::MakeFromFile(ChunkPath, Start, Length);
+ Result.Ranges.reserve(Ranges.size());
+ for (const std::pair<uint64_t, uint64_t>& Range : Ranges)
{
- Result.Bytes = ChunkFile.ReadAll();
+ Result.Ranges.push_back(std::make_pair(Range.first - Start, Range.second));
}
}
- AddStats(0, Result.Bytes.GetSize(), Timer.GetElapsedTimeUs() * 1000);
- Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0;
+ AddStats(0,
+ std::accumulate(Result.Ranges.begin(),
+ Result.Ranges.end(),
+ uint64_t(0),
+ [](uint64_t Current, const std::pair<uint64_t, uint64_t>& Value) { return Current + Value.second; }),
+ Timer.GetElapsedTimeUs() * 1000);
return Result;
}
@@ -263,7 +421,7 @@ public:
LoadAttachmentsResult Result;
for (const IoHash& Hash : RawHashes)
{
- LoadAttachmentResult ChunkResult = LoadAttachment(Hash, {});
+ LoadAttachmentResult ChunkResult = LoadAttachment(Hash);
if (ChunkResult.ErrorCode)
{
ChunkResult.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0;
@@ -276,20 +434,20 @@ public:
return Result;
}
- virtual void Flush() override {}
-
private:
LoadContainerResult LoadContainer(const std::string& Name)
{
- Stopwatch Timer;
- LoadContainerResult Result;
+ LoadContainerResult Result;
+
+ Stopwatch Timer;
+ auto _ = MakeGuard([&Result, &Timer]() { Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; });
+
std::filesystem::path SourcePath = m_OutputPath;
SourcePath.append(Name);
if (!IsFile(SourcePath))
{
Result.ErrorCode = gsl::narrow<int>(HttpResponseCode::NotFound);
Result.Reason = fmt::format("Failed loading oplog container from '{}'. Reason: 'The file does not exist'", SourcePath.string());
- Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0;
return Result;
}
IoBuffer ContainerPayload;
@@ -303,18 +461,16 @@ private:
if (Result.ContainerObject = ValidateAndReadCompactBinaryObject(std::move(ContainerPayload), ValidateResult);
ValidateResult != CbValidateError::None || !Result.ContainerObject)
{
- Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError);
- Result.Reason = fmt::format("The file {} is not formatted as a compact binary object ('{}')",
- SourcePath.string(),
- ToString(ValidateResult));
- Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0;
+ Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError);
+ Result.Reason = fmt::format("The file {} is not formatted as a compact binary object ('{}')",
+ SourcePath.string(),
+ ToString(ValidateResult));
return Result;
}
- Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0;
return Result;
}
- std::filesystem::path GetAttachmentPath(const IoHash& RawHash) const
+ std::filesystem::path GetAttachmentBasePath(const IoHash& RawHash) const
{
ExtendablePathBuilder<128> ShardedPath;
ShardedPath.Append(m_OutputPath.c_str());
@@ -333,6 +489,19 @@ private:
return ShardedPath.ToPath();
}
+ static constexpr std::string_view BlobExtension = ".blob";
+ static constexpr std::string_view MetaExtension = ".meta";
+
+ std::filesystem::path GetAttachmentPath(const IoHash& RawHash)
+ {
+ return GetAttachmentBasePath(RawHash).replace_extension(BlobExtension);
+ }
+
+ std::filesystem::path GetAttachmentMetaPath(const IoHash& RawHash)
+ {
+ return GetAttachmentBasePath(RawHash).replace_extension(MetaExtension);
+ }
+
void AddStats(uint64_t UploadedBytes, uint64_t DownloadedBytes, uint64_t ElapsedNS)
{
m_SentBytes.fetch_add(UploadedBytes);
diff --git a/src/zenremotestore/projectstore/jupiterremoteprojectstore.cpp b/src/zenremotestore/projectstore/jupiterremoteprojectstore.cpp
index f8179831c..5b456cb4c 100644
--- a/src/zenremotestore/projectstore/jupiterremoteprojectstore.cpp
+++ b/src/zenremotestore/projectstore/jupiterremoteprojectstore.cpp
@@ -212,24 +212,43 @@ public:
return Result;
}
- virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes) override
+ virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes,
+ BuildStorageCache* OptionalCache,
+ const Oid& CacheBuildId) override
{
- ZEN_UNUSED(BlockHashes);
+ ZEN_UNUSED(BlockHashes, OptionalCache, CacheBuildId);
return GetBlockDescriptionsResult{Result{.ErrorCode = int(HttpResponseCode::NotFound)}};
}
- virtual AttachmentExistsInCacheResult AttachmentExistsInCache(std::span<const IoHash> RawHashes) override
+ virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash) override
{
- return AttachmentExistsInCacheResult{Result{.ErrorCode = 0}, std::vector<bool>(RawHashes.size(), false)};
+ LoadAttachmentResult Result;
+ JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect);
+ JupiterResult GetResult = Session.GetCompressedBlob(m_Namespace, RawHash, m_TempFilePath);
+ AddStats(GetResult);
+
+ Result = {ConvertResult(GetResult), std::move(GetResult.Response)};
+ if (GetResult.ErrorCode)
+ {
+ Result.Reason = fmt::format("Failed fetching oplog attachment from {}/{}/{}. Reason: '{}'",
+ m_JupiterClient->ServiceUrl(),
+ m_Namespace,
+ RawHash,
+ Result.Reason);
+ }
+ return Result;
}
- virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash, const AttachmentRange& Range) override
+ virtual LoadAttachmentRangesResult LoadAttachmentRanges(const IoHash& RawHash,
+ std::span<const std::pair<uint64_t, uint64_t>> Ranges) override
{
- JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect);
- JupiterResult GetResult = Session.GetCompressedBlob(m_Namespace, RawHash, m_TempFilePath);
+ ZEN_ASSERT(!Ranges.empty());
+ LoadAttachmentRangesResult Result;
+ JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect);
+ JupiterResult GetResult = Session.GetCompressedBlob(m_Namespace, RawHash, m_TempFilePath);
AddStats(GetResult);
- LoadAttachmentResult Result{ConvertResult(GetResult), std::move(GetResult.Response)};
+ Result = LoadAttachmentRangesResult{ConvertResult(GetResult), std::move(GetResult.Response)};
if (GetResult.ErrorCode)
{
Result.Reason = fmt::format("Failed fetching oplog attachment from {}/{}/{}. Reason: '{}'",
@@ -238,9 +257,9 @@ public:
RawHash,
Result.Reason);
}
- if (!Result.ErrorCode && Range)
+ else
{
- Result.Bytes = IoBuffer(Result.Bytes, Range.Offset, Range.Bytes);
+ Result.Ranges = std::vector<std::pair<uint64_t, uint64_t>>(Ranges.begin(), Ranges.end());
}
return Result;
}
@@ -250,7 +269,7 @@ public:
LoadAttachmentsResult Result;
for (const IoHash& Hash : RawHashes)
{
- LoadAttachmentResult ChunkResult = LoadAttachment(Hash, {});
+ LoadAttachmentResult ChunkResult = LoadAttachment(Hash);
if (ChunkResult.ErrorCode)
{
return LoadAttachmentsResult{ChunkResult};
@@ -262,8 +281,6 @@ public:
return Result;
}
- virtual void Flush() override {}
-
private:
LoadContainerResult LoadContainer(const IoHash& Key)
{
diff --git a/src/zenremotestore/projectstore/projectstoreoperations.cpp b/src/zenremotestore/projectstore/projectstoreoperations.cpp
index becac3d4c..ba4b74825 100644
--- a/src/zenremotestore/projectstore/projectstoreoperations.cpp
+++ b/src/zenremotestore/projectstore/projectstoreoperations.cpp
@@ -3,13 +3,14 @@
#include <zenremotestore/projectstore/projectstoreoperations.h>
#include <zencore/compactbinaryutil.h>
+#include <zencore/fmtutils.h>
#include <zencore/parallelwork.h>
#include <zencore/scopeguard.h>
#include <zencore/timer.h>
#include <zenremotestore/builds/buildstorageutil.h>
#include <zenremotestore/chunking/chunkedfile.h>
-#include <zenremotestore/operationlogoutput.h>
#include <zenremotestore/projectstore/remoteprojectstore.h>
+#include <zenutil/progress.h>
namespace zen {
@@ -17,11 +18,11 @@ using namespace std::literals;
//////////////////////////// ProjectStoreOperationOplogState
-ProjectStoreOperationOplogState::ProjectStoreOperationOplogState(OperationLogOutput& OperationLogOutput,
- StorageInstance& Storage,
- const Oid& BuildId,
- const Options& Options)
-: m_LogOutput(OperationLogOutput)
+ProjectStoreOperationOplogState::ProjectStoreOperationOplogState(LoggerRef Log,
+ StorageInstance& Storage,
+ const Oid& BuildId,
+ const Options& Options)
+: m_Log(Log)
, m_Storage(Storage)
, m_BuildId(BuildId)
, m_Options(Options)
@@ -48,10 +49,7 @@ ProjectStoreOperationOplogState::LoadBuildObject()
{
if (!m_Options.IsQuiet)
{
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Read build {} from locally cached file in {}",
- m_BuildId,
- NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ ZEN_INFO("Read build {} from locally cached file in {}", m_BuildId, NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
}
return m_BuildObject;
}
@@ -61,11 +59,10 @@ ProjectStoreOperationOplogState::LoadBuildObject()
m_BuildObject = m_Storage.BuildStorage->GetBuild(m_BuildId);
if (!m_Options.IsQuiet)
{
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Fetched build {} from {} in {}",
- m_BuildId,
- m_Storage.BuildStorageHttp->GetBaseUri(),
- NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ ZEN_INFO("Fetched build {} from {} in {}",
+ m_BuildId,
+ m_Storage.BuildStorageHttp->GetBaseUri(),
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
}
CreateDirectories(CachedBuildObjectPath.parent_path());
TemporaryFile::SafeWriteFile(CachedBuildObjectPath, m_BuildObject.GetBuffer().GetView());
@@ -122,11 +119,10 @@ ProjectStoreOperationOplogState::LoadBuildPartsObject()
{
if (!m_Options.IsQuiet)
{
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Read build part {}/{} from locally cached file in {}",
- m_BuildId,
- BuildPartId,
- NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ ZEN_INFO("Read build part {}/{} from locally cached file in {}",
+ m_BuildId,
+ BuildPartId,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
}
return m_BuildPartsObject;
}
@@ -136,12 +132,11 @@ ProjectStoreOperationOplogState::LoadBuildPartsObject()
m_BuildPartsObject = m_Storage.BuildStorage->GetBuildPart(m_BuildId, BuildPartId);
if (!m_Options.IsQuiet)
{
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Fetched build part {}/{} from {} in {}",
- m_BuildId,
- BuildPartId,
- m_Storage.BuildStorageHttp->GetBaseUri(),
- NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ ZEN_INFO("Fetched build part {}/{} from {} in {}",
+ m_BuildId,
+ BuildPartId,
+ m_Storage.BuildStorageHttp->GetBaseUri(),
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
}
CreateDirectories(CachedBuildPartObjectPath.parent_path());
TemporaryFile::SafeWriteFile(CachedBuildPartObjectPath, m_BuildPartsObject.GetBuffer().GetView());
@@ -168,11 +163,7 @@ ProjectStoreOperationOplogState::LoadOpsSectionObject()
}
else if (!m_Options.IsQuiet)
{
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Read {}/{}/ops from locally cached file in {}",
- BuildPartId,
- m_BuildId,
- NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ ZEN_INFO("Read {}/{}/ops from locally cached file in {}", BuildPartId, m_BuildId, NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
return m_OpsSectionObject;
}
}
@@ -193,11 +184,10 @@ ProjectStoreOperationOplogState::LoadOpsSectionObject()
}
if (!m_Options.IsQuiet)
{
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Decompressed and validated oplog payload {} -> {} in {}",
- NiceBytes(OpsSection.GetSize()),
- NiceBytes(m_OpsSectionObject.GetSize()),
- NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ ZEN_INFO("Decompressed and validated oplog payload {} -> {} in {}",
+ NiceBytes(OpsSection.GetSize()),
+ NiceBytes(m_OpsSectionObject.GetSize()),
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
}
if (m_OpsSectionObject)
{
@@ -226,12 +216,11 @@ ProjectStoreOperationOplogState::LoadArrayFromBuildPart(std::string_view ArrayNa
{
if (!m_Options.IsQuiet)
{
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Read {}/{}/{} from locally cached file in {}",
- BuildPartId,
- m_BuildId,
- ArrayName,
- NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ ZEN_INFO("Read {}/{}/{} from locally cached file in {}",
+ BuildPartId,
+ m_BuildId,
+ ArrayName,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
}
CbArray Result = CbArray(SharedBuffer(std::move(Payload)));
return Result;
@@ -290,7 +279,8 @@ ProjectStoreOperationOplogState::LoadChunksArray()
//////////////////////////// ProjectStoreOperationDownloadAttachments
-ProjectStoreOperationDownloadAttachments::ProjectStoreOperationDownloadAttachments(OperationLogOutput& OperationLogOutput,
+ProjectStoreOperationDownloadAttachments::ProjectStoreOperationDownloadAttachments(LoggerRef Log,
+ ProgressBase& Progress,
StorageInstance& Storage,
std::atomic<bool>& AbortFlag,
std::atomic<bool>& PauseFlag,
@@ -299,7 +289,8 @@ ProjectStoreOperationDownloadAttachments::ProjectStoreOperationDownloadAttachmen
ProjectStoreOperationOplogState& State,
std::span<const IoHash> AttachmentHashes,
const Options& Options)
-: m_LogOutput(OperationLogOutput)
+: m_Log(Log)
+, m_Progress(Progress)
, m_Storage(Storage)
, m_AbortFlag(AbortFlag)
, m_PauseFlag(PauseFlag)
@@ -325,9 +316,9 @@ ProjectStoreOperationDownloadAttachments::Execute()
};
auto EndProgress =
- MakeGuard([&]() { m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::StepCount, (uint32_t)TaskSteps::StepCount); });
+ MakeGuard([&]() { m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::StepCount, (uint32_t)TaskSteps::StepCount); });
- m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::ReadAttachmentData, (uint32_t)TaskSteps::StepCount);
+ m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::ReadAttachmentData, (uint32_t)TaskSteps::StepCount);
Stopwatch Timer;
tsl::robin_map<IoHash, uint64_t, IoHash::Hasher> ChunkSizes;
@@ -415,30 +406,29 @@ ProjectStoreOperationDownloadAttachments::Execute()
FilesToDechunk.size() > 0
? fmt::format("\n{} file{} needs to be dechunked", FilesToDechunk.size(), FilesToDechunk.size() == 1 ? "" : "s")
: "";
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Need to download {} block{} and {} chunk{}{}",
- BlocksToDownload.size(),
- BlocksToDownload.size() == 1 ? "" : "s",
- LooseChunksToDownload.size(),
- LooseChunksToDownload.size() == 1 ? "" : "s",
- DechunkInfo);
+ ZEN_INFO("Need to download {} block{} and {} chunk{}{}",
+ BlocksToDownload.size(),
+ BlocksToDownload.size() == 1 ? "" : "s",
+ LooseChunksToDownload.size(),
+ LooseChunksToDownload.size() == 1 ? "" : "s",
+ DechunkInfo);
}
auto GetBuildBlob = [this](const IoHash& RawHash, const std::filesystem::path& OutputPath) {
IoBuffer Payload;
- if (m_Storage.BuildCacheStorage)
+ if (m_Storage.CacheStorage)
{
- Payload = m_Storage.BuildCacheStorage->GetBuildBlob(m_State.GetBuildId(), RawHash);
+ Payload = m_Storage.CacheStorage->GetBuildBlob(m_State.GetBuildId(), RawHash);
}
if (!Payload)
{
Payload = m_Storage.BuildStorage->GetBuildBlob(m_State.GetBuildId(), RawHash);
- if (m_Storage.BuildCacheStorage && m_Options.PopulateCache)
+ if (m_Storage.CacheStorage && m_Options.PopulateCache)
{
- m_Storage.BuildCacheStorage->PutBuildBlob(m_State.GetBuildId(),
- RawHash,
- Payload.GetContentType(),
- CompositeBuffer(SharedBuffer(Payload)));
+ m_Storage.CacheStorage->PutBuildBlob(m_State.GetBuildId(),
+ RawHash,
+ Payload.GetContentType(),
+ CompositeBuffer(SharedBuffer(Payload)));
}
}
uint64_t PayloadSize = Payload.GetSize();
@@ -470,18 +460,15 @@ ProjectStoreOperationDownloadAttachments::Execute()
std::filesystem::path TempAttachmentPath = MakeSafeAbsolutePath(m_Options.AttachmentOutputPath) / ".tmp";
CreateDirectories(TempAttachmentPath);
auto _0 = MakeGuard([this, &TempAttachmentPath]() {
- if (true)
+ if (!m_Options.IsQuiet)
{
- if (!m_Options.IsQuiet)
- {
- ZEN_OPERATION_LOG_INFO(m_LogOutput, "Cleaning up temporary directory");
- }
- CleanDirectory(TempAttachmentPath, true);
- RemoveDir(TempAttachmentPath);
+ ZEN_INFO("Cleaning up temporary directory");
}
+ CleanDirectory(TempAttachmentPath, true);
+ RemoveDir(TempAttachmentPath);
});
- m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::Download, (uint32_t)TaskSteps::StepCount);
+ m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::Download, (uint32_t)TaskSteps::StepCount);
std::filesystem::path BlocksPath = TempAttachmentPath / "blocks";
CreateDirectories(BlocksPath);
@@ -492,11 +479,9 @@ ProjectStoreOperationDownloadAttachments::Execute()
std::filesystem::path LooseChunksPath = TempAttachmentPath / "loosechunks";
CreateDirectories(LooseChunksPath);
- std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(m_LogOutput.CreateProgressBar("Downloading"));
- OperationLogOutput::ProgressBar& DownloadProgressBar(*ProgressBarPtr);
+ std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Downloading");
- std::atomic<bool> PauseFlag;
- ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
+ ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
std::atomic<size_t> LooseChunksCompleted;
std::atomic<size_t> BlocksCompleted;
@@ -511,7 +496,7 @@ ProjectStoreOperationDownloadAttachments::Execute()
if (m_Options.ForceDownload || !IsFile(LooseChunkOutputPath))
{
GetBuildBlob(RawHash, LooseChunkOutputPath);
- ZEN_OPERATION_LOG_DEBUG(m_LogOutput, "Downloaded chunk {}", RawHash);
+ ZEN_DEBUG("Downloaded chunk {}", RawHash);
}
Work.ScheduleWork(m_IOWorkerPool, [&, LooseChunkIndex, LooseChunkOutputPath](std::atomic<bool>&) {
@@ -547,7 +532,7 @@ ProjectStoreOperationDownloadAttachments::Execute()
{
ChunkOutput.Close();
RemoveFile(ChunkOutputPath);
- throw std::runtime_error(fmt::format("Failed to decompress chunk {} to ", RawHash, ChunkOutputPath));
+ throw std::runtime_error(fmt::format("Failed to decompress chunk {} to '{}'", RawHash, ChunkOutputPath));
}
}
else
@@ -555,7 +540,7 @@ ProjectStoreOperationDownloadAttachments::Execute()
TemporaryFile::SafeWriteFile(ChunkOutputPath, CompressedChunk.GetCompressed());
}
- ZEN_OPERATION_LOG_DEBUG(m_LogOutput, "Wrote loose chunk {} to '{}'", RawHash, ChunkOutputPath);
+ ZEN_DEBUG("Wrote loose chunk {} to '{}'", RawHash, ChunkOutputPath);
LooseChunksCompleted++;
});
});
@@ -572,7 +557,7 @@ ProjectStoreOperationDownloadAttachments::Execute()
if (m_Options.ForceDownload || !IsFile(BlockOutputPath))
{
GetBuildBlob(RawHash, BlockOutputPath);
- ZEN_OPERATION_LOG_DEBUG(m_LogOutput, "Downloaded block {}", RawHash);
+ ZEN_DEBUG("Downloaded block {}", RawHash);
}
Work.ScheduleWork(m_IOWorkerPool, [&, BlockIndex, BlockOutputPath](std::atomic<bool>&) {
@@ -607,7 +592,7 @@ ProjectStoreOperationDownloadAttachments::Execute()
ChunkOutput.Close();
RemoveFile(ChunkOutputPath);
throw std::runtime_error(
- fmt::format("Failed to decompress chunk {} to ", ChunkHash, ChunkOutputPath));
+ fmt::format("Failed to decompress chunk {} to '{}'", ChunkHash, ChunkOutputPath));
}
}
else
@@ -615,7 +600,7 @@ ProjectStoreOperationDownloadAttachments::Execute()
TemporaryFile::SafeWriteFile(ChunkOutputPath, CompressedChunk.GetCompressed());
}
- ZEN_OPERATION_LOG_DEBUG(m_LogOutput, "Wrote block chunk {} to '{}'", ChunkHash, ChunkOutputPath);
+ ZEN_DEBUG("Wrote block chunk {} to '{}'", ChunkHash, ChunkOutputPath);
}
if (ChunkedFileRawHashes.contains(ChunkHash))
{
@@ -635,7 +620,7 @@ ProjectStoreOperationDownloadAttachments::Execute()
});
}
- Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
+ Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
ZEN_UNUSED(IsAborted, IsPaused, PendingWork);
std::string Details = fmt::format("{}/{} blocks, {}/{} chunks downloaded",
@@ -643,39 +628,37 @@ ProjectStoreOperationDownloadAttachments::Execute()
BlocksToDownload.size(),
LooseChunksCompleted.load(),
LooseChunksToDownload.size());
- DownloadProgressBar.UpdateState({.Task = "Downloading",
- .Details = Details,
- .TotalCount = BlocksToDownload.size() + LooseChunksToDownload.size(),
- .RemainingCount = BlocksToDownload.size() + LooseChunksToDownload.size() -
- (BlocksCompleted.load() + LooseChunksCompleted.load()),
- .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
- false);
+ ProgressBar->UpdateState({.Task = "Downloading",
+ .Details = Details,
+ .TotalCount = BlocksToDownload.size() + LooseChunksToDownload.size(),
+ .RemainingCount = BlocksToDownload.size() + LooseChunksToDownload.size() -
+ (BlocksCompleted.load() + LooseChunksCompleted.load()),
+ .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
+ false);
});
- DownloadProgressBar.Finish();
+ ProgressBar->Finish();
if (!m_Options.IsQuiet)
{
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "{} block{} downloaded, {} loose chunk{} downloaded in {}",
- BlocksToDownload.size(),
- BlocksToDownload.size() == 1 ? "" : "s",
- LooseChunksToDownload.size(),
- LooseChunksToDownload.size() == 1 ? "" : "s",
- NiceTimeSpanMs(DownloadTimer.GetElapsedTimeMs()));
+ ZEN_INFO("{} block{} downloaded, {} loose chunk{} downloaded in {}",
+ BlocksToDownload.size(),
+ BlocksToDownload.size() == 1 ? "" : "s",
+ LooseChunksToDownload.size(),
+ LooseChunksToDownload.size() == 1 ? "" : "s",
+ NiceTimeSpanMs(DownloadTimer.GetElapsedTimeMs()));
}
}
if (!ChunkedFileInfos.empty())
{
- m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::AnalyzeDechunk, (uint32_t)TaskSteps::StepCount);
+ m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::AnalyzeDechunk, (uint32_t)TaskSteps::StepCount);
std::filesystem::path ChunkedFilesPath = TempAttachmentPath / "chunkedfiles";
CreateDirectories(ChunkedFilesPath);
try
{
- std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(m_LogOutput.CreateProgressBar("Dechunking"));
- OperationLogOutput::ProgressBar& DechunkingProgressBar(*ProgressBarPtr);
+ std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Dechunking");
std::atomic<uint64_t> ChunksWritten;
@@ -729,7 +712,7 @@ ProjectStoreOperationDownloadAttachments::Execute()
PrepareFileForScatteredWrite(OpenChunkedFiles.back()->Handle(), ChunkedFileInfo.RawSize);
}
- m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::Dechunk, (uint32_t)TaskSteps::StepCount);
+ m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::Dechunk, (uint32_t)TaskSteps::StepCount);
std::vector<std::atomic<uint8_t>> ChunkWrittenFlags(ChunkOpenFileTargets.size());
@@ -755,7 +738,7 @@ ProjectStoreOperationDownloadAttachments::Execute()
}))
{
std::error_code DummyEc;
- throw std::runtime_error(fmt::format("Failed to decompress chunk {} at offset {} to {}",
+ throw std::runtime_error(fmt::format("Failed to decompress chunk {} at offset {} to '{}'",
CompressedChunkBuffer.DecodeRawHash(),
ChunkTarget.Offset,
PathFromHandle(OutputFile.Handle(), DummyEc)));
@@ -768,8 +751,7 @@ ProjectStoreOperationDownloadAttachments::Execute()
{
Stopwatch DechunkTimer;
- std::atomic<bool> PauseFlag;
- ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
+ ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
std::vector<IoHash> LooseChunks(LooseChunksToDownload.begin(), LooseChunksToDownload.end());
@@ -819,26 +801,24 @@ ProjectStoreOperationDownloadAttachments::Execute()
}
});
}
- Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
+ Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
ZEN_UNUSED(IsAborted, IsPaused, PendingWork);
std::string Details = fmt::format("{}/{} chunks written", ChunksWritten.load(), ChunkOpenFileTargets.size());
- DechunkingProgressBar.UpdateState(
- {.Task = "Dechunking ",
- .Details = Details,
- .TotalCount = ChunkOpenFileTargets.size(),
- .RemainingCount = ChunkOpenFileTargets.size() - ChunksWritten.load(),
- .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
- false);
+ ProgressBar->UpdateState({.Task = "Dechunking ",
+ .Details = Details,
+ .TotalCount = ChunkOpenFileTargets.size(),
+ .RemainingCount = ChunkOpenFileTargets.size() - ChunksWritten.load(),
+ .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
+ false);
});
- DechunkingProgressBar.Finish();
+ ProgressBar->Finish();
if (!m_Options.IsQuiet)
{
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "{} file{} dechunked in {}",
- ChunkedFileInfos.size(),
- ChunkedFileInfos.size() == 1 ? "" : "s",
- NiceTimeSpanMs(DechunkTimer.GetElapsedTimeMs()));
+ ZEN_INFO("{} file{} dechunked in {}",
+ ChunkedFileInfos.size(),
+ ChunkedFileInfos.size() == 1 ? "" : "s",
+ NiceTimeSpanMs(DechunkTimer.GetElapsedTimeMs()));
}
}
}
@@ -853,12 +833,10 @@ ProjectStoreOperationDownloadAttachments::Execute()
throw;
}
{
- Stopwatch VerifyTimer;
- std::unique_ptr<OperationLogOutput::ProgressBar> ProgressBarPtr(m_LogOutput.CreateProgressBar("Verifying"));
- OperationLogOutput::ProgressBar& VerifyProgressBar(*ProgressBarPtr);
+ Stopwatch VerifyTimer;
+ std::unique_ptr<ProgressBase::ProgressBar> ProgressBar = m_Progress.CreateProgressBar("Verifying");
- std::atomic<bool> PauseFlag;
- ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
+ ParallelWork Work(m_AbortFlag, m_PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
std::atomic<size_t> DechunkedFilesMoved;
@@ -875,43 +853,41 @@ ProjectStoreOperationDownloadAttachments::Execute()
}
std::filesystem::path ChunkOutputPath = m_Options.AttachmentOutputPath / fmt::format("{}", ChunkedFileInfo.RawHash);
RenameFile(ChunkedFilePath, ChunkOutputPath);
- ZEN_OPERATION_LOG_DEBUG(m_LogOutput, "Moved dechunked file {} to '{}'", ChunkedFileInfo.RawHash, ChunkOutputPath);
+ ZEN_DEBUG("Moved dechunked file {} to '{}'", ChunkedFileInfo.RawHash, ChunkOutputPath);
DechunkedFilesMoved++;
});
}
- Work.Wait(m_LogOutput.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
+ Work.Wait(m_Progress.GetProgressUpdateDelayMS(), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
ZEN_UNUSED(IsAborted, IsPaused, PendingWork);
std::string Details = fmt::format("{}/{} files verified", DechunkedFilesMoved.load(), ChunkedFileInfos.size());
- VerifyProgressBar.UpdateState({.Task = "Verifying ",
- .Details = Details,
- .TotalCount = ChunkedFileInfos.size(),
- .RemainingCount = ChunkedFileInfos.size() - DechunkedFilesMoved.load(),
- .Status = OperationLogOutput::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
- false);
+ ProgressBar->UpdateState({.Task = "Verifying ",
+ .Details = Details,
+ .TotalCount = ChunkedFileInfos.size(),
+ .RemainingCount = ChunkedFileInfos.size() - DechunkedFilesMoved.load(),
+ .Status = ProgressBase::ProgressBar::State::CalculateStatus(IsAborted, IsPaused)},
+ false);
});
- VerifyProgressBar.Finish();
+ ProgressBar->Finish();
if (!m_Options.IsQuiet)
{
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Verified {} chunked file{} in {}",
- ChunkedFileInfos.size(),
- ChunkedFileInfos.size() == 1 ? "" : "s",
- NiceTimeSpanMs(VerifyTimer.GetElapsedTimeMs()));
+ ZEN_INFO("Verified {} chunked file{} in {}",
+ ChunkedFileInfos.size(),
+ ChunkedFileInfos.size() == 1 ? "" : "s",
+ NiceTimeSpanMs(VerifyTimer.GetElapsedTimeMs()));
}
}
}
if (!m_Options.IsQuiet)
{
- ZEN_OPERATION_LOG_INFO(m_LogOutput,
- "Downloaded {} attachment{} to '{}' in {}",
- m_AttachmentHashes.size(),
- m_AttachmentHashes.size() == 1 ? "" : "s",
- m_Options.AttachmentOutputPath,
- NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ ZEN_INFO("Downloaded {} attachment{} to '{}' in {}",
+ m_AttachmentHashes.size(),
+ m_AttachmentHashes.size() == 1 ? "" : "s",
+ m_Options.AttachmentOutputPath,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
}
- m_LogOutput.SetLogOperationProgress((uint32_t)TaskSteps::Cleanup, (uint32_t)TaskSteps::StepCount);
+ m_Progress.SetLogOperationProgress((uint32_t)TaskSteps::Cleanup, (uint32_t)TaskSteps::StepCount);
}
} // namespace zen
diff --git a/src/zenremotestore/projectstore/remoteprojectstore.cpp b/src/zenremotestore/projectstore/remoteprojectstore.cpp
index 2a9da6f58..f43f0813a 100644
--- a/src/zenremotestore/projectstore/remoteprojectstore.cpp
+++ b/src/zenremotestore/projectstore/remoteprojectstore.cpp
@@ -8,16 +8,21 @@
#include <zencore/filesystem.h>
#include <zencore/fmtutils.h>
#include <zencore/logging.h>
+#include <zencore/logging/broadcastsink.h>
+#include <zencore/logging/logger.h>
+#include <zencore/parallelwork.h>
#include <zencore/scopeguard.h>
#include <zencore/stream.h>
#include <zencore/timer.h>
#include <zencore/trace.h>
#include <zencore/workthreadpool.h>
#include <zenhttp/httpcommon.h>
+#include <zenremotestore/builds/buildstoragecache.h>
#include <zenremotestore/chunking/chunkedcontent.h>
#include <zenremotestore/chunking/chunkedfile.h>
-#include <zenremotestore/operationlogoutput.h>
#include <zenstore/cidstore.h>
+#include <zenutil/logging.h>
+#include <zenutil/progress.h>
#include <numeric>
#include <unordered_map>
@@ -65,44 +70,19 @@ namespace zen {
}
*/
namespace remotestore_impl {
- ////////////////////////////// AsyncRemoteResult
-
- struct AsyncRemoteResult
- {
- void SetError(int32_t ErrorCode, const std::string& ErrorReason, const std::string ErrorText)
- {
- int32_t Expected = 0;
- if (m_ErrorCode.compare_exchange_weak(Expected, ErrorCode ? ErrorCode : -1))
- {
- m_ErrorReason = ErrorReason;
- m_ErrorText = ErrorText;
- }
- }
- bool IsError() const { return m_ErrorCode.load() != 0; }
- int GetError() const { return m_ErrorCode.load(); };
- const std::string& GetErrorReason() const { return m_ErrorReason; };
- const std::string& GetErrorText() const { return m_ErrorText; };
- RemoteProjectStore::Result ConvertResult(double ElapsedSeconds = 0.0) const
- {
- return RemoteProjectStore::Result{m_ErrorCode, ElapsedSeconds, m_ErrorReason, m_ErrorText};
- }
-
- private:
- std::atomic<int32_t> m_ErrorCode = 0;
- std::string m_ErrorReason;
- std::string m_ErrorText;
- };
+ using namespace std::literals;
void ReportProgress(JobContext* OptionalContext,
std::string_view CurrentOp,
std::string_view Details,
ptrdiff_t Total,
- ptrdiff_t Remaining)
+ ptrdiff_t Remaining,
+ uint64_t ElapsedTimeMS)
{
if (OptionalContext)
{
ZEN_ASSERT(Total > 0);
- OptionalContext->ReportProgress(CurrentOp, Details, Total, Remaining);
+ OptionalContext->ReportProgress(CurrentOp, Details, Total, Remaining, ElapsedTimeMS);
}
}
@@ -124,14 +104,17 @@ namespace remotestore_impl {
return OptionalContext->IsCancelled();
}
- std::string GetStats(const RemoteProjectStore::Stats& Stats, uint64_t ElapsedWallTimeMS)
+ std::string GetStats(const RemoteProjectStore::Stats& Stats,
+ const BuildStorageCache::Statistics* OptionalCacheStats,
+ uint64_t ElapsedWallTimeMS)
{
- return fmt::format(
- "Sent: {} ({}bits/s) Recv: {} ({}bits/s)",
- NiceBytes(Stats.m_SentBytes),
- NiceNum(ElapsedWallTimeMS > 0u ? static_cast<uint64_t>((Stats.m_SentBytes * 8 * 1000) / ElapsedWallTimeMS) : 0u),
- NiceBytes(Stats.m_ReceivedBytes),
- NiceNum(ElapsedWallTimeMS > 0u ? static_cast<uint64_t>((Stats.m_ReceivedBytes * 8 * 1000) / ElapsedWallTimeMS) : 0u));
+ uint64_t SentBytes = Stats.m_SentBytes + (OptionalCacheStats ? OptionalCacheStats->TotalBytesWritten.load() : 0);
+ uint64_t ReceivedBytes = Stats.m_ReceivedBytes + (OptionalCacheStats ? OptionalCacheStats->TotalBytesRead.load() : 0);
+ return fmt::format("Sent: {} ({}bits/s) Recv: {} ({}bits/s)",
+ NiceBytes(SentBytes),
+ NiceNum(ElapsedWallTimeMS > 0u ? static_cast<uint64_t>((SentBytes * 8 * 1000) / ElapsedWallTimeMS) : 0u),
+ NiceBytes(ReceivedBytes),
+ NiceNum(ElapsedWallTimeMS > 0u ? static_cast<uint64_t>((ReceivedBytes * 8 * 1000) / ElapsedWallTimeMS) : 0u));
}
void LogRemoteStoreStatsDetails(const RemoteProjectStore::Stats& Stats)
@@ -165,6 +148,925 @@ namespace remotestore_impl {
return BlockIndex;
}
+ // BlockComposer packs attachment chunks (each identified by an IoHash and a byte size) into
+ // fixed-size blocks subject to two constraints:
+ // - The total encoded content of a block must not exceed UsableBlockSize bytes.
+ // - A block may contain at most MaxChunksPerBlock chunk entries.
+ //
+ // Chunks belonging to the same op key (Oid) are kept together in one block whenever possible,
+ // so that a single block fetch can satisfy an entire op without needing to read multiple blocks.
+ //
+ // When a block is complete the OnNewBlock callback is invoked with ownership of the chunk-hash
+ // vector for that block. The callback is also invoked for any partially-filled pending block
+ // that remains after all attachments have been processed.
+ class BlockComposer
+ {
+ public:
+ struct Configuration
+ {
+ uint64_t MaxBlockSize = 0; // Total encoded block size limit in bytes (includes header overhead).
+ uint64_t MaxChunksPerBlock = 0; // Maximum number of chunk entries allowed in a single block.
+ uint64_t MaxChunkEmbedSize = 0; // Maximum size of one embeddable chunk; used to calculate worst-case header size.
+ std::function<bool()>
+ IsCancelledFunc; // Optional: if set and returns true, Compose returns early without emitting remaining blocks.
+ };
+
+ explicit BlockComposer(const Configuration& Config) : m_Config(Config), m_UsableBlockSize(CalculateUsableBlockSize(m_Config)) {}
+
+ // Compose distributes AttachmentHashes into blocks via a two-phase algorithm.
+ //
+ // Phase 1 - Gather (inner while loop):
+ // Starting from the current index, collect all consecutive attachments that share the same
+ // op key (Oid) into CurrentOpRawHashes / CurrentOpChunkSizes. Collection stops (with
+ // CurrentOpFillFullBlock = false) when a different op key is encountered. Collection also
+ // stops early (with CurrentOpFillFullBlock = true) if adding the next same-key attachment
+ // would exceed m_UsableBlockSize by bytes OR would reach MaxChunksPerBlock by count -
+ // meaning the gathered chunks exactly saturate one block and must be emitted immediately.
+ //
+ // Phase 2 - Place (while loop over CurrentOpChunkSizes):
+ // Decides where the gathered chunks go. Exactly one of four mutually exclusive paths runs
+ // per iteration; after each path the loop re-evaluates with whatever chunks remain:
+ //
+ // Path A: CurrentOpFillFullBlock == true
+ // The gathered set exactly fills one block. Emit it immediately as a standalone block
+ // and clear CurrentOpChunkSizes. The pending block is left untouched.
+ //
+ // Path B: All gathered chunks fit in the pending block (both size and count constraints met)
+ // Merge the gathered chunks into PendingChunkHashes/PendingBlockSize and clear the
+ // current-op buffers. If the pending block is now exactly full, flush it immediately.
+ //
+ // Path C: Gathered chunks don't fit AND pending block is >75% full by bytes
+ // The pending block is already well-utilised; flush it now and loop so that the gathered
+ // chunks are re-evaluated against the freshly emptied pending block.
+ //
+ // Path D: Gathered chunks don't fit AND pending block is <=75% full by bytes
+ // The binding constraint is chunk count, not bytes. Greedily fill the pending block with
+ // as many gathered chunks as fit (stopping at the first chunk that would violate either
+ // size or count), flush the pending block, remove the added chunks from the current-op
+ // buffers, and loop so the remaining gathered chunks are re-evaluated.
+ //
+ // Final flush: after all attachments have been processed, any non-empty pending block is
+ // emitted.
+ void Compose(std::span<const IoHash> AttachmentHashes,
+ std::span<const uint64_t> AttachmentSizes,
+ std::span<const Oid> AttachmentKeys,
+ const std::function<void(std::vector<IoHash>&& ChunkRawHashes)>& OnNewBlock)
+ {
+ std::vector<IoHash> PendingChunkHashes;
+ uint64_t PendingBlockSize = 0;
+
+ size_t SortedUploadAttachmentsIndex = 0;
+
+ Stopwatch AssembleBlocksProgressTimer;
+ while (SortedUploadAttachmentsIndex < AttachmentHashes.size())
+ {
+ if (m_Config.IsCancelledFunc && m_Config.IsCancelledFunc())
+ {
+ return;
+ }
+
+ const IoHash& FirstAttachmentHash = AttachmentHashes[SortedUploadAttachmentsIndex];
+ const Oid FirstAttachmentOpKey = AttachmentKeys[SortedUploadAttachmentsIndex];
+ uint64_t CurrentOpAttachmentsSize = AttachmentSizes[SortedUploadAttachmentsIndex];
+ ZEN_ASSERT(CurrentOpAttachmentsSize <= m_Config.MaxChunkEmbedSize);
+
+ std::vector<IoHash> CurrentOpRawHashes;
+ CurrentOpRawHashes.push_back(FirstAttachmentHash);
+
+ std::vector<uint64_t> CurrentOpChunkSizes;
+ CurrentOpChunkSizes.push_back(CurrentOpAttachmentsSize);
+
+ bool CurrentOpFillFullBlock = false;
+
+ while (SortedUploadAttachmentsIndex + CurrentOpRawHashes.size() < AttachmentHashes.size())
+ {
+ size_t NextSortedUploadAttachmentsIndex = SortedUploadAttachmentsIndex + CurrentOpChunkSizes.size();
+ const Oid NextAttachmentOpKey = AttachmentKeys[NextSortedUploadAttachmentsIndex];
+ if (NextAttachmentOpKey != FirstAttachmentOpKey)
+ {
+ break;
+ }
+ const IoHash& NextAttachmentHash = AttachmentHashes[NextSortedUploadAttachmentsIndex];
+ uint64_t NextOpAttachmentSize = AttachmentSizes[NextSortedUploadAttachmentsIndex];
+ ZEN_ASSERT(NextOpAttachmentSize <= m_Config.MaxChunkEmbedSize);
+
+ if (CurrentOpAttachmentsSize + NextOpAttachmentSize > m_UsableBlockSize)
+ {
+ CurrentOpFillFullBlock = true;
+ break;
+ }
+ CurrentOpRawHashes.push_back(NextAttachmentHash);
+ CurrentOpChunkSizes.push_back(NextOpAttachmentSize);
+ CurrentOpAttachmentsSize += NextOpAttachmentSize;
+
+ if (CurrentOpRawHashes.size() == m_Config.MaxChunksPerBlock)
+ {
+ CurrentOpFillFullBlock = true;
+ break;
+ }
+ }
+ SortedUploadAttachmentsIndex += CurrentOpChunkSizes.size();
+
+ while (!CurrentOpChunkSizes.empty())
+ {
+ size_t CurrentOpAttachmentCount = CurrentOpChunkSizes.size();
+
+ ZEN_ASSERT(CurrentOpRawHashes.size() == CurrentOpChunkSizes.size());
+ ZEN_ASSERT(CurrentOpAttachmentsSize <= m_UsableBlockSize);
+ ZEN_ASSERT(CurrentOpAttachmentCount <= m_Config.MaxChunksPerBlock);
+
+ // Path A: gathered chunks exactly fill one block -- emit as a standalone block immediately.
+ if (CurrentOpFillFullBlock)
+ {
+ OnNewBlock(std::move(CurrentOpRawHashes));
+ CurrentOpChunkSizes.clear();
+ CurrentOpAttachmentsSize = 0;
+ CurrentOpFillFullBlock = false;
+ }
+ else if ((PendingBlockSize + CurrentOpAttachmentsSize) <= m_UsableBlockSize &&
+ (PendingChunkHashes.size() + CurrentOpAttachmentCount) <= m_Config.MaxChunksPerBlock)
+ {
+ // Path B: all gathered chunks fit in the pending block -- merge them in.
+ PendingChunkHashes.insert(PendingChunkHashes.end(), CurrentOpRawHashes.begin(), CurrentOpRawHashes.end());
+ PendingBlockSize += CurrentOpAttachmentsSize;
+ ZEN_ASSERT(PendingBlockSize <= m_UsableBlockSize);
+ ZEN_ASSERT(PendingChunkHashes.size() <= m_Config.MaxChunksPerBlock);
+
+ CurrentOpRawHashes.clear();
+ CurrentOpChunkSizes.clear();
+ CurrentOpAttachmentsSize = 0;
+
+ if (PendingBlockSize == m_UsableBlockSize || PendingChunkHashes.size() == m_Config.MaxChunksPerBlock)
+ {
+ OnNewBlock(std::move(PendingChunkHashes));
+ PendingChunkHashes.clear();
+ PendingBlockSize = 0;
+ }
+ }
+ else if (PendingBlockSize > (m_UsableBlockSize * 3) / 4)
+ {
+ // Path C: gathered chunks don't fit AND pending block is >75% full by bytes -- flush pending
+ // block now; loop to re-evaluate gathered chunks against the freshly emptied pending block.
+ OnNewBlock(std::move(PendingChunkHashes));
+ PendingChunkHashes.clear();
+ PendingBlockSize = 0;
+ }
+ else
+ {
+ // Path D: gathered chunks don't fit AND pending block is <=75% full by bytes -- the
+ // binding constraint is chunk count. Greedily fill the pending block with as many
+ // chunks as fit, flush it, remove them from the current-op buffers, and loop with the
+ // remaining gathered chunks in the next iteration.
+
+ size_t AddedChunkCount = 0;
+ uint64_t AddedChunkSize = 0;
+
+ for (size_t CurrentChunkIndex = 0; CurrentChunkIndex < CurrentOpRawHashes.size(); CurrentChunkIndex++)
+ {
+ uint64_t ChunkSize = CurrentOpChunkSizes[CurrentChunkIndex];
+ if (PendingBlockSize + ChunkSize > m_UsableBlockSize)
+ {
+ break;
+ }
+ if (PendingChunkHashes.size() == m_Config.MaxChunksPerBlock)
+ {
+ break;
+ }
+ PendingBlockSize += ChunkSize;
+ PendingChunkHashes.push_back(CurrentOpRawHashes[CurrentChunkIndex]);
+ AddedChunkSize += ChunkSize;
+ AddedChunkCount++;
+
+ ZEN_ASSERT(PendingBlockSize <= m_UsableBlockSize);
+ ZEN_ASSERT(PendingChunkHashes.size() <= m_Config.MaxChunksPerBlock);
+ }
+ ZEN_ASSERT(AddedChunkSize <= CurrentOpAttachmentsSize);
+
+ ZEN_ASSERT(PendingBlockSize <= m_UsableBlockSize);
+ ZEN_ASSERT(PendingChunkHashes.size() <= m_Config.MaxChunksPerBlock);
+ ZEN_ASSERT(AddedChunkCount < CurrentOpRawHashes.size());
+
+ OnNewBlock(std::move(PendingChunkHashes));
+ PendingChunkHashes.clear();
+ PendingBlockSize = 0;
+
+ CurrentOpRawHashes.erase(CurrentOpRawHashes.begin(), CurrentOpRawHashes.begin() + AddedChunkCount);
+ CurrentOpChunkSizes.erase(CurrentOpChunkSizes.begin(), CurrentOpChunkSizes.begin() + AddedChunkCount);
+ CurrentOpAttachmentsSize -= AddedChunkSize;
+ }
+ }
+ }
+ if (!PendingChunkHashes.empty())
+ {
+ ZEN_ASSERT(PendingBlockSize < m_UsableBlockSize);
+ ZEN_ASSERT(PendingChunkHashes.size() < m_Config.MaxChunksPerBlock);
+ OnNewBlock(std::move(PendingChunkHashes));
+ PendingChunkHashes.clear();
+ }
+ }
+
+ private:
+ // CalculateUsableBlockSize computes the maximum bytes available for chunk content in one
+ // block. The block header encodes:
+ // - A CompressedBuffer header of fixed size.
+ // - One VarUInt field encoding MaxChunksPerBlock.
+ // - MaxChunksPerBlock VarUInt entries each encoding one chunk size (bounded by
+ // MaxChunkEmbedSize, which determines the worst-case VarUInt width).
+ // MaxHeaderSize is the worst-case total header size, so
+ // UsableBlockSize = MaxBlockSize - MaxHeaderSize is a conservative bound that guarantees
+ // chunk content always fits within the encoded block.
+ static uint64_t CalculateUsableBlockSize(const Configuration& Config)
+ {
+ ZEN_ASSERT(Config.MaxChunksPerBlock > 0);
+ ZEN_ASSERT(Config.MaxChunkEmbedSize > 0);
+ uint64_t MaxHeaderSize = CompressedBuffer::GetHeaderSizeForNoneEncoder() + MeasureVarUInt(Config.MaxChunksPerBlock) +
+ MeasureVarUInt(Config.MaxChunkEmbedSize) * Config.MaxChunksPerBlock;
+ ZEN_ASSERT(Config.MaxBlockSize > MaxHeaderSize);
+ return Config.MaxBlockSize - MaxHeaderSize;
+ }
+
+ const Configuration m_Config;
+ const uint64_t m_UsableBlockSize = 0;
+ };
+
+ IoBuffer CompressToTempFile(const IoHash& RawHash,
+ const IoBuffer& RawData,
+ const std::filesystem::path& AttachmentPath,
+ OodleCompressor Compressor,
+ OodleCompressionLevel CompressionLevel)
+ {
+ if (IsFile(AttachmentPath))
+ {
+ ZEN_WARN("Temp attachment file already exists at '{}', truncating", AttachmentPath);
+ }
+ BasicFile CompressedFile;
+ std::error_code Ec;
+ CompressedFile.Open(AttachmentPath, BasicFile::Mode::kTruncateDelete, Ec);
+ if (Ec)
+ {
+ throw std::system_error(Ec, fmt::format("Failed to create temp file for blob {} at '{}'", RawHash, AttachmentPath));
+ }
+
+ if (RawData.GetSize() < 512u * 1024u)
+ {
+ CompressedBuffer CompressedBlob = CompressedBuffer::Compress(SharedBuffer(std::move(RawData)), Compressor, CompressionLevel);
+ if (!CompressedBlob)
+ {
+ throw std::runtime_error(fmt::format("Failed to compress blob {}", RawHash));
+ }
+ CompressedFile.Write(CompressedBlob.GetCompressed(), 0);
+ }
+ else
+ {
+ bool CouldCompress = CompressedBuffer::CompressToStream(
+ CompositeBuffer(SharedBuffer(RawData)),
+ [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) {
+ ZEN_UNUSED(SourceOffset, SourceSize);
+ CompressedFile.Write(RangeBuffer, Offset);
+ },
+ Compressor,
+ CompressionLevel);
+ if (!CouldCompress)
+ {
+ // Compressed is larger than source data...
+ CompressedBuffer CompressedBlob =
+ CompressedBuffer::Compress(SharedBuffer(std::move(RawData)), OodleCompressor::Mermaid, OodleCompressionLevel::None);
+ if (!CompressedBlob)
+ {
+ throw std::runtime_error(fmt::format("Failed to compress blob {}", RawHash));
+ }
+ CompressedFile.SetFileSize(0);
+ CompressedFile.Write(CompressedBlob.GetCompressed(), 0);
+ }
+ }
+ IoBuffer TempAttachmentBuffer = IoBufferBuilder::MakeFromFile(AttachmentPath);
+ CompressedFile.Close();
+ TempAttachmentBuffer.SetDeleteOnClose(true);
+ ZEN_ASSERT_SLOW(CompressedBuffer::FromCompressedNoValidate(IoBuffer(TempAttachmentBuffer)).CompressedBuffer::Decompress());
+ return TempAttachmentBuffer;
+ }
+
+ struct FoundAttachment
+ {
+ std::filesystem::path RawPath; // If not stored in cid
+ uint64_t Size = 0;
+ Oid Key = Oid::Zero;
+ };
+
+ CbObject RewriteOplog(
+ LoggerRef InLog,
+ ProjectStore::Project& Project,
+ ProjectStore::Oplog& Oplog,
+ bool IgnoreMissingAttachments,
+ bool EmbedLooseFiles,
+ const std::filesystem::path& AttachmentTempPath,
+ std::unordered_map<IoHash, FoundAttachment, IoHash::Hasher>& UploadAttachments, // TODO: Rename to OutUploadAttachments
+ JobContext* OptionalContext)
+ {
+ ZEN_SCOPED_LOG(InLog);
+ size_t OpCount = 0;
+ CreateDirectories(AttachmentTempPath);
+
+ auto RewriteOp = [&](const Oid& Key, CbObjectView Op, const std::function<void(CbObjectView)>& CB) {
+ bool OpRewritten = false;
+ CbArrayView Files = Op["files"sv].AsArrayView();
+ if (Files.Num() == 0)
+ {
+ CB(Op);
+ return;
+ }
+
+ CbWriter Cbo;
+ Cbo.BeginArray("files"sv);
+
+ for (CbFieldView& Field : Files)
+ {
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ return;
+ }
+
+ bool CopyField = true;
+
+ if (CbObjectView View = Field.AsObjectView())
+ {
+ IoHash DataHash = View["data"sv].AsHash();
+
+ if (DataHash == IoHash::Zero)
+ {
+ std::string_view ServerPath = View["serverpath"sv].AsString();
+ std::filesystem::path FilePath = (Project.RootDir / ServerPath).make_preferred();
+ MakeSafeAbsolutePathInPlace(FilePath);
+ if (!IsFile(FilePath))
+ {
+ remotestore_impl::ReportMessage(
+ OptionalContext,
+ fmt::format("Missing attachment '{}' for op '{}'", FilePath, View["id"sv].AsObjectId()));
+ if (IgnoreMissingAttachments)
+ {
+ continue;
+ }
+ else
+ {
+ ExtendableStringBuilder<1024> Sb;
+ Sb.Append("Failed to find attachment '");
+ Sb.Append(FilePath.string());
+ Sb.Append("' for op: \n");
+ View.ToJson(Sb);
+ throw std::runtime_error(Sb.ToString());
+ }
+ }
+
+ {
+ Stopwatch HashTimer;
+ SharedBuffer DataBuffer(IoBufferBuilder::MakeFromFile(FilePath));
+ DataHash = IoHash::HashBuffer(CompositeBuffer(DataBuffer));
+ ZEN_INFO("Hashed loose file '{}' {}: {} in {}",
+ FilePath,
+ NiceBytes(DataBuffer.GetSize()),
+ DataHash,
+ NiceTimeSpanMs(HashTimer.GetElapsedTimeMs()));
+ }
+
+ // Rewrite file array entry with new data reference
+ CbObjectWriter Writer;
+ RewriteCbObject(Writer, View, [&](CbObjectWriter&, CbFieldView Field) -> bool {
+ if (Field.GetName() == "data"sv)
+ {
+ // omit this field as we will write it explicitly ourselves
+ return true;
+ }
+ return false;
+ });
+ Writer.AddBinaryAttachment("data"sv, DataHash);
+ UploadAttachments.insert_or_assign(DataHash, FoundAttachment{.RawPath = FilePath, .Key = Key});
+
+ CbObject RewrittenOp = Writer.Save();
+ Cbo.AddObject(std::move(RewrittenOp));
+ CopyField = false;
+ }
+ }
+
+ if (CopyField)
+ {
+ Cbo.AddField(Field);
+ }
+ else
+ {
+ OpRewritten = true;
+ }
+ }
+
+ if (!OpRewritten)
+ {
+ CB(Op);
+ return;
+ }
+
+ Cbo.EndArray();
+ CbArray FilesArray = Cbo.Save().AsArray();
+
+ CbObject RewrittenOp = RewriteCbObject(Op, [&](CbObjectWriter& NewWriter, CbFieldView Field) -> bool {
+ if (Field.GetName() == "files"sv)
+ {
+ NewWriter.AddArray("files"sv, FilesArray);
+
+ return true;
+ }
+
+ return false;
+ });
+ CB(RewrittenOp);
+ };
+
+ remotestore_impl::ReportMessage(OptionalContext, "Building exported oplog and collecting attachments");
+
+ Stopwatch Timer;
+
+ size_t TotalOpCount = Oplog.GetOplogEntryCount();
+ Stopwatch RewriteOplogTimer;
+ CbObjectWriter SectionOpsWriter;
+ SectionOpsWriter.BeginArray("ops"sv);
+ {
+ Stopwatch BuildingOplogProgressTimer;
+ Oplog.IterateOplogWithKey([&](int, const Oid& Key, CbObjectView Op) {
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ return;
+ }
+ Op.IterateAttachments([&](CbFieldView FieldView) {
+ UploadAttachments.insert_or_assign(FieldView.AsAttachment(), FoundAttachment{.Key = Key});
+ });
+ if (EmbedLooseFiles)
+ {
+ RewriteOp(Key, Op, [&SectionOpsWriter](CbObjectView Op) { SectionOpsWriter << Op; });
+ }
+ else
+ {
+ SectionOpsWriter << Op;
+ }
+ OpCount++;
+
+ if (OpCount % 1000 == 0)
+ {
+ remotestore_impl::ReportProgress(OptionalContext,
+ "Building oplog"sv,
+ fmt::format("{} ops processed", OpCount),
+ TotalOpCount,
+ TotalOpCount - OpCount,
+ BuildingOplogProgressTimer.GetElapsedTimeMs());
+ }
+ });
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ return {};
+ }
+ if (TotalOpCount > 0)
+ {
+ remotestore_impl::ReportProgress(OptionalContext,
+ "Building oplog"sv,
+ fmt::format("{} ops processed", OpCount),
+ TotalOpCount,
+ 0,
+ BuildingOplogProgressTimer.GetElapsedTimeMs());
+ }
+ }
+ SectionOpsWriter.EndArray(); // "ops"
+
+ return SectionOpsWriter.Save();
+ }
+
+ struct FoundChunkedFile
+ {
+ IoHash RawHash = IoHash::Zero;
+ IoBuffer Source;
+ uint64_t Offset = 0;
+ uint64_t Size = 0;
+ };
+
+ void FindChunkSizes(CidStore& ChunkStore,
+ WorkerThreadPool& WorkerPool,
+ size_t MaxChunkEmbedSize,
+ size_t ChunkFileSizeLimit,
+ bool AllowChunking,
+ const std::filesystem::path& AttachmentTempPath,
+ std::unordered_map<IoHash, FoundAttachment, IoHash::Hasher>& UploadAttachments,
+ std::unordered_set<IoHash, IoHash::Hasher>& MissingHashes,
+ std::vector<FoundChunkedFile>& AttachmentsToChunk,
+ JobContext* OptionalContext)
+ {
+ if (UploadAttachments.empty())
+ {
+ return;
+ }
+ Stopwatch FindChunkSizesTimer;
+
+ RwLock FindChunkSizesLock;
+
+ std::atomic<bool> AbortFlag(false);
+ std::atomic<bool> PauseFlag(false);
+ ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
+
+ for (auto& It : UploadAttachments)
+ {
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ AbortFlag.store(true);
+ break;
+ }
+ Work.ScheduleWork(
+ WorkerPool,
+ [&ChunkStore,
+ UploadAttachment = &It.second,
+ RawHash = It.first,
+ &FindChunkSizesLock,
+ &MissingHashes,
+ AttachmentTempPath,
+ MaxChunkEmbedSize,
+ ChunkFileSizeLimit,
+ AllowChunking,
+ &AttachmentsToChunk,
+ OptionalContext](std::atomic<bool>& AbortFlag) {
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ AbortFlag.store(true);
+ }
+ if (AbortFlag)
+ {
+ return;
+ }
+ if (!UploadAttachment->RawPath.empty())
+ {
+ const std::filesystem::path& FilePath = UploadAttachment->RawPath;
+ IoBuffer RawData = IoBufferBuilder::MakeFromFile(FilePath);
+ if (RawData)
+ {
+ UploadAttachment->Size = RawData.GetSize();
+ if (AllowChunking && UploadAttachment->Size > ChunkFileSizeLimit)
+ {
+ FindChunkSizesLock.WithExclusiveLock([&]() {
+ AttachmentsToChunk.push_back(
+ FoundChunkedFile{.RawHash = RawHash, .Source = RawData, .Offset = 0, .Size = RawData.GetSize()});
+ });
+ }
+ }
+ else
+ {
+ FindChunkSizesLock.WithExclusiveLock([RawHash, &MissingHashes]() { MissingHashes.insert(RawHash); });
+ }
+ }
+ else
+ {
+ IoBuffer Data = ChunkStore.FindChunkByCid(RawHash);
+ if (Data)
+ {
+ UploadAttachment->Size = Data.GetSize();
+ if (AllowChunking && Data.IsWholeFile())
+ {
+ IoHash VerifyRawHash;
+ uint64_t VerifyRawSize;
+ CompressedBuffer Compressed =
+ CompressedBuffer::FromCompressed(SharedBuffer(Data), VerifyRawHash, VerifyRawSize);
+ if (Compressed)
+ {
+ if (VerifyRawSize > ChunkFileSizeLimit)
+ {
+ OodleCompressor Compressor;
+ OodleCompressionLevel CompressionLevel;
+ uint64_t BlockSize;
+ if (Compressed.TryGetCompressParameters(Compressor, CompressionLevel, BlockSize))
+ {
+ if (CompressionLevel == OodleCompressionLevel::None)
+ {
+ CompositeBuffer Decompressed = Compressed.DecompressToComposite();
+ if (Decompressed)
+ {
+ std::span<const SharedBuffer> Segments = Decompressed.GetSegments();
+ if (Segments.size() == 1)
+ {
+ IoBuffer DecompressedData = Segments[0].AsIoBuffer();
+ IoBufferFileReference DecompressedFileRef;
+ if (DecompressedData.GetFileReference(DecompressedFileRef))
+ {
+ // Are we still pointing to disk?
+ FindChunkSizesLock.WithExclusiveLock([&]() {
+ AttachmentsToChunk.push_back(
+ FoundChunkedFile{.RawHash = RawHash,
+ .Source = Data,
+ .Offset = DecompressedFileRef.FileChunkOffset,
+ .Size = DecompressedFileRef.FileChunkSize});
+ });
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ FindChunkSizesLock.WithExclusiveLock([RawHash, &MissingHashes]() { MissingHashes.insert(RawHash); });
+ }
+ }
+ });
+ }
+
+ Work.Wait(1000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
+ ZEN_UNUSED(IsAborted, IsPaused);
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ AbortFlag.store(true);
+ }
+ remotestore_impl::ReportProgress(OptionalContext,
+ "Finding attachments"sv,
+ fmt::format("{}{} remaining...", AbortFlag.load() ? "Aborting, " : "", PendingWork),
+ UploadAttachments.size(),
+ PendingWork,
+ FindChunkSizesTimer.GetElapsedTimeMs());
+ });
+
+ if (!AbortFlag.load())
+ {
+ remotestore_impl::ReportProgress(OptionalContext,
+ "Finding attachments"sv,
+ "",
+ UploadAttachments.size(),
+ 0,
+ FindChunkSizesTimer.GetElapsedTimeMs());
+ }
+ }
+
+ struct ChunkedFile
+ {
+ IoBuffer Source;
+ ChunkedInfoWithSource Chunked;
+ };
+
+ std::vector<ChunkedFile> ChunkAttachments(WorkerThreadPool& WorkerPool,
+ const std::vector<remotestore_impl::FoundChunkedFile>& AttachmentsToChunk,
+ JobContext* OptionalContext)
+ {
+ if (AttachmentsToChunk.empty())
+ {
+ return {};
+ }
+ Stopwatch ChunkAttachmentsTimer;
+
+ std::vector<ChunkedFile> ChunkedFiles(AttachmentsToChunk.size());
+
+ std::atomic<bool> AbortFlag(false);
+ std::atomic<bool> PauseFlag(false);
+ ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
+
+ for (size_t ChunkFileIndexToChunk = 0; ChunkFileIndexToChunk < AttachmentsToChunk.size(); ChunkFileIndexToChunk++)
+ {
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ AbortFlag.store(true);
+ break;
+ }
+ Work.ScheduleWork(WorkerPool,
+ [&AttachmentsToChunk, ChunkFileIndexToChunk, &ChunkedFiles, OptionalContext](std::atomic<bool>& AbortFlag) {
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ AbortFlag.store(true);
+ }
+ if (AbortFlag)
+ {
+ return;
+ }
+ const remotestore_impl::FoundChunkedFile& AttachmentToChunk = AttachmentsToChunk[ChunkFileIndexToChunk];
+ const IoHash& RawHash = AttachmentToChunk.RawHash;
+
+ const IoBuffer& Buffer = AttachmentToChunk.Source;
+ IoBufferFileReference FileRef;
+ bool IsFile = Buffer.GetFileReference(FileRef);
+ ZEN_ASSERT(IsFile);
+
+ Stopwatch ChunkOneTimer;
+
+ uint64_t Offset = AttachmentToChunk.Offset;
+ uint64_t Size = AttachmentToChunk.Size;
+
+ BasicFile SourceFile;
+ SourceFile.Attach(FileRef.FileHandle);
+ auto __ = MakeGuard([&SourceFile]() { SourceFile.Detach(); });
+
+ ChunkedFile& Chunked = ChunkedFiles[ChunkFileIndexToChunk];
+ Chunked.Source = Buffer;
+ Chunked.Chunked = ChunkData(SourceFile, Offset, Size, UShaderByteCodeParams);
+ ZEN_ASSERT(Chunked.Chunked.Info.RawHash == RawHash);
+
+ ZEN_INFO("Chunked large attachment '{}' {} into {} chunks in {}",
+ RawHash,
+ NiceBytes(Chunked.Chunked.Info.RawSize),
+ Chunked.Chunked.Info.ChunkHashes.size(),
+ NiceTimeSpanMs(ChunkOneTimer.GetElapsedTimeMs()));
+ });
+ }
+
+ Work.Wait(1000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
+ ZEN_UNUSED(IsAborted, IsPaused);
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ AbortFlag.store(true);
+ }
+ remotestore_impl::ReportProgress(OptionalContext,
+ "Chunking attachments"sv,
+ fmt::format("{}{} remaining...", AbortFlag.load() ? "Aborting, " : "", PendingWork),
+ AttachmentsToChunk.size(),
+ PendingWork,
+ ChunkAttachmentsTimer.GetElapsedTimeMs());
+ });
+
+ if (!AbortFlag.load())
+ {
+ remotestore_impl::ReportProgress(OptionalContext,
+ "Chunking attachments"sv,
+ "",
+ AttachmentsToChunk.size(),
+ 0,
+ ChunkAttachmentsTimer.GetElapsedTimeMs());
+ }
+ return ChunkedFiles;
+ }
+
+ void ResolveAttachments(CidStore& ChunkStore,
+ WorkerThreadPool& WorkerPool,
+ uint64_t MaxChunkEmbedSize,
+ const std::filesystem::path& AttachmentTempPath,
+ std::unordered_map<IoHash, FoundAttachment, IoHash::Hasher>& UploadAttachments,
+ std::unordered_map<IoHash, TGetAttachmentBufferFunc, IoHash::Hasher>& LargeChunkAttachments,
+ std::unordered_map<IoHash, std::pair<uint64_t, IoBuffer>, IoHash::Hasher>& LooseUploadAttachments,
+ JobContext* OptionalContext)
+ {
+ ZEN_ASSERT(!UploadAttachments.empty());
+ Stopwatch UploadAttachmentsTimer;
+
+ RwLock ResolveLock;
+
+ std::atomic<bool> AbortFlag(false);
+ std::atomic<bool> PauseFlag(false);
+ ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
+
+ for (auto& It : UploadAttachments)
+ {
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ AbortFlag.store(true);
+ break;
+ }
+ Work.ScheduleWork(
+ WorkerPool,
+ [&ChunkStore,
+ MaxChunkEmbedSize,
+ &AttachmentTempPath,
+ &ResolveLock,
+ &LargeChunkAttachments,
+ &LooseUploadAttachments,
+ UploadAttachment = &It.second,
+ RawHash = It.first,
+ OptionalContext](std::atomic<bool>& AbortFlag) {
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ AbortFlag.store(true);
+ }
+ if (AbortFlag)
+ {
+ return;
+ }
+ if (!UploadAttachment->RawPath.empty())
+ {
+ if (UploadAttachment->Size > (MaxChunkEmbedSize * 2))
+ {
+ // Assume the compressed file is going to be larger than MaxChunkEmbedSize, even if it isn't
+ // it will be a loose attachment instead of going into a block
+
+ TGetAttachmentBufferFunc FetchFunc =
+ [RawPath = UploadAttachment->RawPath, AttachmentTempPath, RawSize = UploadAttachment->Size](
+ const IoHash& RawHash) -> CompositeBuffer {
+ IoBuffer RawData = IoBufferBuilder::MakeFromFile(RawPath);
+ if (!RawData)
+ {
+ throw std::runtime_error(
+ fmt::format("Failed to read source file for blob {} from '{}'", RawHash, RawPath));
+ }
+
+ std::filesystem::path AttachmentPath = AttachmentTempPath;
+ AttachmentPath.append(RawHash.ToHexString());
+
+ IoBuffer TempAttachmentBuffer = remotestore_impl::CompressToTempFile(RawHash,
+ RawData,
+ AttachmentPath,
+ OodleCompressor::Mermaid,
+ OodleCompressionLevel::VeryFast);
+ if (!TempAttachmentBuffer)
+ {
+ throw std::runtime_error(fmt::format("Failed to compressed source file for blob {} from '{}' to '{}'",
+ RawHash,
+ RawPath,
+ AttachmentPath));
+ }
+ TempAttachmentBuffer.SetDeleteOnClose(true);
+
+ ZEN_INFO("Saved temp attachment to '{}', {} ({})",
+ AttachmentPath,
+ NiceBytes(RawSize),
+ NiceBytes(TempAttachmentBuffer.GetSize()));
+ return CompositeBuffer(SharedBuffer(std::move(TempAttachmentBuffer)));
+ };
+
+ RwLock::ExclusiveLockScope _(ResolveLock);
+ LargeChunkAttachments.insert_or_assign(RawHash, std::move(FetchFunc));
+ }
+ else
+ {
+ // Compress inline - check compressed size to see if it should go into a block or not
+ IoBuffer RawData = IoBufferBuilder::MakeFromFile(UploadAttachment->RawPath);
+ if (!RawData)
+ {
+ throw std::runtime_error(
+ fmt::format("Failed to read source file for blob {} from '{}'", RawHash, UploadAttachment->RawPath));
+ }
+
+ std::filesystem::path TempFilePath = AttachmentTempPath;
+ TempFilePath.append(RawHash.ToHexString());
+
+ IoBuffer TempAttachmentBuffer = remotestore_impl::CompressToTempFile(RawHash,
+ RawData,
+ TempFilePath,
+ OodleCompressor::Mermaid,
+ OodleCompressionLevel::VeryFast);
+ TempAttachmentBuffer.SetDeleteOnClose(true);
+
+ uint64_t CompressedSize = TempAttachmentBuffer.GetSize();
+
+ ZEN_INFO("Saved temp attachment to '{}', {} ({})",
+ TempFilePath,
+ NiceBytes(UploadAttachment->Size),
+ NiceBytes(CompressedSize));
+
+ if (CompressedSize > MaxChunkEmbedSize)
+ {
+ TGetAttachmentBufferFunc FetchFunc = [Data = std::move(TempAttachmentBuffer)](const IoHash&) mutable {
+ return CompositeBuffer(SharedBuffer(std::move(Data)));
+ };
+
+ RwLock::ExclusiveLockScope _(ResolveLock);
+ LargeChunkAttachments.insert_or_assign(RawHash, std::move(FetchFunc));
+ }
+ else
+ {
+ UploadAttachment->Size = CompressedSize;
+
+ std::pair<uint64_t, IoBuffer> LooseAttachment(RawData.GetSize(), std::move(TempAttachmentBuffer));
+
+ RwLock::ExclusiveLockScope _(ResolveLock);
+ LooseUploadAttachments.insert_or_assign(RawHash, std::move(LooseAttachment));
+ }
+ }
+ }
+ else
+ {
+ if (UploadAttachment->Size > MaxChunkEmbedSize)
+ {
+ TGetAttachmentBufferFunc FetchFunc = [&ChunkStore](const IoHash& RawHash) {
+ return CompositeBuffer(SharedBuffer(ChunkStore.FindChunkByCid(RawHash)));
+ };
+ RwLock::ExclusiveLockScope _(ResolveLock);
+ LargeChunkAttachments.insert_or_assign(RawHash, std::move(FetchFunc));
+ }
+ }
+ });
+ }
+
+ Work.Wait(1000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
+ ZEN_UNUSED(IsAborted, IsPaused);
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ AbortFlag.store(true);
+ }
+ remotestore_impl::ReportProgress(OptionalContext,
+ "Resolving attachments"sv,
+ fmt::format("{}{} remaining...", AbortFlag.load() ? "Aborting, " : "", PendingWork),
+ UploadAttachments.size(),
+ PendingWork,
+ UploadAttachmentsTimer.GetElapsedTimeMs());
+ });
+
+ if (!AbortFlag.load())
+ {
+ remotestore_impl::ReportProgress(OptionalContext,
+ "Resolving attachments"sv,
+ "",
+ UploadAttachments.size(),
+ 0,
+ UploadAttachmentsTimer.GetElapsedTimeMs());
+ }
+ }
+
RemoteProjectStore::Result WriteOplogSection(ProjectStore::Oplog& Oplog, const CbObjectView& SectionObject, JobContext* OptionalContext)
{
using namespace std::literals;
@@ -198,7 +1100,8 @@ namespace remotestore_impl {
"Writing oplog"sv,
fmt::format("{} remaining...", OpCount - OpsCompleteCount),
OpCount,
- OpCount - OpsCompleteCount);
+ OpCount - OpsCompleteCount,
+ Timer.GetElapsedTimeMs());
};
BinaryWriter Writer;
@@ -222,7 +1125,7 @@ namespace remotestore_impl {
if (OpCount > 0)
{
- ReportProgress(OptionalContext, "Writing oplog"sv, ""sv, OpCount, 0);
+ ReportProgress(OptionalContext, "Writing oplog"sv, ""sv, OpCount, 0, Timer.GetElapsedTimeMs());
}
return RemoteProjectStore::Result{.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0};
@@ -240,70 +1143,78 @@ namespace remotestore_impl {
std::atomic<uint64_t> AttachmentsStored = 0;
std::atomic<uint64_t> AttachmentBytesStored = 0;
std::atomic_size_t MissingAttachmentCount = 0;
+
+ std::atomic<uint64_t> ChunksCompleteCount = 0;
};
- class JobContextLogOutput : public OperationLogOutput
+ class JobContextSink : public logging::Sink
{
public:
- JobContextLogOutput(JobContext* OptionalContext) : m_OptionalContext(OptionalContext) {}
- virtual void EmitLogMessage(int LogLevel, std::string_view Format, fmt::format_args Args) override
+ explicit JobContextSink(JobContext* Context) : m_Context(Context) {}
+
+ void Log(const logging::LogMessage& Msg) override
{
- ZEN_UNUSED(LogLevel);
- if (m_OptionalContext)
+ if (m_Context)
{
- fmt::basic_memory_buffer<char, 250> MessageBuffer;
- fmt::vformat_to(fmt::appender(MessageBuffer), Format, Args);
- remotestore_impl::ReportMessage(m_OptionalContext, std::string_view(MessageBuffer.data(), MessageBuffer.size()));
+ m_Context->ReportMessage(Msg.GetPayload());
}
}
- virtual void SetLogOperationName(std::string_view Name) override { ZEN_UNUSED(Name); }
- virtual void SetLogOperationProgress(uint32_t StepIndex, uint32_t StepCount) override { ZEN_UNUSED(StepIndex, StepCount); }
- virtual uint32_t GetProgressUpdateDelayMS() override { return 0; }
- virtual ProgressBar* CreateProgressBar(std::string_view InSubTask) override
+ void Flush() override {}
+ void SetFormatter(std::unique_ptr<logging::Formatter>) override {}
+
+ private:
+ JobContext* m_Context;
+ };
+
+ class JobContextLogger
+ {
+ public:
+ explicit JobContextLogger(JobContext* OptionalContext)
{
- ZEN_UNUSED(InSubTask);
- return nullptr;
+ if (!OptionalContext)
+ {
+ return;
+ }
+ logging::SinkPtr ContextSink(new JobContextSink(OptionalContext));
+ Ref<logging::BroadcastSink> DefaultSink = GetDefaultBroadcastSink();
+ std::vector<logging::SinkPtr> Sinks;
+ if (DefaultSink)
+ {
+ Sinks.push_back(DefaultSink);
+ }
+ Sinks.push_back(std::move(ContextSink));
+ Ref<logging::BroadcastSink> Broadcast(new logging::BroadcastSink(std::move(Sinks)));
+ m_Log = Ref<logging::Logger>(new logging::Logger("jobcontext", Broadcast));
}
+ LoggerRef Log() const { return m_Log ? LoggerRef(*m_Log) : zen::Log(); }
+
private:
- JobContext* m_OptionalContext;
+ Ref<logging::Logger> m_Log;
};
- void DownloadAndSaveBlockChunks(CidStore& ChunkStore,
- RemoteProjectStore& RemoteStore,
- bool IgnoreMissingAttachments,
- JobContext* OptionalContext,
- WorkerThreadPool& NetworkWorkerPool,
- WorkerThreadPool& WorkerPool,
- Latch& AttachmentsDownloadLatch,
- Latch& AttachmentsWriteLatch,
- AsyncRemoteResult& RemoteResult,
+ void DownloadAndSaveBlockChunks(LoadOplogContext& Context,
+ ParallelWork& AttachmentWork,
DownloadInfo& Info,
Stopwatch& LoadAttachmentsTimer,
std::atomic_uint64_t& DownloadStartMS,
ThinChunkBlockDescription&& ThinBlockDescription,
std::vector<uint32_t>&& NeededChunkIndexes)
{
- AttachmentsDownloadLatch.AddCount(1);
- NetworkWorkerPool.ScheduleWork(
- [&RemoteStore,
- &ChunkStore,
- &WorkerPool,
- &AttachmentsDownloadLatch,
- &AttachmentsWriteLatch,
- &RemoteResult,
+ AttachmentWork.ScheduleWork(
+ Context.NetworkWorkerPool,
+ [&Context,
+ &AttachmentWork,
ThinBlockDescription = std::move(ThinBlockDescription),
NeededChunkIndexes = std::move(NeededChunkIndexes),
&Info,
&LoadAttachmentsTimer,
- &DownloadStartMS,
- IgnoreMissingAttachments,
- OptionalContext]() {
+ &DownloadStartMS](std::atomic<bool>& AbortFlag) {
ZEN_TRACE_CPU("DownloadBlockChunks");
+ ZEN_SCOPED_LOG(Context.Log);
- auto _ = MakeGuard([&AttachmentsDownloadLatch] { AttachmentsDownloadLatch.CountDown(); });
- if (RemoteResult.IsError())
+ if (AbortFlag)
{
return;
}
@@ -318,18 +1229,18 @@ namespace remotestore_impl {
uint64_t Unset = (std::uint64_t)-1;
DownloadStartMS.compare_exchange_strong(Unset, LoadAttachmentsTimer.GetElapsedTimeMs());
- RemoteProjectStore::LoadAttachmentsResult Result = RemoteStore.LoadAttachments(Chunks);
+ RemoteProjectStore::LoadAttachmentsResult Result = Context.RemoteStore.LoadAttachments(Chunks);
if (Result.ErrorCode)
{
- ReportMessage(OptionalContext,
+ ReportMessage(Context.OptionalJobContext,
fmt::format("Failed to load attachments with {} chunks ({}): {}",
Chunks.size(),
- RemoteResult.GetError(),
- RemoteResult.GetErrorReason()));
+ Result.ErrorCode,
+ Result.Reason));
Info.MissingAttachmentCount.fetch_add(1);
- if (IgnoreMissingAttachments)
+ if (!Context.IgnoreMissingAttachments)
{
- RemoteResult.SetError(Result.ErrorCode, Result.Reason, Result.Text);
+ throw RemoteStoreError(Result.Reason, Result.ErrorCode, Result.Text);
}
return;
}
@@ -339,76 +1250,64 @@ namespace remotestore_impl {
uint64_t ChunkSize = It.second.GetCompressedSize();
Info.AttachmentBytesDownloaded.fetch_add(ChunkSize);
}
- ZEN_INFO("Loaded {} bulk attachments in {}",
- Chunks.size(),
- NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)));
- if (RemoteResult.IsError())
+ remotestore_impl::ReportMessage(Context.OptionalJobContext,
+ fmt::format("Loaded {} bulk attachments in {}",
+ Chunks.size(),
+ NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000))));
+ if (AbortFlag)
{
return;
}
- AttachmentsWriteLatch.AddCount(1);
- WorkerPool.ScheduleWork(
- [&AttachmentsWriteLatch, &RemoteResult, &Info, &ChunkStore, Chunks = std::move(Result.Chunks)]() {
- auto _ = MakeGuard([&AttachmentsWriteLatch] { AttachmentsWriteLatch.CountDown(); });
- if (RemoteResult.IsError())
+ AttachmentWork.ScheduleWork(
+ Context.WorkerPool,
+ [&Info, &Context, Chunks = std::move(Result.Chunks)](std::atomic<bool>& AbortFlag) {
+ if (AbortFlag)
{
return;
}
if (!Chunks.empty())
{
- try
- {
- std::vector<IoBuffer> WriteAttachmentBuffers;
- std::vector<IoHash> WriteRawHashes;
- WriteAttachmentBuffers.reserve(Chunks.size());
- WriteRawHashes.reserve(Chunks.size());
+ std::vector<IoBuffer> WriteAttachmentBuffers;
+ std::vector<IoHash> WriteRawHashes;
+ WriteAttachmentBuffers.reserve(Chunks.size());
+ WriteRawHashes.reserve(Chunks.size());
- for (const auto& It : Chunks)
- {
- WriteAttachmentBuffers.push_back(It.second.GetCompressed().Flatten().AsIoBuffer());
- WriteRawHashes.push_back(It.first);
- }
- std::vector<CidStore::InsertResult> InsertResults =
- ChunkStore.AddChunks(WriteAttachmentBuffers, WriteRawHashes, CidStore::InsertMode::kCopyOnly);
+ for (const auto& It : Chunks)
+ {
+ WriteAttachmentBuffers.push_back(It.second.GetCompressed().Flatten().AsIoBuffer());
+ WriteRawHashes.push_back(It.first);
+ }
+ std::vector<CidStore::InsertResult> InsertResults =
+ Context.ChunkStore.AddChunks(WriteAttachmentBuffers, WriteRawHashes, CidStore::InsertMode::kCopyOnly);
- for (size_t Index = 0; Index < InsertResults.size(); Index++)
+ for (size_t Index = 0; Index < InsertResults.size(); Index++)
+ {
+ if (InsertResults[Index].New)
{
- if (InsertResults[Index].New)
- {
- Info.AttachmentBytesStored.fetch_add(WriteAttachmentBuffers[Index].GetSize());
- Info.AttachmentsStored.fetch_add(1);
- }
+ Info.AttachmentBytesStored.fetch_add(WriteAttachmentBuffers[Index].GetSize());
+ Info.AttachmentsStored.fetch_add(1);
}
}
- catch (const std::exception& Ex)
- {
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError),
- fmt::format("Failed to bulk save {} attachments", Chunks.size()),
- Ex.what());
- }
}
},
WorkerThreadPool::EMode::EnableBacklog);
}
+ catch (const RemoteStoreError&)
+ {
+ throw;
+ }
catch (const std::exception& Ex)
{
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError),
- fmt::format("Failed to bulk load {} attachments", NeededChunkIndexes.size()),
- Ex.what());
+ throw RemoteStoreError(fmt::format("Failed to bulk load {} attachments", NeededChunkIndexes.size()),
+ gsl::narrow<int>(HttpResponseCode::InternalServerError),
+ Ex.what());
}
},
WorkerThreadPool::EMode::EnableBacklog);
};
- void DownloadAndSaveBlock(CidStore& ChunkStore,
- RemoteProjectStore& RemoteStore,
- bool IgnoreMissingAttachments,
- JobContext* OptionalContext,
- WorkerThreadPool& NetworkWorkerPool,
- WorkerThreadPool& WorkerPool,
- Latch& AttachmentsDownloadLatch,
- Latch& AttachmentsWriteLatch,
- AsyncRemoteResult& RemoteResult,
+ void DownloadAndSaveBlock(LoadOplogContext& Context,
+ ParallelWork& AttachmentWork,
DownloadInfo& Info,
Stopwatch& LoadAttachmentsTimer,
std::atomic_uint64_t& DownloadStartMS,
@@ -417,28 +1316,21 @@ namespace remotestore_impl {
std::span<std::atomic<bool>> ChunkDownloadedFlags,
uint32_t RetriesLeft)
{
- AttachmentsDownloadLatch.AddCount(1);
- NetworkWorkerPool.ScheduleWork(
- [&AttachmentsDownloadLatch,
- &AttachmentsWriteLatch,
- &ChunkStore,
- &RemoteStore,
- &NetworkWorkerPool,
- &WorkerPool,
- &RemoteResult,
+ AttachmentWork.ScheduleWork(
+ Context.NetworkWorkerPool,
+ [&AttachmentWork,
+ &Context,
&Info,
&LoadAttachmentsTimer,
&DownloadStartMS,
- IgnoreMissingAttachments,
- OptionalContext,
RetriesLeft,
BlockHash = IoHash(BlockHash),
&AllNeededPartialChunkHashesLookup,
- ChunkDownloadedFlags]() {
+ ChunkDownloadedFlags](std::atomic<bool>& AbortFlag) {
ZEN_TRACE_CPU("DownloadBlock");
+ ZEN_SCOPED_LOG(Context.Log);
- auto _ = MakeGuard([&AttachmentsDownloadLatch] { AttachmentsDownloadLatch.CountDown(); });
- if (RemoteResult.IsError())
+ if (AbortFlag)
{
return;
}
@@ -446,54 +1338,65 @@ namespace remotestore_impl {
{
uint64_t Unset = (std::uint64_t)-1;
DownloadStartMS.compare_exchange_strong(Unset, LoadAttachmentsTimer.GetElapsedTimeMs());
- RemoteProjectStore::LoadAttachmentResult BlockResult = RemoteStore.LoadAttachment(BlockHash, {});
- if (BlockResult.ErrorCode)
+
+ IoBuffer BlobBuffer;
+ if (Context.OptionalCache)
{
- ReportMessage(OptionalContext,
- fmt::format("Failed to download block attachment {} ({}): {}",
- BlockHash,
- RemoteResult.GetError(),
- RemoteResult.GetErrorReason()));
- Info.MissingAttachmentCount.fetch_add(1);
- if (!IgnoreMissingAttachments)
- {
- RemoteResult.SetError(BlockResult.ErrorCode, BlockResult.Reason, BlockResult.Text);
- }
- return;
+ BlobBuffer = Context.OptionalCache->GetBuildBlob(Context.CacheBuildId, BlockHash);
}
- if (RemoteResult.IsError())
+
+ if (!BlobBuffer)
{
- return;
+ RemoteProjectStore::LoadAttachmentResult BlockResult = Context.RemoteStore.LoadAttachment(BlockHash);
+ if (BlockResult.ErrorCode)
+ {
+ ReportMessage(Context.OptionalJobContext,
+ fmt::format("Failed to download block attachment {} ({}): {}",
+ BlockHash,
+ BlockResult.Reason,
+ BlockResult.Text));
+ Info.MissingAttachmentCount.fetch_add(1);
+ if (!Context.IgnoreMissingAttachments)
+ {
+ throw RemoteStoreError(BlockResult.Reason, BlockResult.ErrorCode, BlockResult.Text);
+ }
+ return;
+ }
+ if (AbortFlag)
+ {
+ return;
+ }
+ BlobBuffer = std::move(BlockResult.Bytes);
+ ZEN_DEBUG("Loaded block attachment '{}' in {} ({})",
+ BlockHash,
+ NiceTimeSpanMs(static_cast<uint64_t>(BlockResult.ElapsedSeconds * 1000)),
+ NiceBytes(BlobBuffer.Size()));
+ if (Context.OptionalCache && Context.PopulateCache)
+ {
+ Context.OptionalCache->PutBuildBlob(Context.CacheBuildId,
+ BlockHash,
+ BlobBuffer.GetContentType(),
+ CompositeBuffer(SharedBuffer(BlobBuffer)));
+ }
}
- uint64_t BlockSize = BlockResult.Bytes.GetSize();
+ uint64_t BlockSize = BlobBuffer.GetSize();
Info.AttachmentBlocksDownloaded.fetch_add(1);
- ZEN_DEBUG("Loaded block attachment '{}' in {} ({})",
- BlockHash,
- NiceTimeSpanMs(static_cast<uint64_t>(BlockResult.ElapsedSeconds * 1000)),
- NiceBytes(BlockSize));
Info.AttachmentBlockBytesDownloaded.fetch_add(BlockSize);
- AttachmentsWriteLatch.AddCount(1);
- WorkerPool.ScheduleWork(
- [&AttachmentsDownloadLatch,
- &AttachmentsWriteLatch,
- &ChunkStore,
- &RemoteStore,
- &NetworkWorkerPool,
- &WorkerPool,
- &RemoteResult,
+ AttachmentWork.ScheduleWork(
+ Context.WorkerPool,
+ [&AttachmentWork,
+ &Context,
&Info,
&LoadAttachmentsTimer,
&DownloadStartMS,
- IgnoreMissingAttachments,
- OptionalContext,
RetriesLeft,
BlockHash = IoHash(BlockHash),
&AllNeededPartialChunkHashesLookup,
ChunkDownloadedFlags,
- Bytes = std::move(BlockResult.Bytes)]() {
- auto _ = MakeGuard([&AttachmentsWriteLatch] { AttachmentsWriteLatch.CountDown(); });
- if (RemoteResult.IsError())
+ Bytes = std::move(BlobBuffer)](std::atomic<bool>& AbortFlag) {
+ ZEN_SCOPED_LOG(Context.Log);
+ if (AbortFlag)
{
return;
}
@@ -506,59 +1409,103 @@ namespace remotestore_impl {
IoHash RawHash;
uint64_t RawSize;
CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(Bytes), RawHash, RawSize);
+
+ std::string ErrorString;
+
if (!Compressed)
{
- if (RetriesLeft > 0)
+ ErrorString =
+ fmt::format("Block attachment {} is malformed, can't parse as compressed binary", BlockHash);
+ }
+ else if (RawHash != BlockHash)
+ {
+ ErrorString = fmt::format("Block attachment {} has mismatching raw hash ({})", BlockHash, RawHash);
+ }
+ else if (CompositeBuffer BlockPayload = Compressed.DecompressToComposite(); !BlockPayload)
+ {
+ ErrorString = fmt::format("Block attachment {} is malformed, can't decompress payload", BlockHash);
+ }
+ else
+ {
+ uint64_t PotentialSize = 0;
+ uint64_t UsedSize = 0;
+ uint64_t BlockSize = BlockPayload.GetSize();
+
+ uint64_t BlockHeaderSize = 0;
+
+ bool StoreChunksOK = IterateChunkBlock(
+ BlockPayload.Flatten(),
+ [&AllNeededPartialChunkHashesLookup,
+ &ChunkDownloadedFlags,
+ &WriteAttachmentBuffers,
+ &WriteRawHashes,
+ &Info,
+ &PotentialSize](CompressedBuffer&& Chunk, const IoHash& AttachmentRawHash) {
+ auto ChunkIndexIt = AllNeededPartialChunkHashesLookup.find(AttachmentRawHash);
+ if (ChunkIndexIt != AllNeededPartialChunkHashesLookup.end())
+ {
+ bool Expected = false;
+ if (ChunkDownloadedFlags[ChunkIndexIt->second].compare_exchange_strong(Expected, true))
+ {
+ WriteAttachmentBuffers.emplace_back(Chunk.GetCompressed().Flatten().AsIoBuffer());
+ IoHash RawHash;
+ uint64_t RawSize;
+ ZEN_ASSERT(CompressedBuffer::ValidateCompressedHeader(
+ WriteAttachmentBuffers.back(),
+ RawHash,
+ RawSize,
+ /*OutOptionalTotalCompressedSize*/ nullptr));
+ ZEN_ASSERT(RawHash == AttachmentRawHash);
+ WriteRawHashes.emplace_back(AttachmentRawHash);
+ PotentialSize += WriteAttachmentBuffers.back().GetSize();
+ }
+ }
+ },
+ BlockHeaderSize);
+
+ if (!StoreChunksOK)
{
- ReportMessage(
- OptionalContext,
- fmt::format(
- "Block attachment {} is malformed, can't parse as compressed binary, retrying download",
- BlockHash));
- return DownloadAndSaveBlock(ChunkStore,
- RemoteStore,
- IgnoreMissingAttachments,
- OptionalContext,
- NetworkWorkerPool,
- WorkerPool,
- AttachmentsDownloadLatch,
- AttachmentsWriteLatch,
- RemoteResult,
- Info,
- LoadAttachmentsTimer,
- DownloadStartMS,
- BlockHash,
- AllNeededPartialChunkHashesLookup,
- ChunkDownloadedFlags,
- RetriesLeft - 1);
+ ErrorString = fmt::format("Invalid format for block {}", BlockHash);
+ }
+ else
+ {
+ if (!WriteAttachmentBuffers.empty())
+ {
+ std::vector<CidStore::InsertResult> Results =
+ Context.ChunkStore.AddChunks(WriteAttachmentBuffers, WriteRawHashes);
+ for (size_t Index = 0; Index < Results.size(); Index++)
+ {
+ const CidStore::InsertResult& Result = Results[Index];
+ if (Result.New)
+ {
+ Info.AttachmentBytesStored.fetch_add(WriteAttachmentBuffers[Index].GetSize());
+ Info.AttachmentsStored.fetch_add(1);
+ UsedSize += WriteAttachmentBuffers[Index].GetSize();
+ }
+ }
+ Info.ChunksCompleteCount += WriteAttachmentBuffers.size();
+ if (UsedSize < BlockSize)
+ {
+ ZEN_DEBUG("Used {} (skipping {}) out of {} for block {} ({} %) (use of matching {}%)",
+ NiceBytes(UsedSize),
+ NiceBytes(BlockSize - UsedSize),
+ NiceBytes(BlockSize),
+ BlockHash,
+ (100 * UsedSize) / BlockSize,
+ PotentialSize > 0 ? (UsedSize * 100) / PotentialSize : 0);
+ }
+ }
}
- ReportMessage(
- OptionalContext,
- fmt::format("Block attachment {} is malformed, can't parse as compressed binary", BlockHash));
- RemoteResult.SetError(
- gsl::narrow<int32_t>(HttpResponseCode::InternalServerError),
- fmt::format("Block attachment {} is malformed, can't parse as compressed binary", BlockHash),
- {});
- return;
}
- CompositeBuffer BlockPayload = Compressed.DecompressToComposite();
- if (!BlockPayload)
+
+ if (!ErrorString.empty())
{
if (RetriesLeft > 0)
{
- ReportMessage(
- OptionalContext,
- fmt::format("Block attachment {} is malformed, can't decompress payload, retrying download",
- BlockHash));
- return DownloadAndSaveBlock(ChunkStore,
- RemoteStore,
- IgnoreMissingAttachments,
- OptionalContext,
- NetworkWorkerPool,
- WorkerPool,
- AttachmentsDownloadLatch,
- AttachmentsWriteLatch,
- RemoteResult,
+ ReportMessage(Context.OptionalJobContext, fmt::format("{}, retrying download", ErrorString));
+
+ return DownloadAndSaveBlock(Context,
+ AttachmentWork,
Info,
LoadAttachmentsTimer,
DownloadStartMS,
@@ -567,128 +1514,214 @@ namespace remotestore_impl {
ChunkDownloadedFlags,
RetriesLeft - 1);
}
- ReportMessage(OptionalContext,
- fmt::format("Block attachment {} is malformed, can't decompress payload", BlockHash));
- RemoteResult.SetError(
- gsl::narrow<int32_t>(HttpResponseCode::InternalServerError),
- fmt::format("Block attachment {} is malformed, can't decompress payload", BlockHash),
- {});
- return;
- }
- if (RawHash != BlockHash)
- {
- ReportMessage(OptionalContext,
- fmt::format("Block attachment {} has mismatching raw hash ({})", BlockHash, RawHash));
- RemoteResult.SetError(
- gsl::narrow<int32_t>(HttpResponseCode::InternalServerError),
- fmt::format("Block attachment {} has mismatching raw hash ({})", BlockHash, RawHash),
- {});
- return;
- }
-
- uint64_t PotentialSize = 0;
- uint64_t UsedSize = 0;
- uint64_t BlockSize = BlockPayload.GetSize();
-
- uint64_t BlockHeaderSize = 0;
-
- bool StoreChunksOK = IterateChunkBlock(
- BlockPayload.Flatten(),
- [&AllNeededPartialChunkHashesLookup,
- &ChunkDownloadedFlags,
- &WriteAttachmentBuffers,
- &WriteRawHashes,
- &Info,
- &PotentialSize](CompressedBuffer&& Chunk, const IoHash& AttachmentRawHash) {
- auto ChunkIndexIt = AllNeededPartialChunkHashesLookup.find(AttachmentRawHash);
- if (ChunkIndexIt != AllNeededPartialChunkHashesLookup.end())
- {
- bool Expected = false;
- if (ChunkDownloadedFlags[ChunkIndexIt->second].compare_exchange_strong(Expected, true))
- {
- WriteAttachmentBuffers.emplace_back(Chunk.GetCompressed().Flatten().AsIoBuffer());
- IoHash RawHash;
- uint64_t RawSize;
- ZEN_ASSERT(
- CompressedBuffer::ValidateCompressedHeader(WriteAttachmentBuffers.back(),
- RawHash,
- RawSize,
- /*OutOptionalTotalCompressedSize*/ nullptr));
- ZEN_ASSERT(RawHash == AttachmentRawHash);
- WriteRawHashes.emplace_back(AttachmentRawHash);
- PotentialSize += WriteAttachmentBuffers.back().GetSize();
- }
- }
- },
- BlockHeaderSize);
-
- if (!StoreChunksOK)
- {
- ReportMessage(OptionalContext,
- fmt::format("Block attachment {} has invalid format ({}): {}",
- BlockHash,
- RemoteResult.GetError(),
- RemoteResult.GetErrorReason()));
- RemoteResult.SetError(gsl::narrow<int32_t>(HttpResponseCode::InternalServerError),
- fmt::format("Invalid format for block {}", BlockHash),
- {});
- return;
- }
-
- if (!WriteAttachmentBuffers.empty())
- {
- auto Results = ChunkStore.AddChunks(WriteAttachmentBuffers, WriteRawHashes);
- for (size_t Index = 0; Index < Results.size(); Index++)
+ else
{
- const auto& Result = Results[Index];
- if (Result.New)
- {
- Info.AttachmentBytesStored.fetch_add(WriteAttachmentBuffers[Index].GetSize());
- Info.AttachmentsStored.fetch_add(1);
- UsedSize += WriteAttachmentBuffers[Index].GetSize();
- }
+ ReportMessage(Context.OptionalJobContext, ErrorString);
+ throw RemoteStoreError(ErrorString,
+ gsl::narrow<int32_t>(HttpResponseCode::InternalServerError),
+ {});
}
- ZEN_DEBUG("Used {} (matching {}) out of {} for block {} ({} %) (use of matching {}%)",
- NiceBytes(UsedSize),
- NiceBytes(PotentialSize),
- NiceBytes(BlockSize),
- BlockHash,
- (100 * UsedSize) / BlockSize,
- PotentialSize > 0 ? (UsedSize * 100) / PotentialSize : 0);
}
}
+ catch (const RemoteStoreError&)
+ {
+ throw;
+ }
catch (const std::exception& Ex)
{
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError),
- fmt::format("Failed save block attachment {}", BlockHash),
- Ex.what());
+ throw RemoteStoreError(fmt::format("Failed to save block attachment {}", BlockHash),
+ gsl::narrow<int>(HttpResponseCode::InternalServerError),
+ Ex.what());
}
},
WorkerThreadPool::EMode::EnableBacklog);
}
+ catch (const RemoteStoreError&)
+ {
+ throw;
+ }
catch (const std::exception& Ex)
{
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError),
- fmt::format("Failed to block attachment {}", BlockHash),
- Ex.what());
+ throw RemoteStoreError(fmt::format("Failed to download block attachment {}", BlockHash),
+ gsl::narrow<int>(HttpResponseCode::InternalServerError),
+ Ex.what());
}
},
WorkerThreadPool::EMode::EnableBacklog);
};
- void DownloadAndSavePartialBlock(CidStore& ChunkStore,
- RemoteProjectStore& RemoteStore,
- bool IgnoreMissingAttachments,
- JobContext* OptionalContext,
- WorkerThreadPool& NetworkWorkerPool,
- WorkerThreadPool& WorkerPool,
- Latch& AttachmentsDownloadLatch,
- Latch& AttachmentsWriteLatch,
- AsyncRemoteResult& RemoteResult,
+ void DownloadPartialBlock(LoadOplogContext& Context,
+ std::atomic<bool>& AbortFlag,
+ DownloadInfo& Info,
+ double& DownloadTimeSeconds,
+ const ChunkBlockDescription& BlockDescription,
+ bool BlockExistsInCache,
+ std::span<const ChunkBlockAnalyser::BlockRangeDescriptor> BlockRangeDescriptors,
+ size_t BlockRangeIndexStart,
+ size_t BlockRangeCount,
+ std::function<void(IoBuffer&& Buffer,
+ size_t BlockRangeStartIndex,
+ std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths)>&& OnDownloaded)
+ {
+ ZEN_ASSERT(Context.StoreMaxRangeCountPerRequest != 0);
+ ZEN_ASSERT(BlockExistsInCache == false || Context.CacheMaxRangeCountPerRequest != 0);
+
+ std::vector<std::pair<uint64_t, uint64_t>> Ranges;
+ Ranges.reserve(BlockRangeDescriptors.size());
+ for (size_t BlockRangeIndex = BlockRangeIndexStart; BlockRangeIndex < BlockRangeIndexStart + BlockRangeCount; BlockRangeIndex++)
+ {
+ const ChunkBlockAnalyser::BlockRangeDescriptor& BlockRange = BlockRangeDescriptors[BlockRangeIndex];
+ Ranges.push_back(std::make_pair(BlockRange.RangeStart, BlockRange.RangeLength));
+ }
+
+ size_t SubBlockRangeCount = BlockRangeCount;
+ size_t SubRangeCountComplete = 0;
+ std::span<const std::pair<uint64_t, uint64_t>> RangesSpan(Ranges);
+
+ while (SubRangeCountComplete < SubBlockRangeCount)
+ {
+ if (AbortFlag.load())
+ {
+ break;
+ }
+
+ size_t SubRangeStartIndex = BlockRangeIndexStart + SubRangeCountComplete;
+ if (BlockExistsInCache)
+ {
+ ZEN_ASSERT(Context.OptionalCache);
+ size_t SubRangeCount = Min(BlockRangeCount - SubRangeCountComplete, Context.CacheMaxRangeCountPerRequest);
+
+ if (SubRangeCount == 1)
+ {
+ // Legacy single-range path, prefer that for max compatibility
+
+ const std::pair<uint64_t, uint64_t> SubRange = RangesSpan[SubRangeCountComplete];
+ Stopwatch CacheTimer;
+ IoBuffer PayloadBuffer = Context.OptionalCache->GetBuildBlob(Context.CacheBuildId,
+ BlockDescription.BlockHash,
+ SubRange.first,
+ SubRange.second);
+ DownloadTimeSeconds += CacheTimer.GetElapsedTimeMs() / 1000.0;
+ if (AbortFlag.load())
+ {
+ break;
+ }
+ if (PayloadBuffer)
+ {
+ OnDownloaded(std::move(PayloadBuffer),
+ SubRangeStartIndex,
+ std::vector<std::pair<uint64_t, uint64_t>>{std::make_pair(0u, SubRange.second)});
+ SubRangeCountComplete += SubRangeCount;
+ continue;
+ }
+ }
+ else
+ {
+ auto SubRanges = RangesSpan.subspan(SubRangeCountComplete, SubRangeCount);
+
+ Stopwatch CacheTimer;
+ BuildStorageCache::BuildBlobRanges RangeBuffers =
+ Context.OptionalCache->GetBuildBlobRanges(Context.CacheBuildId, BlockDescription.BlockHash, SubRanges);
+ DownloadTimeSeconds += CacheTimer.GetElapsedTimeMs() / 1000.0;
+ if (AbortFlag.load())
+ {
+ break;
+ }
+ if (RangeBuffers.PayloadBuffer)
+ {
+ if (RangeBuffers.Ranges.empty())
+ {
+ SubRangeCount = Ranges.size() - SubRangeCountComplete;
+ OnDownloaded(std::move(RangeBuffers.PayloadBuffer),
+ SubRangeStartIndex,
+ RangesSpan.subspan(SubRangeCountComplete, SubRangeCount));
+ SubRangeCountComplete += SubRangeCount;
+ continue;
+ }
+ else if (RangeBuffers.Ranges.size() == SubRangeCount)
+ {
+ OnDownloaded(std::move(RangeBuffers.PayloadBuffer), SubRangeStartIndex, RangeBuffers.Ranges);
+ SubRangeCountComplete += SubRangeCount;
+ continue;
+ }
+ }
+ }
+ }
+
+ size_t SubRangeCount = Min(BlockRangeCount - SubRangeCountComplete, Context.StoreMaxRangeCountPerRequest);
+
+ auto SubRanges = RangesSpan.subspan(SubRangeCountComplete, SubRangeCount);
+
+ RemoteProjectStore::LoadAttachmentRangesResult BlockResult =
+ Context.RemoteStore.LoadAttachmentRanges(BlockDescription.BlockHash, SubRanges);
+ DownloadTimeSeconds += BlockResult.ElapsedSeconds;
+ if (AbortFlag.load())
+ {
+ break;
+ }
+ if (BlockResult.ErrorCode || !BlockResult.Bytes)
+ {
+ ReportMessage(Context.OptionalJobContext,
+ fmt::format("Failed to download {} ranges from block attachment '{}' ({}): {}",
+ SubRanges.size(),
+ BlockDescription.BlockHash,
+ BlockResult.ErrorCode,
+ BlockResult.Reason));
+ Info.MissingAttachmentCount.fetch_add(1);
+ if (!Context.IgnoreMissingAttachments)
+ {
+ throw RemoteStoreError(BlockResult.Reason, BlockResult.ErrorCode, BlockResult.Text);
+ }
+ }
+ else
+ {
+ if (BlockResult.Ranges.empty())
+ {
+ // Jupiter will ignore the ranges and send the whole payload if it fetches the payload from S3
+ // Use the whole payload for the remaining ranges
+
+ if (Context.OptionalCache && Context.PopulateCache)
+ {
+ Context.OptionalCache->PutBuildBlob(Context.CacheBuildId,
+ BlockDescription.BlockHash,
+ ZenContentType::kCompressedBinary,
+ CompositeBuffer(std::vector<IoBuffer>{BlockResult.Bytes}));
+ if (AbortFlag.load())
+ {
+ break;
+ }
+ }
+ SubRangeCount = Ranges.size() - SubRangeCountComplete;
+ OnDownloaded(std::move(BlockResult.Bytes),
+ SubRangeStartIndex,
+ RangesSpan.subspan(SubRangeCountComplete, SubRangeCount));
+ }
+ else
+ {
+ if (BlockResult.Ranges.size() != SubRanges.size())
+ {
+ throw RemoteStoreError(fmt::format("Range response for block {} contains {} ranges, expected {} ranges",
+ BlockDescription.BlockHash,
+ BlockResult.Ranges.size(),
+ SubRanges.size()),
+ gsl::narrow<int32_t>(HttpResponseCode::InternalServerError),
+ "");
+ }
+ OnDownloaded(std::move(BlockResult.Bytes), SubRangeStartIndex, BlockResult.Ranges);
+ }
+ }
+
+ SubRangeCountComplete += SubRangeCount;
+ }
+ }
+
+ void DownloadAndSavePartialBlock(LoadOplogContext& Context,
+ ParallelWork& AttachmentWork,
DownloadInfo& Info,
Stopwatch& LoadAttachmentsTimer,
std::atomic_uint64_t& DownloadStartMS,
const ChunkBlockDescription& BlockDescription,
+ bool BlockExistsInCache,
std::span<const ChunkBlockAnalyser::BlockRangeDescriptor> BlockRangeDescriptors,
size_t BlockRangeIndexStart,
size_t BlockRangeCount,
@@ -696,30 +1729,23 @@ namespace remotestore_impl {
std::span<std::atomic<bool>> ChunkDownloadedFlags,
uint32_t RetriesLeft)
{
- AttachmentsDownloadLatch.AddCount(1);
- NetworkWorkerPool.ScheduleWork(
- [&AttachmentsDownloadLatch,
- &AttachmentsWriteLatch,
- &ChunkStore,
- &RemoteStore,
- &NetworkWorkerPool,
- &WorkerPool,
- &RemoteResult,
+ AttachmentWork.ScheduleWork(
+ Context.NetworkWorkerPool,
+ [&AttachmentWork,
+ &Context,
&Info,
&LoadAttachmentsTimer,
&DownloadStartMS,
- IgnoreMissingAttachments,
- OptionalContext,
- RetriesLeft,
BlockDescription,
+ BlockExistsInCache,
BlockRangeDescriptors,
BlockRangeIndexStart,
BlockRangeCount,
&AllNeededPartialChunkHashesLookup,
- ChunkDownloadedFlags]() {
+ ChunkDownloadedFlags,
+ RetriesLeft](std::atomic<bool>& AbortFlag) {
ZEN_TRACE_CPU("DownloadBlockRanges");
-
- auto _ = MakeGuard([&AttachmentsDownloadLatch] { AttachmentsDownloadLatch.CountDown(); });
+ ZEN_SCOPED_LOG(Context.Log);
try
{
uint64_t Unset = (std::uint64_t)-1;
@@ -728,273 +1754,259 @@ namespace remotestore_impl {
double DownloadElapsedSeconds = 0;
uint64_t DownloadedBytes = 0;
- for (size_t BlockRangeIndex = BlockRangeIndexStart; BlockRangeIndex < BlockRangeIndexStart + BlockRangeCount;
- BlockRangeIndex++)
- {
- if (RemoteResult.IsError())
- {
- return;
- }
+ DownloadPartialBlock(
+ Context,
+ AbortFlag,
+ Info,
+ DownloadElapsedSeconds,
+ BlockDescription,
+ BlockExistsInCache,
+ BlockRangeDescriptors,
+ BlockRangeIndexStart,
+ BlockRangeCount,
+ [&](IoBuffer&& Buffer,
+ size_t BlockRangeStartIndex,
+ std::span<const std::pair<uint64_t, uint64_t>> OffsetAndLengths) {
+ uint64_t BlockPartSize = Buffer.GetSize();
+ DownloadedBytes += BlockPartSize;
+
+ Info.AttachmentBlockRangeBytesDownloaded.fetch_add(BlockPartSize);
+ Info.AttachmentBlocksRangesDownloaded++;
+
+ AttachmentWork.ScheduleWork(
+ Context.WorkerPool,
+ [&AttachmentWork,
+ &Context,
+ &Info,
+ &LoadAttachmentsTimer,
+ &DownloadStartMS,
+ BlockDescription,
+ BlockExistsInCache,
+ BlockRangeDescriptors,
+ BlockRangeStartIndex,
+ &AllNeededPartialChunkHashesLookup,
+ ChunkDownloadedFlags,
+ RetriesLeft,
+ BlockPayload = std::move(Buffer),
+ OffsetAndLengths =
+ std::vector<std::pair<uint64_t, uint64_t>>(OffsetAndLengths.begin(), OffsetAndLengths.end())](
+ std::atomic<bool>& AbortFlag) {
+ ZEN_SCOPED_LOG(Context.Log);
+ try
+ {
+ ZEN_ASSERT(BlockPayload.Size() > 0);
- const ChunkBlockAnalyser::BlockRangeDescriptor& BlockRange = BlockRangeDescriptors[BlockRangeIndex];
+ size_t RangeCount = OffsetAndLengths.size();
+ for (size_t RangeOffset = 0; RangeOffset < RangeCount; RangeOffset++)
+ {
+ if (AbortFlag)
+ {
+ return;
+ }
- RemoteProjectStore::LoadAttachmentResult BlockResult =
- RemoteStore.LoadAttachment(BlockDescription.BlockHash,
- {.Offset = BlockRange.RangeStart, .Bytes = BlockRange.RangeLength});
- if (BlockResult.ErrorCode)
- {
- ReportMessage(OptionalContext,
- fmt::format("Failed to download block attachment '{}' range {},{} ({}): {}",
- BlockDescription.BlockHash,
- BlockRange.RangeStart,
- BlockRange.RangeLength,
- BlockResult.ErrorCode,
- BlockResult.Reason));
- Info.MissingAttachmentCount.fetch_add(1);
- if (!IgnoreMissingAttachments)
- {
- RemoteResult.SetError(BlockResult.ErrorCode, BlockResult.Reason, BlockResult.Text);
- }
- return;
- }
- if (RemoteResult.IsError())
- {
- return;
- }
- uint64_t BlockPartSize = BlockResult.Bytes.GetSize();
- if (BlockPartSize != BlockRange.RangeLength)
- {
- std::string ErrorString =
- fmt::format("Failed to download block attachment '{}' range {},{}, got {} bytes ({}): {}",
- BlockDescription.BlockHash,
- BlockRange.RangeStart,
- BlockRange.RangeLength,
- BlockPartSize,
- RemoteResult.GetError(),
- RemoteResult.GetErrorReason());
-
- ReportMessage(OptionalContext, ErrorString);
- Info.MissingAttachmentCount.fetch_add(1);
- if (!IgnoreMissingAttachments)
- {
- RemoteResult.SetError(gsl::narrow<int32_t>(HttpResponseCode::NotFound),
- "Mismatching block part range received",
- ErrorString);
- }
- return;
- }
- Info.AttachmentBlocksRangesDownloaded.fetch_add(1);
-
- DownloadElapsedSeconds += BlockResult.ElapsedSeconds;
- DownloadedBytes += BlockPartSize;
-
- Info.AttachmentBlockRangeBytesDownloaded.fetch_add(BlockPartSize);
-
- AttachmentsWriteLatch.AddCount(1);
- WorkerPool.ScheduleWork(
- [&AttachmentsDownloadLatch,
- &AttachmentsWriteLatch,
- &ChunkStore,
- &RemoteStore,
- &NetworkWorkerPool,
- &WorkerPool,
- &RemoteResult,
- &Info,
- &LoadAttachmentsTimer,
- &DownloadStartMS,
- IgnoreMissingAttachments,
- OptionalContext,
- RetriesLeft,
- BlockDescription,
- BlockRange,
- &AllNeededPartialChunkHashesLookup,
- ChunkDownloadedFlags,
- BlockPayload = std::move(BlockResult.Bytes)]() {
- auto _ = MakeGuard([&AttachmentsWriteLatch] { AttachmentsWriteLatch.CountDown(); });
- if (RemoteResult.IsError())
- {
- return;
- }
- try
- {
- ZEN_ASSERT(BlockPayload.Size() > 0);
- std::vector<IoBuffer> WriteAttachmentBuffers;
- std::vector<IoHash> WriteRawHashes;
+ const ChunkBlockAnalyser::BlockRangeDescriptor& BlockRange =
+ BlockRangeDescriptors[BlockRangeStartIndex + RangeOffset];
+ const std::pair<uint64_t, uint64_t>& OffsetAndLength = OffsetAndLengths[RangeOffset];
+ IoBuffer BlockRangeBuffer(BlockPayload, OffsetAndLength.first, OffsetAndLength.second);
- uint64_t PotentialSize = 0;
- uint64_t UsedSize = 0;
- uint64_t BlockPartSize = BlockPayload.GetSize();
+ std::vector<IoBuffer> WriteAttachmentBuffers;
+ std::vector<IoHash> WriteRawHashes;
- uint32_t OffsetInBlock = 0;
- for (uint32_t ChunkBlockIndex = BlockRange.ChunkBlockIndexStart;
- ChunkBlockIndex < BlockRange.ChunkBlockIndexStart + BlockRange.ChunkBlockIndexCount;
- ChunkBlockIndex++)
- {
- const uint32_t ChunkCompressedSize = BlockDescription.ChunkCompressedLengths[ChunkBlockIndex];
- const IoHash& ChunkHash = BlockDescription.ChunkRawHashes[ChunkBlockIndex];
+ uint64_t PotentialSize = 0;
+ uint64_t UsedSize = 0;
+ uint64_t BlockPartSize = BlockRangeBuffer.GetSize();
- if (auto ChunkIndexIt = AllNeededPartialChunkHashesLookup.find(ChunkHash);
- ChunkIndexIt != AllNeededPartialChunkHashesLookup.end())
- {
- bool Expected = false;
- if (ChunkDownloadedFlags[ChunkIndexIt->second].compare_exchange_strong(Expected, true))
+ uint32_t OffsetInBlock = 0;
+ for (uint32_t ChunkBlockIndex = BlockRange.ChunkBlockIndexStart;
+ ChunkBlockIndex < BlockRange.ChunkBlockIndexStart + BlockRange.ChunkBlockIndexCount;
+ ChunkBlockIndex++)
{
- IoHash VerifyChunkHash;
- uint64_t VerifyChunkSize;
- CompressedBuffer CompressedChunk = CompressedBuffer::FromCompressed(
- SharedBuffer(IoBuffer(BlockPayload, OffsetInBlock, ChunkCompressedSize)),
- VerifyChunkHash,
- VerifyChunkSize);
- if (!CompressedChunk)
+ if (AbortFlag)
{
- std::string ErrorString = fmt::format(
- "Chunk at {},{} in block attachment '{}' is not a valid compressed buffer",
- OffsetInBlock,
- ChunkCompressedSize,
- BlockDescription.BlockHash);
- ReportMessage(OptionalContext, ErrorString);
- Info.MissingAttachmentCount.fetch_add(1);
- if (!IgnoreMissingAttachments)
+ break;
+ }
+
+ const uint32_t ChunkCompressedSize =
+ BlockDescription.ChunkCompressedLengths[ChunkBlockIndex];
+ const IoHash& ChunkHash = BlockDescription.ChunkRawHashes[ChunkBlockIndex];
+
+ if (auto ChunkIndexIt = AllNeededPartialChunkHashesLookup.find(ChunkHash);
+ ChunkIndexIt != AllNeededPartialChunkHashesLookup.end())
+ {
+ if (!ChunkDownloadedFlags[ChunkIndexIt->second])
{
- RemoteResult.SetError(gsl::narrow<int32_t>(HttpResponseCode::NotFound),
- "Malformed chunk block",
- ErrorString);
+ IoHash VerifyChunkHash;
+ uint64_t VerifyChunkSize;
+ CompressedBuffer CompressedChunk = CompressedBuffer::FromCompressed(
+ SharedBuffer(IoBuffer(BlockRangeBuffer, OffsetInBlock, ChunkCompressedSize)),
+ VerifyChunkHash,
+ VerifyChunkSize);
+
+ std::string ErrorString;
+
+ if (!CompressedChunk)
+ {
+ ErrorString = fmt::format(
+ "Chunk at {},{} in block attachment '{}' is not a valid compressed buffer",
+ OffsetInBlock,
+ ChunkCompressedSize,
+ BlockDescription.BlockHash);
+ }
+ else if (VerifyChunkHash != ChunkHash)
+ {
+ ErrorString = fmt::format(
+ "Chunk at {},{} in block attachment '{}' has mismatching hash, expected "
+ "{}, got {}",
+ OffsetInBlock,
+ ChunkCompressedSize,
+ BlockDescription.BlockHash,
+ ChunkHash,
+ VerifyChunkHash);
+ }
+ else if (VerifyChunkSize != BlockDescription.ChunkRawLengths[ChunkBlockIndex])
+ {
+ ErrorString = fmt::format(
+ "Chunk at {},{} in block attachment '{}' has mismatching raw size, "
+ "expected {}, "
+ "got {}",
+ OffsetInBlock,
+ ChunkCompressedSize,
+ BlockDescription.BlockHash,
+ BlockDescription.ChunkRawLengths[ChunkBlockIndex],
+ VerifyChunkSize);
+ }
+
+ if (!ErrorString.empty())
+ {
+ if (RetriesLeft > 0)
+ {
+ ReportMessage(Context.OptionalJobContext,
+ fmt::format("{}, retrying download", ErrorString));
+ return DownloadAndSavePartialBlock(Context,
+ AttachmentWork,
+ Info,
+ LoadAttachmentsTimer,
+ DownloadStartMS,
+ BlockDescription,
+ BlockExistsInCache,
+ BlockRangeDescriptors,
+ BlockRangeStartIndex,
+ RangeCount,
+ AllNeededPartialChunkHashesLookup,
+ ChunkDownloadedFlags,
+ RetriesLeft - 1);
+ }
+
+ ReportMessage(Context.OptionalJobContext, ErrorString);
+ Info.MissingAttachmentCount.fetch_add(1);
+ if (!Context.IgnoreMissingAttachments)
+ {
+ throw RemoteStoreError("Malformed chunk block",
+ gsl::narrow<int32_t>(HttpResponseCode::NotFound),
+ ErrorString);
+ }
+ }
+ else
+ {
+ bool Expected = false;
+ if (ChunkDownloadedFlags[ChunkIndexIt->second].compare_exchange_strong(Expected,
+ true))
+ {
+ WriteAttachmentBuffers.emplace_back(
+ CompressedChunk.GetCompressed().Flatten().AsIoBuffer());
+ WriteRawHashes.emplace_back(ChunkHash);
+ PotentialSize += WriteAttachmentBuffers.back().GetSize();
+ }
+ }
}
- continue;
}
- if (VerifyChunkHash != ChunkHash)
+ OffsetInBlock += ChunkCompressedSize;
+ }
+
+ if (!WriteAttachmentBuffers.empty())
+ {
+ std::vector<CidStore::InsertResult> Results =
+ Context.ChunkStore.AddChunks(WriteAttachmentBuffers, WriteRawHashes);
+ for (size_t Index = 0; Index < Results.size(); Index++)
{
- std::string ErrorString = fmt::format(
- "Chunk at {},{} in block attachment '{}' has mismatching hash, expected {}, got {}",
- OffsetInBlock,
- ChunkCompressedSize,
- BlockDescription.BlockHash,
- ChunkHash,
- VerifyChunkHash);
- ReportMessage(OptionalContext, ErrorString);
- Info.MissingAttachmentCount.fetch_add(1);
- if (!IgnoreMissingAttachments)
+ const CidStore::InsertResult& Result = Results[Index];
+ if (Result.New)
{
- RemoteResult.SetError(gsl::narrow<int32_t>(HttpResponseCode::NotFound),
- "Malformed chunk block",
- ErrorString);
+ Info.AttachmentBytesStored.fetch_add(WriteAttachmentBuffers[Index].GetSize());
+ Info.AttachmentsStored.fetch_add(1);
+ UsedSize += WriteAttachmentBuffers[Index].GetSize();
}
- continue;
}
- if (VerifyChunkSize != BlockDescription.ChunkRawLengths[ChunkBlockIndex])
+ Info.ChunksCompleteCount += WriteAttachmentBuffers.size();
+ if (UsedSize < BlockPartSize)
{
- std::string ErrorString = fmt::format(
- "Chunk at {},{} in block attachment '{}' has mismatching raw size, expected {}, "
- "got {}",
- OffsetInBlock,
- ChunkCompressedSize,
+ ZEN_DEBUG(
+ "Used {} (skipping {}) out of {} for block {} range {}, {} ({} %) (use of matching "
+ "{}%)",
+ NiceBytes(UsedSize),
+ NiceBytes(BlockPartSize - UsedSize),
+ NiceBytes(BlockPartSize),
BlockDescription.BlockHash,
- BlockDescription.ChunkRawLengths[ChunkBlockIndex],
- VerifyChunkSize);
- ReportMessage(OptionalContext, ErrorString);
- Info.MissingAttachmentCount.fetch_add(1);
- if (!IgnoreMissingAttachments)
- {
- RemoteResult.SetError(gsl::narrow<int32_t>(HttpResponseCode::NotFound),
- "Malformed chunk block",
- ErrorString);
- }
- continue;
+ BlockRange.RangeStart,
+ BlockRange.RangeLength,
+ (100 * UsedSize) / BlockPartSize,
+ PotentialSize > 0 ? (UsedSize * 100) / PotentialSize : 0);
}
-
- WriteAttachmentBuffers.emplace_back(CompressedChunk.GetCompressed().Flatten().AsIoBuffer());
- WriteRawHashes.emplace_back(ChunkHash);
- PotentialSize += WriteAttachmentBuffers.back().GetSize();
}
}
- OffsetInBlock += ChunkCompressedSize;
}
-
- if (!WriteAttachmentBuffers.empty())
+ catch (const RemoteStoreError&)
{
- auto Results = ChunkStore.AddChunks(WriteAttachmentBuffers, WriteRawHashes);
- for (size_t Index = 0; Index < Results.size(); Index++)
- {
- const auto& Result = Results[Index];
- if (Result.New)
- {
- Info.AttachmentBytesStored.fetch_add(WriteAttachmentBuffers[Index].GetSize());
- Info.AttachmentsStored.fetch_add(1);
- UsedSize += WriteAttachmentBuffers[Index].GetSize();
- }
- }
- ZEN_DEBUG("Used {} (matching {}) out of {} for block {} range {}, {} ({} %) (use of matching {}%)",
- NiceBytes(UsedSize),
- NiceBytes(PotentialSize),
- NiceBytes(BlockPartSize),
- BlockDescription.BlockHash,
- BlockRange.RangeStart,
- BlockRange.RangeLength,
- (100 * UsedSize) / BlockPartSize,
- PotentialSize > 0 ? (UsedSize * 100) / PotentialSize : 0);
+ throw;
}
- }
- catch (const std::exception& Ex)
- {
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError),
- fmt::format("Failed save block attachment {} range {}, {}",
- BlockDescription.BlockHash,
- BlockRange.RangeStart,
- BlockRange.RangeLength),
- Ex.what());
- }
- },
- WorkerThreadPool::EMode::EnableBacklog);
+ catch (const std::exception& Ex)
+ {
+ throw RemoteStoreError(fmt::format("Failed saving {} ranges from block attachment {}",
+ OffsetAndLengths.size(),
+ BlockDescription.BlockHash),
+ gsl::narrow<int>(HttpResponseCode::InternalServerError),
+ Ex.what());
+ }
+ },
+ WorkerThreadPool::EMode::EnableBacklog);
+ });
+ if (!AbortFlag)
+ {
+ ZEN_DEBUG("Loaded {} ranges from block attachment '{}' in {} ({})",
+ BlockRangeCount,
+ BlockDescription.BlockHash,
+ NiceTimeSpanMs(static_cast<uint64_t>(DownloadElapsedSeconds * 1000)),
+ NiceBytes(DownloadedBytes));
}
-
- ZEN_DEBUG("Loaded {} ranges from block attachment '{}' in {} ({})",
- BlockRangeCount,
- BlockDescription.BlockHash,
- NiceTimeSpanMs(static_cast<uint64_t>(DownloadElapsedSeconds * 1000)),
- NiceBytes(DownloadedBytes));
+ }
+ catch (const RemoteStoreError&)
+ {
+ throw;
}
catch (const std::exception& Ex)
{
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError),
- fmt::format("Failed to download block attachment {} ranges", BlockDescription.BlockHash),
- Ex.what());
+ throw RemoteStoreError(fmt::format("Failed to download block attachment {} ranges", BlockDescription.BlockHash),
+ gsl::narrow<int>(HttpResponseCode::InternalServerError),
+ Ex.what());
}
},
WorkerThreadPool::EMode::EnableBacklog);
};
- void DownloadAndSaveAttachment(CidStore& ChunkStore,
- RemoteProjectStore& RemoteStore,
- bool IgnoreMissingAttachments,
- JobContext* OptionalContext,
- WorkerThreadPool& NetworkWorkerPool,
- WorkerThreadPool& WorkerPool,
- Latch& AttachmentsDownloadLatch,
- Latch& AttachmentsWriteLatch,
- AsyncRemoteResult& RemoteResult,
+ void DownloadAndSaveAttachment(LoadOplogContext& Context,
+ ParallelWork& AttachmentWork,
DownloadInfo& Info,
Stopwatch& LoadAttachmentsTimer,
std::atomic_uint64_t& DownloadStartMS,
const IoHash& RawHash)
{
- AttachmentsDownloadLatch.AddCount(1);
- NetworkWorkerPool.ScheduleWork(
- [&RemoteStore,
- &ChunkStore,
- &WorkerPool,
- &RemoteResult,
- &AttachmentsDownloadLatch,
- &AttachmentsWriteLatch,
- RawHash,
- &LoadAttachmentsTimer,
- &DownloadStartMS,
- &Info,
- IgnoreMissingAttachments,
- OptionalContext]() {
+ AttachmentWork.ScheduleWork(
+ Context.NetworkWorkerPool,
+ [&Context, &AttachmentWork, RawHash, &LoadAttachmentsTimer, &DownloadStartMS, &Info](std::atomic<bool>& AbortFlag) {
ZEN_TRACE_CPU("DownloadAttachment");
+ ZEN_SCOPED_LOG(Context.Log);
- auto _ = MakeGuard([&AttachmentsDownloadLatch] { AttachmentsDownloadLatch.CountDown(); });
- if (RemoteResult.IsError())
+ if (AbortFlag)
{
return;
}
@@ -1002,132 +2014,127 @@ namespace remotestore_impl {
{
uint64_t Unset = (std::uint64_t)-1;
DownloadStartMS.compare_exchange_strong(Unset, LoadAttachmentsTimer.GetElapsedTimeMs());
- RemoteProjectStore::LoadAttachmentResult AttachmentResult = RemoteStore.LoadAttachment(RawHash, {});
- if (AttachmentResult.ErrorCode)
+ IoBuffer BlobBuffer;
+ if (Context.OptionalCache)
{
- ReportMessage(OptionalContext,
- fmt::format("Failed to download large attachment {}: '{}', error code : {}",
- RawHash,
- AttachmentResult.Reason,
- AttachmentResult.ErrorCode));
- Info.MissingAttachmentCount.fetch_add(1);
- if (!IgnoreMissingAttachments)
+ BlobBuffer = Context.OptionalCache->GetBuildBlob(Context.CacheBuildId, RawHash);
+ }
+ if (!BlobBuffer)
+ {
+ RemoteProjectStore::LoadAttachmentResult AttachmentResult = Context.RemoteStore.LoadAttachment(RawHash);
+ if (AttachmentResult.ErrorCode)
{
- RemoteResult.SetError(AttachmentResult.ErrorCode, AttachmentResult.Reason, AttachmentResult.Text);
+ ReportMessage(Context.OptionalJobContext,
+ fmt::format("Failed to download large attachment {}: '{}', error code: {}",
+ RawHash,
+ AttachmentResult.Reason,
+ AttachmentResult.ErrorCode));
+ Info.MissingAttachmentCount.fetch_add(1);
+ if (!Context.IgnoreMissingAttachments)
+ {
+ throw RemoteStoreError(AttachmentResult.Reason, AttachmentResult.ErrorCode, AttachmentResult.Text);
+ }
+ return;
+ }
+ ZEN_ASSERT(AttachmentResult.Bytes);
+ BlobBuffer = std::move(AttachmentResult.Bytes);
+ ZEN_DEBUG("Loaded large attachment '{}' in {} ({})",
+ RawHash,
+ NiceTimeSpanMs(static_cast<uint64_t>(AttachmentResult.ElapsedSeconds * 1000)),
+ NiceBytes(BlobBuffer.GetSize()));
+ if (Context.OptionalCache && Context.PopulateCache)
+ {
+ Context.OptionalCache->PutBuildBlob(Context.CacheBuildId,
+ RawHash,
+ BlobBuffer.GetContentType(),
+ CompositeBuffer(SharedBuffer(BlobBuffer)));
}
- return;
}
- uint64_t AttachmentSize = AttachmentResult.Bytes.GetSize();
- ZEN_DEBUG("Loaded large attachment '{}' in {} ({})",
- RawHash,
- NiceTimeSpanMs(static_cast<uint64_t>(AttachmentResult.ElapsedSeconds * 1000)),
- NiceBytes(AttachmentSize));
- Info.AttachmentsDownloaded.fetch_add(1);
- if (RemoteResult.IsError())
+ if (AbortFlag)
{
return;
}
+ uint64_t AttachmentSize = BlobBuffer.GetSize();
+ Info.AttachmentsDownloaded.fetch_add(1);
Info.AttachmentBytesDownloaded.fetch_add(AttachmentSize);
+ ZEN_ASSERT(BlobBuffer);
- AttachmentsWriteLatch.AddCount(1);
- WorkerPool.ScheduleWork(
- [&AttachmentsWriteLatch,
- &RemoteResult,
- &Info,
- &ChunkStore,
- RawHash,
- AttachmentSize,
- Bytes = std::move(AttachmentResult.Bytes),
- OptionalContext]() {
+ AttachmentWork.ScheduleWork(
+ Context.WorkerPool,
+ [&Context, &Info, RawHash, AttachmentSize, Bytes = std::move(BlobBuffer)](std::atomic<bool>& AbortFlag) {
ZEN_TRACE_CPU("WriteAttachment");
- auto _ = MakeGuard([&AttachmentsWriteLatch] { AttachmentsWriteLatch.CountDown(); });
- if (RemoteResult.IsError())
+ ZEN_ASSERT(Bytes);
+
+ if (AbortFlag)
{
return;
}
- try
- {
- CidStore::InsertResult InsertResult = ChunkStore.AddChunk(Bytes, RawHash);
- if (InsertResult.New)
- {
- Info.AttachmentBytesStored.fetch_add(AttachmentSize);
- Info.AttachmentsStored.fetch_add(1);
- }
- }
- catch (const std::exception& Ex)
+ CidStore::InsertResult InsertResult = Context.ChunkStore.AddChunk(Bytes, RawHash);
+ if (InsertResult.New)
{
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError),
- fmt::format("Saving attachment {} failed", RawHash),
- Ex.what());
+ Info.AttachmentBytesStored.fetch_add(AttachmentSize);
+ Info.AttachmentsStored.fetch_add(1);
}
+ Info.ChunksCompleteCount++;
},
WorkerThreadPool::EMode::EnableBacklog);
}
- catch (const std::exception& Ex)
- {
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError),
- fmt::format("Loading attachment {} failed", RawHash),
- Ex.what());
- }
- },
- WorkerThreadPool::EMode::EnableBacklog);
- };
-
- void CreateBlock(WorkerThreadPool& WorkerPool,
- Latch& OpSectionsLatch,
- std::vector<std::pair<IoHash, FetchChunkFunc>>&& ChunksInBlock,
- RwLock& SectionsLock,
- std::vector<ChunkBlockDescription>& Blocks,
- size_t BlockIndex,
- const std::function<void(CompressedBuffer&&, ChunkBlockDescription&&)>& AsyncOnBlock,
- AsyncRemoteResult& RemoteResult)
- {
- OpSectionsLatch.AddCount(1);
- WorkerPool.ScheduleWork(
- [&Blocks,
- &SectionsLock,
- &OpSectionsLatch,
- BlockIndex,
- Chunks = std::move(ChunksInBlock),
- &AsyncOnBlock,
- &RemoteResult]() mutable {
- ZEN_TRACE_CPU("CreateBlock");
-
- auto _ = MakeGuard([&OpSectionsLatch] { OpSectionsLatch.CountDown(); });
- if (RemoteResult.IsError())
- {
- return;
- }
- size_t ChunkCount = Chunks.size();
- try
+ catch (const RemoteStoreError&)
{
- ZEN_ASSERT(ChunkCount > 0);
- Stopwatch Timer;
- ChunkBlockDescription Block;
- CompressedBuffer CompressedBlock = GenerateChunkBlock(std::move(Chunks), Block);
- IoHash BlockHash = CompressedBlock.DecodeRawHash();
- ZEN_UNUSED(BlockHash);
- {
- // We can share the lock as we are not resizing the vector and only touch BlockHash at our own index
- RwLock::SharedLockScope __(SectionsLock);
- Blocks[BlockIndex] = Block;
- }
- uint64_t BlockSize = CompressedBlock.GetCompressedSize();
- AsyncOnBlock(std::move(CompressedBlock), std::move(Block));
- ZEN_INFO("Generated block with {} attachments in {} ({})",
- ChunkCount,
- NiceTimeSpanMs(Timer.GetElapsedTimeMs()),
- NiceBytes(BlockSize));
+ throw;
}
catch (const std::exception& Ex)
{
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError),
- fmt::format("Failed creating block {} with {} chunks", BlockIndex, ChunkCount),
- Ex.what());
+ throw RemoteStoreError(fmt::format("Loading attachment {} failed", RawHash),
+ gsl::narrow<int>(HttpResponseCode::InternalServerError),
+ Ex.what());
}
},
WorkerThreadPool::EMode::EnableBacklog);
+ };
+
+ void AsyncCreateBlock(LoggerRef InLog,
+ ParallelWork& Work,
+ WorkerThreadPool& WorkerPool,
+ std::vector<std::pair<IoHash, FetchChunkFunc>>&& ChunksInBlock,
+ RwLock& SectionsLock,
+ std::vector<ChunkBlockDescription>& Blocks,
+ size_t BlockIndex,
+ const std::function<void(CompressedBuffer&&, ChunkBlockDescription&&)>& AsyncOnBlock,
+ JobContext* OptionalContext)
+ {
+ Work.ScheduleWork(WorkerPool,
+ [InLog, &Blocks, &SectionsLock, BlockIndex, Chunks = std::move(ChunksInBlock), &AsyncOnBlock, OptionalContext](
+ std::atomic<bool>& AbortFlag) mutable {
+ ZEN_TRACE_CPU("CreateBlock");
+ ZEN_SCOPED_LOG(InLog);
+
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ AbortFlag.store(true);
+ }
+ if (AbortFlag)
+ {
+ return;
+ }
+ size_t ChunkCount = Chunks.size();
+ ZEN_ASSERT(ChunkCount > 0);
+ Stopwatch Timer;
+ ChunkBlockDescription Block;
+ CompressedBuffer CompressedBlock = GenerateChunkBlock(std::move(Chunks), Block);
+ uint64_t BlockSize = CompressedBlock.GetCompressedSize();
+ {
+ // We can share the lock as we are not resizing the vector and only touch our own index
+ RwLock::SharedLockScope __(SectionsLock);
+ Blocks[BlockIndex] = Block;
+ }
+ AsyncOnBlock(std::move(CompressedBlock), std::move(Block));
+ ZEN_INFO("Generated block with {} attachments in {} ({})",
+ ChunkCount,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()),
+ NiceBytes(BlockSize));
+ });
}
struct UploadInfo
@@ -1141,7 +2148,7 @@ namespace remotestore_impl {
struct CreatedBlock
{
- IoBuffer Payload;
+ CompositeBuffer Payload;
ChunkBlockDescription Block;
};
@@ -1155,7 +2162,6 @@ namespace remotestore_impl {
const std::unordered_set<IoHash, IoHash::Hasher>& Needs,
bool ForceAll,
UploadInfo& Info,
- AsyncRemoteResult& RemoteResult,
JobContext* OptionalContext)
{
using namespace std::literals;
@@ -1216,22 +2222,15 @@ namespace remotestore_impl {
if (!UnknownAttachments.empty())
{
- RemoteResult.SetError(
- gsl::narrow<int>(HttpResponseCode::NotFound),
+ throw RemoteStoreError(
fmt::format("Upload requested of {} missing attachments, the base container referenced blocks that are no longer available",
UnknownAttachments.size()),
+ gsl::narrow<int>(HttpResponseCode::NotFound),
"");
- ReportMessage(OptionalContext, fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- return;
}
if (IsCancelled(OptionalContext))
{
- if (!RemoteResult.IsError())
- {
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", "");
- ReportMessage(OptionalContext, fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- }
return;
}
@@ -1244,122 +2243,91 @@ namespace remotestore_impl {
Stopwatch Timer;
- ptrdiff_t AttachmentsToSave(0);
- Latch SaveAttachmentsLatch(1);
+ std::atomic<bool> AbortFlag(false);
+ std::atomic<bool> PauseFlag(false);
+ ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
+ ptrdiff_t AttachmentsToSave(0);
for (const IoHash& RawHash : AttachmentsToUpload)
{
- if (RemoteResult.IsError())
+ if (AbortFlag.load())
{
break;
}
- SaveAttachmentsLatch.AddCount(1);
AttachmentsToSave++;
- WorkerPool.ScheduleWork(
- [&ChunkStore,
- &RemoteStore,
- &SaveAttachmentsLatch,
- &RemoteResult,
- RawHash,
- &CreatedBlocks,
- &LooseFileAttachments,
- &Info,
- OptionalContext]() {
+ Work.ScheduleWork(
+ WorkerPool,
+ [&ChunkStore, &RemoteStore, RawHash, &CreatedBlocks, &LooseFileAttachments, &Info, OptionalContext](
+ std::atomic<bool>& AbortFlag) {
ZEN_TRACE_CPU("UploadAttachment");
- auto _ = MakeGuard([&SaveAttachmentsLatch] { SaveAttachmentsLatch.CountDown(); });
- if (RemoteResult.IsError())
+ if (AbortFlag.load())
{
return;
}
- try
+ CompositeBuffer Payload;
+ ChunkBlockDescription Block;
+ if (auto BlockIt = CreatedBlocks.find(RawHash); BlockIt != CreatedBlocks.end())
{
- IoBuffer Payload;
- ChunkBlockDescription Block;
- if (auto BlockIt = CreatedBlocks.find(RawHash); BlockIt != CreatedBlocks.end())
- {
- Payload = BlockIt->second.Payload;
- Block = BlockIt->second.Block;
- }
- else if (auto LooseTmpFileIt = LooseFileAttachments.find(RawHash); LooseTmpFileIt != LooseFileAttachments.end())
- {
- Payload = LooseTmpFileIt->second(RawHash);
- }
- else
- {
- Payload = ChunkStore.FindChunkByCid(RawHash);
- }
- if (!Payload)
- {
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::NotFound),
- fmt::format("Failed to find attachment {}", RawHash),
- {});
- ZEN_WARN("Failed to save attachment '{}' ({}): {}",
- RawHash,
- RemoteResult.GetError(),
- RemoteResult.GetErrorReason());
- return;
- }
- const bool IsBlock = Block.BlockHash == RawHash;
- size_t PayloadSize = Payload.GetSize();
- RemoteProjectStore::SaveAttachmentResult Result =
- RemoteStore.SaveAttachment(CompositeBuffer(SharedBuffer(std::move(Payload))), RawHash, std::move(Block));
- if (Result.ErrorCode)
- {
- RemoteResult.SetError(Result.ErrorCode, Result.Reason, Result.Text);
- ReportMessage(OptionalContext,
- fmt::format("Failed to save attachment '{}', {} ({}): {}",
- RawHash,
- NiceBytes(PayloadSize),
- RemoteResult.GetError(),
- RemoteResult.GetErrorReason()));
- return;
- }
- if (IsBlock)
- {
- Info.AttachmentBlocksUploaded.fetch_add(1);
- Info.AttachmentBlockBytesUploaded.fetch_add(PayloadSize);
- ZEN_INFO("Saved block attachment '{}' in {} ({})",
- RawHash,
- NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)),
- NiceBytes(PayloadSize));
- }
- else
- {
- Info.AttachmentsUploaded.fetch_add(1);
- Info.AttachmentBytesUploaded.fetch_add(PayloadSize);
- ZEN_INFO("Saved large attachment '{}' in {} ({})",
- RawHash,
- NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)),
- NiceBytes(PayloadSize));
- }
+ Payload = BlockIt->second.Payload;
+ Block = BlockIt->second.Block;
}
- catch (const std::exception& Ex)
+ else if (auto LooseTmpFileIt = LooseFileAttachments.find(RawHash); LooseTmpFileIt != LooseFileAttachments.end())
{
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError),
- fmt::format("To upload attachment {}", RawHash),
- Ex.what());
+ Payload = LooseTmpFileIt->second(RawHash);
}
- },
- WorkerThreadPool::EMode::EnableBacklog);
+ else
+ {
+ Payload = CompositeBuffer(SharedBuffer(ChunkStore.FindChunkByCid(RawHash)));
+ }
+ if (!Payload)
+ {
+ throw RemoteStoreError(fmt::format("Failed to find attachment {}", RawHash),
+ gsl::narrow<int>(HttpResponseCode::NotFound),
+ {});
+ }
+ const bool IsBlock = Block.BlockHash == RawHash;
+ size_t PayloadSize = Payload.GetSize();
+ RemoteProjectStore::SaveAttachmentResult Result =
+ RemoteStore.SaveAttachment(std::move(Payload), RawHash, std::move(Block));
+ if (Result.ErrorCode)
+ {
+ throw RemoteStoreError(fmt::format("Failed to save attachment '{}', {}", RawHash, NiceBytes(PayloadSize)),
+ Result.ErrorCode,
+ Result.Text);
+ }
+ if (IsBlock)
+ {
+ Info.AttachmentBlocksUploaded.fetch_add(1);
+ Info.AttachmentBlockBytesUploaded.fetch_add(PayloadSize);
+ ZEN_INFO("Saved block attachment '{}' in {} ({})",
+ RawHash,
+ NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)),
+ NiceBytes(PayloadSize));
+ }
+ else
+ {
+ Info.AttachmentsUploaded.fetch_add(1);
+ Info.AttachmentBytesUploaded.fetch_add(PayloadSize);
+ ZEN_INFO("Saved large attachment '{}' in {} ({})",
+ RawHash,
+ NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)),
+ NiceBytes(PayloadSize));
+ }
+ });
}
if (IsCancelled(OptionalContext))
{
- if (!RemoteResult.IsError())
- {
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", "");
- ReportMessage(OptionalContext, fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- }
- return;
+ AbortFlag = true;
}
if (!BulkBlockAttachmentsToUpload.empty())
{
for (const std::vector<std::pair<IoHash, FetchChunkFunc>>& Chunks : BlockChunks)
{
- if (RemoteResult.IsError())
+ if (AbortFlag.load())
{
break;
}
@@ -1379,103 +2347,82 @@ namespace remotestore_impl {
continue;
}
- SaveAttachmentsLatch.AddCount(1);
AttachmentsToSave++;
- WorkerPool.ScheduleWork(
+ Work.ScheduleWork(
+ WorkerPool,
[&RemoteStore,
&ChunkStore,
- &SaveAttachmentsLatch,
- &RemoteResult,
NeededChunks = std::move(NeededChunks),
&BulkBlockAttachmentsToUpload,
&Info,
- OptionalContext]() {
+ OptionalContext](std::atomic<bool>& AbortFlag) {
ZEN_TRACE_CPU("UploadChunk");
- auto _ = MakeGuard([&SaveAttachmentsLatch] { SaveAttachmentsLatch.CountDown(); });
- if (RemoteResult.IsError())
+ if (AbortFlag.load())
{
return;
}
- try
+ size_t ChunksSize = 0;
+ std::vector<SharedBuffer> ChunkBuffers;
+ ChunkBuffers.reserve(NeededChunks.size());
+ for (const IoHash& Chunk : NeededChunks)
{
- size_t ChunksSize = 0;
- std::vector<SharedBuffer> ChunkBuffers;
- ChunkBuffers.reserve(NeededChunks.size());
- for (const IoHash& Chunk : NeededChunks)
- {
- auto It = BulkBlockAttachmentsToUpload.find(Chunk);
- ZEN_ASSERT(It != BulkBlockAttachmentsToUpload.end());
- CompressedBuffer ChunkPayload = It->second(It->first).second;
- if (!ChunkPayload)
- {
- RemoteResult.SetError(static_cast<int32_t>(HttpResponseCode::NotFound),
- fmt::format("Missing chunk {}"sv, Chunk),
- fmt::format("Unable to fetch attachment {} required by the oplog"sv, Chunk));
- ChunkBuffers.clear();
- break;
- }
- ChunksSize += ChunkPayload.GetCompressedSize();
- ChunkBuffers.emplace_back(SharedBuffer(std::move(ChunkPayload).GetCompressed().Flatten().AsIoBuffer()));
- }
- RemoteProjectStore::SaveAttachmentsResult Result = RemoteStore.SaveAttachments(ChunkBuffers);
- if (Result.ErrorCode)
+ auto It = BulkBlockAttachmentsToUpload.find(Chunk);
+ ZEN_ASSERT(It != BulkBlockAttachmentsToUpload.end());
+ CompositeBuffer ChunkPayload = It->second(It->first).second;
+ if (!ChunkPayload)
{
- RemoteResult.SetError(Result.ErrorCode, Result.Reason, Result.Text);
- ReportMessage(OptionalContext,
- fmt::format("Failed to save attachments with {} chunks ({}): {}",
- NeededChunks.size(),
- RemoteResult.GetError(),
- RemoteResult.GetErrorReason()));
- return;
+ throw RemoteStoreError(fmt::format("Missing chunk {}"sv, Chunk),
+ static_cast<int32_t>(HttpResponseCode::NotFound),
+ fmt::format("Unable to fetch attachment {} required by the oplog"sv, Chunk));
}
- Info.AttachmentsUploaded.fetch_add(ChunkBuffers.size());
- Info.AttachmentBytesUploaded.fetch_add(ChunksSize);
-
- ZEN_INFO("Saved {} bulk attachments in {} ({})",
- NeededChunks.size(),
- NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)),
- NiceBytes(ChunksSize));
+ ChunksSize += ChunkPayload.GetSize();
+ ChunkBuffers.emplace_back(SharedBuffer(ChunkPayload.Flatten().AsIoBuffer()));
}
- catch (const std::exception& Ex)
+ RemoteProjectStore::SaveAttachmentsResult Result = RemoteStore.SaveAttachments(ChunkBuffers);
+ if (Result.ErrorCode)
{
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError),
- fmt::format("Failed to buck upload {} attachments", NeededChunks.size()),
- Ex.what());
+ throw RemoteStoreError(fmt::format("Failed to save attachments with {} chunks", NeededChunks.size()),
+ Result.ErrorCode,
+ Result.Text);
}
- },
- WorkerThreadPool::EMode::EnableBacklog);
+ Info.AttachmentsUploaded.fetch_add(ChunkBuffers.size());
+ Info.AttachmentBytesUploaded.fetch_add(ChunksSize);
+
+ ZEN_INFO("Saved {} bulk attachments in {} ({})",
+ NeededChunks.size(),
+ NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)),
+ NiceBytes(ChunksSize));
+ });
}
}
- SaveAttachmentsLatch.CountDown();
- while (!SaveAttachmentsLatch.Wait(1000))
- {
- ptrdiff_t Remaining = SaveAttachmentsLatch.Remaining();
- if (IsCancelled(OptionalContext))
+ Stopwatch SaveAttachmentsProgressTimer;
+ Work.Wait(1000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t Remaining) {
+ ZEN_UNUSED(IsAborted, IsPaused);
+ if (IsCancelled(OptionalContext) && !AbortFlag.load())
{
- if (!RemoteResult.IsError())
- {
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", "");
- ReportMessage(OptionalContext,
- fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- }
+ AbortFlag = true;
}
uint64_t PartialTransferWallTimeMS = Timer.GetElapsedTimeMs();
ReportProgress(OptionalContext,
"Saving attachments"sv,
- fmt::format("{} remaining... {}", Remaining, GetStats(RemoteStore.GetStats(), PartialTransferWallTimeMS)),
+ fmt::format("{} remaining... {}",
+ Remaining,
+ GetStats(RemoteStore.GetStats(), /*OptionalCacheStats*/ nullptr, PartialTransferWallTimeMS)),
AttachmentsToSave,
- Remaining);
- }
+ Remaining,
+ SaveAttachmentsProgressTimer.GetElapsedTimeMs());
+ });
uint64_t ElapsedTimeMS = Timer.GetElapsedTimeMs();
if (AttachmentsToSave > 0)
{
ReportProgress(OptionalContext,
"Saving attachments"sv,
- fmt::format("{}", GetStats(RemoteStore.GetStats(), ElapsedTimeMS)),
+ fmt::format("{}", GetStats(RemoteStore.GetStats(), /*OptionalCacheStats*/ nullptr, ElapsedTimeMS)),
AttachmentsToSave,
- 0);
+ 0,
+ SaveAttachmentsProgressTimer.GetElapsedTimeMs());
}
ReportMessage(OptionalContext,
fmt::format("Saved {} attachments ({} blocks, {} attachments, {} bulk attachments) in {} {}",
@@ -1484,7 +2431,7 @@ namespace remotestore_impl {
LargeAttachmentCountToUpload,
BulkAttachmentCountToUpload,
NiceTimeSpanMs(ElapsedTimeMS),
- GetStats(RemoteStore.GetStats(), ElapsedTimeMS)));
+ GetStats(RemoteStore.GetStats(), /*OptionalCacheStats*/ nullptr, ElapsedTimeMS)));
}
} // namespace remotestore_impl
@@ -1493,8 +2440,7 @@ std::vector<IoHash>
GetBlockHashesFromOplog(CbObjectView ContainerObject)
{
using namespace std::literals;
- std::vector<ChunkBlockDescription> Result;
- CbArrayView BlocksArray = ContainerObject["blocks"sv].AsArrayView();
+ CbArrayView BlocksArray = ContainerObject["blocks"sv].AsArrayView();
std::vector<IoHash> BlockHashes;
BlockHashes.reserve(BlocksArray.Num());
@@ -1541,7 +2487,8 @@ GetBlocksFromOplog(CbObjectView ContainerObject, std::span<const IoHash> Include
}
CbObject
-BuildContainer(CidStore& ChunkStore,
+BuildContainer(LoggerRef InLog,
+ CidStore& ChunkStore,
ProjectStore::Project& Project,
ProjectStore::Oplog& Oplog,
size_t MaxBlockSize,
@@ -1557,1127 +2504,651 @@ BuildContainer(CidStore& ChunkStore,
const std::function<void(const IoHash&, TGetAttachmentBufferFunc&&)>& OnLargeAttachment,
const std::function<void(std::vector<std::pair<IoHash, FetchChunkFunc>>&&)>& OnBlockChunks,
bool EmbedLooseFiles,
- JobContext* OptionalContext,
- remotestore_impl::AsyncRemoteResult& RemoteResult)
+ JobContext* OptionalContext)
{
using namespace std::literals;
- std::unique_ptr<OperationLogOutput> LogOutput(std::make_unique<remotestore_impl::JobContextLogOutput>(OptionalContext));
-
- size_t OpCount = 0;
-
- CbObject OplogContainerObject;
- {
- struct FoundAttachment
- {
- std::filesystem::path RawPath; // If not stored in cid
- uint64_t Size = 0;
- Oid Key = Oid::Zero;
- };
-
- std::unordered_map<IoHash, FoundAttachment, IoHash::Hasher> UploadAttachments;
-
- RwLock BlocksLock;
- std::vector<ChunkBlockDescription> Blocks;
- CompressedBuffer OpsBuffer;
-
- std::filesystem::path AttachmentTempPath = Oplog.TempPath();
- AttachmentTempPath.append(".pending");
- CreateDirectories(AttachmentTempPath);
-
- auto RewriteOp = [&](const Oid& Key, CbObjectView Op, const std::function<void(CbObjectView)>& CB) {
- bool OpRewritten = false;
- CbArrayView Files = Op["files"sv].AsArrayView();
- if (Files.Num() == 0)
- {
- CB(Op);
- return;
- }
-
- CbWriter Cbo;
- Cbo.BeginArray("files"sv);
+ ZEN_SCOPED_LOG(InLog);
+ remotestore_impl::JobContextLogger JobContextOutput(OptionalContext);
- for (CbFieldView& Field : Files)
- {
- if (remotestore_impl::IsCancelled(OptionalContext))
- {
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", "");
- remotestore_impl::ReportMessage(
- OptionalContext,
- fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- CB(Op);
- return;
- }
+ Stopwatch Timer;
- bool CopyField = true;
+ CbObject OplogContainerObject;
+ CompressedBuffer CompressedOpsSection;
+ std::unordered_map<IoHash, remotestore_impl::FoundAttachment, IoHash::Hasher> UploadAttachments;
+ std::filesystem::path AttachmentTempPath = Oplog.TempPath();
+ AttachmentTempPath.append(".pending");
+
+ size_t TotalOpCount = Oplog.GetOplogEntryCount();
+
+ Stopwatch RewriteOplogTimer;
+ CbObject SectionOps = remotestore_impl::RewriteOplog(InLog,
+ Project,
+ Oplog,
+ IgnoreMissingAttachments,
+ EmbedLooseFiles,
+ AttachmentTempPath,
+ UploadAttachments,
+ OptionalContext);
+ remotestore_impl::ReportMessage(OptionalContext,
+ fmt::format("Rewrote {} ops to new oplog in {}",
+ TotalOpCount,
+ NiceTimeSpanMs(static_cast<uint64_t>(RewriteOplogTimer.GetElapsedTimeMs()))));
- if (CbObjectView View = Field.AsObjectView())
- {
- IoHash DataHash = View["data"sv].AsHash();
+ {
+ Stopwatch CompressOpsTimer;
+ CompressedOpsSection = CompressedBuffer::Compress(SectionOps.GetBuffer(), OodleCompressor::Mermaid, OodleCompressionLevel::Fast);
+ remotestore_impl::ReportMessage(OptionalContext,
+ fmt::format("Compressed oplog section {} ({} -> {}) in {}",
+ CompressedOpsSection.DecodeRawHash(),
+ NiceBytes(CompressedOpsSection.DecodeRawSize()),
+ NiceBytes(CompressedOpsSection.GetCompressedSize()),
+ NiceTimeSpanMs(static_cast<uint64_t>(CompressOpsTimer.GetElapsedTimeMs()))));
+ }
- if (DataHash == IoHash::Zero)
- {
- std::string_view ServerPath = View["serverpath"sv].AsString();
- std::filesystem::path FilePath = Project.RootDir / ServerPath;
- if (!IsFile(FilePath))
- {
- remotestore_impl::ReportMessage(
- OptionalContext,
- fmt::format("Missing attachment '{}' for op '{}'", FilePath, View["id"sv].AsObjectId()));
- if (IgnoreMissingAttachments)
- {
- continue;
- }
- else
- {
- ExtendableStringBuilder<1024> Sb;
- Sb.Append("Failed to find attachment '");
- Sb.Append(FilePath.string());
- Sb.Append("' for op: \n");
- View.ToJson(Sb);
- throw std::runtime_error(Sb.ToString());
- }
- }
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ return {};
+ }
- {
- Stopwatch HashTimer;
- SharedBuffer DataBuffer(IoBufferBuilder::MakeFromFile(FilePath));
- DataHash = IoHash::HashBuffer(CompositeBuffer(DataBuffer));
- ZEN_INFO("Hashed loose file '{}' {}: {} in {}",
- FilePath,
- NiceBytes(DataBuffer.GetSize()),
- DataHash,
- NiceTimeSpanMs(HashTimer.GetElapsedTimeMs()));
- }
+ std::unordered_set<IoHash, IoHash::Hasher> FoundHashes;
+ FoundHashes.reserve(UploadAttachments.size());
+ for (const auto& It : UploadAttachments)
+ {
+ FoundHashes.insert(It.first);
+ }
- // Rewrite file array entry with new data reference
- CbObjectWriter Writer;
- RewriteCbObject(Writer, View, [&](CbObjectWriter&, CbFieldView Field) -> bool {
- if (Field.GetName() == "data"sv)
- {
- // omit this field as we will write it explicitly ourselves
- return true;
- }
- return false;
- });
- Writer.AddBinaryAttachment("data"sv, DataHash);
- UploadAttachments.insert_or_assign(DataHash, FoundAttachment{.RawPath = FilePath, .Key = Key});
+ std::unordered_set<IoHash, IoHash::Hasher> MissingHashes;
+ std::vector<remotestore_impl::FoundChunkedFile> AttachmentsToChunk;
- CbObject RewrittenOp = Writer.Save();
- Cbo.AddObject(std::move(RewrittenOp));
- CopyField = false;
- }
- }
+ remotestore_impl::FindChunkSizes(ChunkStore,
+ WorkerPool,
+ MaxChunkEmbedSize,
+ ChunkFileSizeLimit,
+ AllowChunking,
+ AttachmentTempPath,
+ UploadAttachments,
+ MissingHashes,
+ AttachmentsToChunk,
+ OptionalContext);
- if (CopyField)
- {
- Cbo.AddField(Field);
- }
- else
- {
- OpRewritten = true;
- }
- }
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ return {};
+ }
- if (!OpRewritten)
- {
- CB(Op);
- return;
- }
+ for (const IoHash& AttachmentHash : MissingHashes)
+ {
+ auto It = UploadAttachments.find(AttachmentHash);
+ ZEN_ASSERT(It != UploadAttachments.end());
+ std::optional<CbObject> Op = Oplog.GetOpByKey(It->second.Key);
+ ZEN_ASSERT(Op.has_value());
- Cbo.EndArray();
- CbArray FilesArray = Cbo.Save().AsArray();
+ if (IgnoreMissingAttachments)
+ {
+ remotestore_impl::ReportMessage(OptionalContext,
+ fmt::format("Missing attachment '{}' for op '{}'", AttachmentHash, It->second.Key));
+ }
+ else
+ {
+ ExtendableStringBuilder<1024> Sb;
+ Sb.Append("Failed to find attachment '");
+ Sb.Append(AttachmentHash.ToHexString());
+ Sb.Append("' for op: \n");
+ Op.value().ToJson(Sb);
+ throw std::runtime_error(Sb.ToString());
+ }
+ UploadAttachments.erase(AttachmentHash);
+ }
- CbObject RewrittenOp = RewriteCbObject(Op, [&](CbObjectWriter& NewWriter, CbFieldView Field) -> bool {
- if (Field.GetName() == "files"sv)
- {
- NewWriter.AddArray("files"sv, FilesArray);
+ std::vector<remotestore_impl::ChunkedFile> ChunkedFiles = ChunkAttachments(WorkerPool, AttachmentsToChunk, OptionalContext);
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ return {};
+ }
- return true;
- }
+ for (const remotestore_impl::ChunkedFile& Chunked : ChunkedFiles)
+ {
+ UploadAttachments.erase(Chunked.Chunked.Info.RawHash);
+ for (const IoHash& ChunkHash : Chunked.Chunked.Info.ChunkHashes)
+ {
+ UploadAttachments.erase(ChunkHash);
+ }
+ }
- return false;
- });
- CB(RewrittenOp);
- };
+ size_t ChunkedChunkCount = std::accumulate(
+ ChunkedFiles.begin(),
+ ChunkedFiles.end(),
+ size_t(0),
+ [](size_t Current, const remotestore_impl::ChunkedFile& Value) { return Current + Value.Chunked.Info.ChunkHashes.size(); });
- remotestore_impl::ReportMessage(OptionalContext, "Building exported oplog and collecting attachments");
+ size_t ReusedAttachmentCount = 0;
+ std::vector<size_t> ReusedBlockIndexes;
+ {
+ std::unordered_set<IoHash, IoHash::Hasher> UniqueChunkHashes;
+ UniqueChunkHashes.reserve(FoundHashes.size() + ChunkedChunkCount);
- Stopwatch Timer;
+ UniqueChunkHashes.insert(FoundHashes.begin(), FoundHashes.end());
- size_t TotalOpCount = Oplog.GetOplogEntryCount();
- CompressedBuffer CompressedOpsSection;
+ for (remotestore_impl::ChunkedFile& Chunked : ChunkedFiles)
+ {
+ UniqueChunkHashes.insert(Chunked.Chunked.Info.ChunkHashes.begin(), Chunked.Chunked.Info.ChunkHashes.end());
+ }
+ std::vector<IoHash> ChunkHashes(UniqueChunkHashes.begin(), UniqueChunkHashes.end());
+
+ std::vector<uint32_t> ChunkIndexes;
+ ChunkIndexes.resize(ChunkHashes.size());
+ std::iota(ChunkIndexes.begin(), ChunkIndexes.end(), 0);
+
+ std::vector<uint32_t> UnusedChunkIndexes;
+ ReuseBlocksStatistics ReuseBlocksStats;
+
+ ReusedBlockIndexes = FindReuseBlocks(JobContextOutput.Log(),
+ /*BlockReuseMinPercentLimit*/ 80,
+ /*IsVerbose*/ false,
+ ReuseBlocksStats,
+ KnownBlocks,
+ ChunkHashes,
+ ChunkIndexes,
+ UnusedChunkIndexes);
+ for (size_t KnownBlockIndex : ReusedBlockIndexes)
{
- Stopwatch RewriteOplogTimer;
- CbObjectWriter SectionOpsWriter;
- SectionOpsWriter.BeginArray("ops"sv);
+ const ThinChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex];
+ for (const IoHash& KnownHash : KnownBlock.ChunkRawHashes)
{
- Oplog.IterateOplogWithKey([&](int, const Oid& Key, CbObjectView Op) {
- if (RemoteResult.IsError())
- {
- return;
- }
- Op.IterateAttachments([&](CbFieldView FieldView) {
- UploadAttachments.insert_or_assign(FieldView.AsAttachment(), FoundAttachment{.Key = Key});
- });
- if (EmbedLooseFiles)
- {
- RewriteOp(Key, Op, [&SectionOpsWriter](CbObjectView Op) { SectionOpsWriter << Op; });
- }
- else
- {
- SectionOpsWriter << Op;
- }
- OpCount++;
- if (remotestore_impl::IsCancelled(OptionalContext))
- {
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", "");
- remotestore_impl::ReportMessage(
- OptionalContext,
- fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- return;
- }
- if (OpCount % 1000 == 0)
- {
- remotestore_impl::ReportProgress(OptionalContext,
- "Building oplog"sv,
- fmt::format("{} ops processed", OpCount),
- TotalOpCount,
- TotalOpCount - OpCount);
- }
- });
- if (RemoteResult.IsError())
- {
- return {};
- }
- if (remotestore_impl::IsCancelled(OptionalContext))
- {
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", "");
- remotestore_impl::ReportMessage(
- OptionalContext,
- fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- return {};
- }
- if (TotalOpCount > 0)
+ if (UploadAttachments.erase(KnownHash) == 1)
{
- remotestore_impl::ReportProgress(OptionalContext,
- "Building oplog"sv,
- fmt::format("{} ops processed", OpCount),
- TotalOpCount,
- 0);
+ ReusedAttachmentCount++;
}
}
- SectionOpsWriter.EndArray(); // "ops"
-
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Rewrote {} ops to new oplog in {}",
- OpCount,
- NiceTimeSpanMs(static_cast<uint64_t>(RewriteOplogTimer.GetElapsedTimeMs()))));
-
- {
- Stopwatch CompressOpsTimer;
- CompressedOpsSection =
- CompressedBuffer::Compress(SectionOpsWriter.Save().GetBuffer(), OodleCompressor::Mermaid, OodleCompressionLevel::Fast);
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Compressed oplog section {} ({} -> {}) in {}",
- CompressedOpsSection.DecodeRawHash(),
- NiceBytes(CompressedOpsSection.DecodeRawSize()),
- NiceBytes(CompressedOpsSection.GetCompressedSize()),
- NiceTimeSpanMs(static_cast<uint64_t>(CompressOpsTimer.GetElapsedTimeMs()))));
- }
}
+ }
- if (remotestore_impl::IsCancelled(OptionalContext))
- {
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", "");
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- return {};
- }
+ std::unordered_map<IoHash, TGetAttachmentBufferFunc, IoHash::Hasher> LargeChunkAttachments;
+ std::unordered_map<IoHash, std::pair<uint64_t, IoBuffer>, IoHash::Hasher> LooseUploadAttachments;
- std::unordered_set<IoHash, IoHash::Hasher> FoundHashes;
- FoundHashes.reserve(UploadAttachments.size());
- for (const auto& It : UploadAttachments)
+ if (UploadAttachments.empty())
+ {
+ if (ReusedAttachmentCount != 0)
{
- FoundHashes.insert(It.first);
+ remotestore_impl::ReportMessage(
+ OptionalContext,
+ fmt::format("Found all {} attachments from {} ops in existing blocks", ReusedAttachmentCount, TotalOpCount));
}
+ }
+ else
+ {
+ const size_t TotalAttachmentCount = UploadAttachments.size() + ReusedAttachmentCount;
+ remotestore_impl::ReportMessage(OptionalContext,
+ fmt::format("Resolving {} attachments from {} ops ({} ({:.1f}%) found in existing blocks)",
+ UploadAttachments.size(),
+ TotalOpCount,
+ ReusedAttachmentCount,
+ (100.f * ReusedAttachmentCount) / TotalAttachmentCount));
+
+ ResolveAttachments(ChunkStore,
+ WorkerPool,
+ MaxChunkEmbedSize,
+ AttachmentTempPath,
+ UploadAttachments,
+ LargeChunkAttachments,
+ LooseUploadAttachments,
+ OptionalContext);
- size_t ReusedAttachmentCount = 0;
- std::vector<size_t> ReusedBlockIndexes;
+ if (remotestore_impl::IsCancelled(OptionalContext))
{
- std::vector<IoHash> ChunkHashes;
- ChunkHashes.reserve(FoundHashes.size());
- ChunkHashes.insert(ChunkHashes.begin(), FoundHashes.begin(), FoundHashes.end());
- std::vector<uint32_t> ChunkIndexes;
- ChunkIndexes.resize(FoundHashes.size());
- std::iota(ChunkIndexes.begin(), ChunkIndexes.end(), 0);
-
- std::vector<uint32_t> UnusedChunkIndexes;
- ReuseBlocksStatistics ReuseBlocksStats;
-
- ReusedBlockIndexes = FindReuseBlocks(*LogOutput,
- /*BlockReuseMinPercentLimit*/ 80,
- /*IsVerbose*/ false,
- ReuseBlocksStats,
- KnownBlocks,
- ChunkHashes,
- ChunkIndexes,
- UnusedChunkIndexes);
- for (size_t KnownBlockIndex : ReusedBlockIndexes)
- {
- const ThinChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex];
- for (const IoHash& KnownHash : KnownBlock.ChunkRawHashes)
- {
- if (UploadAttachments.erase(KnownHash) == 1)
- {
- ReusedAttachmentCount++;
- }
- }
- }
+ return {};
}
+ }
- struct ChunkedFile
- {
- IoBuffer Source;
-
- ChunkedInfoWithSource Chunked;
- tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkLoookup;
- };
- std::vector<ChunkedFile> ChunkedFiles;
-
- auto ChunkFile = [](const IoHash& RawHash, IoBuffer& RawData, const IoBufferFileReference& FileRef, JobContext*) -> ChunkedFile {
- ChunkedFile Chunked;
- Stopwatch Timer;
-
- uint64_t Offset = FileRef.FileChunkOffset;
- uint64_t Size = FileRef.FileChunkSize;
-
- BasicFile SourceFile;
- SourceFile.Attach(FileRef.FileHandle);
- auto __ = MakeGuard([&SourceFile]() { SourceFile.Detach(); });
+ std::unordered_set<IoHash, IoHash::Hasher> LargeChunkHashes;
- Chunked.Chunked = ChunkData(SourceFile, Offset, Size, UShaderByteCodeParams);
- ZEN_ASSERT(Chunked.Chunked.Info.RawHash == RawHash);
- Chunked.Source = RawData;
+ for (auto& It : LargeChunkAttachments)
+ {
+ UploadAttachments.erase(It.first);
+ LargeChunkHashes.insert(It.first);
+ OnLargeAttachment(It.first, std::move(It.second));
+ }
- ZEN_INFO("Chunked large attachment '{}' {} into {} chunks in {}",
- RawHash,
- NiceBytes(Chunked.Chunked.Info.RawSize),
- Chunked.Chunked.Info.ChunkHashes.size(),
- NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ RwLock BlocksLock;
+ std::vector<ChunkBlockDescription> Blocks;
- return Chunked;
- };
+ std::vector<std::pair<IoHash, Oid>> SortedUploadAttachments;
+ SortedUploadAttachments.reserve(UploadAttachments.size());
+ for (const auto& It : UploadAttachments)
+ {
+ SortedUploadAttachments.push_back(std::make_pair(It.first, It.second.Key));
+ }
- RwLock ResolveLock;
- std::unordered_set<IoHash, IoHash::Hasher> ChunkedHashes;
- std::unordered_set<IoHash, IoHash::Hasher> LargeChunkHashes;
- std::unordered_map<IoHash, size_t, IoHash::Hasher> ChunkedUploadAttachments;
- std::unordered_map<IoHash, std::pair<uint64_t, IoBuffer>, IoHash::Hasher> LooseUploadAttachments;
- std::unordered_set<IoHash, IoHash::Hasher> MissingHashes;
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ return {};
+ }
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Resolving {} attachments from {} ops", UploadAttachments.size(), TotalOpCount));
+ remotestore_impl::ReportMessage(OptionalContext,
+ fmt::format("Sorting {} attachments from {} ops", SortedUploadAttachments.size(), TotalOpCount));
- Latch ResolveAttachmentsLatch(1);
- for (auto& It : UploadAttachments)
- {
- if (remotestore_impl::IsCancelled(OptionalContext))
- {
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", "");
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- return {};
- }
+ // Sort attachments so we get predictable blocks for the same oplog upload
+ std::sort(SortedUploadAttachments.begin(),
+ SortedUploadAttachments.end(),
+ [](const std::pair<IoHash, Oid>& Lhs, const std::pair<IoHash, Oid>& Rhs) {
+ if (Lhs.second == Rhs.second)
+ {
+ // Same key, sort by raw hash
+ return Lhs.first < Rhs.first;
+ }
+ // Sort by key
+ return Lhs.second < Rhs.second;
+ });
- ResolveAttachmentsLatch.AddCount(1);
+ std::vector<size_t> ChunkedFilesOrder;
+ ChunkedFilesOrder.reserve(ChunkedFiles.size());
+ for (size_t Index = 0; Index < ChunkedFiles.size(); Index++)
+ {
+ ChunkedFilesOrder.push_back(Index);
+ }
+ std::sort(ChunkedFilesOrder.begin(), ChunkedFilesOrder.end(), [&ChunkedFiles](size_t Lhs, size_t Rhs) {
+ return ChunkedFiles[Lhs].Chunked.Info.RawHash < ChunkedFiles[Rhs].Chunked.Info.RawHash;
+ });
- WorkerPool.ScheduleWork(
- [&ChunkStore,
- UploadAttachment = &It.second,
- RawHash = It.first,
- &ResolveAttachmentsLatch,
- &ResolveLock,
- &ChunkedHashes,
- &LargeChunkHashes,
- &ChunkedUploadAttachments,
- &LooseUploadAttachments,
- &MissingHashes,
- &OnLargeAttachment,
- &AttachmentTempPath,
- &ChunkFile,
- &ChunkedFiles,
- MaxChunkEmbedSize,
- ChunkFileSizeLimit,
- AllowChunking,
- &RemoteResult,
- OptionalContext]() {
- ZEN_TRACE_CPU("PrepareChunk");
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ return {};
+ }
+ remotestore_impl::ReportMessage(
+ OptionalContext,
+ fmt::format("Assembling {} attachments and {} chunked parts from {} ops into blocks and loose attachments",
+ SortedUploadAttachments.size(),
+ ChunkedChunkCount,
+ TotalOpCount));
- auto _ = MakeGuard([&ResolveAttachmentsLatch] { ResolveAttachmentsLatch.CountDown(); });
- if (remotestore_impl::IsCancelled(OptionalContext))
- {
- return;
- }
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ return {};
+ }
- try
- {
- if (!UploadAttachment->RawPath.empty())
- {
- const std::filesystem::path& FilePath = UploadAttachment->RawPath;
- IoBuffer RawData = IoBufferBuilder::MakeFromFile(FilePath);
- if (RawData)
- {
- if (AllowChunking && RawData.GetSize() > ChunkFileSizeLimit)
- {
- IoBufferFileReference FileRef;
- (void)RawData.GetFileReference(FileRef);
-
- ChunkedFile Chunked = ChunkFile(RawHash, RawData, FileRef, OptionalContext);
- ResolveLock.WithExclusiveLock(
- [RawHash, &ChunkedFiles, &ChunkedUploadAttachments, &ChunkedHashes, &Chunked]() {
- ChunkedUploadAttachments.insert_or_assign(RawHash, ChunkedFiles.size());
- ChunkedHashes.reserve(ChunkedHashes.size() + Chunked.Chunked.Info.ChunkHashes.size());
- for (const IoHash& ChunkHash : Chunked.Chunked.Info.ChunkHashes)
- {
- ChunkedHashes.insert(ChunkHash);
- }
- ChunkedFiles.emplace_back(std::move(Chunked));
- });
- }
- else if (RawData.GetSize() > (MaxChunkEmbedSize * 2))
- {
- // Assume the compressed file is going to be larger than MaxChunkEmbedSize, even if it isn't
- // it will be a loose attachment instead of going into a block
- OnLargeAttachment(RawHash, [RawData = std::move(RawData), AttachmentTempPath](const IoHash& RawHash) {
- size_t RawSize = RawData.GetSize();
- CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer(std::move(RawData)),
- OodleCompressor::Mermaid,
- OodleCompressionLevel::VeryFast);
+ size_t ChunkAssembleCount = SortedUploadAttachments.size() + ChunkedChunkCount;
+ size_t ChunksAssembled = 0;
+ remotestore_impl::ReportMessage(OptionalContext,
+ fmt::format("Assembling {} attachments from {} ops into blocks", ChunkAssembleCount, TotalOpCount));
- std::filesystem::path AttachmentPath = AttachmentTempPath;
- AttachmentPath.append(RawHash.ToHexString());
- IoBuffer TempAttachmentBuffer =
- WriteToTempFile(std::move(Compressed).GetCompressed(), AttachmentPath);
- ZEN_INFO("Saved temp attachment to '{}', {} ({})",
- AttachmentPath,
- NiceBytes(RawSize),
- NiceBytes(TempAttachmentBuffer.GetSize()));
- return TempAttachmentBuffer;
- });
- ResolveLock.WithExclusiveLock([RawHash, &LargeChunkHashes]() { LargeChunkHashes.insert(RawHash); });
- }
- else
- {
- uint64_t RawSize = RawData.GetSize();
- CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer(RawData),
- OodleCompressor::Mermaid,
- OodleCompressionLevel::VeryFast);
-
- std::filesystem::path AttachmentPath = AttachmentTempPath;
- AttachmentPath.append(RawHash.ToHexString());
-
- uint64_t CompressedSize = Compressed.GetCompressedSize();
- IoBuffer TempAttachmentBuffer = WriteToTempFile(std::move(Compressed).GetCompressed(), AttachmentPath);
- ZEN_INFO("Saved temp attachment to '{}', {} ({})",
- AttachmentPath,
- NiceBytes(RawSize),
- NiceBytes(TempAttachmentBuffer.GetSize()));
-
- if (CompressedSize > MaxChunkEmbedSize)
- {
- OnLargeAttachment(RawHash,
- [Data = std::move(TempAttachmentBuffer)](const IoHash&) { return Data; });
- ResolveLock.WithExclusiveLock([RawHash, &LargeChunkHashes]() { LargeChunkHashes.insert(RawHash); });
- }
- else
- {
- UploadAttachment->Size = CompressedSize;
- ResolveLock.WithExclusiveLock(
- [RawHash, RawSize, &LooseUploadAttachments, Data = std::move(TempAttachmentBuffer)]() {
- LooseUploadAttachments.insert_or_assign(RawHash, std::make_pair(RawSize, std::move(Data)));
- });
- }
- }
- }
- else
- {
- ResolveLock.WithExclusiveLock([RawHash, &MissingHashes]() { MissingHashes.insert(RawHash); });
- }
- }
- else
- {
- IoBuffer Data = ChunkStore.FindChunkByCid(RawHash);
- if (Data)
- {
- auto GetForChunking =
- [](size_t ChunkFileSizeLimit, const IoBuffer& Data, IoBufferFileReference& OutFileRef) -> bool {
- if (Data.IsWholeFile())
- {
- IoHash VerifyRawHash;
- uint64_t VerifyRawSize;
- CompressedBuffer Compressed =
- CompressedBuffer::FromCompressed(SharedBuffer(Data), VerifyRawHash, VerifyRawSize);
- if (Compressed)
- {
- if (VerifyRawSize > ChunkFileSizeLimit)
- {
- OodleCompressor Compressor;
- OodleCompressionLevel CompressionLevel;
- uint64_t BlockSize;
- if (Compressed.TryGetCompressParameters(Compressor, CompressionLevel, BlockSize))
- {
- if (CompressionLevel == OodleCompressionLevel::None)
- {
- CompositeBuffer Decompressed = Compressed.DecompressToComposite();
- if (Decompressed)
- {
- std::span<const SharedBuffer> Segments = Decompressed.GetSegments();
- if (Segments.size() == 1)
- {
- IoBuffer DecompressedData = Segments[0].AsIoBuffer();
- if (DecompressedData.GetFileReference(OutFileRef))
- {
- return true;
- }
- }
- }
- }
- }
- }
- }
- }
- return false;
- };
+ std::atomic<bool> AbortFlag(false);
+ std::atomic<bool> PauseFlag(false);
+ ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
- IoBufferFileReference FileRef;
- if (AllowChunking && GetForChunking(ChunkFileSizeLimit, Data, FileRef))
- {
- ChunkedFile Chunked = ChunkFile(RawHash, Data, FileRef, OptionalContext);
- ResolveLock.WithExclusiveLock(
- [RawHash, &ChunkedFiles, &ChunkedUploadAttachments, &ChunkedHashes, &Chunked]() {
- ChunkedUploadAttachments.insert_or_assign(RawHash, ChunkedFiles.size());
- ChunkedHashes.reserve(ChunkedHashes.size() + Chunked.Chunked.Info.ChunkHashes.size());
- for (const IoHash& ChunkHash : Chunked.Chunked.Info.ChunkHashes)
- {
- ChunkedHashes.insert(ChunkHash);
- }
- ChunkedFiles.emplace_back(std::move(Chunked));
- });
- }
- else if (Data.GetSize() > MaxChunkEmbedSize)
- {
- OnLargeAttachment(RawHash,
- [&ChunkStore](const IoHash& RawHash) { return ChunkStore.FindChunkByCid(RawHash); });
- ResolveLock.WithExclusiveLock([RawHash, &LargeChunkHashes]() { LargeChunkHashes.insert(RawHash); });
- }
- else
- {
- UploadAttachment->Size = Data.GetSize();
- }
- }
- else
- {
- ResolveLock.WithExclusiveLock([RawHash, &MissingHashes]() { MissingHashes.insert(RawHash); });
- }
- }
- }
- catch (const std::exception& Ex)
- {
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::NotFound),
- fmt::format("Failed to resolve attachment {}", RawHash),
- Ex.what());
- }
- },
- WorkerThreadPool::EMode::EnableBacklog);
- }
- ResolveAttachmentsLatch.CountDown();
+ uint32_t ComposedBlocks = 0;
- while (!ResolveAttachmentsLatch.Wait(1000))
- {
- ptrdiff_t Remaining = ResolveAttachmentsLatch.Remaining();
- if (remotestore_impl::IsCancelled(OptionalContext))
+ uint64_t CreateBlocksStartMS = Timer.GetElapsedTimeMs();
+ {
+ Stopwatch BlockCreateProgressTimer;
+ remotestore_impl::BlockComposer Composer(remotestore_impl::BlockComposer::Configuration{
+ .MaxBlockSize = MaxBlockSize,
+ .MaxChunksPerBlock = MaxChunksPerBlock,
+ .MaxChunkEmbedSize = MaxChunkEmbedSize,
+ .IsCancelledFunc = [OptionalContext]() { return remotestore_impl::IsCancelled(OptionalContext); }});
+
+ auto OnNewBlock = [&Log,
+ &Work,
+ &WorkerPool,
+ BuildBlocks,
+ &BlockCreateProgressTimer,
+ &BlocksLock,
+ &Blocks,
+ &AsyncOnBlock,
+ &OnBlockChunks,
+ ChunkAssembleCount,
+ &ChunksAssembled,
+ &ComposedBlocks,
+ OptionalContext](std::vector<IoHash>&& ChunkRawHashes,
+ const std::function<FetchChunkFunc(const IoHash& AttachmentHash)>& FetchAttachmentResolver) {
+ size_t ChunkCount = ChunkRawHashes.size();
+ std::vector<std::pair<IoHash, FetchChunkFunc>> ChunksInBlock;
+ ChunksInBlock.reserve(ChunkCount);
+
+ for (const IoHash& AttachmentHash : ChunkRawHashes)
{
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", "");
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- while (!ResolveAttachmentsLatch.Wait(1000))
- {
- Remaining = ResolveAttachmentsLatch.Remaining();
- remotestore_impl::ReportProgress(OptionalContext,
- "Resolving attachments"sv,
- fmt::format("Aborting, {} attachments remaining...", Remaining),
- UploadAttachments.size(),
- Remaining);
- }
- remotestore_impl::ReportProgress(OptionalContext, "Resolving attachments"sv, "Aborted"sv, UploadAttachments.size(), 0);
- return {};
+ ChunksInBlock.emplace_back(std::make_pair(AttachmentHash, FetchAttachmentResolver(AttachmentHash)));
}
- remotestore_impl::ReportProgress(OptionalContext,
- "Resolving attachments"sv,
- fmt::format("{} remaining...", Remaining),
- UploadAttachments.size(),
- Remaining);
- }
- if (UploadAttachments.size() > 0)
- {
- remotestore_impl::ReportProgress(OptionalContext, "Resolving attachments"sv, ""sv, UploadAttachments.size(), 0);
- }
-
- if (remotestore_impl::IsCancelled(OptionalContext))
- {
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", "");
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- return {};
- }
- for (const IoHash& AttachmentHash : MissingHashes)
- {
- auto It = UploadAttachments.find(AttachmentHash);
- ZEN_ASSERT(It != UploadAttachments.end());
- std::optional<CbObject> Op = Oplog.GetOpByKey(It->second.Key);
- ZEN_ASSERT(Op.has_value());
-
- if (IgnoreMissingAttachments)
+ size_t BlockIndex = remotestore_impl::AddBlock(BlocksLock, Blocks);
+ if (BuildBlocks)
{
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Missing attachment '{}' for op '{}'", AttachmentHash, It->second.Key));
+ remotestore_impl::AsyncCreateBlock(Log(),
+ Work,
+ WorkerPool,
+ std::move(ChunksInBlock),
+ BlocksLock,
+ Blocks,
+ BlockIndex,
+ AsyncOnBlock,
+ OptionalContext);
}
else
{
- ExtendableStringBuilder<1024> Sb;
- Sb.Append("Failed to find attachment '");
- Sb.Append(AttachmentHash.ToHexString());
- Sb.Append("' for op: \n");
- Op.value().ToJson(Sb);
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::NotFound), Sb.ToString(), {});
- return {};
- }
- UploadAttachments.erase(AttachmentHash);
- }
+ ZEN_INFO("Bulk group {} attachments", ChunkCount);
- for (const auto& It : ChunkedUploadAttachments)
- {
- UploadAttachments.erase(It.first);
- }
- for (const auto& It : LargeChunkHashes)
- {
- UploadAttachments.erase(It);
- }
-
- {
- std::vector<IoHash> ChunkHashes;
- ChunkHashes.reserve(ChunkedHashes.size());
- ChunkHashes.insert(ChunkHashes.begin(), ChunkedHashes.begin(), ChunkedHashes.end());
- std::vector<uint32_t> ChunkIndexes;
- ChunkIndexes.resize(ChunkedHashes.size());
- std::iota(ChunkIndexes.begin(), ChunkIndexes.end(), 0);
-
- std::vector<uint32_t> UnusedChunkIndexes;
- ReuseBlocksStatistics ReuseBlocksStats;
-
- std::vector<size_t> ReusedBlockFromChunking = FindReuseBlocks(*LogOutput,
- /*BlockReuseMinPercentLimit*/ 80,
- /*IsVerbose*/ false,
- ReuseBlocksStats,
- KnownBlocks,
- ChunkHashes,
- ChunkIndexes,
- UnusedChunkIndexes);
- for (size_t KnownBlockIndex : ReusedBlockIndexes)
- {
- const ThinChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex];
- for (const IoHash& KnownHash : KnownBlock.ChunkRawHashes)
- {
- if (ChunkedHashes.erase(KnownHash) == 1)
- {
- ReusedAttachmentCount++;
- }
- }
+ // We can share the lock as we are not resizing the vector and only touch BlockHash at our own index
+ RwLock::SharedLockScope _(BlocksLock);
+ Blocks[BlockIndex].ChunkRawHashes = std::move(ChunkRawHashes);
+ OnBlockChunks(std::move(ChunksInBlock));
}
- ReusedBlockIndexes.insert(ReusedBlockIndexes.end(), ReusedBlockFromChunking.begin(), ReusedBlockFromChunking.end());
- }
- std::sort(ReusedBlockIndexes.begin(), ReusedBlockIndexes.end());
- auto UniqueKnownBlocksEnd = std::unique(ReusedBlockIndexes.begin(), ReusedBlockIndexes.end());
- size_t ReuseBlockCount = std::distance(ReusedBlockIndexes.begin(), UniqueKnownBlocksEnd);
- if (ReuseBlockCount > 0)
- {
- Blocks.reserve(ReuseBlockCount);
- for (auto It = ReusedBlockIndexes.begin(); It != UniqueKnownBlocksEnd; It++)
+ ChunksAssembled += ChunkCount;
+ ComposedBlocks++;
+
+ if (ChunksAssembled % 1000 == 0)
{
- Blocks.push_back({KnownBlocks[*It]});
+ remotestore_impl::ReportProgress(
+ OptionalContext,
+ "Assembling blocks"sv,
+ fmt::format("{} attachments processed, {} blocks assembled", ChunksAssembled, ComposedBlocks),
+ ChunkAssembleCount,
+ ChunkAssembleCount - ChunksAssembled,
+ BlockCreateProgressTimer.GetElapsedTimeMs());
}
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Reused {} attachments from {} blocks", ReusedAttachmentCount, ReuseBlockCount));
- }
-
- std::vector<std::pair<IoHash, Oid>> SortedUploadAttachments;
- SortedUploadAttachments.reserve(UploadAttachments.size());
- for (const auto& It : UploadAttachments)
- {
- SortedUploadAttachments.push_back(std::make_pair(It.first, It.second.Key));
- }
-
- if (remotestore_impl::IsCancelled(OptionalContext))
- {
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", "");
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- return {};
- }
-
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Sorting {} attachments from {} ops", SortedUploadAttachments.size(), TotalOpCount));
-
- // Sort attachments so we get predictable blocks for the same oplog upload
- std::sort(SortedUploadAttachments.begin(),
- SortedUploadAttachments.end(),
- [](const std::pair<IoHash, Oid>& Lhs, const std::pair<IoHash, Oid>& Rhs) {
- if (Lhs.second == Rhs.second)
- {
- // Same key, sort by raw hash
- return Lhs.first < Rhs.first;
- }
- // Sort by key
- return Lhs.second < Rhs.second;
- });
-
- std::vector<size_t> ChunkedFilesOrder;
- ChunkedFilesOrder.reserve(ChunkedFiles.size());
- for (size_t Index = 0; Index < ChunkedFiles.size(); Index++)
- {
- ChunkedFilesOrder.push_back(Index);
- }
- std::sort(ChunkedFilesOrder.begin(), ChunkedFilesOrder.end(), [&ChunkedFiles](size_t Lhs, size_t Rhs) {
- return ChunkedFiles[Lhs].Chunked.Info.RawHash < ChunkedFiles[Rhs].Chunked.Info.RawHash;
- });
-
- // SortedUploadAttachments now contains all whole chunks with size to be composed into blocks and uploaded
- // ChunkedHashes contains all chunked up chunks to be composed into blocks
-
- if (remotestore_impl::IsCancelled(OptionalContext))
- {
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", "");
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- return {};
- }
- remotestore_impl::ReportMessage(
- OptionalContext,
- fmt::format("Assembling {} attachments and {} chunked parts from {} ops into blocks and loose attachments",
- SortedUploadAttachments.size(),
- ChunkedHashes.size(),
- TotalOpCount));
-
- if (remotestore_impl::IsCancelled(OptionalContext))
- {
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", "");
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- return {};
- }
-
- // SortedUploadAttachments now contains all whole chunks with size to be composed into blocks and uploaded
- // ChunkedHashes contains all chunked up chunks to be composed into blocks
-
- size_t ChunkAssembleCount = SortedUploadAttachments.size() + ChunkedHashes.size();
- size_t ChunksAssembled = 0;
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Assembling {} attachments from {} ops into blocks", ChunkAssembleCount, TotalOpCount));
-
- Latch BlockCreateLatch(1);
- size_t GeneratedBlockCount = 0;
- size_t BlockSize = 0;
- std::vector<std::pair<IoHash, FetchChunkFunc>> ChunksInBlock;
-
- Oid LastOpKey = Oid::Zero;
- uint32_t ComposedBlocks = 0;
+ };
- uint64_t CreateBlocksStartMS = Timer.GetElapsedTimeMs();
- try
{
- uint64_t FetchAttachmentsStartMS = Timer.GetElapsedTimeMs();
- std::unordered_set<IoHash, IoHash::Hasher> AddedAttachmentHashes;
- auto NewBlock = [&]() {
- size_t BlockIndex = remotestore_impl::AddBlock(BlocksLock, Blocks);
- size_t ChunkCount = ChunksInBlock.size();
- std::vector<IoHash> ChunkRawHashes;
- ChunkRawHashes.reserve(ChunkCount);
- for (const std::pair<IoHash, FetchChunkFunc>& Chunk : ChunksInBlock)
- {
- ChunkRawHashes.push_back(Chunk.first);
- }
- if (BuildBlocks)
- {
- remotestore_impl::CreateBlock(WorkerPool,
- BlockCreateLatch,
- std::move(ChunksInBlock),
- BlocksLock,
- Blocks,
- BlockIndex,
- AsyncOnBlock,
- RemoteResult);
- ComposedBlocks++;
- }
- else
- {
- ZEN_INFO("Bulk group {} attachments", ChunkCount);
- OnBlockChunks(std::move(ChunksInBlock));
- }
- {
- // We can share the lock as we are not resizing the vector and only touch BlockHash at our own index
- RwLock::SharedLockScope _(BlocksLock);
- Blocks[BlockIndex].ChunkRawHashes = std::move(ChunkRawHashes);
- }
- uint64_t NowMS = Timer.GetElapsedTimeMs();
- ZEN_INFO("Assembled block {} with {} chunks in {} ({})",
- BlockIndex,
- ChunkCount,
- NiceTimeSpanMs(NowMS - FetchAttachmentsStartMS),
- NiceBytes(BlockSize));
- FetchAttachmentsStartMS = NowMS;
- ChunksInBlock.clear();
- BlockSize = 0;
- GeneratedBlockCount++;
- };
-
- for (auto HashIt = SortedUploadAttachments.begin(); HashIt != SortedUploadAttachments.end(); HashIt++)
+ std::vector<IoHash> AttachmentHashes;
+ AttachmentHashes.reserve(SortedUploadAttachments.size());
+ std::vector<uint64_t> AttachmentSizes;
+ AttachmentSizes.reserve(SortedUploadAttachments.size());
+ std::vector<Oid> AttachmentKeys;
+ AttachmentKeys.reserve(SortedUploadAttachments.size());
+
+ for (const std::pair<IoHash, Oid>& Attachment : SortedUploadAttachments)
{
- if (remotestore_impl::IsCancelled(OptionalContext))
+ AttachmentHashes.push_back(Attachment.first);
+ if (auto It = UploadAttachments.find(Attachment.first); It != UploadAttachments.end())
{
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", "");
- remotestore_impl::ReportMessage(
- OptionalContext,
- fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- break;
- }
- if (ChunksAssembled % 1000 == 0)
- {
- remotestore_impl::ReportProgress(
- OptionalContext,
- "Assembling blocks"sv,
- fmt::format("{} attachments processed, {} blocks assembled", ChunksAssembled, ComposedBlocks),
- ChunkAssembleCount,
- ChunkAssembleCount - ChunksAssembled);
+ AttachmentSizes.push_back(It->second.Size);
}
- const IoHash& RawHash(HashIt->first);
- const Oid CurrentOpKey = HashIt->second;
- const IoHash& AttachmentHash(HashIt->first);
- auto InfoIt = UploadAttachments.find(RawHash);
- ZEN_ASSERT(InfoIt != UploadAttachments.end());
- uint64_t PayloadSize = InfoIt->second.Size;
-
- if (AddedAttachmentHashes.insert(AttachmentHash).second)
+ else
{
- if (BuildBlocks && ChunksInBlock.size() > 0)
- {
- if (((BlockSize + PayloadSize) > MaxBlockSize || (ChunksInBlock.size() + 1) > MaxChunksPerBlock) &&
- (CurrentOpKey != LastOpKey))
- {
- NewBlock();
- }
- }
-
- if (auto It = LooseUploadAttachments.find(RawHash); It != LooseUploadAttachments.end())
- {
- ChunksInBlock.emplace_back(std::make_pair(
- RawHash,
- [RawSize = It->second.first,
- IoBuffer = SharedBuffer(It->second.second)](const IoHash&) -> std::pair<uint64_t, CompressedBuffer> {
- return std::make_pair(RawSize, CompressedBuffer::FromCompressedNoValidate(IoBuffer.AsIoBuffer()));
- }));
- LooseUploadAttachments.erase(It);
- }
- else
- {
- ChunksInBlock.emplace_back(
- std::make_pair(RawHash, [&ChunkStore](const IoHash& RawHash) -> std::pair<uint64_t, CompressedBuffer> {
- IoBuffer Chunk = ChunkStore.FindChunkByCid(RawHash);
- if (!Chunk)
- {
- throw std::runtime_error(fmt::format("Failed to find chunk {} in cid store", RawHash));
- }
- IoHash ValidateRawHash;
- uint64_t RawSize = 0;
- CompressedBuffer Compressed =
- CompressedBuffer::FromCompressed(SharedBuffer(std::move(Chunk)), ValidateRawHash, RawSize);
- if (!Compressed)
- {
- throw std::runtime_error(
- fmt::format("Chunk {} in cid store is malformed (not a compressed buffer)", RawHash));
- }
- if (RawHash != ValidateRawHash)
- {
- throw std::runtime_error(
- fmt::format("Chunk {} in cid store is malformed (mismatching raw hash)", RawHash));
- }
- return {RawSize, Compressed};
- }));
- }
- BlockSize += PayloadSize;
-
- LastOpKey = CurrentOpKey;
- ChunksAssembled++;
+ throw std::runtime_error(
+ fmt::format("Attachment to upload state inconsistent, could not find attachment {}", Attachment.first));
}
+ AttachmentKeys.push_back(Attachment.second);
}
- if (!RemoteResult.IsError())
- {
- // Keep the chunked files as separate blocks to make the blocks generated
- // more consistent
- if (BlockSize > 0)
+
+ auto FetchWholeAttachmentResolver = [&LooseUploadAttachments, &ChunkStore](const IoHash& AttachmentHash) -> FetchChunkFunc {
+ if (auto It = LooseUploadAttachments.find(AttachmentHash); It != LooseUploadAttachments.end())
{
- NewBlock();
+ uint64_t RawSize = It->second.first;
+ IoBuffer Payload = std::move(It->second.second);
+ return
+ [RawSize, Payload = std::move(Payload)](const IoHash& ChunkHash) mutable -> std::pair<uint64_t, CompositeBuffer> {
+ ZEN_UNUSED(ChunkHash);
+ return {RawSize, CompositeBuffer(SharedBuffer(std::move(Payload)))};
+ };
}
-
- for (size_t ChunkedFileIndex : ChunkedFilesOrder)
+ else
{
- const ChunkedFile& ChunkedFile = ChunkedFiles[ChunkedFileIndex];
- const ChunkedInfoWithSource& Chunked = ChunkedFile.Chunked;
- size_t ChunkCount = Chunked.Info.ChunkHashes.size();
- for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ChunkIndex++)
- {
- if (remotestore_impl::IsCancelled(OptionalContext))
+ return [&ChunkStore](const IoHash& RawHash) -> std::pair<uint64_t, CompositeBuffer> {
+ IoBuffer Chunk = ChunkStore.FindChunkByCid(RawHash);
+ if (!Chunk)
{
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", "");
- remotestore_impl::ReportMessage(
- OptionalContext,
- fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- break;
+ throw std::runtime_error(fmt::format("Failed to find chunk {} in cid store", RawHash));
}
- if (ChunksAssembled % 1000 == 0)
+
+ // These are small chunks - make memory resident
+ Chunk = IoBufferBuilder::ReadFromFileMaybe(Chunk);
+
+ IoHash ValidateRawHash;
+ uint64_t RawSize = 0;
+ CompressedBuffer Compressed =
+ CompressedBuffer::FromCompressed(SharedBuffer(std::move(Chunk)), ValidateRawHash, RawSize);
+ if (!Compressed)
{
- remotestore_impl::ReportProgress(
- OptionalContext,
- "Assembling blocks"sv,
- fmt::format("{} attachments processed, {} blocks assembled", ChunksAssembled, ComposedBlocks),
- ChunkAssembleCount,
- ChunkAssembleCount - ChunksAssembled);
+ throw std::runtime_error(fmt::format("Chunk {} in cid store is malformed (not a compressed buffer)", RawHash));
}
- const IoHash& ChunkHash = ChunkedFile.Chunked.Info.ChunkHashes[ChunkIndex];
- if (auto FindIt = ChunkedHashes.find(ChunkHash); FindIt != ChunkedHashes.end())
+ if (RawHash != ValidateRawHash)
{
- if (AddedAttachmentHashes.insert(ChunkHash).second)
- {
- const ChunkSource& Source = Chunked.ChunkSources[ChunkIndex];
- uint32_t ChunkSize = gsl::narrow<uint32_t>(CompressedBuffer::GetHeaderSizeForNoneEncoder() + Source.Size);
- if (BuildBlocks && ChunksInBlock.size() > 0)
- {
- if ((BlockSize + ChunkSize) > MaxBlockSize || (ChunksInBlock.size() + 1) > MaxChunksPerBlock)
- {
- NewBlock();
- }
- }
- ChunksInBlock.emplace_back(
- std::make_pair(ChunkHash,
- [Source = ChunkedFile.Source, Offset = Source.Offset, Size = Source.Size](
- const IoHash&) -> std::pair<uint64_t, CompressedBuffer> {
- return {Size,
- CompressedBuffer::Compress(SharedBuffer(IoBuffer(Source, Offset, Size)),
- OodleCompressor::Mermaid,
- OodleCompressionLevel::None)};
- }));
- BlockSize += CompressedBuffer::GetHeaderSizeForNoneEncoder() + Source.Size;
- ChunksAssembled++;
- }
- ChunkedHashes.erase(FindIt);
+ throw std::runtime_error(fmt::format("Chunk {} in cid store is malformed (mismatching raw hash)", RawHash));
}
- }
+ return {RawSize, Compressed.GetCompressed()};
+ };
}
- }
+ };
+
+ Composer.Compose(AttachmentHashes,
+ AttachmentSizes,
+ AttachmentKeys,
+ [&OnNewBlock, &FetchWholeAttachmentResolver](std::vector<IoHash>&& ChunkRawHashes) {
+ OnNewBlock(std::move(ChunkRawHashes), FetchWholeAttachmentResolver);
+ });
+ }
+
+ {
+ std::vector<IoHash> AttachmentHashes;
+ AttachmentHashes.reserve(ChunkedChunkCount);
+ std::vector<uint64_t> AttachmentSizes;
+ AttachmentSizes.reserve(ChunkedChunkCount);
+ std::vector<Oid> AttachmentKeys;
+ AttachmentKeys.reserve(ChunkedChunkCount);
- if (BlockSize > 0 && !RemoteResult.IsError())
+ tsl::robin_map<IoHash, std::pair<size_t, size_t>, IoHash::Hasher> ChunkHashToChunkFileIndexAndChunkIndex;
+
+ for (size_t ChunkedFileIndex : ChunkedFilesOrder)
{
- if (!remotestore_impl::IsCancelled(OptionalContext))
+ const remotestore_impl::ChunkedFile& ChunkedFile = ChunkedFiles[ChunkedFileIndex];
+ const ChunkedInfoWithSource& Chunked = ChunkedFile.Chunked;
+ size_t ChunkCount = Chunked.Info.ChunkHashes.size();
+ Oid ChunkedFileOid = Oid::NewOid();
+ for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ChunkIndex++)
{
- NewBlock();
+ const IoHash& ChunkHash = Chunked.Info.ChunkHashes[ChunkIndex];
+ uint64_t ChunkSize = Chunked.ChunkSources[ChunkIndex].Size;
+ {
+ if (ChunkHashToChunkFileIndexAndChunkIndex
+ .insert(std::make_pair(ChunkHash, std::make_pair(ChunkedFileIndex, ChunkIndex)))
+ .second)
+ {
+ if (ChunkSize > MaxChunkEmbedSize)
+ {
+ OnLargeAttachment(ChunkHash,
+ [SourceBuffer = ChunkedFile.Source,
+ ChunkSource = Chunked.ChunkSources[ChunkIndex],
+ ChunkHash](const IoHash& RawHash) -> CompositeBuffer {
+ ZEN_ASSERT(RawHash == ChunkHash);
+ CompressedBuffer Compressed = CompressedBuffer::Compress(
+ SharedBuffer(IoBuffer(SourceBuffer, ChunkSource.Offset, ChunkSource.Size)),
+ OodleCompressor::Mermaid,
+ OodleCompressionLevel::None);
+ return Compressed.GetCompressed();
+ });
+
+ LargeChunkHashes.insert(ChunkHash);
+ }
+ else
+ {
+ AttachmentHashes.push_back(ChunkHash);
+ AttachmentSizes.push_back(ChunkSize);
+ AttachmentKeys.push_back(ChunkedFileOid);
+ }
+ }
+ }
}
}
- if (ChunkAssembleCount > 0)
- {
- remotestore_impl::ReportProgress(
- OptionalContext,
- "Assembling blocks"sv,
- fmt::format("{} attachments processed, {} blocks assembled", ChunksAssembled, ComposedBlocks),
- ChunkAssembleCount,
- 0);
- }
-
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Built oplog and collected {} attachments from {} ops into {} blocks and in {}",
- ChunkAssembleCount,
- TotalOpCount,
- GeneratedBlockCount,
- NiceTimeSpanMs(static_cast<uint64_t>(Timer.GetElapsedTimeMs()))));
-
- if (remotestore_impl::IsCancelled(OptionalContext))
- {
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", "");
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- BlockCreateLatch.CountDown();
- while (!BlockCreateLatch.Wait(1000))
+ auto ChunkedFileAttachmentResolver = [&ChunkHashToChunkFileIndexAndChunkIndex,
+ &ChunkedFiles](const IoHash& AttachmentHash) -> FetchChunkFunc {
+ if (auto It = ChunkHashToChunkFileIndexAndChunkIndex.find(AttachmentHash);
+ It != ChunkHashToChunkFileIndexAndChunkIndex.end())
{
- ptrdiff_t Remaining = BlockCreateLatch.Remaining();
- remotestore_impl::ReportProgress(OptionalContext,
- "Assembling blocks"sv,
- fmt::format("Aborting, {} blocks remaining...", Remaining),
- GeneratedBlockCount,
- Remaining);
+ const std::pair<size_t, size_t>& ChunkFileIndexAndChunkIndex = It->second;
+ size_t ChunkedFileIndex = ChunkFileIndexAndChunkIndex.first;
+ size_t ChunkIndex = ChunkFileIndexAndChunkIndex.second;
+ const remotestore_impl::ChunkedFile& ChunkedFile = ChunkedFiles[ChunkedFileIndex];
+
+ const ChunkSource& Source = ChunkedFile.Chunked.ChunkSources[ChunkIndex];
+ ZEN_ASSERT(Source.Offset + Source.Size <= ChunkedFile.Source.GetSize());
+
+ return [Source = ChunkedFile.Source, Offset = Source.Offset, Size = Source.Size](
+ const IoHash&) -> std::pair<uint64_t, CompositeBuffer> {
+ return {Size,
+ CompressedBuffer::Compress(SharedBuffer(IoBuffer(Source, Offset, Size)),
+ OodleCompressor::Mermaid,
+ OodleCompressionLevel::None)
+ .GetCompressed()};
+ };
}
- if (GeneratedBlockCount > 0)
+ else
{
- remotestore_impl::ReportProgress(OptionalContext,
- "Assembling blocks"sv,
- fmt::format("Aborting, {} blocks remaining...", 0),
- GeneratedBlockCount,
- 0);
+ ZEN_ASSERT(false);
}
- return {};
- }
+ };
+
+ Composer.Compose(AttachmentHashes,
+ AttachmentSizes,
+ AttachmentKeys,
+ [&OnNewBlock, &ChunkedFileAttachmentResolver](std::vector<IoHash>&& ChunkRawHashes) {
+ OnNewBlock(std::move(ChunkRawHashes), ChunkedFileAttachmentResolver);
+ });
}
- catch (const std::exception& Ex)
+
+ if (remotestore_impl::IsCancelled(OptionalContext))
{
- BlockCreateLatch.CountDown();
- while (!BlockCreateLatch.Wait(1000))
- {
- }
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), "Block creation failed", Ex.what());
- throw;
+ Work.Abort();
}
- BlockCreateLatch.CountDown();
- while (!BlockCreateLatch.Wait(1000))
- {
- ptrdiff_t Remaining = BlockCreateLatch.Remaining();
+ Work.Wait(1000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) {
+ ZEN_UNUSED(IsAborted, IsPaused);
if (remotestore_impl::IsCancelled(OptionalContext))
{
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", "");
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- while (!BlockCreateLatch.Wait(1000))
- {
- Remaining = BlockCreateLatch.Remaining();
- remotestore_impl::ReportProgress(OptionalContext,
- "Creating blocks"sv,
- fmt::format("Aborting, {} blocks remaining...", Remaining),
- GeneratedBlockCount,
- Remaining);
- }
- remotestore_impl::ReportProgress(OptionalContext, "Creating blocks"sv, "Aborted"sv, GeneratedBlockCount, 0);
- return {};
+ AbortFlag.store(true);
}
remotestore_impl::ReportProgress(OptionalContext,
"Creating blocks"sv,
- fmt::format("{} remaining...", Remaining),
- GeneratedBlockCount,
- Remaining);
- }
+ fmt::format("{}{} remaining...", AbortFlag.load() ? "Aborting, " : "", PendingWork),
+ ComposedBlocks,
+ PendingWork,
+ BlockCreateProgressTimer.GetElapsedTimeMs());
+ });
- if (GeneratedBlockCount > 0)
+ if (!AbortFlag.load() && ComposedBlocks > 0)
{
+ remotestore_impl::ReportProgress(OptionalContext,
+ "Creating blocks"sv,
+ ""sv,
+ ComposedBlocks,
+ 0,
+ BlockCreateProgressTimer.GetElapsedTimeMs());
+
uint64_t NowMS = Timer.GetElapsedTimeMs();
- remotestore_impl::ReportProgress(OptionalContext, "Creating blocks"sv, ""sv, GeneratedBlockCount, 0);
remotestore_impl::ReportMessage(
OptionalContext,
- fmt::format("Created {} blocks in {}", GeneratedBlockCount, NiceTimeSpanMs(NowMS - CreateBlocksStartMS)));
+ fmt::format("Created {} blocks in {}", ComposedBlocks, NiceTimeSpanMs(NowMS - CreateBlocksStartMS)));
+ }
+ }
+
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ return {};
+ }
+
+ // Reused blocks were not composed (their chunks were erased from UploadAttachments) but must
+ // still appear in the container so that a fresh receiver knows to download them.
+ if (BuildBlocks)
+ {
+ for (size_t KnownBlockIndex : ReusedBlockIndexes)
+ {
+ const ChunkBlockDescription& Reused = KnownBlocks[KnownBlockIndex];
+ Blocks.push_back(Reused);
}
+ }
- if (!RemoteResult.IsError())
+ CbObjectWriter OplogContainerWriter;
+ RwLock::SharedLockScope _(BlocksLock);
+ OplogContainerWriter.AddBinary("ops"sv, CompressedOpsSection.GetCompressed().Flatten().AsIoBuffer());
+ OplogContainerWriter.BeginArray("blocks"sv);
+ {
+ for (const ChunkBlockDescription& B : Blocks)
{
- CbObjectWriter OplogContinerWriter;
- RwLock::SharedLockScope _(BlocksLock);
- OplogContinerWriter.AddBinary("ops"sv, CompressedOpsSection.GetCompressed().Flatten().AsIoBuffer());
- OplogContinerWriter.BeginArray("blocks"sv);
+ ZEN_ASSERT(!B.ChunkRawHashes.empty());
+ if (BuildBlocks)
{
- for (const ChunkBlockDescription& B : Blocks)
+ ZEN_ASSERT(B.BlockHash != IoHash::Zero);
+
+ OplogContainerWriter.BeginObject();
{
- ZEN_ASSERT(!B.ChunkRawHashes.empty());
- if (BuildBlocks)
+ OplogContainerWriter.AddBinaryAttachment("rawhash"sv, B.BlockHash);
+ OplogContainerWriter.BeginArray("chunks"sv);
{
- ZEN_ASSERT(B.BlockHash != IoHash::Zero);
-
- OplogContinerWriter.BeginObject();
+ for (const IoHash& RawHash : B.ChunkRawHashes)
{
- OplogContinerWriter.AddBinaryAttachment("rawhash"sv, B.BlockHash);
- OplogContinerWriter.BeginArray("chunks"sv);
- {
- for (const IoHash& RawHash : B.ChunkRawHashes)
- {
- OplogContinerWriter.AddHash(RawHash);
- }
- }
- OplogContinerWriter.EndArray(); // "chunks"
+ OplogContainerWriter.AddHash(RawHash);
}
- OplogContinerWriter.EndObject();
- continue;
}
+ OplogContainerWriter.EndArray(); // "chunks"
+ }
+ OplogContainerWriter.EndObject();
+ continue;
+ }
- ZEN_ASSERT(B.BlockHash == IoHash::Zero);
- OplogContinerWriter.BeginObject();
+ ZEN_ASSERT(B.BlockHash == IoHash::Zero);
+ OplogContainerWriter.BeginObject();
+ {
+ OplogContainerWriter.BeginArray("chunks"sv);
+ {
+ for (const IoHash& RawHash : B.ChunkRawHashes)
{
- OplogContinerWriter.BeginArray("chunks"sv);
- {
- for (const IoHash& RawHash : B.ChunkRawHashes)
- {
- OplogContinerWriter.AddBinaryAttachment(RawHash);
- }
- }
- OplogContinerWriter.EndArray();
+ OplogContainerWriter.AddBinaryAttachment(RawHash);
}
- OplogContinerWriter.EndObject();
}
+ OplogContainerWriter.EndArray();
}
- OplogContinerWriter.EndArray(); // "blocks"sv
- OplogContinerWriter.BeginArray("chunkedfiles"sv);
+ OplogContainerWriter.EndObject();
+ }
+ }
+ OplogContainerWriter.EndArray(); // "blocks"sv
+ OplogContainerWriter.BeginArray("chunkedfiles"sv);
+ {
+ for (const remotestore_impl::ChunkedFile& F : ChunkedFiles)
+ {
+ OplogContainerWriter.BeginObject();
{
- for (const ChunkedFile& F : ChunkedFiles)
+ OplogContainerWriter.AddHash("rawhash"sv, F.Chunked.Info.RawHash);
+ OplogContainerWriter.AddInteger("rawsize"sv, F.Chunked.Info.RawSize);
+ OplogContainerWriter.BeginArray("chunks"sv);
{
- OplogContinerWriter.BeginObject();
+ for (const IoHash& RawHash : F.Chunked.Info.ChunkHashes)
{
- OplogContinerWriter.AddHash("rawhash"sv, F.Chunked.Info.RawHash);
- OplogContinerWriter.AddInteger("rawsize"sv, F.Chunked.Info.RawSize);
- OplogContinerWriter.BeginArray("chunks"sv);
- {
- for (const IoHash& RawHash : F.Chunked.Info.ChunkHashes)
- {
- OplogContinerWriter.AddHash(RawHash);
- }
- }
- OplogContinerWriter.EndArray(); // "chunks"
- OplogContinerWriter.BeginArray("sequence"sv);
- {
- for (uint32_t ChunkIndex : F.Chunked.Info.ChunkSequence)
- {
- OplogContinerWriter.AddInteger(ChunkIndex);
- }
- }
- OplogContinerWriter.EndArray(); // "sequence"
+ OplogContainerWriter.AddHash(RawHash);
}
- OplogContinerWriter.EndObject();
}
- }
- OplogContinerWriter.EndArray(); // "chunkedfiles"sv
-
- OplogContinerWriter.BeginArray("chunks"sv);
- {
- for (const IoHash& AttachmentHash : LargeChunkHashes)
+ OplogContainerWriter.EndArray(); // "chunks"
+ OplogContainerWriter.BeginArray("sequence"sv);
{
- OplogContinerWriter.AddBinaryAttachment(AttachmentHash);
+ for (uint32_t ChunkIndex : F.Chunked.Info.ChunkSequence)
+ {
+ OplogContainerWriter.AddInteger(ChunkIndex);
+ }
}
+ OplogContainerWriter.EndArray(); // "sequence"
}
- OplogContinerWriter.EndArray(); // "chunks"
+ OplogContainerWriter.EndObject();
+ }
+ }
+ OplogContainerWriter.EndArray(); // "chunkedfiles"sv
- OplogContainerObject = OplogContinerWriter.Save();
+ OplogContainerWriter.BeginArray("chunks"sv);
+ {
+ for (const IoHash& AttachmentHash : LargeChunkHashes)
+ {
+ OplogContainerWriter.AddBinaryAttachment(AttachmentHash);
}
}
+ OplogContainerWriter.EndArray(); // "chunks"
+
+ OplogContainerObject = OplogContainerWriter.Save();
+
return OplogContainerObject;
}
-RemoteProjectStore::LoadContainerResult
-BuildContainer(CidStore& ChunkStore,
+CbObject
+BuildContainer(LoggerRef InLog,
+ CidStore& ChunkStore,
ProjectStore::Project& Project,
ProjectStore::Oplog& Oplog,
WorkerThreadPool& WorkerPool,
@@ -2693,32 +3164,29 @@ BuildContainer(CidStore& ChunkStore,
const std::function<void(std::vector<std::pair<IoHash, FetchChunkFunc>>&&)>& OnBlockChunks,
bool EmbedLooseFiles)
{
- // WorkerThreadPool& WorkerPool = GetLargeWorkerPool(EWorkloadType::Background);
-
- remotestore_impl::AsyncRemoteResult RemoteResult;
- CbObject ContainerObject = BuildContainer(ChunkStore,
- Project,
- Oplog,
- MaxBlockSize,
- MaxChunksPerBlock,
- MaxChunkEmbedSize,
- ChunkFileSizeLimit,
- BuildBlocks,
- IgnoreMissingAttachments,
- AllowChunking,
- {},
- WorkerPool,
- AsyncOnBlock,
- OnLargeAttachment,
- OnBlockChunks,
- EmbedLooseFiles,
- nullptr,
- RemoteResult);
- return RemoteProjectStore::LoadContainerResult{RemoteResult.ConvertResult(), ContainerObject};
+ return BuildContainer(InLog,
+ ChunkStore,
+ Project,
+ Oplog,
+ MaxBlockSize,
+ MaxChunksPerBlock,
+ MaxChunkEmbedSize,
+ ChunkFileSizeLimit,
+ BuildBlocks,
+ IgnoreMissingAttachments,
+ AllowChunking,
+ {},
+ WorkerPool,
+ AsyncOnBlock,
+ OnLargeAttachment,
+ OnBlockChunks,
+ EmbedLooseFiles,
+ /*OptionalContext*/ nullptr);
}
-RemoteProjectStore::Result
-SaveOplog(CidStore& ChunkStore,
+void
+SaveOplog(LoggerRef InLog,
+ CidStore& ChunkStore,
RemoteProjectStore& RemoteStore,
ProjectStore::Project& Project,
ProjectStore::Oplog& Oplog,
@@ -2735,6 +3203,7 @@ SaveOplog(CidStore& ChunkStore,
{
using namespace std::literals;
+ ZEN_SCOPED_LOG(InLog);
Stopwatch Timer;
remotestore_impl::UploadInfo Info;
@@ -2749,58 +3218,51 @@ SaveOplog(CidStore& ChunkStore,
CreateDirectories(AttachmentTempPath);
}
- remotestore_impl::AsyncRemoteResult RemoteResult;
RwLock AttachmentsLock;
std::unordered_set<IoHash, IoHash::Hasher> LargeAttachments;
std::unordered_map<IoHash, remotestore_impl::CreatedBlock, IoHash::Hasher> CreatedBlocks;
tsl::robin_map<IoHash, TGetAttachmentBufferFunc, IoHash::Hasher> LooseLargeFiles;
- auto MakeTempBlock = [AttachmentTempPath, &RemoteResult, &AttachmentsLock, &CreatedBlocks](CompressedBuffer&& CompressedBlock,
- ChunkBlockDescription&& Block) {
+ auto MakeTempBlock = [&Log, AttachmentTempPath, &AttachmentsLock, &CreatedBlocks](CompressedBuffer&& CompressedBlock,
+ ChunkBlockDescription&& Block) {
std::filesystem::path BlockPath = AttachmentTempPath;
BlockPath.append(Block.BlockHash.ToHexString());
- try
- {
- IoBuffer BlockBuffer = WriteToTempFile(std::move(CompressedBlock).GetCompressed(), BlockPath);
- RwLock::ExclusiveLockScope __(AttachmentsLock);
- CreatedBlocks.insert({Block.BlockHash, {.Payload = std::move(BlockBuffer), .Block = std::move(Block)}});
- ZEN_DEBUG("Saved temp block to '{}', {}", AttachmentTempPath, NiceBytes(BlockBuffer.GetSize()));
- }
- catch (const std::exception& Ex)
- {
- RemoteResult.SetError(gsl::narrow<int32_t>(HttpResponseCode::InternalServerError),
- Ex.what(),
- "Unable to create temp block file");
- return;
- }
+ IoBuffer BlockBuffer = WriteToTempFile(std::move(CompressedBlock).GetCompressed(), BlockPath);
+ const uint64_t BlockSize = BlockBuffer.GetSize();
+ RwLock::ExclusiveLockScope __(AttachmentsLock);
+ CreatedBlocks.insert(
+ {Block.BlockHash, {.Payload = CompositeBuffer(SharedBuffer(std::move(BlockBuffer))), .Block = std::move(Block)}});
+ ZEN_DEBUG("Saved temp block to '{}', {}", AttachmentTempPath, NiceBytes(BlockSize));
};
- auto UploadBlock = [&RemoteStore, &RemoteResult, &Info, OptionalContext](CompressedBuffer&& CompressedBlock,
- ChunkBlockDescription&& Block) {
- IoHash BlockHash = Block.BlockHash;
+ auto UploadBlock = [&Log, &RemoteStore, &RemoteStoreInfo, &Info, OptionalContext](CompressedBuffer&& CompressedBlock,
+ ChunkBlockDescription&& Block) {
+ IoHash BlockHash = Block.BlockHash;
+ uint64_t CompressedSize = CompressedBlock.GetCompressedSize();
RemoteProjectStore::SaveAttachmentResult Result =
RemoteStore.SaveAttachment(CompressedBlock.GetCompressed(), BlockHash, std::move(Block));
if (Result.ErrorCode)
{
- RemoteResult.SetError(Result.ErrorCode, Result.Reason, Result.Text);
- remotestore_impl::ReportMessage(
- OptionalContext,
- fmt::format("Failed to save attachment ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- return;
+ throw RemoteStoreError(fmt::format("Failed to save block attachment {} for oplog '{}': {}",
+ BlockHash,
+ RemoteStoreInfo.ContainerName,
+ Result.Reason),
+ Result.ErrorCode,
+ Result.Text);
}
Info.AttachmentBlocksUploaded.fetch_add(1);
- Info.AttachmentBlockBytesUploaded.fetch_add(CompressedBlock.GetCompressedSize());
- ZEN_DEBUG("Saved block {}, {}", BlockHash, NiceBytes(CompressedBlock.GetCompressedSize()));
+ Info.AttachmentBlockBytesUploaded.fetch_add(CompressedSize);
+ ZEN_DEBUG("Saved block {}, {}", BlockHash, NiceBytes(CompressedSize));
};
std::vector<std::vector<std::pair<IoHash, FetchChunkFunc>>> BlockChunks;
- auto OnBlockChunks = [&BlockChunks](std::vector<std::pair<IoHash, FetchChunkFunc>>&& Chunks) {
- BlockChunks.push_back({Chunks.begin(), Chunks.end()});
+ auto OnBlockChunks = [&Log, &BlockChunks](std::vector<std::pair<IoHash, FetchChunkFunc>>&& Chunks) {
+ BlockChunks.push_back({std::make_move_iterator(Chunks.begin()), std::make_move_iterator(Chunks.end())});
ZEN_DEBUG("Found {} block chunks", Chunks.size());
};
- auto OnLargeAttachment = [&AttachmentsLock, &LargeAttachments, &LooseLargeFiles](const IoHash& AttachmentHash,
- TGetAttachmentBufferFunc&& GetBufferFunc) {
+ auto OnLargeAttachment = [&Log, &AttachmentsLock, &LargeAttachments, &LooseLargeFiles](const IoHash& AttachmentHash,
+ TGetAttachmentBufferFunc&& GetBufferFunc) {
{
RwLock::ExclusiveLockScope _(AttachmentsLock);
LargeAttachments.insert(AttachmentHash);
@@ -2826,15 +3288,10 @@ SaveOplog(CidStore& ChunkStore,
RemoteProjectStore::CreateContainerResult ContainerResult = RemoteStore.CreateContainer();
if (ContainerResult.ErrorCode)
{
- RemoteProjectStore::Result Result = {.ErrorCode = ContainerResult.ErrorCode,
- .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0,
- .Text = fmt::format("Failed to create container for oplog '{}' ({}): {}",
- RemoteStoreInfo.ContainerName,
- ContainerResult.ErrorCode,
- ContainerResult.Reason)};
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- return Result;
+ throw RemoteStoreError(
+ fmt::format("Failed to create container for oplog '{}': {}", RemoteStoreInfo.ContainerName, ContainerResult.Reason),
+ ContainerResult.ErrorCode,
+ ContainerResult.Text);
}
if (RemoteStoreInfo.CreateBlocks)
@@ -2850,7 +3307,7 @@ SaveOplog(CidStore& ChunkStore,
{
ZEN_ASSERT(BlockDescription.ChunkCompressedLengths.empty());
- size_t ChunkCount = BlockDescription.ChunkRawLengths.size();
+ size_t ChunkCount = BlockDescription.ChunkRawHashes.size();
if (ChunkCount > 0)
{
// Fake sizes, will give usage number of number of chunks used rather than bytes used - better than nothing
@@ -2884,7 +3341,8 @@ SaveOplog(CidStore& ChunkStore,
}
}
- CbObject OplogContainerObject = BuildContainer(ChunkStore,
+ CbObject OplogContainerObject = BuildContainer(InLog,
+ ChunkStore,
Project,
Oplog,
MaxBlockSize,
@@ -2900,97 +3358,80 @@ SaveOplog(CidStore& ChunkStore,
OnLargeAttachment,
OnBlockChunks,
EmbedLooseFiles,
- OptionalContext,
- /* out */ RemoteResult);
- if (!RemoteResult.IsError())
+ OptionalContext);
+ if (remotestore_impl::IsCancelled(OptionalContext))
{
- Info.OplogSizeBytes = OplogContainerObject.GetSize();
+ return;
+ }
- if (remotestore_impl::IsCancelled(OptionalContext))
- {
- RemoteProjectStore::Result Result = {.ErrorCode = 0,
- .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0,
- .Text = "Operation cancelled"};
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- return Result;
- }
+ Info.OplogSizeBytes = OplogContainerObject.GetSize();
+
+ if (remotestore_impl::IsCancelled(OptionalContext))
+ {
+ return;
+ }
- uint64_t ChunkCount = OplogContainerObject["chunks"sv].AsArrayView().Num();
- uint64_t BlockCount = OplogContainerObject["blocks"sv].AsArrayView().Num();
+ uint64_t ChunkCount = OplogContainerObject["chunks"sv].AsArrayView().Num();
+ uint64_t BlockCount = OplogContainerObject["blocks"sv].AsArrayView().Num();
+ remotestore_impl::ReportMessage(OptionalContext,
+ fmt::format("Saving oplog container '{}' with {} attachments and {} blocks...",
+ RemoteStoreInfo.ContainerName,
+ ChunkCount,
+ BlockCount));
+ Stopwatch SaveContainerTimer;
+ IoBuffer ContainerPayload = OplogContainerObject.GetBuffer().AsIoBuffer();
+ ContainerPayload.SetContentType(ZenContentType::kCbObject);
+ RemoteProjectStore::SaveResult ContainerSaveResult = RemoteStore.SaveContainer(std::move(ContainerPayload));
+ TransferWallTimeMS += SaveContainerTimer.GetElapsedTimeMs();
+ if (ContainerSaveResult.ErrorCode)
+ {
+ throw RemoteStoreError(
+ fmt::format("Failed to save oplog container for oplog '{}': {}", RemoteStoreInfo.ContainerName, ContainerSaveResult.Reason),
+ ContainerSaveResult.ErrorCode,
+ ContainerSaveResult.Text);
+ }
+ else
+ {
remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Saving oplog container '{}' with {} attachments and {} blocks...",
+ fmt::format("Saved container '{}' in {}",
RemoteStoreInfo.ContainerName,
- ChunkCount,
- BlockCount));
- Stopwatch SaveContainerTimer;
- IoBuffer ContainerPayload = OplogContainerObject.GetBuffer().AsIoBuffer();
- ContainerPayload.SetContentType(ZenContentType::kCbObject);
- RemoteProjectStore::SaveResult ContainerSaveResult = RemoteStore.SaveContainer(std::move(ContainerPayload));
- TransferWallTimeMS += SaveContainerTimer.GetElapsedTimeMs();
- if (ContainerSaveResult.ErrorCode)
- {
- RemoteResult.SetError(ContainerSaveResult.ErrorCode, ContainerSaveResult.Reason, "Failed to save oplog container");
- RemoteProjectStore::Result Result = {
- .ErrorCode = RemoteResult.GetError(),
- .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0,
- .Text = fmt::format("Failed to save oplog container ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())};
- remotestore_impl::ReportMessage(
- OptionalContext,
- fmt::format("Failed to save oplog container ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
- return Result;
- }
- else
- {
- remotestore_impl::ReportMessage(
- OptionalContext,
- fmt::format("Saved container '{}' in {}",
- RemoteStoreInfo.ContainerName,
- NiceTimeSpanMs(static_cast<uint64_t>(ContainerSaveResult.ElapsedSeconds * 1000.0))));
- }
-
- {
- Stopwatch UploadAttachmentsTimer;
- UploadAttachments(NetworkWorkerPool,
- ChunkStore,
- RemoteStore,
- LargeAttachments,
- BlockChunks,
- CreatedBlocks,
- LooseLargeFiles,
- ContainerSaveResult.Needs,
- ForceUpload,
- Info,
- RemoteResult,
- OptionalContext);
- TransferWallTimeMS += UploadAttachmentsTimer.GetElapsedTimeMs();
- }
+ NiceTimeSpanMs(static_cast<uint64_t>(ContainerSaveResult.ElapsedSeconds * 1000.0))));
+ }
- uint32_t Try = 0;
- while (!RemoteResult.IsError())
+ {
+ Stopwatch UploadAttachmentsTimer;
+ UploadAttachments(NetworkWorkerPool,
+ ChunkStore,
+ RemoteStore,
+ LargeAttachments,
+ BlockChunks,
+ CreatedBlocks,
+ LooseLargeFiles,
+ ContainerSaveResult.Needs,
+ ForceUpload,
+ Info,
+ OptionalContext);
+ TransferWallTimeMS += UploadAttachmentsTimer.GetElapsedTimeMs();
+
+ const uint32_t MaxTries = 8;
+ uint32_t Try = 0;
+ while (Try < MaxTries)
{
if (remotestore_impl::IsCancelled(OptionalContext))
{
- RemoteProjectStore::Result Result = {.ErrorCode = 0,
- .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0,
- .Text = "Operation cancelled"};
- remotestore_impl::ReportMessage(OptionalContext, fmt::format("Aborting ({}): {}", Result.ErrorCode, Result.Text));
- return Result;
+ return;
}
remotestore_impl::ReportMessage(OptionalContext, "Finalizing oplog container...");
RemoteProjectStore::FinalizeResult ContainerFinalizeResult = RemoteStore.FinalizeContainer(ContainerSaveResult.RawHash);
if (ContainerFinalizeResult.ErrorCode)
{
- RemoteResult.SetError(ContainerFinalizeResult.ErrorCode, ContainerFinalizeResult.Reason, ContainerFinalizeResult.Text);
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Failed to finalize oplog container {} ({}): {}",
- ContainerSaveResult.RawHash,
- RemoteResult.GetError(),
- RemoteResult.GetErrorReason()));
- RemoteProjectStore::Result Result = RemoteResult.ConvertResult();
- return Result;
+ throw RemoteStoreError(
+ fmt::format("Failed to finalize oplog container {}: {}", ContainerSaveResult.RawHash, ContainerFinalizeResult.Reason),
+ ContainerFinalizeResult.ErrorCode,
+ ContainerFinalizeResult.Text);
}
+
remotestore_impl::ReportMessage(
OptionalContext,
fmt::format("Finalized container '{}' in {}",
@@ -3004,78 +3445,60 @@ SaveOplog(CidStore& ChunkStore,
if (remotestore_impl::IsCancelled(OptionalContext))
{
- RemoteProjectStore::Result Result = {.ErrorCode = 0,
- .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0,
- .Text = "Operation cancelled"};
- return Result;
+ return;
}
- const uint32_t MaxTries = 8;
- if (Try < MaxTries)
- {
- Try++;
+ Try++;
- remotestore_impl::ReportMessage(
- OptionalContext,
- fmt::format("Finalize of container '{}' reported {} missing attachments. Uploading missing attachements. Try {}",
- RemoteStoreInfo.ContainerName,
- ContainerFinalizeResult.Needs.size(),
- Try));
-
- Stopwatch UploadAttachmentsTimer;
- UploadAttachments(NetworkWorkerPool,
- ChunkStore,
- RemoteStore,
- LargeAttachments,
- BlockChunks,
- CreatedBlocks,
- LooseLargeFiles,
- ContainerFinalizeResult.Needs,
- false,
- Info,
- RemoteResult,
- OptionalContext);
- TransferWallTimeMS += UploadAttachmentsTimer.GetElapsedTimeMs();
- }
- else
+ if (Try == MaxTries)
{
- RemoteResult.SetError(
- gsl::narrow<int>(HttpResponseCode::InternalServerError),
- "Failed to save oplog container",
+ throw std::runtime_error(
fmt::format("Giving up finalize oplog container {} after {} retries, still getting reports of missing attachments",
ContainerSaveResult.RawHash,
- ContainerFinalizeResult.Needs.size()));
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Failed to finalize oplog container container {} ({}): {}",
- ContainerSaveResult.RawHash,
- RemoteResult.GetError(),
- RemoteResult.GetErrorReason()));
- break;
+ Try));
}
- }
- LooseLargeFiles.clear();
- CreatedBlocks.clear();
+ remotestore_impl::ReportMessage(
+ OptionalContext,
+ fmt::format("Finalize of container '{}' reported {} missing attachments. Uploading missing attachments. Try {}",
+ RemoteStoreInfo.ContainerName,
+ ContainerFinalizeResult.Needs.size(),
+ Try));
+
+ Stopwatch RetryUploadAttachmentsTimer;
+ UploadAttachments(NetworkWorkerPool,
+ ChunkStore,
+ RemoteStore,
+ LargeAttachments,
+ BlockChunks,
+ CreatedBlocks,
+ LooseLargeFiles,
+ ContainerFinalizeResult.Needs,
+ false,
+ Info,
+ OptionalContext);
+ TransferWallTimeMS += RetryUploadAttachmentsTimer.GetElapsedTimeMs();
+ }
}
- RemoteProjectStore::Result Result = RemoteResult.ConvertResult();
- Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0;
- remotestore_impl::LogRemoteStoreStatsDetails(RemoteStore.GetStats());
+ LooseLargeFiles.clear();
+ CreatedBlocks.clear();
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Saved oplog '{}' {} in {} ({}), Blocks: {} ({}), Attachments: {} ({}) {}",
- RemoteStoreInfo.ContainerName,
- RemoteResult.GetError() == 0 ? "SUCCESS" : "FAILURE",
- NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000.0)),
- NiceBytes(Info.OplogSizeBytes),
- Info.AttachmentBlocksUploaded.load(),
- NiceBytes(Info.AttachmentBlockBytesUploaded.load()),
- Info.AttachmentsUploaded.load(),
- NiceBytes(Info.AttachmentBytesUploaded.load()),
- remotestore_impl::GetStats(RemoteStore.GetStats(), TransferWallTimeMS)));
+ remotestore_impl::LogRemoteStoreStatsDetails(RemoteStore.GetStats());
- return Result;
-};
+ remotestore_impl::ReportMessage(
+ OptionalContext,
+ fmt::format("Saved oplog '{}' {} in {} ({}), Blocks: {} ({}), Attachments: {} ({}) {}",
+ RemoteStoreInfo.ContainerName,
+ "SUCCESS",
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()),
+ NiceBytes(Info.OplogSizeBytes),
+ Info.AttachmentBlocksUploaded.load(),
+ NiceBytes(Info.AttachmentBlockBytesUploaded.load()),
+ Info.AttachmentsUploaded.load(),
+ NiceBytes(Info.AttachmentBytesUploaded.load()),
+ remotestore_impl::GetStats(RemoteStore.GetStats(), /*OptionalCacheStats*/ nullptr, TransferWallTimeMS)));
+}
RemoteProjectStore::Result
ParseOplogContainer(
@@ -3098,7 +3521,7 @@ ParseOplogContainer(
CbValidateError ValidateResult = CbValidateError::None;
if (CbObject SectionObject = ValidateAndReadCompactBinaryObject(std::move(SectionPayload), ValidateResult);
- ValidateResult == CbValidateError::None && ContainerObject)
+ ValidateResult == CbValidateError::None && SectionObject)
{
OutOplogSection = SectionObject;
}
@@ -3106,15 +3529,23 @@ ParseOplogContainer(
{
remotestore_impl::ReportMessage(
OptionalContext,
- fmt::format("Failed to save oplog container: '{}' ('{}')", "Section has unexpected data type", ToString(ValidateResult)));
+ fmt::format("Failed to read oplog container: '{}' ('{}')", "Section has unexpected data type", ToString(ValidateResult)));
return RemoteProjectStore::Result{gsl::narrow<int>(HttpResponseCode::BadRequest),
Timer.GetElapsedTimeMs() / 1000.0,
"Section has unexpected data type",
- "Failed to save oplog container"};
+ "Failed to read oplog container"};
}
std::unordered_set<IoHash, IoHash::Hasher> NeededAttachments;
{
CbArrayView OpsArray = OutOplogSection["ops"sv].AsArrayView();
+
+ size_t OpCount = OpsArray.Num();
+ size_t OpsCompleteCount = 0;
+
+ remotestore_impl::ReportMessage(OptionalContext, fmt::format("Scanning {} ops for attachments", OpCount));
+
+ Stopwatch ScanOplogProgressTimer;
+ uint64_t LastReportTimeMs = ScanOplogProgressTimer.GetElapsedTimeMs();
for (CbFieldView OpEntry : OpsArray)
{
OpEntry.IterateAttachments([&](CbFieldView FieldView) { NeededAttachments.insert(FieldView.AsAttachment()); });
@@ -3124,7 +3555,25 @@ ParseOplogContainer(
.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0,
.Reason = "Operation cancelled"};
}
+ OpsCompleteCount++;
+ if (ScanOplogProgressTimer.GetElapsedTimeMs() - LastReportTimeMs > 200)
+ {
+ remotestore_impl::ReportProgress(
+ OptionalContext,
+ "Scanning oplog"sv,
+ fmt::format("{} attachments found, {} ops remaining...", NeededAttachments.size(), OpCount - OpsCompleteCount),
+ OpCount,
+ OpCount - OpsCompleteCount,
+ ScanOplogProgressTimer.GetElapsedTimeMs());
+ LastReportTimeMs = ScanOplogProgressTimer.GetElapsedTimeMs();
+ }
}
+ remotestore_impl::ReportProgress(OptionalContext,
+ "Scanning oplog"sv,
+ fmt::format("{} attachments found", NeededAttachments.size()),
+ OpCount,
+ OpCount - OpsCompleteCount,
+ ScanOplogProgressTimer.GetElapsedTimeMs());
}
{
std::vector<IoHash> ReferencedAttachments(NeededAttachments.begin(), NeededAttachments.end());
@@ -3151,13 +3600,27 @@ ParseOplogContainer(
{
ChunkedInfo Chunked = ReadChunkedInfo(ChunkedFileView);
+ size_t NeededChunkAttachmentCount = 0;
+
OnReferencedAttachments(Chunked.ChunkHashes);
- NeededAttachments.insert(Chunked.ChunkHashes.begin(), Chunked.ChunkHashes.end());
+ for (const IoHash& ChunkHash : Chunked.ChunkHashes)
+ {
+ if (!HasAttachment(ChunkHash))
+ {
+ if (NeededAttachments.insert(ChunkHash).second)
+ {
+ NeededChunkAttachmentCount++;
+ }
+ }
+ }
OnChunkedAttachment(Chunked);
- ZEN_INFO("Requesting chunked attachment '{}' ({}) built from {} chunks",
- Chunked.RawHash,
- NiceBytes(Chunked.RawSize),
- Chunked.ChunkHashes.size());
+
+ remotestore_impl::ReportMessage(OptionalContext,
+ fmt::format("Requesting chunked attachment '{}' ({}) built from {} chunks, need {} chunks",
+ Chunked.RawHash,
+ NiceBytes(Chunked.RawSize),
+ Chunked.ChunkHashes.size(),
+ NeededChunkAttachmentCount));
}
}
if (remotestore_impl::IsCancelled(OptionalContext))
@@ -3243,7 +3706,7 @@ ParseOplogContainer(
.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0,
.Reason = "Operation cancelled"};
}
- };
+ }
remotestore_impl::ReportMessage(OptionalContext,
fmt::format("Requesting {} of {} large attachments", NeedAttachmentCount, LargeChunksArray.Num()));
@@ -3282,23 +3745,13 @@ SaveOplogContainer(
return Result;
}
-RemoteProjectStore::Result
-LoadOplog(CidStore& ChunkStore,
- RemoteProjectStore& RemoteStore,
- ProjectStore::Oplog& Oplog,
- WorkerThreadPool& NetworkWorkerPool,
- WorkerThreadPool& WorkerPool,
- bool ForceDownload,
- bool IgnoreMissingAttachments,
- bool CleanOplog,
- EPartialBlockRequestMode PartialBlockRequestMode,
- double HostLatencySec,
- double CacheLatencySec,
- JobContext* OptionalContext)
+void
+LoadOplog(LoadOplogContext&& Context)
{
using namespace std::literals;
- std::unique_ptr<OperationLogOutput> LogOutput(std::make_unique<remotestore_impl::JobContextLogOutput>(OptionalContext));
+ ZEN_SCOPED_LOG(Context.Log);
+ remotestore_impl::JobContextLogger JobContextOutput(Context.OptionalJobContext);
remotestore_impl::DownloadInfo Info;
@@ -3307,44 +3760,44 @@ LoadOplog(CidStore& ChunkStore,
std::unordered_set<IoHash, IoHash::Hasher> Attachments;
uint64_t BlockCountToDownload = 0;
- RemoteProjectStore::RemoteStoreInfo RemoteStoreInfo = RemoteStore.GetInfo();
- remotestore_impl::ReportMessage(OptionalContext, fmt::format("Loading oplog container '{}'", RemoteStoreInfo.ContainerName));
+ RemoteProjectStore::RemoteStoreInfo RemoteStoreInfo = Context.RemoteStore.GetInfo();
+ remotestore_impl::ReportMessage(Context.OptionalJobContext, fmt::format("Loading oplog container '{}'", RemoteStoreInfo.ContainerName));
uint64_t TransferWallTimeMS = 0;
Stopwatch LoadContainerTimer;
- RemoteProjectStore::LoadContainerResult LoadContainerResult = RemoteStore.LoadContainer();
+ RemoteProjectStore::LoadContainerResult LoadContainerResult = Context.RemoteStore.LoadContainer();
TransferWallTimeMS += LoadContainerTimer.GetElapsedTimeMs();
if (LoadContainerResult.ErrorCode)
{
remotestore_impl::ReportMessage(
- OptionalContext,
+ Context.OptionalJobContext,
fmt::format("Failed to load oplog container: '{}', error code: {}", LoadContainerResult.Reason, LoadContainerResult.ErrorCode));
- return RemoteProjectStore::Result{.ErrorCode = LoadContainerResult.ErrorCode,
- .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0,
- .Reason = LoadContainerResult.Reason,
- .Text = LoadContainerResult.Text};
+ throw RemoteStoreError(
+ fmt::format("Failed to load oplog container: '{}', error code: {}", LoadContainerResult.Reason, LoadContainerResult.ErrorCode),
+ LoadContainerResult.ErrorCode,
+ LoadContainerResult.Text);
}
- remotestore_impl::ReportMessage(OptionalContext,
+ remotestore_impl::ReportMessage(Context.OptionalJobContext,
fmt::format("Loaded container in {} ({})",
NiceTimeSpanMs(static_cast<uint64_t>(LoadContainerResult.ElapsedSeconds * 1000)),
NiceBytes(LoadContainerResult.ContainerObject.GetSize())));
Info.OplogSizeBytes = LoadContainerResult.ContainerObject.GetSize();
- remotestore_impl::AsyncRemoteResult RemoteResult;
- Latch AttachmentsDownloadLatch(1);
- Latch AttachmentsWriteLatch(1);
- std::atomic_size_t AttachmentCount = 0;
+ std::atomic<bool> AbortFlag(false);
+ std::atomic<bool> PauseFlag(false);
+ ParallelWork AttachmentWork(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
+ std::atomic_size_t AttachmentCount = 0;
Stopwatch LoadAttachmentsTimer;
std::atomic_uint64_t DownloadStartMS = (std::uint64_t)-1;
- auto HasAttachment = [&Oplog, &ChunkStore, ForceDownload](const IoHash& RawHash) {
- if (ForceDownload)
+ auto HasAttachment = [&Context](const IoHash& RawHash) {
+ if (Context.ForceDownload)
{
return false;
}
- if (ChunkStore.ContainsChunk(RawHash))
+ if (Context.ChunkStore.ContainsChunk(RawHash))
{
return true;
}
@@ -3359,22 +3812,17 @@ LoadOplog(CidStore& ChunkStore,
std::vector<NeededBlockDownload> NeededBlockDownloads;
- auto OnNeedBlock = [&RemoteStore,
- &ChunkStore,
- &NetworkWorkerPool,
- &WorkerPool,
- &AttachmentsDownloadLatch,
- &AttachmentsWriteLatch,
+ auto OnNeedBlock = [&Context,
+ &AttachmentWork,
+ &AbortFlag,
&AttachmentCount,
- &RemoteResult,
&BlockCountToDownload,
&Info,
&LoadAttachmentsTimer,
&DownloadStartMS,
- &NeededBlockDownloads,
- IgnoreMissingAttachments,
- OptionalContext](ThinChunkBlockDescription&& ThinBlockDescription, std::vector<uint32_t>&& NeededChunkIndexes) {
- if (RemoteResult.IsError())
+ &NeededBlockDownloads](ThinChunkBlockDescription&& ThinBlockDescription,
+ std::vector<uint32_t>&& NeededChunkIndexes) {
+ if (AbortFlag.load())
{
return;
}
@@ -3383,15 +3831,8 @@ LoadOplog(CidStore& ChunkStore,
AttachmentCount.fetch_add(1);
if (ThinBlockDescription.BlockHash == IoHash::Zero)
{
- DownloadAndSaveBlockChunks(ChunkStore,
- RemoteStore,
- IgnoreMissingAttachments,
- OptionalContext,
- NetworkWorkerPool,
- WorkerPool,
- AttachmentsDownloadLatch,
- AttachmentsWriteLatch,
- RemoteResult,
+ DownloadAndSaveBlockChunks(Context,
+ AttachmentWork,
Info,
LoadAttachmentsTimer,
DownloadStartMS,
@@ -3405,53 +3846,29 @@ LoadOplog(CidStore& ChunkStore,
}
};
- auto OnNeedAttachment = [&RemoteStore,
- &Oplog,
- &ChunkStore,
- &NetworkWorkerPool,
- &WorkerPool,
- &AttachmentsDownloadLatch,
- &AttachmentsWriteLatch,
- &RemoteResult,
- &Attachments,
- &AttachmentCount,
- &LoadAttachmentsTimer,
- &DownloadStartMS,
- &Info,
- IgnoreMissingAttachments,
- OptionalContext](const IoHash& RawHash) {
+ std::vector<IoHash> AttachmentsToDownload;
+
+ auto OnNeedAttachment = [&AttachmentsToDownload, &AbortFlag, &Attachments, &AttachmentCount](const IoHash& RawHash) {
if (!Attachments.insert(RawHash).second)
{
return;
}
- if (RemoteResult.IsError())
+ if (AbortFlag.load())
{
return;
}
AttachmentCount.fetch_add(1);
- DownloadAndSaveAttachment(ChunkStore,
- RemoteStore,
- IgnoreMissingAttachments,
- OptionalContext,
- NetworkWorkerPool,
- WorkerPool,
- AttachmentsDownloadLatch,
- AttachmentsWriteLatch,
- RemoteResult,
- Info,
- LoadAttachmentsTimer,
- DownloadStartMS,
- RawHash);
+ AttachmentsToDownload.push_back(RawHash);
};
std::vector<ChunkedInfo> FilesToDechunk;
auto OnChunkedAttachment = [&FilesToDechunk](const ChunkedInfo& Chunked) { FilesToDechunk.push_back(Chunked); };
- auto OnReferencedAttachments = [&Oplog](std::span<IoHash> RawHashes) { Oplog.CaptureAddedAttachments(RawHashes); };
+ auto OnReferencedAttachments = [&Context](std::span<IoHash> RawHashes) { Context.Oplog.CaptureAddedAttachments(RawHashes); };
// Make sure we retain any attachments we download before writing the oplog
- Oplog.EnableUpdateCapture();
- auto _ = MakeGuard([&Oplog]() { Oplog.DisableUpdateCapture(); });
+ Context.Oplog.EnableUpdateCapture();
+ auto _ = MakeGuard([&Context]() { Context.Oplog.DisableUpdateCapture(); });
CbObject OplogSection;
RemoteProjectStore::Result Result = ParseOplogContainer(LoadContainerResult.ContainerObject,
@@ -3461,12 +3878,14 @@ LoadOplog(CidStore& ChunkStore,
OnNeedAttachment,
OnChunkedAttachment,
OplogSection,
- OptionalContext);
+ Context.OptionalJobContext);
if (Result.ErrorCode != 0)
{
- RemoteResult.SetError(Result.ErrorCode, Result.Reason, Result.Text);
+ AbortFlag = true;
+ AttachmentWork.Wait();
+ throw RemoteStoreError(Result.Reason, Result.ErrorCode, Result.Text);
}
- remotestore_impl::ReportMessage(OptionalContext,
+ remotestore_impl::ReportMessage(Context.OptionalJobContext,
fmt::format("Parsed oplog in {}, found {} attachments, {} blocks and {} chunked files to download",
NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000.0)),
Attachments.size(),
@@ -3490,8 +3909,17 @@ LoadOplog(CidStore& ChunkStore,
std::vector<bool> DownloadedViaLegacyChunkFlag(AllNeededChunkHashes.size(), false);
ChunkBlockAnalyser::BlockResult PartialBlocksResult;
- RemoteProjectStore::GetBlockDescriptionsResult BlockDescriptions = RemoteStore.GetBlockDescriptions(BlockHashes);
- std::vector<IoHash> BlocksWithDescription;
+ remotestore_impl::ReportMessage(Context.OptionalJobContext, fmt::format("Fetching descriptions for {} blocks", BlockHashes.size()));
+
+ RemoteProjectStore::GetBlockDescriptionsResult BlockDescriptions =
+ Context.RemoteStore.GetBlockDescriptions(BlockHashes, Context.OptionalCache, Context.CacheBuildId);
+
+ remotestore_impl::ReportMessage(Context.OptionalJobContext,
+ fmt::format("GetBlockDescriptions took {}. Found {} blocks",
+ NiceTimeSpanMs(uint64_t(BlockDescriptions.ElapsedSeconds * 1000)),
+ BlockDescriptions.Blocks.size()));
+
+ std::vector<IoHash> BlocksWithDescription;
BlocksWithDescription.reserve(BlockDescriptions.Blocks.size());
for (const ChunkBlockDescription& BlockDescription : BlockDescriptions.Blocks)
{
@@ -3505,15 +3933,8 @@ LoadOplog(CidStore& ChunkStore,
if (FindIt == BlockDescriptions.Blocks.end())
{
// Fall back to full download as we can't get enough information about the block
- DownloadAndSaveBlock(ChunkStore,
- RemoteStore,
- IgnoreMissingAttachments,
- OptionalContext,
- NetworkWorkerPool,
- WorkerPool,
- AttachmentsDownloadLatch,
- AttachmentsWriteLatch,
- RemoteResult,
+ DownloadAndSaveBlock(Context,
+ AttachmentWork,
Info,
LoadAttachmentsTimer,
DownloadStartMS,
@@ -3539,142 +3960,185 @@ LoadOplog(CidStore& ChunkStore,
}
else
{
- // Not a requested block?
- ZEN_ASSERT(false);
+ // Not a requested block? Ignore it
+ FindIt++;
}
}
}
+
+ std::vector<bool> BlockExistsInCache(BlocksWithDescription.size(), false);
+
if (!AllNeededChunkHashes.empty())
{
std::vector<ChunkBlockAnalyser::EPartialBlockDownloadMode> PartialBlockDownloadModes;
- if (PartialBlockRequestMode == EPartialBlockRequestMode::Off)
+ if (Context.PartialBlockRequestMode == EPartialBlockRequestMode::Off)
{
PartialBlockDownloadModes.resize(BlocksWithDescription.size(), ChunkBlockAnalyser::EPartialBlockDownloadMode::Off);
}
else
{
- RemoteProjectStore::AttachmentExistsInCacheResult CacheExistsResult =
- RemoteStore.AttachmentExistsInCache(BlocksWithDescription);
- if (CacheExistsResult.ErrorCode != 0 || CacheExistsResult.HasBody.size() != BlocksWithDescription.size())
+ if (Context.OptionalCache)
{
- CacheExistsResult.HasBody.resize(BlocksWithDescription.size(), false);
- }
-
- PartialBlockDownloadModes.reserve(BlocksWithDescription.size());
-
- for (bool ExistsInCache : CacheExistsResult.HasBody)
- {
- if (PartialBlockRequestMode == EPartialBlockRequestMode::All)
- {
- PartialBlockDownloadModes.push_back(ExistsInCache ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed
- : ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange);
- }
- else if (PartialBlockRequestMode == EPartialBlockRequestMode::ZenCacheOnly)
+ std::vector<BuildStorageCache::BlobExistsResult> CacheExistsResult =
+ Context.OptionalCache->BlobsExists(Context.CacheBuildId, BlocksWithDescription);
+ if (CacheExistsResult.size() == BlocksWithDescription.size())
{
- PartialBlockDownloadModes.push_back(ExistsInCache ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed
- : ChunkBlockAnalyser::EPartialBlockDownloadMode::Off);
+ for (size_t BlobIndex = 0; BlobIndex < CacheExistsResult.size(); BlobIndex++)
+ {
+ BlockExistsInCache[BlobIndex] = CacheExistsResult[BlobIndex].HasBody;
+ }
}
- else if (PartialBlockRequestMode == EPartialBlockRequestMode::Mixed)
+ uint64_t FoundBlocks =
+ std::accumulate(BlockExistsInCache.begin(),
+ BlockExistsInCache.end(),
+ uint64_t(0u),
+ [](uint64_t Current, bool Exists) -> uint64_t { return Current + (Exists ? 1 : 0); });
+ if (FoundBlocks > 0)
{
- PartialBlockDownloadModes.push_back(ExistsInCache ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed
- : ChunkBlockAnalyser::EPartialBlockDownloadMode::SingleRange);
+ remotestore_impl::ReportMessage(
+ Context.OptionalJobContext,
+ fmt::format("Found {} out of {} blocks in cache", FoundBlocks, BlockExistsInCache.size()));
}
}
+
+ ChunkBlockAnalyser::EPartialBlockDownloadMode CloudPartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::Off;
+ ChunkBlockAnalyser::EPartialBlockDownloadMode CachePartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::Off;
+
+ switch (Context.PartialBlockRequestMode)
+ {
+ case EPartialBlockRequestMode::Off:
+ break;
+ case EPartialBlockRequestMode::ZenCacheOnly:
+ CachePartialDownloadMode = Context.CacheMaxRangeCountPerRequest > 1
+ ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed
+ : ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange;
+ CloudPartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::Off;
+ break;
+ case EPartialBlockRequestMode::Mixed:
+ CachePartialDownloadMode = Context.CacheMaxRangeCountPerRequest > 1
+ ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed
+ : ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange;
+ CloudPartialDownloadMode = ChunkBlockAnalyser::EPartialBlockDownloadMode::SingleRange;
+ break;
+ case EPartialBlockRequestMode::All:
+ CachePartialDownloadMode = Context.CacheMaxRangeCountPerRequest > 1
+ ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRangeHighSpeed
+ : ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange;
+ CloudPartialDownloadMode = Context.StoreMaxRangeCountPerRequest > 1
+ ? ChunkBlockAnalyser::EPartialBlockDownloadMode::MultiRange
+ : ChunkBlockAnalyser::EPartialBlockDownloadMode::SingleRange;
+ break;
+ }
+
+ PartialBlockDownloadModes.reserve(BlocksWithDescription.size());
+ for (uint32_t BlockIndex = 0; BlockIndex < BlocksWithDescription.size(); BlockIndex++)
+ {
+ const bool BlockExistInCache = BlockExistsInCache[BlockIndex];
+ PartialBlockDownloadModes.push_back(BlockExistInCache ? CachePartialDownloadMode : CloudPartialDownloadMode);
+ }
}
ZEN_ASSERT(PartialBlockDownloadModes.size() == BlocksWithDescription.size());
- ChunkBlockAnalyser PartialAnalyser(*LogOutput,
- BlockDescriptions.Blocks,
- ChunkBlockAnalyser::Options{.IsQuiet = false,
- .IsVerbose = false,
- .HostLatencySec = HostLatencySec,
- .HostHighSpeedLatencySec = CacheLatencySec});
+ ChunkBlockAnalyser PartialAnalyser(
+ JobContextOutput.Log(),
+ BlockDescriptions.Blocks,
+ ChunkBlockAnalyser::Options{.IsQuiet = false,
+ .IsVerbose = false,
+ .HostLatencySec = Context.StoreLatencySec,
+ .HostHighSpeedLatencySec = Context.CacheLatencySec,
+ .HostMaxRangeCountPerRequest = Context.StoreMaxRangeCountPerRequest,
+ .HostHighSpeedMaxRangeCountPerRequest = Context.CacheMaxRangeCountPerRequest});
std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks =
PartialAnalyser.GetNeeded(AllNeededPartialChunkHashesLookup,
[&](uint32_t ChunkIndex) { return !DownloadedViaLegacyChunkFlag[ChunkIndex]; });
PartialBlocksResult = PartialAnalyser.CalculatePartialBlockDownloads(NeededBlocks, PartialBlockDownloadModes);
- for (uint32_t FullBlockIndex : PartialBlocksResult.FullBlockIndexes)
- {
- DownloadAndSaveBlock(ChunkStore,
- RemoteStore,
- IgnoreMissingAttachments,
- OptionalContext,
- NetworkWorkerPool,
- WorkerPool,
- AttachmentsDownloadLatch,
- AttachmentsWriteLatch,
- RemoteResult,
- Info,
- LoadAttachmentsTimer,
- DownloadStartMS,
- BlockDescriptions.Blocks[FullBlockIndex].BlockHash,
- AllNeededPartialChunkHashesLookup,
- ChunkDownloadedFlags,
- 3);
- }
+ }
- for (size_t BlockRangeIndex = 0; BlockRangeIndex < PartialBlocksResult.BlockRanges.size();)
- {
- size_t RangeCount = 1;
- size_t RangesLeft = PartialBlocksResult.BlockRanges.size() - BlockRangeIndex;
- const ChunkBlockAnalyser::BlockRangeDescriptor& CurrentBlockRange = PartialBlocksResult.BlockRanges[BlockRangeIndex];
- while (RangeCount < RangesLeft &&
- CurrentBlockRange.BlockIndex == PartialBlocksResult.BlockRanges[BlockRangeIndex + RangeCount].BlockIndex)
- {
- RangeCount++;
- }
+ Stopwatch AttachmentsDownloadProgressTimer;
+ for (uint32_t FullBlockIndex : PartialBlocksResult.FullBlockIndexes)
+ {
+ DownloadAndSaveBlock(Context,
+ AttachmentWork,
+ Info,
+ LoadAttachmentsTimer,
+ DownloadStartMS,
+ BlockDescriptions.Blocks[FullBlockIndex].BlockHash,
+ AllNeededPartialChunkHashesLookup,
+ ChunkDownloadedFlags,
+ 3);
+ }
- DownloadAndSavePartialBlock(ChunkStore,
- RemoteStore,
- IgnoreMissingAttachments,
- OptionalContext,
- NetworkWorkerPool,
- WorkerPool,
- AttachmentsDownloadLatch,
- AttachmentsWriteLatch,
- RemoteResult,
- Info,
- LoadAttachmentsTimer,
- DownloadStartMS,
- BlockDescriptions.Blocks[CurrentBlockRange.BlockIndex],
- PartialBlocksResult.BlockRanges,
- BlockRangeIndex,
- RangeCount,
- AllNeededPartialChunkHashesLookup,
- ChunkDownloadedFlags,
- 3);
-
- BlockRangeIndex += RangeCount;
+ for (size_t BlockRangeIndex = 0; BlockRangeIndex < PartialBlocksResult.BlockRanges.size();)
+ {
+ size_t RangeCount = 1;
+ size_t RangesLeft = PartialBlocksResult.BlockRanges.size() - BlockRangeIndex;
+ const ChunkBlockAnalyser::BlockRangeDescriptor& CurrentBlockRange = PartialBlocksResult.BlockRanges[BlockRangeIndex];
+ while (RangeCount < RangesLeft &&
+ CurrentBlockRange.BlockIndex == PartialBlocksResult.BlockRanges[BlockRangeIndex + RangeCount].BlockIndex)
+ {
+ RangeCount++;
}
+
+ DownloadAndSavePartialBlock(Context,
+ AttachmentWork,
+ Info,
+ LoadAttachmentsTimer,
+ DownloadStartMS,
+ BlockDescriptions.Blocks[CurrentBlockRange.BlockIndex],
+ BlockExistsInCache[CurrentBlockRange.BlockIndex],
+ PartialBlocksResult.BlockRanges,
+ BlockRangeIndex,
+ RangeCount,
+ AllNeededPartialChunkHashesLookup,
+ ChunkDownloadedFlags,
+ /* RetriesLeft*/ 3);
+
+ BlockRangeIndex += RangeCount;
}
- AttachmentsDownloadLatch.CountDown();
- while (!AttachmentsDownloadLatch.Wait(1000))
+ for (const IoHash& AttachmentToDownload : AttachmentsToDownload)
{
- ptrdiff_t Remaining = AttachmentsDownloadLatch.Remaining();
- if (remotestore_impl::IsCancelled(OptionalContext))
+ DownloadAndSaveAttachment(Context, AttachmentWork, Info, LoadAttachmentsTimer, DownloadStartMS, AttachmentToDownload);
+ }
+
+ uint64_t TotalChunksToDownload = AllNeededChunkHashes.size() + AttachmentsToDownload.size();
+ AttachmentWork.Wait(1000, [&](bool /*IsAborted*/, bool /*IsPaused*/, std::ptrdiff_t /*Pending*/) {
+ if (remotestore_impl::IsCancelled(Context.OptionalJobContext) && !AbortFlag)
{
- if (!RemoteResult.IsError())
- {
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", "");
- }
+ AbortFlag = true;
}
uint64_t PartialTransferWallTimeMS = TransferWallTimeMS;
if (DownloadStartMS != (uint64_t)-1)
{
PartialTransferWallTimeMS += LoadAttachmentsTimer.GetElapsedTimeMs() - DownloadStartMS.load();
}
+
+ uint64_t CompletedChunkCount = Info.ChunksCompleteCount.load();
+
+ uint64_t AttachmentsDownloaded =
+ Info.AttachmentBlocksDownloaded.load() + Info.AttachmentBlocksRangesDownloaded.load() + Info.AttachmentsDownloaded.load();
+ uint64_t AttachmentBytesDownloaded = Info.AttachmentBlockBytesDownloaded.load() + Info.AttachmentBlockRangeBytesDownloaded.load() +
+ Info.AttachmentBytesDownloaded.load();
+
remotestore_impl::ReportProgress(
- OptionalContext,
+ Context.OptionalJobContext,
"Loading attachments"sv,
- fmt::format("{} remaining. {}", Remaining, remotestore_impl::GetStats(RemoteStore.GetStats(), PartialTransferWallTimeMS)),
- AttachmentCount.load(),
- Remaining);
- }
+ fmt::format("{}/{} ({}) chunks. {} ({}) blobs downloaded. {}",
+ CompletedChunkCount,
+ TotalChunksToDownload,
+ NiceBytes(Info.AttachmentBytesStored.load()),
+ AttachmentsDownloaded,
+ NiceBytes(AttachmentBytesDownloaded),
+ remotestore_impl::GetStats(Context.RemoteStore.GetStats(), Context.OptionalCacheStats, PartialTransferWallTimeMS)),
+ TotalChunksToDownload,
+ TotalChunksToDownload - CompletedChunkCount,
+ AttachmentsDownloadProgressTimer.GetElapsedTimeMs());
+ });
+
if (DownloadStartMS != (uint64_t)-1)
{
TransferWallTimeMS += LoadAttachmentsTimer.GetElapsedTimeMs() - DownloadStartMS.load();
@@ -3682,266 +4146,247 @@ LoadOplog(CidStore& ChunkStore,
if (AttachmentCount.load() > 0)
{
- remotestore_impl::ReportProgress(OptionalContext,
- "Loading attachments"sv,
- fmt::format("{}", remotestore_impl::GetStats(RemoteStore.GetStats(), TransferWallTimeMS)),
- AttachmentCount.load(),
- 0);
- }
-
- AttachmentsWriteLatch.CountDown();
- while (!AttachmentsWriteLatch.Wait(1000))
- {
- ptrdiff_t Remaining = AttachmentsWriteLatch.Remaining();
- if (remotestore_impl::IsCancelled(OptionalContext))
- {
- if (!RemoteResult.IsError())
- {
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", "");
- }
- }
- remotestore_impl::ReportProgress(OptionalContext,
- "Writing attachments"sv,
- fmt::format("{} remaining.", Remaining),
+ remotestore_impl::ReportProgress(Context.OptionalJobContext,
+ "Loading attachments",
+ ""sv,
AttachmentCount.load(),
- Remaining);
+ 0,
+ AttachmentsDownloadProgressTimer.GetElapsedTimeMs());
}
-
- if (AttachmentCount.load() > 0)
+ if (!FilesToDechunk.empty())
{
- remotestore_impl::ReportProgress(OptionalContext, "Writing attachments", ""sv, AttachmentCount.load(), 0);
- }
+ remotestore_impl::ReportMessage(Context.OptionalJobContext, fmt::format("Dechunking {} attachments", FilesToDechunk.size()));
- if (Result.ErrorCode == 0)
- {
- if (!FilesToDechunk.empty())
+ ParallelWork DechunkWork(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog);
+ std::filesystem::path TempFilePath = Context.Oplog.TempPath();
+ for (size_t ChunkedIndex = 0; ChunkedIndex < FilesToDechunk.size(); ChunkedIndex++)
{
- remotestore_impl::ReportMessage(OptionalContext, fmt::format("Dechunking {} attachments", FilesToDechunk.size()));
-
- Latch DechunkLatch(1);
- std::filesystem::path TempFilePath = Oplog.TempPath();
- for (const ChunkedInfo& Chunked : FilesToDechunk)
- {
- std::filesystem::path TempFileName = TempFilePath / Chunked.RawHash.ToHexString();
- DechunkLatch.AddCount(1);
- WorkerPool.ScheduleWork(
- [&ChunkStore,
- &DechunkLatch,
- TempFileName,
- &Chunked,
- &RemoteResult,
- IgnoreMissingAttachments,
- &Info,
- OptionalContext]() {
- ZEN_TRACE_CPU("DechunkAttachment");
-
- auto _ = MakeGuard([&DechunkLatch, &TempFileName] {
- std::error_code Ec;
- if (IsFile(TempFileName, Ec))
+ const ChunkedInfo& Chunked = FilesToDechunk[ChunkedIndex];
+ std::filesystem::path TempFileName = TempFilePath / Chunked.RawHash.ToHexString();
+ DechunkWork.ScheduleWork(
+ Context.WorkerPool,
+ [&Log, &Context, TempFileName, &FilesToDechunk, ChunkedIndex, &Info](std::atomic<bool>& AbortFlag) {
+ ZEN_TRACE_CPU("DechunkAttachment");
+
+ auto _ = MakeGuard([&Log, &TempFileName] {
+ std::error_code Ec;
+ if (IsFile(TempFileName, Ec))
+ {
+ RemoveFile(TempFileName, Ec);
+ if (Ec)
{
- RemoveFile(TempFileName, Ec);
- if (Ec)
- {
- ZEN_INFO("Failed to remove temporary file '{}'. Reason: {}", TempFileName, Ec.message());
- }
+ ZEN_INFO("Failed to remove temporary file '{}'. Reason: {}", TempFileName, Ec.message());
}
- DechunkLatch.CountDown();
- });
- try
+ }
+ });
+ const ChunkedInfo& Chunked = FilesToDechunk[ChunkedIndex];
+
+ try
+ {
+ if (AbortFlag.load())
+ {
+ return;
+ }
+ Stopwatch Timer;
+
+ IoBuffer TmpBuffer;
{
- if (RemoteResult.IsError())
+ BasicFile TmpFile;
+ std::error_code Ec;
+ TmpFile.Open(TempFileName, BasicFile::Mode::kTruncate, Ec);
+ if (Ec)
{
- return;
+ throw RemoteStoreError(
+ "Write error",
+ gsl::narrow<int>(HttpResponseCode::InternalServerError),
+ fmt::format("Failed to open temp file {} for chunked attachment {}", TempFileName, Chunked.RawHash));
}
- Stopwatch Timer;
- IoBuffer TmpBuffer;
+ else
{
- BasicFile TmpFile;
- TmpFile.Open(TempFileName, BasicFile::Mode::kTruncate);
+ BasicFileWriter TmpWriter(TmpFile, 64u * 1024u);
+
+ uint64_t ChunkOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder();
+ BLAKE3Stream HashingStream;
+ for (std::uint32_t SequenceIndex : Chunked.ChunkSequence)
{
- BasicFileWriter TmpWriter(TmpFile, 64u * 1024u);
+ if (AbortFlag.load())
+ {
+ return;
+ }
- uint64_t ChunkOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder();
- BLAKE3Stream HashingStream;
- for (std::uint32_t SequenceIndex : Chunked.ChunkSequence)
+ const IoHash& ChunkHash = Chunked.ChunkHashes[SequenceIndex];
+ IoBuffer Chunk = Context.ChunkStore.FindChunkByCid(ChunkHash);
+ if (!Chunk)
{
- const IoHash& ChunkHash = Chunked.ChunkHashes[SequenceIndex];
- IoBuffer Chunk = ChunkStore.FindChunkByCid(ChunkHash);
- if (!Chunk)
+ remotestore_impl::ReportMessage(
+ Context.OptionalJobContext,
+ fmt::format("Missing chunk {} for chunked attachment {}", ChunkHash, Chunked.RawHash));
+
+ // We only add 1 as the resulting missing count will be 1 for the dechunked file
+ Info.MissingAttachmentCount.fetch_add(1);
+ if (!Context.IgnoreMissingAttachments)
{
- remotestore_impl::ReportMessage(
- OptionalContext,
+ throw RemoteStoreError(
+ "Missing chunk",
+ gsl::narrow<int>(HttpResponseCode::NotFound),
fmt::format("Missing chunk {} for chunked attachment {}", ChunkHash, Chunked.RawHash));
-
- // We only add 1 as the resulting missing count will be 1 for the dechunked file
- Info.MissingAttachmentCount.fetch_add(1);
- if (!IgnoreMissingAttachments)
- {
- RemoteResult.SetError(
- gsl::narrow<int>(HttpResponseCode::NotFound),
- "Missing chunk",
- fmt::format("Missing chunk {} for chunked attachment {}", ChunkHash, Chunked.RawHash));
- }
- return;
}
+ return;
+ }
- IoHash RawHash;
- uint64_t RawSize;
+ IoHash RawHash;
+ uint64_t RawSize;
- CompressedBuffer Compressed =
- CompressedBuffer::FromCompressed(SharedBuffer(std::move(Chunk)), RawHash, RawSize);
- if (RawHash != ChunkHash)
+ CompressedBuffer Compressed =
+ CompressedBuffer::FromCompressed(SharedBuffer(std::move(Chunk)), RawHash, RawSize);
+ if (RawHash != ChunkHash || !Compressed)
+ {
+ std::string Message =
+ Compressed ? fmt::format("Mismatching raw hash {} for chunk {} for chunked attachment {}",
+ RawHash,
+ ChunkHash,
+ Chunked.RawHash)
+ : fmt::format("Malformed data for chunk {} for chunked attachment {}",
+ ChunkHash,
+ Chunked.RawHash);
+ remotestore_impl::ReportMessage(Context.OptionalJobContext, Message);
+
+ // We only add 1 as the resulting missing count will be 1 for the dechunked file
+ Info.MissingAttachmentCount.fetch_add(1);
+ if (!Context.IgnoreMissingAttachments)
+ {
+ throw RemoteStoreError("Missing chunk", gsl::narrow<int>(HttpResponseCode::NotFound), Message);
+ }
+ return;
+ }
+
+ {
+ ZEN_TRACE_CPU("DecompressChunk");
+
+ if (!Compressed.DecompressToStream(
+ 0,
+ RawSize,
+ [&](uint64_t SourceOffset,
+ uint64_t SourceSize,
+ uint64_t Offset,
+ const CompositeBuffer& RangeBuffer) {
+ ZEN_UNUSED(SourceOffset, SourceSize, Offset);
+
+ for (const SharedBuffer& Segment : RangeBuffer.GetSegments())
+ {
+ MemoryView SegmentData = Segment.GetView();
+ HashingStream.Append(SegmentData);
+ TmpWriter.Write(SegmentData.GetData(), SegmentData.GetSize(), ChunkOffset + Offset);
+ }
+ return true;
+ }))
{
remotestore_impl::ReportMessage(
- OptionalContext,
- fmt::format("Mismatching raw hash {} for chunk {} for chunked attachment {}",
- RawHash,
+ Context.OptionalJobContext,
+ fmt::format("Failed to decompress chunk {} for chunked attachment {}",
ChunkHash,
Chunked.RawHash));
// We only add 1 as the resulting missing count will be 1 for the dechunked file
Info.MissingAttachmentCount.fetch_add(1);
- if (!IgnoreMissingAttachments)
+ if (!Context.IgnoreMissingAttachments)
{
- RemoteResult.SetError(
- gsl::narrow<int>(HttpResponseCode::NotFound),
+ throw RemoteStoreError(
"Missing chunk",
- fmt::format("Mismatching raw hash {} for chunk {} for chunked attachment {}",
- RawHash,
- ChunkHash,
- Chunked.RawHash));
- }
- return;
- }
-
- {
- ZEN_TRACE_CPU("DecompressChunk");
-
- if (!Compressed.DecompressToStream(0,
- RawSize,
- [&](uint64_t SourceOffset,
- uint64_t SourceSize,
- uint64_t Offset,
- const CompositeBuffer& RangeBuffer) {
- ZEN_UNUSED(SourceOffset, SourceSize, Offset);
-
- for (const SharedBuffer& Segment :
- RangeBuffer.GetSegments())
- {
- MemoryView SegmentData = Segment.GetView();
- HashingStream.Append(SegmentData);
- TmpWriter.Write(SegmentData.GetData(),
- SegmentData.GetSize(),
- ChunkOffset + Offset);
- }
- return true;
- }))
- {
- remotestore_impl::ReportMessage(
- OptionalContext,
+ gsl::narrow<int>(HttpResponseCode::NotFound),
fmt::format("Failed to decompress chunk {} for chunked attachment {}",
ChunkHash,
Chunked.RawHash));
-
- // We only add 1 as the resulting missing count will be 1 for the dechunked file
- Info.MissingAttachmentCount.fetch_add(1);
- if (!IgnoreMissingAttachments)
- {
- RemoteResult.SetError(
- gsl::narrow<int>(HttpResponseCode::NotFound),
- "Missing chunk",
- fmt::format("Failed to decompress chunk {} for chunked attachment {}",
- ChunkHash,
- Chunked.RawHash));
- }
- return;
}
+ return;
}
- ChunkOffset += RawSize;
}
- BLAKE3 RawHash = HashingStream.GetHash();
- ZEN_ASSERT(Chunked.RawHash == IoHash::FromBLAKE3(RawHash));
- UniqueBuffer Header = CompressedBuffer::CreateHeaderForNoneEncoder(Chunked.RawSize, RawHash);
- TmpWriter.Write(Header.GetData(), Header.GetSize(), 0);
+ ChunkOffset += RawSize;
}
- TmpFile.Close();
- TmpBuffer = IoBufferBuilder::MakeFromTemporaryFile(TempFileName);
- }
- CidStore::InsertResult InsertResult =
- ChunkStore.AddChunk(TmpBuffer, Chunked.RawHash, CidStore::InsertMode::kMayBeMovedInPlace);
- if (InsertResult.New)
- {
- Info.AttachmentBytesStored.fetch_add(TmpBuffer.GetSize());
- Info.AttachmentsStored.fetch_add(1);
+ BLAKE3 RawHash = HashingStream.GetHash();
+ ZEN_ASSERT(Chunked.RawHash == IoHash::FromBLAKE3(RawHash));
+ UniqueBuffer Header = CompressedBuffer::CreateHeaderForNoneEncoder(Chunked.RawSize, RawHash);
+ TmpWriter.Write(Header.GetData(), Header.GetSize(), 0);
}
-
- ZEN_INFO("Dechunked attachment {} ({}) in {}",
- Chunked.RawHash,
- NiceBytes(Chunked.RawSize),
- NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ TmpFile.Close();
+ TmpBuffer = IoBufferBuilder::MakeFromTemporaryFile(TempFileName);
}
- catch (const std::exception& Ex)
+ uint64_t TmpBufferSize = TmpBuffer.GetSize();
+ CidStore::InsertResult InsertResult =
+ Context.ChunkStore.AddChunk(TmpBuffer, Chunked.RawHash, CidStore::InsertMode::kMayBeMovedInPlace);
+ if (InsertResult.New)
{
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError),
- fmt::format("Failed to dechunck file {}", Chunked.RawHash),
- Ex.what());
+ Info.AttachmentBytesStored.fetch_add(TmpBufferSize);
+ Info.AttachmentsStored.fetch_add(1);
}
- },
- WorkerThreadPool::EMode::EnableBacklog);
- }
- DechunkLatch.CountDown();
- while (!DechunkLatch.Wait(1000))
- {
- ptrdiff_t Remaining = DechunkLatch.Remaining();
- if (remotestore_impl::IsCancelled(OptionalContext))
- {
- if (!RemoteResult.IsError())
+ ZEN_INFO("Dechunked attachment {} ({}) in {}",
+ Chunked.RawHash,
+ NiceBytes(Chunked.RawSize),
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ }
+ catch (const std::exception& Ex)
{
- RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", "");
- remotestore_impl::ReportMessage(
- OptionalContext,
- fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason()));
+ throw RemoteStoreError(fmt::format("Failed to dechunk file {}", Chunked.RawHash),
+ gsl::narrow<int>(HttpResponseCode::InternalServerError),
+ Ex.what());
}
- }
- remotestore_impl::ReportProgress(OptionalContext,
- "Dechunking attachments"sv,
- fmt::format("{} remaining...", Remaining),
- FilesToDechunk.size(),
- Remaining);
- }
- remotestore_impl::ReportProgress(OptionalContext, "Dechunking attachments"sv, ""sv, FilesToDechunk.size(), 0);
+ },
+ WorkerThreadPool::EMode::EnableBacklog);
}
- Result = RemoteResult.ConvertResult();
- }
- if (Result.ErrorCode == 0)
- {
- if (CleanOplog)
- {
- RemoteStore.Flush();
- if (!Oplog.Reset())
+ Stopwatch DechunkProgressTimer;
+ DechunkWork.Wait(1000, [&](bool /*IsAborted*/, bool /*IsPaused*/, std::ptrdiff_t Remaining) {
+ if (remotestore_impl::IsCancelled(Context.OptionalJobContext) && !AbortFlag)
{
- Result = RemoteProjectStore::Result{.ErrorCode = gsl::narrow<int>(HttpResponseCode::InternalServerError),
- .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0,
- .Reason = fmt::format("Failed to clean existing oplog '{}'", Oplog.OplogId())};
- remotestore_impl::ReportMessage(OptionalContext, fmt::format("Aborting ({}): {}", Result.ErrorCode, Result.Reason));
+ AbortFlag = true;
}
+ remotestore_impl::ReportProgress(Context.OptionalJobContext,
+ "Dechunking attachments"sv,
+ fmt::format("{} remaining...", Remaining),
+ FilesToDechunk.size(),
+ Remaining,
+ DechunkProgressTimer.GetElapsedTimeMs());
+ });
+ remotestore_impl::ReportProgress(Context.OptionalJobContext,
+ "Dechunking attachments"sv,
+ ""sv,
+ FilesToDechunk.size(),
+ 0,
+ DechunkProgressTimer.GetElapsedTimeMs());
+ }
+ if (Context.CleanOplog)
+ {
+ if (Context.OptionalCache)
+ {
+ Context.OptionalCache->Flush(100, [](intptr_t) { return /*DontWaitForPendingOperation*/ false; });
}
- if (Result.ErrorCode == 0)
+ if (!Context.Oplog.Reset())
{
- remotestore_impl::WriteOplogSection(Oplog, OplogSection, OptionalContext);
+ std::string Reason = fmt::format("Failed to clean existing oplog '{}'", Context.Oplog.OplogId());
+ remotestore_impl::ReportMessage(
+ Context.OptionalJobContext,
+ fmt::format("Aborting ({}): {}", gsl::narrow<int>(HttpResponseCode::InternalServerError), Reason));
+ throw RemoteStoreError(Reason, gsl::narrow<int>(HttpResponseCode::InternalServerError), "");
+ }
+ }
+ {
+ RemoteProjectStore::Result WriteResult =
+ remotestore_impl::WriteOplogSection(Context.Oplog, OplogSection, Context.OptionalJobContext);
+ if (WriteResult.ErrorCode)
+ {
+ remotestore_impl::ReportMessage(Context.OptionalJobContext,
+ fmt::format("Aborting ({}): {}", WriteResult.ErrorCode, WriteResult.Reason));
+ throw RemoteStoreError(WriteResult.Reason, WriteResult.ErrorCode, WriteResult.Text);
}
}
- Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0;
-
- remotestore_impl::LogRemoteStoreStatsDetails(RemoteStore.GetStats());
+ remotestore_impl::LogRemoteStoreStatsDetails(Context.RemoteStore.GetStats());
{
std::string DownloadDetails;
RemoteProjectStore::ExtendedStats ExtendedStats;
- if (RemoteStore.GetExtendedStats(ExtendedStats))
+ if (Context.RemoteStore.GetExtendedStats(ExtendedStats))
{
if (!ExtendedStats.m_ReceivedBytesPerSource.empty())
{
@@ -3960,7 +4405,8 @@ LoadOplog(CidStore& ChunkStore,
Total += It.second;
}
- remotestore_impl::ReportMessage(OptionalContext, fmt::format("Downloaded {} ({})", NiceBytes(Total), SB.ToView()));
+ remotestore_impl::ReportMessage(Context.OptionalJobContext,
+ fmt::format("Downloaded {} ({})", NiceBytes(Total), SB.ToView()));
}
}
}
@@ -3970,27 +4416,26 @@ LoadOplog(CidStore& ChunkStore,
uint64_t TotalBytesDownloaded = Info.OplogSizeBytes + Info.AttachmentBlockBytesDownloaded.load() +
Info.AttachmentBlockRangeBytesDownloaded.load() + Info.AttachmentBytesDownloaded.load();
- remotestore_impl::ReportMessage(OptionalContext,
- fmt::format("Loaded oplog '{}' {} in {} ({}), Blocks: {} ({}), BlockRanges: {} ({}), Attachments: {} "
- "({}), Total: {} ({}), Stored: {} ({}), Missing: {} {}",
- RemoteStoreInfo.ContainerName,
- Result.ErrorCode == 0 ? "SUCCESS" : "FAILURE",
- NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000.0)),
- NiceBytes(Info.OplogSizeBytes),
- Info.AttachmentBlocksDownloaded.load(),
- NiceBytes(Info.AttachmentBlockBytesDownloaded.load()),
- Info.AttachmentBlocksRangesDownloaded.load(),
- NiceBytes(Info.AttachmentBlockRangeBytesDownloaded.load()),
- Info.AttachmentsDownloaded.load(),
- NiceBytes(Info.AttachmentBytesDownloaded.load()),
- TotalDownloads,
- NiceBytes(TotalBytesDownloaded),
- Info.AttachmentsStored.load(),
- NiceBytes(Info.AttachmentBytesStored.load()),
- Info.MissingAttachmentCount.load(),
- remotestore_impl::GetStats(RemoteStore.GetStats(), TransferWallTimeMS)));
-
- return Result;
+ remotestore_impl::ReportMessage(
+ Context.OptionalJobContext,
+ fmt::format("Loaded oplog '{}' {} in {} ({}), Blocks: {} ({}), BlockRanges: {} ({}), Attachments: {} "
+ "({}), Total: {} ({}), Stored: {} ({}), Missing: {} {}",
+ RemoteStoreInfo.ContainerName,
+ "SUCCESS",
+ NiceTimeSpanMs(static_cast<uint64_t>(Timer.GetElapsedTimeMs())),
+ NiceBytes(Info.OplogSizeBytes),
+ Info.AttachmentBlocksDownloaded.load(),
+ NiceBytes(Info.AttachmentBlockBytesDownloaded.load()),
+ Info.AttachmentBlocksRangesDownloaded.load(),
+ NiceBytes(Info.AttachmentBlockRangeBytesDownloaded.load()),
+ Info.AttachmentsDownloaded.load(),
+ NiceBytes(Info.AttachmentBytesDownloaded.load()),
+ TotalDownloads,
+ NiceBytes(TotalBytesDownloaded),
+ Info.AttachmentsStored.load(),
+ NiceBytes(Info.AttachmentBytesStored.load()),
+ Info.MissingAttachmentCount.load(),
+ remotestore_impl::GetStats(Context.RemoteStore.GetStats(), Context.OptionalCacheStats, TransferWallTimeMS)));
}
ChunkedInfo
@@ -4033,7 +4478,7 @@ RemoteProjectStore::~RemoteProjectStore()
#if ZEN_WITH_TESTS
-namespace testutils {
+namespace projectstore_testutils {
using namespace std::literals;
static std::string OidAsString(const Oid& Id)
@@ -4069,6 +4514,64 @@ namespace testutils {
return Package;
};
+ static CbPackage CreateFilesOplogPackage(const Oid& Id,
+ const std::filesystem::path ProjectRootDir,
+ const std::span<const std::pair<Oid, std::filesystem::path>>& Attachments)
+ {
+ CbPackage Package;
+ CbObjectWriter Object;
+ Object << "key"sv << OidAsString(Id);
+ if (!Attachments.empty())
+ {
+ Object.BeginArray("files");
+ for (const auto& Attachment : Attachments)
+ {
+ std::filesystem::path ServerPath = std::filesystem::relative(Attachment.second, ProjectRootDir).generic_string();
+ std::filesystem::path ClientPath = ServerPath; // dummy
+ Object.BeginObject();
+ Object << "id"sv << Attachment.first;
+ Object << "serverpath"sv << ServerPath.string();
+ Object << "clientpath"sv << ClientPath.string();
+ Object.EndObject();
+ }
+ Object.EndArray();
+ }
+ Package.SetObject(Object.Save());
+ return Package;
+ };
+
+ // Variant of CreateFilesOplogPackage where each entry includes a "data" field of
+ // CbFieldType::Hash set to IoHash::Zero. CbFieldView::AsHash() returns Zero for a
+ // plain Hash field whose stored value is zero, so RewriteOp still enters the rewrite
+ // path (DataHash == Zero) and calls RewriteCbObject, which then finds the pre-existing
+ // "data" field, triggering the return-true branch at line 1858.
+ static CbPackage CreateFilesOplogPackageWithZeroDataHash(const Oid& Id,
+ const std::filesystem::path ProjectRootDir,
+ const std::span<const std::pair<Oid, std::filesystem::path>>& Attachments)
+ {
+ CbPackage Package;
+ CbObjectWriter Object;
+ Object << "key"sv << OidAsString(Id);
+ if (!Attachments.empty())
+ {
+ Object.BeginArray("files");
+ for (const auto& Attachment : Attachments)
+ {
+ std::filesystem::path ServerPath = std::filesystem::relative(Attachment.second, ProjectRootDir).generic_string();
+ std::filesystem::path ClientPath = ServerPath; // dummy
+ Object.BeginObject();
+ Object << "id"sv << Attachment.first;
+ Object << "serverpath"sv << ServerPath.string();
+ Object << "clientpath"sv << ClientPath.string();
+ Object.AddHash("data"sv, IoHash::Zero);
+ Object.EndObject();
+ }
+ Object.EndArray();
+ }
+ Package.SetObject(Object.Save());
+ return Package;
+ };
+
static std::vector<std::pair<Oid, CompressedBuffer>> CreateAttachments(
const std::span<const size_t>& Sizes,
OodleCompressionLevel CompressionLevel = OodleCompressionLevel::VeryFast,
@@ -4085,7 +4588,105 @@ namespace testutils {
return Result;
}
-} // namespace testutils
+ static std::vector<std::pair<Oid, std::filesystem::path>> CreateFileAttachments(const std::filesystem::path& RootDir,
+ const std::span<const size_t>& Sizes)
+ {
+ std::vector<std::pair<Oid, std::filesystem::path>> Result;
+ Result.reserve(Sizes.size());
+ for (size_t Size : Sizes)
+ {
+ IoBuffer FileBlob = CreateRandomBlob(Size);
+ IoHash FileHash = IoHash::HashBuffer(FileBlob);
+ std::filesystem::path UncompressedFilePath = RootDir / "content" / "uncompressed_file" / FileHash.ToHexString();
+ CreateDirectories(UncompressedFilePath.parent_path());
+ WriteFile(UncompressedFilePath, FileBlob);
+ Result.push_back({Oid::NewOid(), UncompressedFilePath});
+ }
+ return Result;
+ }
+
+ struct CapturingJobContext : public JobContext
+ {
+ bool IsCancelled() const override { return m_Cancel; }
+ void ReportMessage(std::string_view Message) override
+ {
+ RwLock::ExclusiveLockScope _(m_Lock);
+ Messages.emplace_back(Message);
+ }
+ void ReportProgress(std::string_view Op, std::string_view Details, ptrdiff_t, ptrdiff_t, uint64_t) override
+ {
+ RwLock::ExclusiveLockScope _(m_Lock);
+ ProgressMessages.emplace_back(fmt::format("{}: {}", Op, Details));
+ }
+
+ bool HasMessage(std::string_view Substr) const
+ {
+ RwLock::SharedLockScope _(m_Lock);
+ return std::any_of(Messages.begin(), Messages.end(), [Substr](const std::string& M) {
+ return M.find(Substr) != std::string::npos;
+ });
+ }
+
+ bool m_Cancel = false;
+ std::vector<std::string> Messages;
+ std::vector<std::string> ProgressMessages;
+
+ private:
+ mutable RwLock m_Lock;
+ };
+
+ // Worker pool pair with separate NetworkPool and WorkerPool.
+ struct TestWorkerPools
+ {
+ private:
+ uint32_t m_NetworkCount;
+ uint32_t m_WorkerCount;
+
+ public:
+ WorkerThreadPool NetworkPool;
+ WorkerThreadPool WorkerPool;
+
+ TestWorkerPools()
+ : m_NetworkCount(Max(GetHardwareConcurrency() / 4u, 2u))
+ , m_WorkerCount(m_NetworkCount < GetHardwareConcurrency() ? Max(GetHardwareConcurrency() - m_NetworkCount, 4u) : 4u)
+ , NetworkPool(m_NetworkCount)
+ , WorkerPool(m_WorkerCount)
+ {
+ }
+ };
+
+ inline uint32_t GetWorkerCount() { return Max(GetHardwareConcurrency() / 4u, 2u); }
+
+ inline IoHash MakeTestHash(uint8_t Index)
+ {
+ uint8_t Data[20] = {};
+ Data[0] = Index;
+ return IoHash::MakeFrom(Data);
+ }
+
+ inline Oid MakeTestOid(uint32_t Index)
+ {
+ uint32_t Data[3] = {Index, 0, 0};
+ return Oid::FromMemory(Data);
+ }
+
+ // MaxChunks must be <= 127 (so MeasureVarUInt(MaxChunks) == 1) and MaxChunkEmbedSize is
+ // fixed at 100 to keep header sizes deterministic in BlockComposer tests.
+ inline remotestore_impl::BlockComposer::Configuration MakeTestConfig(uint64_t UsableSize, uint64_t MaxChunks)
+ {
+ constexpr uint64_t MaxChunkEmbedSize = 100;
+ uint64_t MaxHeaderSize =
+ CompressedBuffer::GetHeaderSizeForNoneEncoder() + MeasureVarUInt(MaxChunks) + MeasureVarUInt(MaxChunkEmbedSize) * MaxChunks;
+ return remotestore_impl::BlockComposer::Configuration{
+ .MaxBlockSize = UsableSize + MaxHeaderSize,
+ .MaxChunksPerBlock = MaxChunks,
+ .MaxChunkEmbedSize = MaxChunkEmbedSize,
+ };
+ }
+
+} // namespace projectstore_testutils
+
+TEST_SUITE_BEGIN("remotestore.projectstore");
struct ExportForceDisableBlocksTrue_ForceTempBlocksFalse
{
@@ -4112,7 +4713,7 @@ TEST_CASE_TEMPLATE("project.store.export",
ExportForceDisableBlocksFalse_ForceTempBlocksTrue)
{
using namespace std::literals;
- using namespace testutils;
+ using namespace projectstore_testutils;
ScopedTemporaryDirectory TempDir;
ScopedTemporaryDirectory ExportDir;
@@ -4147,6 +4748,11 @@ TEST_CASE_TEMPLATE("project.store.export",
Oid::NewOid(),
CreateAttachments(std::initializer_list<size_t>{256u * 1024u, 92u * 1024u}, OodleCompressionLevel::None)));
+ Oplog->AppendNewOplogEntry(
+ CreateFilesOplogPackage(Oid::NewOid(),
+ RootDir,
+ CreateFileAttachments(RootDir, std::initializer_list<size_t>{423 * 1024, 2 * 1024, 3213, 762 * 1024})));
+
FileRemoteStoreOptions Options = {RemoteStoreOptions{.MaxBlockSize = 64u * 1024,
.MaxChunksPerBlock = 1000,
.MaxChunkEmbedSize = 32 * 1024u,
@@ -4159,89 +4765,3080 @@ TEST_CASE_TEMPLATE("project.store.export",
std::shared_ptr<RemoteProjectStore> RemoteStore = CreateFileRemoteStore(Log(), Options);
RemoteProjectStore::RemoteStoreInfo StoreInfo = RemoteStore->GetInfo();
- uint32_t NetworkWorkerCount = Max(GetHardwareConcurrency() / 4u, 2u);
- uint32_t WorkerCount = (NetworkWorkerCount < GetHardwareConcurrency()) ? Max(GetHardwareConcurrency() - NetworkWorkerCount, 4u) : 4u;
-
- WorkerThreadPool WorkerPool(WorkerCount);
- WorkerThreadPool NetworkPool(NetworkWorkerCount);
-
- RemoteProjectStore::Result ExportResult = SaveOplog(CidStore,
- *RemoteStore,
- *Project.Get(),
- *Oplog,
- NetworkPool,
- WorkerPool,
- Options.MaxBlockSize,
- Options.MaxChunksPerBlock,
- Options.MaxChunkEmbedSize,
- Options.ChunkFileSizeLimit,
- true,
- false,
- false,
- nullptr);
-
- CHECK(ExportResult.ErrorCode == 0);
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ SaveOplog(Log(),
+ CidStore,
+ *RemoteStore,
+ *Project.Get(),
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ Options.MaxBlockSize,
+ Options.MaxChunksPerBlock,
+ Options.MaxChunkEmbedSize,
+ Options.ChunkFileSizeLimit,
+ true,
+ false,
+ false,
+ nullptr);
Ref<ProjectStore::Oplog> OplogImport = Project->NewOplog("oplog2", {});
- CHECK(OplogImport);
-
- RemoteProjectStore::Result ImportResult = LoadOplog(CidStore,
- *RemoteStore,
- *OplogImport,
- NetworkPool,
- WorkerPool,
- /*Force*/ false,
- /*IgnoreMissingAttachments*/ false,
- /*CleanOplog*/ false,
- EPartialBlockRequestMode::Mixed,
- /*HostLatencySec*/ -1.0,
- /*CacheLatencySec*/ -1.0,
- nullptr);
- CHECK(ImportResult.ErrorCode == 0);
-
- RemoteProjectStore::Result ImportForceResult = LoadOplog(CidStore,
- *RemoteStore,
- *OplogImport,
- NetworkPool,
- WorkerPool,
- /*Force*/ true,
- /*IgnoreMissingAttachments*/ false,
- /*CleanOplog*/ false,
- EPartialBlockRequestMode::Mixed,
- /*HostLatencySec*/ -1.0,
- /*CacheLatencySec*/ -1.0,
- nullptr);
- CHECK(ImportForceResult.ErrorCode == 0);
-
- RemoteProjectStore::Result ImportCleanResult = LoadOplog(CidStore,
- *RemoteStore,
- *OplogImport,
- NetworkPool,
- WorkerPool,
- /*Force*/ false,
- /*IgnoreMissingAttachments*/ false,
- /*CleanOplog*/ true,
- EPartialBlockRequestMode::Mixed,
- /*HostLatencySec*/ -1.0,
- /*CacheLatencySec*/ -1.0,
- nullptr);
- CHECK(ImportCleanResult.ErrorCode == 0);
-
- RemoteProjectStore::Result ImportForceCleanResult = LoadOplog(CidStore,
- *RemoteStore,
- *OplogImport,
- NetworkPool,
- WorkerPool,
- /*Force*/ true,
- /*IgnoreMissingAttachments*/ false,
- /*CleanOplog*/ true,
- EPartialBlockRequestMode::Mixed,
- /*HostLatencySec*/ -1.0,
- /*CacheLatencySec*/ -1.0,
- nullptr);
- CHECK(ImportForceCleanResult.ErrorCode == 0);
+ REQUIRE(OplogImport);
+
+ CapturingJobContext Ctx;
+ auto DoLoad = [&](bool Force, bool Clean) {
+ LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = CidStore,
+ .RemoteStore = *RemoteStore,
+ .OptionalCache = nullptr,
+ .CacheBuildId = Oid::Zero,
+ .Oplog = *OplogImport,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .ForceDownload = Force,
+ .IgnoreMissingAttachments = false,
+ .CleanOplog = Clean,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed,
+ .OptionalJobContext = &Ctx});
+ };
+
+ DoLoad(false, false);
+ DoLoad(true, false);
+ DoLoad(false, true);
+ DoLoad(true, true);
}
+// Populates ExportDir with a SaveOplog call using the same data as project.store.export.
+static std::shared_ptr<RemoteProjectStore>
+SetupExportStore(CidStore& CidStore,
+ ProjectStore::Project& Project,
+ WorkerThreadPool& NetworkPool,
+ WorkerThreadPool& WorkerPool,
+ const std::filesystem::path& ExportDir)
+{
+ using namespace projectstore_testutils;
+ using namespace std::literals;
+
+ Ref<ProjectStore::Oplog> Oplog = Project.NewOplog("oplog_export", {});
+ if (!Oplog)
+ {
+ throw std::runtime_error("Failed to create oplog");
+ }
+
+ Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), {}));
+ Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{77})));
+ Oplog->AppendNewOplogEntry(
+ CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{7123, 583, 690, 99})));
+ Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{55, 122})));
+ Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(
+ Oid::NewOid(),
+ CreateAttachments(std::initializer_list<size_t>{256u * 1024u, 92u * 1024u}, OodleCompressionLevel::None)));
+ Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(
+ Oid::NewOid(),
+ Project.RootDir,
+ CreateFileAttachments(Project.RootDir, std::initializer_list<size_t>{423 * 1024, 2 * 1024, 3213, 762 * 1024})));
+
+ FileRemoteStoreOptions Options = {RemoteStoreOptions{.MaxBlockSize = 64u * 1024,
+ .MaxChunksPerBlock = 1000,
+ .MaxChunkEmbedSize = 32 * 1024u,
+ .ChunkFileSizeLimit = 64u * 1024u},
+ /*.FolderPath =*/ExportDir,
+ /*.Name =*/std::string("oplog_export"),
+ /*.OptionalBaseName =*/std::string(),
+ /*.ForceDisableBlocks =*/false,
+ /*.ForceEnableTempBlocks =*/false};
+
+ std::shared_ptr<RemoteProjectStore> RemoteStore = CreateFileRemoteStore(Log(), Options);
+ SaveOplog(Log(),
+ CidStore,
+ *RemoteStore,
+ Project,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ Options.MaxBlockSize,
+ Options.MaxChunksPerBlock,
+ Options.MaxChunkEmbedSize,
+ Options.ChunkFileSizeLimit,
+ /*EmbedLooseFiles*/ true,
+ /*ForceUpload*/ false,
+ /*IgnoreMissingAttachments*/ false,
+ /*OptionalContext*/ nullptr);
+ return RemoteStore;
+}
+
+// Creates an export store with six 512 KB chunks packed into one ~3 MB block (MaxBlockSize=8 MB).
+// The ~1.5 MB slack exceeds the ChunkBlockAnalyser threshold, enabling partial-block downloads.
+// Uses its own GcManager/CidStore/ProjectStore so each call is independent.
+static std::shared_ptr<RemoteProjectStore>
+SetupPartialBlockExportStore(WorkerThreadPool& NetworkPool, WorkerThreadPool& WorkerPool, const std::filesystem::path& ExportDir)
+{
+ using namespace projectstore_testutils;
+ using namespace std::literals;
+
+ GcManager LocalGc;
+ CidStore LocalCidStore(LocalGc);
+ CidStoreConfiguration LocalCidConfig = {.RootDirectory = ExportDir / "cas", .TinyValueThreshold = 1024, .HugeValueThreshold = 4096};
+ LocalCidStore.Initialize(LocalCidConfig);
+
+ std::filesystem::path LocalProjectBasePath = ExportDir / "proj";
+ ProjectStore LocalProjectStore(LocalCidStore, LocalProjectBasePath, LocalGc, ProjectStore::Configuration{});
+ Ref<ProjectStore::Project> LocalProject(LocalProjectStore.NewProject(LocalProjectBasePath / "p"sv,
+ "p"sv,
+ (ExportDir / "root").string(),
+ (ExportDir / "engine").string(),
+ (ExportDir / "game").string(),
+ (ExportDir / "game" / "game.uproject").string()));
+
+ Ref<ProjectStore::Oplog> Oplog = LocalProject->NewOplog("oplog_partial_block", {});
+ if (!Oplog)
+ {
+ throw std::runtime_error("Failed to create oplog");
+ }
+
+ Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(
+ Oid::NewOid(),
+ CreateAttachments(std::initializer_list<size_t>{512u * 1024u, 512u * 1024u, 512u * 1024u, 512u * 1024u, 512u * 1024u, 512u * 1024u},
+ OodleCompressionLevel::None)));
+
+ // MaxChunkEmbedSize must exceed 512 KB (compressed size with None encoding) or all chunks
+ // become loose attachments and no block is created.
+ FileRemoteStoreOptions Options = {RemoteStoreOptions{.MaxBlockSize = 8u * 1024u * 1024u,
+ .MaxChunksPerBlock = 1000,
+ .MaxChunkEmbedSize = RemoteStoreOptions::DefaultMaxChunkEmbedSize,
+ .ChunkFileSizeLimit = 64u * 1024u * 1024u},
+ /*.FolderPath =*/ExportDir,
+ /*.Name =*/std::string("oplog_partial_block"),
+ /*.OptionalBaseName =*/std::string(),
+ /*.ForceDisableBlocks =*/false,
+ /*.ForceEnableTempBlocks =*/false};
+ std::shared_ptr<RemoteProjectStore> RemoteStore = CreateFileRemoteStore(Log(), Options);
+ SaveOplog(Log(),
+ LocalCidStore,
+ *RemoteStore,
+ *LocalProject,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ Options.MaxBlockSize,
+ Options.MaxChunksPerBlock,
+ Options.MaxChunkEmbedSize,
+ Options.ChunkFileSizeLimit,
+ /*EmbedLooseFiles*/ true,
+ /*ForceUpload*/ false,
+ /*IgnoreMissingAttachments*/ false,
+ /*OptionalContext*/ nullptr);
+ return RemoteStore;
+}
+
+static IoHash
+FindBlockWithMultipleChunks(RemoteProjectStore& Store, size_t MinChunkCount)
+{
+ RemoteProjectStore::LoadContainerResult ContainerResult = Store.LoadContainer();
+ if (ContainerResult.ErrorCode != 0)
+ {
+ return {};
+ }
+ std::vector<IoHash> BlockHashes = GetBlockHashesFromOplog(ContainerResult.ContainerObject);
+ if (BlockHashes.empty())
+ {
+ return {};
+ }
+ RemoteProjectStore::GetBlockDescriptionsResult Descriptions = Store.GetBlockDescriptions(BlockHashes, nullptr, Oid{});
+ if (Descriptions.ErrorCode != 0)
+ {
+ return {};
+ }
+ for (const ChunkBlockDescription& Desc : Descriptions.Blocks)
+ {
+ if (Desc.ChunkRawHashes.size() >= MinChunkCount)
+ {
+ return Desc.BlockHash;
+ }
+ }
+ return {};
+}
+
+// Seeds TargetCidStore with even-indexed chunks (0, 2, 4 ...) from BlockHash, leaving
+// odd chunks absent to create non-adjacent missing ranges for partial-block download tests.
+static void
+SeedCidStoreWithAlternateChunks(CidStore& TargetCidStore, RemoteProjectStore& Source, const IoHash& BlockHash)
+{
+ RemoteProjectStore::LoadAttachmentResult BlockResult = Source.LoadAttachment(BlockHash);
+ if (BlockResult.ErrorCode != 0 || !BlockResult.Bytes)
+ {
+ return;
+ }
+
+ IoHash RawHash;
+ uint64_t RawSize;
+ CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(BlockResult.Bytes), RawHash, RawSize);
+ if (!Compressed)
+ {
+ return;
+ }
+ CompositeBuffer BlockPayload = Compressed.DecompressToComposite();
+ if (!BlockPayload)
+ {
+ return;
+ }
+
+ uint32_t ChunkIndex = 0;
+ uint64_t HeaderSize = 0;
+ IterateChunkBlock(
+ BlockPayload.Flatten(),
+ [&TargetCidStore, &ChunkIndex](CompressedBuffer&& Chunk, const IoHash& AttachmentHash) {
+ if (ChunkIndex % 2 == 0)
+ {
+ IoBuffer ChunkData = Chunk.GetCompressed().Flatten().AsIoBuffer();
+ TargetCidStore.AddChunk(ChunkData, AttachmentHash);
+ }
+ ++ChunkIndex;
+ },
+ HeaderSize);
+}
+
+TEST_CASE("project.store.import.context_settings")
+{
+ using namespace std::literals;
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir;
+
+ std::filesystem::path RootDir = TempDir.Path() / "root";
+ std::filesystem::path EngineRootDir = TempDir.Path() / "engine";
+ std::filesystem::path ProjectRootDir = TempDir.Path() / "game";
+ std::filesystem::path ProjectFilePath = TempDir.Path() / "game" / "game.uproject";
+
+ // Export-side CAS and project store; kept disjoint from the import side.
+ GcManager ExportGc;
+ CidStore ExportCidStore(ExportGc);
+ CidStoreConfiguration ExportCidConfig = {.RootDirectory = TempDir.Path() / "export_cas",
+ .TinyValueThreshold = 1024,
+ .HugeValueThreshold = 4096};
+ ExportCidStore.Initialize(ExportCidConfig);
+
+ std::filesystem::path ExportBasePath = TempDir.Path() / "export_projectstore";
+ ProjectStore ExportProjectStore(ExportCidStore, ExportBasePath, ExportGc, ProjectStore::Configuration{});
+ Ref<ProjectStore::Project> ExportProject(ExportProjectStore.NewProject(ExportBasePath / "proj1"sv,
+ "proj1"sv,
+ RootDir.string(),
+ EngineRootDir.string(),
+ ProjectRootDir.string(),
+ ProjectFilePath.string()));
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ std::shared_ptr<RemoteProjectStore> RemoteStore =
+ SetupExportStore(ExportCidStore, *ExportProject, NetworkPool, WorkerPool, ExportDir.Path());
+
+ // Import-side CAS starts empty so the first import downloads from the remote store without ForceDownload.
+ GcManager ImportGc;
+ CidStore ImportCidStore(ImportGc);
+ CidStoreConfiguration ImportCidConfig = {.RootDirectory = TempDir.Path() / "import_cas",
+ .TinyValueThreshold = 1024,
+ .HugeValueThreshold = 4096};
+ ImportCidStore.Initialize(ImportCidConfig);
+
+ std::filesystem::path ImportBasePath = TempDir.Path() / "import_projectstore";
+ ProjectStore ImportProjectStore(ImportCidStore, ImportBasePath, ImportGc, ProjectStore::Configuration{});
+ Ref<ProjectStore::Project> ImportProject(ImportProjectStore.NewProject(ImportBasePath / "proj1"sv,
+ "proj1"sv,
+ RootDir.string(),
+ EngineRootDir.string(),
+ ProjectRootDir.string(),
+ ProjectFilePath.string()));
+
+ const Oid CacheBuildId = Oid::NewOid();
+ BuildStorageCache::Statistics CacheStats;
+ std::unique_ptr<BuildStorageCache> Cache = CreateInMemoryBuildStorageCache(256u, CacheStats);
+ auto ResetCacheStats = [&]() {
+ CacheStats.TotalBytesRead = 0;
+ CacheStats.TotalBytesWritten = 0;
+ CacheStats.TotalRequestCount = 0;
+ CacheStats.TotalRequestTimeUs = 0;
+ CacheStats.TotalExecutionTimeUs = 0;
+ CacheStats.PeakSentBytes = 0;
+ CacheStats.PeakReceivedBytes = 0;
+ CacheStats.PeakBytesPerSec = 0;
+ CacheStats.PutBlobCount = 0;
+ CacheStats.PutBlobByteCount = 0;
+ };
+
+ int OpJobIndex = 0;
+
+ CapturingJobContext OpJobContext;
+
+ // Each call creates a fresh oplog to prevent short-circuiting on already-present data.
+ auto DoImport = [&](BuildStorageCache* OptCache,
+ EPartialBlockRequestMode Mode,
+ double StoreLatency,
+ uint64_t StoreRanges,
+ double CacheLatency,
+ uint64_t CacheRanges,
+ bool PopulateCache,
+ bool ForceDownload) -> void {
+ Ref<ProjectStore::Oplog> ImportOplog = ImportProject->NewOplog(fmt::format("import_{}", OpJobIndex++), {});
+ LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = ImportCidStore,
+ .RemoteStore = *RemoteStore,
+ .OptionalCache = OptCache,
+ .CacheBuildId = CacheBuildId,
+ .Oplog = *ImportOplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .ForceDownload = ForceDownload,
+ .IgnoreMissingAttachments = false,
+ .CleanOplog = false,
+ .PartialBlockRequestMode = Mode,
+ .PopulateCache = PopulateCache,
+ .StoreLatencySec = StoreLatency,
+ .StoreMaxRangeCountPerRequest = StoreRanges,
+ .CacheLatencySec = CacheLatency,
+ .CacheMaxRangeCountPerRequest = CacheRanges,
+ .OptionalJobContext = &OpJobContext});
+ };
+
+ // Shorthand: Mode=All, low latency, 128 ranges for both store and cache.
+ auto ImportAll = [&](BuildStorageCache* OptCache, bool Populate, bool Force) -> void {
+ DoImport(OptCache, EPartialBlockRequestMode::All, 0.001, 128u, 0.001, 128u, Populate, Force);
+ };
+
+ SUBCASE("mode_off_no_cache") { DoImport(nullptr, EPartialBlockRequestMode::Off, -1.0, (uint64_t)-1, -1.0, (uint64_t)-1, false, false); }
+
+ SUBCASE("mode_all_multirange_cloud_no_cache")
+ {
+ // StoreMaxRangeCountPerRequest > 1 -> MultiRange cloud path.
+ DoImport(nullptr, EPartialBlockRequestMode::All, 0.001, 128u, -1.0, 0u, false, false);
+ }
+
+ SUBCASE("mode_all_singlerange_cloud_no_cache")
+ {
+ // StoreMaxRangeCountPerRequest == 1 -> SingleRange cloud path.
+ DoImport(nullptr, EPartialBlockRequestMode::All, 0.001, 1u, -1.0, 0u, false, false);
+ }
+
+ SUBCASE("mode_mixed_high_latency_no_cache")
+ {
+ // High store latency encourages range merging; Mixed uses SingleRange for cloud, Off for cache.
+ DoImport(nullptr, EPartialBlockRequestMode::Mixed, 0.1, 128u, -1.0, 0u, false, false);
+ }
+
+ SUBCASE("cache_populate_and_hit")
+ {
+ // First import: CidStore empty -> blocks downloaded and written to cache.
+ ImportAll(Cache.get(), /*PopulateCache=*/true, /*Force=*/false);
+ CHECK(CacheStats.PutBlobCount > 0);
+
+ // Re-import with Force=true: HasAttachment overridden, blocks served from cache.
+ ResetCacheStats();
+ ImportAll(Cache.get(), /*PopulateCache=*/false, /*Force=*/true);
+ CHECK(CacheStats.PutBlobCount == 0);
+ CHECK(CacheStats.TotalRequestCount > 0);
+ }
+
+ SUBCASE("cache_no_populate_flag")
+ {
+ ImportAll(Cache.get(), /*PopulateCache=*/false, /*Force=*/false);
+ CHECK(CacheStats.PutBlobCount == 0);
+ }
+
+ SUBCASE("mode_zencacheonly_cache_multirange")
+ {
+ // Pre-populate; re-import via ZenCacheOnly. All chunks needed -> FullBlockIndexes path (GetBuildBlob).
+ ImportAll(Cache.get(), /*PopulateCache=*/true, /*Force=*/false);
+ ResetCacheStats();
+
+ DoImport(Cache.get(), EPartialBlockRequestMode::ZenCacheOnly, 0.1, 128u, 0.001, 128u, false, true);
+ CHECK(CacheStats.TotalRequestCount > 0);
+ }
+
+ SUBCASE("mode_zencacheonly_cache_singlerange")
+ {
+ // Pre-populate; re-import via ZenCacheOnly with CacheMaxRangeCountPerRequest=1. All chunks needed -> GetBuildBlob (full-blob).
+ ImportAll(Cache.get(), /*PopulateCache=*/true, /*Force=*/false);
+ ResetCacheStats();
+
+ DoImport(Cache.get(), EPartialBlockRequestMode::ZenCacheOnly, 0.1, 128u, 0.001, 1u, false, true);
+ CHECK(CacheStats.TotalRequestCount > 0);
+ }
+
+ SUBCASE("mode_all_cache_and_cloud_multirange")
+ {
+ // Pre-populate cache; All mode uses multi-range for both the cache and cloud paths.
+ ImportAll(Cache.get(), /*PopulateCache=*/true, /*Force=*/false);
+ ResetCacheStats();
+
+ ImportAll(Cache.get(), /*PopulateCache=*/false, /*Force=*/true);
+ CHECK(CacheStats.TotalRequestCount > 0);
+ }
+
+ SUBCASE("partial_block_cloud_multirange")
+ {
+ ScopedTemporaryDirectory PartialExportDir;
+ std::shared_ptr<RemoteProjectStore> PartialRemoteStore =
+ SetupPartialBlockExportStore(NetworkPool, WorkerPool, PartialExportDir.Path());
+
+ // Seeding even-indexed chunks (0, 2, 4) leaves odd ones (1, 3, 5) absent in
+ // ImportCidStore. Three non-adjacent needed positions -> three BlockRangeDescriptors.
+ IoHash BlockHash = FindBlockWithMultipleChunks(*PartialRemoteStore, 4u);
+ CHECK(BlockHash != IoHash::Zero);
+ SeedCidStoreWithAlternateChunks(ImportCidStore, *PartialRemoteStore, BlockHash);
+
+ // StoreMaxRangeCountPerRequest=128 -> all three ranges sent in one LoadAttachmentRanges call.
+ Ref<ProjectStore::Oplog> PartialOplog = ImportProject->NewOplog(fmt::format("partial_cloud_multi_{}", OpJobIndex++), {});
+ LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = ImportCidStore,
+ .RemoteStore = *PartialRemoteStore,
+ .OptionalCache = nullptr,
+ .CacheBuildId = CacheBuildId,
+ .Oplog = *PartialOplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .ForceDownload = false,
+ .IgnoreMissingAttachments = false,
+ .CleanOplog = false,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::All,
+ .PopulateCache = false,
+ .StoreLatencySec = 0.001,
+ .StoreMaxRangeCountPerRequest = 128u,
+ .CacheLatencySec = -1.0,
+ .CacheMaxRangeCountPerRequest = 0u,
+ .OptionalJobContext = &OpJobContext});
+ }
+
+ SUBCASE("partial_block_cloud_singlerange")
+ {
+ // Same block layout as partial_block_cloud_multirange but StoreMaxRangeCountPerRequest=1.
+ // DownloadPartialBlock issues one LoadAttachmentRanges call per range.
+ ScopedTemporaryDirectory PartialExportDir;
+ std::shared_ptr<RemoteProjectStore> PartialRemoteStore =
+ SetupPartialBlockExportStore(NetworkPool, WorkerPool, PartialExportDir.Path());
+
+ IoHash BlockHash = FindBlockWithMultipleChunks(*PartialRemoteStore, 4u);
+ CHECK(BlockHash != IoHash::Zero);
+ SeedCidStoreWithAlternateChunks(ImportCidStore, *PartialRemoteStore, BlockHash);
+
+ Ref<ProjectStore::Oplog> PartialOplog = ImportProject->NewOplog(fmt::format("partial_cloud_single_{}", OpJobIndex++), {});
+ LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = ImportCidStore,
+ .RemoteStore = *PartialRemoteStore,
+ .OptionalCache = nullptr,
+ .CacheBuildId = CacheBuildId,
+ .Oplog = *PartialOplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .ForceDownload = false,
+ .IgnoreMissingAttachments = false,
+ .CleanOplog = false,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::All,
+ .PopulateCache = false,
+ .StoreLatencySec = 0.001,
+ .StoreMaxRangeCountPerRequest = 1u,
+ .CacheLatencySec = -1.0,
+ .CacheMaxRangeCountPerRequest = 0u,
+ .OptionalJobContext = &OpJobContext});
+ }
+
+ SUBCASE("partial_block_cache_multirange")
+ {
+ ScopedTemporaryDirectory PartialExportDir;
+ std::shared_ptr<RemoteProjectStore> PartialRemoteStore =
+ SetupPartialBlockExportStore(NetworkPool, WorkerPool, PartialExportDir.Path());
+
+ IoHash BlockHash = FindBlockWithMultipleChunks(*PartialRemoteStore, 4u);
+ CHECK(BlockHash != IoHash::Zero);
+
+ // Phase 1: full block download from remote populates the cache.
+ {
+ Ref<ProjectStore::Oplog> Phase1Oplog = ImportProject->NewOplog(fmt::format("partial_cache_multi_p1_{}", OpJobIndex++), {});
+ LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = ImportCidStore,
+ .RemoteStore = *PartialRemoteStore,
+ .OptionalCache = Cache.get(),
+ .CacheBuildId = CacheBuildId,
+ .Oplog = *Phase1Oplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .ForceDownload = false,
+ .IgnoreMissingAttachments = false,
+ .CleanOplog = false,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::All,
+ .PopulateCache = true,
+ .StoreLatencySec = 0.001,
+ .StoreMaxRangeCountPerRequest = 128u,
+ .CacheLatencySec = 0.001,
+ .CacheMaxRangeCountPerRequest = 128u,
+ .OptionalJobContext = &OpJobContext});
+
+ CHECK(CacheStats.PutBlobCount > 0);
+ }
+ ResetCacheStats();
+
+ // Phase 2: fresh CidStore with even chunks seeded; CacheMaxRangeCountPerRequest=128 -> GetBuildBlobRanges.
+ GcManager Phase2Gc;
+ CidStore Phase2CidStore(Phase2Gc);
+ CidStoreConfiguration Phase2CidConfig = {.RootDirectory = TempDir.Path() / "partial_cas",
+ .TinyValueThreshold = 1024,
+ .HugeValueThreshold = 4096};
+ Phase2CidStore.Initialize(Phase2CidConfig);
+ SeedCidStoreWithAlternateChunks(Phase2CidStore, *PartialRemoteStore, BlockHash);
+
+ Ref<ProjectStore::Oplog> Phase2Oplog = ImportProject->NewOplog(fmt::format("partial_cache_multi_p2_{}", OpJobIndex++), {});
+ LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = Phase2CidStore,
+ .RemoteStore = *PartialRemoteStore,
+ .OptionalCache = Cache.get(),
+ .CacheBuildId = CacheBuildId,
+ .Oplog = *Phase2Oplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .ForceDownload = false,
+ .IgnoreMissingAttachments = false,
+ .CleanOplog = false,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::ZenCacheOnly,
+ .PopulateCache = false,
+ .StoreLatencySec = 0.001,
+ .StoreMaxRangeCountPerRequest = 128u,
+ .CacheLatencySec = 0.001,
+ .CacheMaxRangeCountPerRequest = 128u,
+ .OptionalJobContext = &OpJobContext});
+
+ CHECK(CacheStats.TotalRequestCount > 0);
+ }
+
+ SUBCASE("partial_block_cache_singlerange")
+ {
+ ScopedTemporaryDirectory PartialExportDir;
+ std::shared_ptr<RemoteProjectStore> PartialRemoteStore =
+ SetupPartialBlockExportStore(NetworkPool, WorkerPool, PartialExportDir.Path());
+
+ IoHash BlockHash = FindBlockWithMultipleChunks(*PartialRemoteStore, 4u);
+ CHECK(BlockHash != IoHash::Zero);
+
+ // Phase 1: full block download from remote into cache.
+ {
+ Ref<ProjectStore::Oplog> Phase1Oplog = ImportProject->NewOplog(fmt::format("partial_cache_single_p1_{}", OpJobIndex++), {});
+ LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = ImportCidStore,
+ .RemoteStore = *PartialRemoteStore,
+ .OptionalCache = Cache.get(),
+ .CacheBuildId = CacheBuildId,
+ .Oplog = *Phase1Oplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .ForceDownload = false,
+ .IgnoreMissingAttachments = false,
+ .CleanOplog = false,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::All,
+ .PopulateCache = true,
+ .StoreLatencySec = 0.001,
+ .StoreMaxRangeCountPerRequest = 128u,
+ .CacheLatencySec = 0.001,
+ .CacheMaxRangeCountPerRequest = 128u,
+ .OptionalJobContext = &OpJobContext});
+
+ CHECK(CacheStats.PutBlobCount > 0);
+ }
+ ResetCacheStats();
+
+ // Phase 2: CacheMaxRangeCountPerRequest=1 -> GetBuildBlob with range offset, called per needed range.
+ GcManager Phase2Gc;
+ CidStore Phase2CidStore(Phase2Gc);
+ CidStoreConfiguration Phase2CidConfig = {.RootDirectory = TempDir.Path() / "partial_cas_single",
+ .TinyValueThreshold = 1024,
+ .HugeValueThreshold = 4096};
+ Phase2CidStore.Initialize(Phase2CidConfig);
+ SeedCidStoreWithAlternateChunks(Phase2CidStore, *PartialRemoteStore, BlockHash);
+
+ Ref<ProjectStore::Oplog> Phase2Oplog = ImportProject->NewOplog(fmt::format("partial_cache_single_p2_{}", OpJobIndex++), {});
+ LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = Phase2CidStore,
+ .RemoteStore = *PartialRemoteStore,
+ .OptionalCache = Cache.get(),
+ .CacheBuildId = CacheBuildId,
+ .Oplog = *Phase2Oplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .ForceDownload = false,
+ .IgnoreMissingAttachments = false,
+ .CleanOplog = false,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::ZenCacheOnly,
+ .PopulateCache = false,
+ .StoreLatencySec = 0.001,
+ .StoreMaxRangeCountPerRequest = 128u,
+ .CacheLatencySec = 0.001,
+ .CacheMaxRangeCountPerRequest = 1u,
+ .OptionalJobContext = &OpJobContext});
+
+ CHECK(CacheStats.TotalRequestCount > 0);
+ }
+}
+
+static Ref<ProjectStore::Project>
+MakeTestProject(CidStore& CidStore,
+ GcManager& Gc,
+ const std::filesystem::path& TempDir,
+ std::unique_ptr<class ProjectStore>& OutProjectStore)
+{
+ using namespace std::literals;
+
+ CidStoreConfiguration CidConfig = {.RootDirectory = TempDir / "cas", .TinyValueThreshold = 1024, .HugeValueThreshold = 4096};
+ CidStore.Initialize(CidConfig);
+
+ std::filesystem::path BasePath = TempDir / "projectstore";
+ OutProjectStore = std::make_unique<class ProjectStore>(CidStore, BasePath, Gc, ProjectStore::Configuration{});
+
+ std::filesystem::path RootDir = TempDir / "root";
+ std::filesystem::path EngineRootDir = TempDir / "engine";
+ std::filesystem::path ProjectRootDir = TempDir / "game";
+ std::filesystem::path ProjectFilePath = TempDir / "game" / "game.uproject";
+
+ return Ref<ProjectStore::Project>(OutProjectStore->NewProject(BasePath / "proj1"sv,
+ "proj1"sv,
+ RootDir.string(),
+ EngineRootDir.string(),
+ ProjectRootDir.string(),
+ ProjectFilePath.string()));
+}
+
+static void
+RunSaveOplog(CidStore& CidStore,
+ ProjectStore::Project& Project,
+ ProjectStore::Oplog& Oplog,
+ WorkerThreadPool& NetworkPool,
+ WorkerThreadPool& WorkerPool,
+ const std::filesystem::path& ExportDir,
+ const std::string& Name,
+ size_t MaxBlockSize,
+ size_t MaxChunksPerBlock,
+ size_t MaxChunkEmbedSize,
+ bool EmbedLooseFiles,
+ bool ForceUpload,
+ bool IgnoreMissingAttachments,
+ JobContext* OptionalContext,
+ bool ForceDisableBlocks,
+ std::shared_ptr<RemoteProjectStore>* OutRemoteStore = nullptr)
+{
+ FileRemoteStoreOptions Options = {RemoteStoreOptions{.MaxBlockSize = MaxBlockSize,
+ .MaxChunksPerBlock = MaxChunksPerBlock,
+ .MaxChunkEmbedSize = MaxChunkEmbedSize,
+ .ChunkFileSizeLimit = 64u * 1024u * 1024u},
+ /*.FolderPath =*/ExportDir,
+ /*.Name =*/Name,
+ /*.OptionalBaseName =*/std::string(),
+ /*.ForceDisableBlocks =*/ForceDisableBlocks,
+ /*.ForceEnableTempBlocks =*/false};
+
+ std::shared_ptr<RemoteProjectStore> RemoteStore = CreateFileRemoteStore(Log(), Options);
+ if (OutRemoteStore)
+ {
+ *OutRemoteStore = RemoteStore;
+ }
+ SaveOplog(Log(),
+ CidStore,
+ *RemoteStore,
+ Project,
+ Oplog,
+ NetworkPool,
+ WorkerPool,
+ Options.MaxBlockSize,
+ Options.MaxChunksPerBlock,
+ Options.MaxChunkEmbedSize,
+ Options.ChunkFileSizeLimit,
+ EmbedLooseFiles,
+ ForceUpload,
+ IgnoreMissingAttachments,
+ OptionalContext);
+}
+
+TEST_CASE("project.store.export.no_attachments_needed")
+{
+ // With no binary attachments, UploadAttachments reports "No attachments needed".
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_no_att", {});
+ REQUIRE(Oplog);
+
+ Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), {}));
+ Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), {}));
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ CapturingJobContext Ctx;
+ RunSaveOplog(CidStore,
+ *Project,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ ExportDir.Path(),
+ "oplog_no_att",
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ /*EmbedLooseFiles=*/true,
+ /*ForceUpload=*/true,
+ /*IgnoreMissingAttachments=*/false,
+ &Ctx,
+ /*ForceDisableBlocks=*/false);
+
+ CHECK(Ctx.HasMessage("No attachments needed"));
+}
+
+TEST_CASE("project.store.embed_loose_files_true")
+{
+ // EmbedLooseFiles=true: file-op entries are rewritten with a BinaryAttachment field. Round-trip must succeed.
+ using namespace projectstore_testutils;
+ using namespace std::literals;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ std::filesystem::path RootDir = TempDir.Path() / "root";
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_embed_true", {});
+ REQUIRE(Oplog);
+
+ Oplog->AppendNewOplogEntry(
+ CreateFilesOplogPackage(Oid::NewOid(), RootDir, CreateFileAttachments(RootDir, std::initializer_list<size_t>{1024, 2048})));
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ std::shared_ptr<RemoteProjectStore> RemoteStore;
+ RunSaveOplog(CidStore,
+ *Project,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ ExportDir.Path(),
+ "oplog_embed_true",
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ /*EmbedLooseFiles=*/true,
+ /*ForceUpload=*/false,
+ /*IgnoreMissingAttachments=*/false,
+ /*OptionalContext=*/nullptr,
+ /*ForceDisableBlocks=*/false,
+ &RemoteStore);
+ Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_embed_true_import", {});
+ LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = CidStore,
+ .RemoteStore = *RemoteStore,
+ .Oplog = *ImportOplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .IgnoreMissingAttachments = false,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed});
+}
+
+TEST_CASE("project.store.embed_loose_files_false" * doctest::skip()) // superseded by buildcontainer.embed_loose_files_false_no_rewrite
+{
+ // EmbedLooseFiles=false: file-op entries pass through unrewritten. Round-trip must succeed.
+ using namespace projectstore_testutils;
+ using namespace std::literals;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ std::filesystem::path RootDir = TempDir.Path() / "root";
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_embed_false", {});
+ REQUIRE(Oplog);
+
+ Oplog->AppendNewOplogEntry(
+ CreateFilesOplogPackage(Oid::NewOid(), RootDir, CreateFileAttachments(RootDir, std::initializer_list<size_t>{1024, 2048})));
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ std::shared_ptr<RemoteProjectStore> RemoteStore;
+ RunSaveOplog(CidStore,
+ *Project,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ ExportDir.Path(),
+ "oplog_embed_false",
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ /*EmbedLooseFiles=*/false,
+ /*ForceUpload=*/false,
+ /*IgnoreMissingAttachments=*/false,
+ /*OptionalContext=*/nullptr,
+ /*ForceDisableBlocks=*/false,
+ &RemoteStore);
+
+ Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_embed_false_import", {});
+ LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = CidStore,
+ .RemoteStore = *RemoteStore,
+ .Oplog = *ImportOplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .IgnoreMissingAttachments = false,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed});
+}
+
+TEST_CASE("project.store.export.missing_attachment_ignored" *
+ doctest::skip()) // superseded by buildcontainer.ignore_missing_file_attachment_warn
+{
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ std::filesystem::path RootDir = TempDir.Path() / "root";
+
+ auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{512, 1024});
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_missing_att", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(Oid::NewOid(), RootDir, FileAtts));
+
+ for (const auto& [Id, Path] : FileAtts)
+ {
+ std::filesystem::remove(Path);
+ }
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ CapturingJobContext Ctx;
+ RunSaveOplog(CidStore,
+ *Project,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ ExportDir.Path(),
+ "oplog_missing_att",
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ /*EmbedLooseFiles=*/true,
+ /*ForceUpload=*/false,
+ /*IgnoreMissingAttachments=*/true,
+ &Ctx,
+ /*ForceDisableBlocks=*/false);
+
+ CHECK(Ctx.HasMessage("Missing attachment"));
+}
+
+TEST_CASE("project.store.export.missing_chunk_in_cidstore" *
+ doctest::skip()) // superseded by buildcontainer.ignore_missing_binary_attachment_warn/throw
+{
+ using namespace projectstore_testutils;
+ using namespace std::literals;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ IoBuffer FakeData = CreateRandomBlob(256);
+ IoHash FakeHash = IoHash::HashBuffer(FakeData);
+
+ CbObjectWriter Object;
+ Object << "key"sv << OidAsString(Oid::NewOid());
+ Object.BeginArray("bulkdata"sv);
+ {
+ Object.BeginObject();
+ Object << "id"sv << Oid::NewOid();
+ Object << "type"sv
+ << "Standard"sv;
+ Object.AddBinaryAttachment("data"sv, FakeHash);
+ Object.EndObject();
+ }
+ Object.EndArray();
+ CbPackage Package;
+ Package.SetObject(Object.Save());
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_missing_cid", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(Package);
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ CHECK_THROWS(RunSaveOplog(CidStore,
+ *Project,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ ExportDir.Path(),
+ "oplog_missing_cid",
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ /*EmbedLooseFiles=*/true,
+ /*ForceUpload=*/false,
+ /*IgnoreMissingAttachments=*/false,
+ /*OptionalContext=*/nullptr,
+ /*ForceDisableBlocks=*/false));
+}
+
+TEST_CASE("project.store.export.large_file_attachment_direct")
+{
+ // File > 2 x MaxChunkEmbedSize: classified as a direct large attachment (no compression attempt). Round-trip must succeed.
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ std::filesystem::path RootDir = TempDir.Path() / "root";
+
+ // 96 KB > 2 x 32 KB -> direct large attachment.
+ auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{96u * 1024u});
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_large_direct", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(Oid::NewOid(), RootDir, FileAtts));
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ constexpr size_t MaxChunkEmbedSize = 32u * 1024u;
+
+ std::shared_ptr<RemoteProjectStore> RemoteStore;
+ RunSaveOplog(CidStore,
+ *Project,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ ExportDir.Path(),
+ "oplog_large_direct",
+ 64u * 1024u,
+ 1000,
+ MaxChunkEmbedSize,
+ /*EmbedLooseFiles=*/true,
+ /*ForceUpload=*/false,
+ /*IgnoreMissingAttachments=*/false,
+ /*OptionalContext=*/nullptr,
+ /*ForceDisableBlocks=*/false,
+ &RemoteStore);
+
+ Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_large_direct_import", {});
+ LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = CidStore,
+ .RemoteStore = *RemoteStore,
+ .Oplog = *ImportOplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .IgnoreMissingAttachments = false,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed});
+}
+
+TEST_CASE("project.store.export.large_file_attachment_via_temp")
+{
+ // File with MaxChunkEmbedSize < size <= 2xMaxChunkEmbedSize: compressed to a temp buffer;
+ // if still large (incompressible), goes to OnLargeAttachment. Round-trip must succeed.
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ std::filesystem::path RootDir = TempDir.Path() / "root";
+
+ // 48 KB: 32 KB < 48 KB <= 64 KB -> temp-compression path; incompressible data stays > 32 KB.
+ auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{48u * 1024u});
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_large_via_temp", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(Oid::NewOid(), RootDir, FileAtts));
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ constexpr size_t MaxChunkEmbedSize = 32u * 1024u;
+
+ std::shared_ptr<RemoteProjectStore> RemoteStore;
+ RunSaveOplog(CidStore,
+ *Project,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ ExportDir.Path(),
+ "oplog_large_via_temp",
+ 64u * 1024u,
+ 1000,
+ MaxChunkEmbedSize,
+ /*EmbedLooseFiles=*/true,
+ /*ForceUpload=*/false,
+ /*IgnoreMissingAttachments=*/false,
+ /*OptionalContext=*/nullptr,
+ /*ForceDisableBlocks=*/false,
+ &RemoteStore);
+
+ Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_large_via_temp_import", {});
+ LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = CidStore,
+ .RemoteStore = *RemoteStore,
+ .Oplog = *ImportOplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .IgnoreMissingAttachments = false,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed});
+}
+
+TEST_CASE("project.store.export.large_chunk_from_cidstore")
+{
+ // Bulkdata attachment in CidStore with compressed size > MaxChunkEmbedSize -> OnLargeAttachment. Round-trip must succeed.
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ // 64 KB with None encoding -> compressed ~ 64 KB > MaxChunkEmbedSize = 32 KB.
+ auto Attachments = CreateAttachments(std::initializer_list<size_t>{64u * 1024u}, OodleCompressionLevel::None);
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_large_cid", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), Attachments));
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ constexpr size_t MaxChunkEmbedSize = 32u * 1024u;
+
+ std::shared_ptr<RemoteProjectStore> RemoteStore;
+ RunSaveOplog(CidStore,
+ *Project,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ ExportDir.Path(),
+ "oplog_large_cid",
+ 64u * 1024u,
+ 1000,
+ MaxChunkEmbedSize,
+ /*EmbedLooseFiles=*/true,
+ /*ForceUpload=*/false,
+ /*IgnoreMissingAttachments=*/false,
+ /*OptionalContext=*/nullptr,
+ /*ForceDisableBlocks=*/false,
+ &RemoteStore);
+
+ Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_large_cid_import", {});
+ LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = CidStore,
+ .RemoteStore = *RemoteStore,
+ .Oplog = *ImportOplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .IgnoreMissingAttachments = false,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed});
+}
+
+TEST_CASE("project.store.export.block_reuse")
+{
+ // Second export to the same store: FindReuseBlocks matches existing blocks; no new blocks are written.
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ // 20 KB with None encoding: compressed ~ 20 KB < MaxChunkEmbedSize = 32 KB -> goes into a block.
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_reuse", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(
+ Oid::NewOid(),
+ CreateAttachments(std::initializer_list<size_t>{20u * 1024u, 20u * 1024u}, OodleCompressionLevel::None)));
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ constexpr size_t MaxChunkEmbedSize = 32u * 1024u;
+ constexpr size_t MaxBlockSize = 64u * 1024u;
+
+ std::shared_ptr<RemoteProjectStore> RemoteStore;
+ RunSaveOplog(CidStore,
+ *Project,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ ExportDir.Path(),
+ "oplog_reuse",
+ MaxBlockSize,
+ 1000,
+ MaxChunkEmbedSize,
+ /*EmbedLooseFiles=*/true,
+ /*ForceUpload=*/false,
+ /*IgnoreMissingAttachments=*/false,
+ /*OptionalContext=*/nullptr,
+ /*ForceDisableBlocks=*/false,
+ &RemoteStore);
+
+ RemoteProjectStore::GetKnownBlocksResult KnownAfterFirst = RemoteStore->GetKnownBlocks();
+ REQUIRE(!KnownAfterFirst.Blocks.empty());
+
+ std::vector<IoHash> BlockHashesAfterFirst;
+ for (const ChunkBlockDescription& B : KnownAfterFirst.Blocks)
+ {
+ BlockHashesAfterFirst.push_back(B.BlockHash);
+ }
+
+ SaveOplog(Log(),
+ CidStore,
+ *RemoteStore,
+ *Project,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ MaxBlockSize,
+ 1000,
+ MaxChunkEmbedSize,
+ 64u * 1024u * 1024u,
+ /*EmbedLooseFiles=*/true,
+ /*ForceUpload=*/false,
+ /*IgnoreMissingAttachments=*/false,
+ /*OptionalContext=*/nullptr);
+
+ RemoteProjectStore::GetKnownBlocksResult KnownAfterSecond = RemoteStore->GetKnownBlocks();
+ std::vector<IoHash> BlockHashesAfterSecond;
+ for (const ChunkBlockDescription& B : KnownAfterSecond.Blocks)
+ {
+ BlockHashesAfterSecond.push_back(B.BlockHash);
+ }
+
+ std::sort(BlockHashesAfterFirst.begin(), BlockHashesAfterFirst.end());
+ std::sort(BlockHashesAfterSecond.begin(), BlockHashesAfterSecond.end());
+ CHECK(BlockHashesAfterFirst == BlockHashesAfterSecond);
+}
+
+TEST_CASE("project.store.export.max_chunks_per_block")
+{
+ // MaxChunksPerBlock=2 with 3 attachments from one op -> at least 2 blocks produced.
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ // 2 KB with None encoding: compressed ~ 2 KB < MaxChunkEmbedSize = 4 KB -> enters block assembly.
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_max_chunks", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(
+ Oid::NewOid(),
+ CreateAttachments(std::initializer_list<size_t>{2u * 1024u, 2u * 1024u, 2u * 1024u}, OodleCompressionLevel::None)));
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ constexpr size_t MaxChunksPerBlock = 2;
+ constexpr size_t MaxBlockSize = 1u * 1024u * 1024u;
+ constexpr size_t MaxChunkEmbedSize = 4u * 1024u;
+
+ std::shared_ptr<RemoteProjectStore> RemoteStore;
+ RunSaveOplog(CidStore,
+ *Project,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ ExportDir.Path(),
+ "oplog_max_chunks",
+ MaxBlockSize,
+ MaxChunksPerBlock,
+ MaxChunkEmbedSize,
+ /*EmbedLooseFiles=*/true,
+ /*ForceUpload=*/false,
+ /*IgnoreMissingAttachments=*/false,
+ /*OptionalContext=*/nullptr,
+ /*ForceDisableBlocks=*/false,
+ &RemoteStore);
+
+ RemoteProjectStore::GetKnownBlocksResult KnownBlocks = RemoteStore->GetKnownBlocks();
+ CHECK(KnownBlocks.Blocks.size() >= 2);
+
+ Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_max_chunks_import", {});
+ LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = CidStore,
+ .RemoteStore = *RemoteStore,
+ .Oplog = *ImportOplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .IgnoreMissingAttachments = false,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed});
+}
+
+TEST_CASE("project.store.export.max_data_per_block")
+{
+ // Verifies ComposeBlocks respects UsableBlockSize = MaxBlockSize - MaxHeaderSize.
+ // With MaxBlockSize=7168, MaxChunksPerBlock=32: MaxHeaderSize=129, UsableBlockSize=7039.
+ // Oids[1] contributes 7041 compressed bytes (> 7039) to force a block boundary at that exact limit.
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_max_data_per_block", {});
+ REQUIRE(Oplog);
+
+ std::vector<Oid> Oids;
+ Oids.push_back(Oid::NewOid());
+ Oids.push_back(Oid::NewOid());
+ Oids.push_back(Oid::NewOid());
+ Oids.push_back(Oid::NewOid());
+ Oids.push_back(Oid::NewOid());
+ std::sort(Oids.begin(), Oids.end());
+
+ Oplog->AppendNewOplogEntry(
+ CreateBulkDataOplogPackage(Oids[0], CreateAttachments(std::initializer_list<size_t>{2u * 1024u}, OodleCompressionLevel::None)));
+
+ Oplog->AppendNewOplogEntry(
+ CreateBulkDataOplogPackage(Oids[1],
+ CreateAttachments(std::initializer_list<size_t>{3u * 1024u, 2u * 1024u, 2u * 1024u, 875u, 875u, 875u},
+ OodleCompressionLevel::None)));
+
+ Oplog->AppendNewOplogEntry(
+ CreateBulkDataOplogPackage(Oids[2], CreateAttachments(std::initializer_list<size_t>{875u, 875u}, OodleCompressionLevel::None)));
+
+ Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(
+ Oids[3],
+ CreateAttachments(std::initializer_list<size_t>{875u, 875u, 875u, 875u, 875u, 875u}, OodleCompressionLevel::None)));
+
+ Oplog->AppendNewOplogEntry(
+ CreateBulkDataOplogPackage(Oids[4], CreateAttachments(std::initializer_list<size_t>{1676, 1678}, OodleCompressionLevel::None)));
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ constexpr size_t MaxChunksPerBlock = 32;
+ constexpr size_t MaxBlockSize = 7u * 1024u;
+ constexpr size_t MaxChunkEmbedSize = 3u * 1024u;
+
+ std::shared_ptr<RemoteProjectStore> RemoteStore;
+ RunSaveOplog(CidStore,
+ *Project,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ ExportDir.Path(),
+ "oplog_max_data_per_block",
+ MaxBlockSize,
+ MaxChunksPerBlock,
+ MaxChunkEmbedSize,
+ /*EmbedLooseFiles=*/true,
+ /*ForceUpload=*/false,
+ /*IgnoreMissingAttachments=*/false,
+ /*OptionalContext=*/nullptr,
+ /*ForceDisableBlocks=*/false,
+ &RemoteStore);
+
+ RemoteProjectStore::GetKnownBlocksResult KnownBlocks = RemoteStore->GetKnownBlocks();
+ CHECK(KnownBlocks.Blocks.size() >= 2);
+
+ Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_max_data_per_block_import", {});
+ LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = CidStore,
+ .RemoteStore = *RemoteStore,
+ .Oplog = *ImportOplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .IgnoreMissingAttachments = false,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed});
+}
+
+TEST_CASE("project.store.export.file_deleted_between_phases")
+{
+ // File exists during RewriteOp but is deleted before AllowChunking workers run.
+ // With IgnoreMissingAttachments=true the export continues.
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ std::filesystem::path RootDir = TempDir.Path() / "root";
+
+ auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{512, 1024});
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_file_deleted", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(Oid::NewOid(), RootDir, FileAtts));
+
+ std::vector<std::filesystem::path> FilePaths;
+ for (const auto& [Id, Path] : FileAtts)
+ {
+ FilePaths.push_back(Path);
+ }
+
+ // Deletes files when "Rewrote" arrives, before AllowChunking workers run.
+ struct DeleteOnRewriteContext : public CapturingJobContext
+ {
+ std::vector<std::filesystem::path>* Paths = nullptr;
+ void ReportMessage(std::string_view Message) override
+ {
+ CapturingJobContext::ReportMessage(Message);
+ if (Message.find("Rewrote") != std::string_view::npos && Paths)
+ {
+ for (const auto& P : *Paths)
+ {
+ std::filesystem::remove(P);
+ }
+ }
+ }
+ };
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ DeleteOnRewriteContext Ctx;
+ Ctx.Paths = &FilePaths;
+
+ RunSaveOplog(CidStore,
+ *Project,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ ExportDir.Path(),
+ "oplog_file_deleted",
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ /*EmbedLooseFiles=*/true,
+ /*ForceUpload=*/false,
+ /*IgnoreMissingAttachments=*/true,
+ &Ctx,
+ /*ForceDisableBlocks=*/false);
+
+ CHECK(Ctx.HasMessage("Missing attachment"));
+ for (const auto& P : FilePaths)
+ {
+ CHECK(!std::filesystem::exists(P));
+ }
+}
+
+TEST_CASE("project.store.embed_loose_files_zero_data_hash")
+{
+ // File-op entries with "data": IoHash::Zero (unresolved marker) trigger RewriteOp to
+ // read from disk and replace with a resolved BinaryAttachment.
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ std::filesystem::path RootDir = TempDir.Path() / "root";
+ auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{512, 1024});
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_zero_data_hash", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(CreateFilesOplogPackageWithZeroDataHash(Oid::NewOid(), RootDir, FileAtts));
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ std::shared_ptr<RemoteProjectStore> RemoteStore;
+ RunSaveOplog(CidStore,
+ *Project,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ ExportDir.Path(),
+ "oplog_zero_data_hash",
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ /*EmbedLooseFiles=*/true,
+ /*ForceUpload=*/false,
+ /*IgnoreMissingAttachments=*/false,
+ /*OptionalContext=*/nullptr,
+ /*ForceDisableBlocks=*/false,
+ &RemoteStore);
+
+ Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_zero_data_hash_import", {});
+ LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = CidStore,
+ .RemoteStore = *RemoteStore,
+ .Oplog = *ImportOplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .IgnoreMissingAttachments = false,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed});
+}
+
+TEST_CASE("project.store.embed_loose_files_already_resolved")
+{
+ // After an export->import round-trip, oplog entries carry resolved "data": BinaryAttachment(H).
+ // A re-export must preserve those fields without re-reading from disk.
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir1;
+ ScopedTemporaryDirectory ExportDir2;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ std::filesystem::path RootDir = TempDir.Path() / "root";
+ auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{512, 1024});
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_already_resolved", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(Oid::NewOid(), RootDir, FileAtts));
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ std::shared_ptr<RemoteProjectStore> RemoteStore1;
+ RunSaveOplog(CidStore,
+ *Project,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ ExportDir1.Path(),
+ "oplog_already_resolved",
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ /*EmbedLooseFiles=*/true,
+ /*ForceUpload=*/false,
+ /*IgnoreMissingAttachments=*/false,
+ /*OptionalContext=*/nullptr,
+ /*ForceDisableBlocks=*/false,
+ &RemoteStore1);
+
+ Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_already_resolved_import", {});
+ LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = CidStore,
+ .RemoteStore = *RemoteStore1,
+ .Oplog = *ImportOplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .IgnoreMissingAttachments = false,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed});
+
+ RunSaveOplog(CidStore,
+ *Project,
+ *ImportOplog,
+ NetworkPool,
+ WorkerPool,
+ ExportDir2.Path(),
+ "oplog_already_resolved_reexport",
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ /*EmbedLooseFiles=*/true,
+ /*ForceUpload=*/false,
+ /*IgnoreMissingAttachments=*/false,
+ /*OptionalContext=*/nullptr,
+ /*ForceDisableBlocks=*/false);
+}
+
+TEST_CASE("project.store.import.missing_attachment")
+{
+ // Export a small oplog with ForceDisableBlocks=true (only loose .blob files), delete one
+ // attachment, then test both sides of IgnoreMissingAttachments.
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_missing_att", {});
+ REQUIRE(Oplog);
+
+ Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{512, 1024})));
+ Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{2048, 3000})));
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ std::shared_ptr<RemoteProjectStore> RemoteStore;
+ RunSaveOplog(CidStore,
+ *Project,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ ExportDir.Path(),
+ "oplog_missing_att",
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ /*EmbedLooseFiles=*/false,
+ /*ForceUpload=*/false,
+ /*IgnoreMissingAttachments=*/false,
+ /*OptionalContext=*/nullptr,
+ /*ForceDisableBlocks=*/true,
+ &RemoteStore);
+
+ // Find and delete one .blob attachment file from the remote store directory.
+ std::filesystem::path DeletedBlob;
+ for (const auto& Entry : std::filesystem::recursive_directory_iterator(ExportDir.Path()))
+ {
+ if (Entry.path().extension() == ".blob")
+ {
+ DeletedBlob = Entry.path();
+ break;
+ }
+ }
+ REQUIRE(!DeletedBlob.empty());
+ std::error_code Ec;
+ std::filesystem::remove(DeletedBlob, Ec);
+ REQUIRE(!Ec);
+
+ SUBCASE("throws_when_not_ignored")
+ {
+ Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_missing_att_throw", {});
+ REQUIRE(ImportOplog);
+ CapturingJobContext Ctx;
+ CHECK_THROWS_AS(LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = CidStore,
+ .RemoteStore = *RemoteStore,
+ .Oplog = *ImportOplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .ForceDownload = true,
+ .IgnoreMissingAttachments = false,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed,
+ .OptionalJobContext = &Ctx}),
+ RemoteStoreError);
+ }
+
+ SUBCASE("succeeds_when_ignored")
+ {
+ Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_missing_att_ignore", {});
+ REQUIRE(ImportOplog);
+ CapturingJobContext Ctx;
+ CHECK_NOTHROW(LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = CidStore,
+ .RemoteStore = *RemoteStore,
+ .Oplog = *ImportOplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .ForceDownload = true,
+ .IgnoreMissingAttachments = true,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed,
+ .OptionalJobContext = &Ctx}));
+ CHECK(Ctx.HasMessage("Failed to load attachments"));
+ }
+}
+
+TEST_CASE("project.store.import.error.load_container_failure")
+{
+ // LoadContainer() on a nonexistent path returns non-zero ErrorCode -> LoadOplog throws RemoteStoreError.
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ std::filesystem::path NonExistentPath = TempDir.Path() / "does_not_exist";
+ FileRemoteStoreOptions Options = {RemoteStoreOptions{.MaxBlockSize = 64u * 1024u,
+ .MaxChunksPerBlock = 1000,
+ .MaxChunkEmbedSize = 32u * 1024u,
+ .ChunkFileSizeLimit = 64u * 1024u * 1024u},
+ /*.FolderPath =*/NonExistentPath,
+ /*.Name =*/"load_container_failure",
+ /*.OptionalBaseName =*/std::string(),
+ /*.ForceDisableBlocks =*/false,
+ /*.ForceEnableTempBlocks =*/false};
+ std::shared_ptr<RemoteProjectStore> RemoteStore = CreateFileRemoteStore(Log(), Options);
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("load_container_failure_import", {});
+ REQUIRE(ImportOplog);
+
+ CapturingJobContext Ctx;
+ CHECK_THROWS_AS(LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = CidStore,
+ .RemoteStore = *RemoteStore,
+ .Oplog = *ImportOplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .ForceDownload = false,
+ .IgnoreMissingAttachments = false,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed,
+ .OptionalJobContext = &Ctx}),
+ RemoteStoreError);
+}
+
+TEST_CASE("project.store.blockcomposer.path_a_standalone_block")
+{
+ // Path A: one op with exactly MaxChunksPerBlock chunks -> emitted as a standalone block without merging into pending.
+ using namespace projectstore_testutils;
+
+ constexpr uint64_t UsableSize = 1000;
+ constexpr uint64_t MaxChunks = 4;
+ remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks));
+
+ Oid Op1 = MakeTestOid(1);
+ std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4)};
+ std::vector<uint64_t> Sizes = {100, 100, 100, 100};
+ std::vector<Oid> Keys = {Op1, Op1, Op1, Op1};
+
+ std::vector<std::vector<IoHash>> Blocks;
+ Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); });
+
+ REQUIRE(Blocks.size() == 1);
+ CHECK(Blocks[0].size() == 4);
+ CHECK(Blocks[0][0] == MakeTestHash(1));
+ CHECK(Blocks[0][3] == MakeTestHash(4));
+}
+
+TEST_CASE("project.store.blockcomposer.path_b_fits_pending")
+{
+ // Path B: a single op whose chunks fit in the empty pending block.
+ // No flush occurs during processing; the final flush emits the one pending block.
+ using namespace projectstore_testutils;
+
+ constexpr uint64_t UsableSize = 1000;
+ constexpr uint64_t MaxChunks = 4;
+ remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks));
+
+ Oid Op1 = MakeTestOid(1);
+ std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2)};
+ std::vector<uint64_t> Sizes = {60, 80}; // each <= MaxChunkEmbedSize (100); sum=140 << UsableSize (1000)
+ std::vector<Oid> Keys = {Op1, Op1};
+
+ std::vector<std::vector<IoHash>> Blocks;
+ Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); });
+
+ REQUIRE(Blocks.size() == 1);
+ CHECK(Blocks[0].size() == 2);
+ CHECK(Blocks[0][0] == MakeTestHash(1));
+ CHECK(Blocks[0][1] == MakeTestHash(2));
+}
+
+TEST_CASE("project.store.blockcomposer.path_b_exact_count_fill")
+{
+ // Path B: pending reaches MaxChunksPerBlock exactly -> immediate flush, no separate final flush.
+ using namespace projectstore_testutils;
+
+ constexpr uint64_t UsableSize = 1000;
+ constexpr uint64_t MaxChunks = 4;
+ remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks));
+
+ Oid Op1 = MakeTestOid(1);
+ Oid Op2 = MakeTestOid(2);
+ std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4)};
+ std::vector<uint64_t> Sizes = {100, 100, 100, 100};
+ std::vector<Oid> Keys = {Op1, Op1, Op2, Op2};
+
+ std::vector<std::vector<IoHash>> Blocks;
+ Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); });
+
+ REQUIRE(Blocks.size() == 1);
+ CHECK(Blocks[0].size() == 4);
+ CHECK(Blocks[0][0] == MakeTestHash(1));
+ CHECK(Blocks[0][3] == MakeTestHash(4));
+}
+
+TEST_CASE("project.store.blockcomposer.path_c_75pct_flush")
+{
+ // Path C: pending is >75% full when the next op doesn't fit -> pending flushed first, new op placed via Path B.
+ // UsableSize=100, threshold=75 bytes; Op1=80 bytes > 75%.
+ using namespace projectstore_testutils;
+
+ constexpr uint64_t UsableSize = 100; // 75% threshold = 75 bytes
+ constexpr uint64_t MaxChunks = 4;
+ remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks));
+
+ Oid Op1 = MakeTestOid(1);
+ Oid Op2 = MakeTestOid(2);
+ // Op1: 80 bytes -> Path B, pending = {80 bytes, 1 chunk} (80 > 75)
+ // Op2: 30 bytes -> does not fit (80+30=110 > 100) and 80 > 75 -> Path C flush,
+ // then Path B, pending = {30 bytes} -> final flush
+ std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2)};
+ std::vector<uint64_t> Sizes = {80, 30};
+ std::vector<Oid> Keys = {Op1, Op2};
+
+ std::vector<std::vector<IoHash>> Blocks;
+ Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); });
+
+ REQUIRE(Blocks.size() == 2);
+ CHECK(Blocks[0].size() == 1);
+ CHECK(Blocks[0][0] == MakeTestHash(1));
+ CHECK(Blocks[1].size() == 1);
+ CHECK(Blocks[1][0] == MakeTestHash(2));
+}
+
+TEST_CASE("project.store.blockcomposer.path_d_partial_fill")
+{
+ // Path D: pending <=75% full but chunk count is the binding constraint. Greedy fill adds chunks until count capacity, then flushes.
+ using namespace projectstore_testutils;
+
+ constexpr uint64_t UsableSize = 1000; // 75% threshold = 750 bytes
+ constexpr uint64_t MaxChunks = 4;
+ remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks));
+
+ Oid Op1 = MakeTestOid(1);
+ Oid Op2 = MakeTestOid(2);
+ // Op1: 3 x 100 bytes -> Path B, pending = {3 chunks, 300 bytes} (300 <= 750)
+ // Op2: 2 x 100 bytes -> 3+2=5 > MaxChunks=4; 300+200=500 <= 1000; 300 <= 750 -> Path D
+ // D adds op2[0] to pending (4 chunks, count capacity reached), flushes -> block 1
+ // Remaining op2[1] -> Path B (pending empty) -> final flush -> block 2
+ std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4), MakeTestHash(5)};
+ std::vector<uint64_t> Sizes = {100, 100, 100, 100, 100};
+ std::vector<Oid> Keys = {Op1, Op1, Op1, Op2, Op2};
+
+ std::vector<std::vector<IoHash>> Blocks;
+ Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); });
+
+ REQUIRE(Blocks.size() == 2);
+ CHECK(Blocks[0].size() == 4);
+ CHECK(Blocks[0][0] == MakeTestHash(1));
+ CHECK(Blocks[0][1] == MakeTestHash(2));
+ CHECK(Blocks[0][2] == MakeTestHash(3));
+ CHECK(Blocks[0][3] == MakeTestHash(4));
+ CHECK(Blocks[1].size() == 1);
+ CHECK(Blocks[1][0] == MakeTestHash(5));
+}
+
+TEST_CASE("project.store.blockcomposer.cancellation")
+{
+ // IsCancelledFunc returns true on the second outer-loop iteration.
+ // Op1 (4 chunks, Path A) is fully emitted before cancellation; Op2 is never started.
+ using namespace projectstore_testutils;
+
+ constexpr uint64_t UsableSize = 1000;
+ constexpr uint64_t MaxChunks = 4;
+
+ int CallCount = 0;
+ remotestore_impl::BlockComposer::Configuration Config = MakeTestConfig(UsableSize, MaxChunks);
+ Config.IsCancelledFunc = [&]() { return ++CallCount > 1; };
+ remotestore_impl::BlockComposer Composer(Config);
+
+ Oid Op1 = MakeTestOid(1);
+ Oid Op2 = MakeTestOid(2);
+ std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4), MakeTestHash(5), MakeTestHash(6)};
+ std::vector<uint64_t> Sizes = {100, 100, 100, 100, 100, 100};
+ std::vector<Oid> Keys = {Op1, Op1, Op1, Op1, Op2, Op2};
+
+ std::vector<std::vector<IoHash>> Blocks;
+ Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); });
+
+ REQUIRE(Blocks.size() == 1);
+ CHECK(Blocks[0].size() == 4);
+}
+
+TEST_CASE("project.store.blockcomposer.final_flush")
+{
+ // Three ops with all chunks fitting in pending (no mid-stream flush) -> single block from final flush.
+ using namespace projectstore_testutils;
+
+ constexpr uint64_t UsableSize = 1000;
+ constexpr uint64_t MaxChunks = 4;
+ remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks));
+
+ Oid Op1 = MakeTestOid(1);
+ Oid Op2 = MakeTestOid(2);
+ Oid Op3 = MakeTestOid(3);
+ std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3)};
+ std::vector<uint64_t> Sizes = {60, 80, 70}; // each <= MaxChunkEmbedSize (100); sum=210 << UsableSize (1000)
+ std::vector<Oid> Keys = {Op1, Op2, Op3};
+
+ std::vector<std::vector<IoHash>> Blocks;
+ Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); });
+
+ REQUIRE(Blocks.size() == 1);
+ CHECK(Blocks[0].size() == 3);
+ CHECK(Blocks[0][0] == MakeTestHash(1));
+ CHECK(Blocks[0][1] == MakeTestHash(2));
+ CHECK(Blocks[0][2] == MakeTestHash(3));
+}
+
+TEST_CASE("project.store.blockcomposer.path_b_b_c")
+{
+ // Path B -> Path B -> Path C: two ops accumulate past 75% threshold; third op triggers Path C flush.
+ // UsableSize=200, threshold=150; two ops of 90 bytes each accumulate 180 bytes, exceeding threshold.
+ using namespace projectstore_testutils;
+ constexpr uint64_t UsableSize = 200; // 75% threshold = 150 bytes
+ constexpr uint64_t MaxChunks = 8;
+ remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks));
+
+ Oid Op1 = MakeTestOid(1);
+ Oid Op2 = MakeTestOid(2);
+ Oid Op3 = MakeTestOid(3);
+ // Op1: 90 bytes -> Path B, pending = {90 bytes, 1 chunk} (90 <= 150)
+ // Op2: 90 bytes -> Path B, pending = {180 bytes, 2 chunks} (180 > 150)
+ // Op3: 60 bytes -> does not fit (180+60=240 > 200) and 180 > 150 -> Path C flush -> block 1
+ // then Path B, pending = {60 bytes} -> final flush -> block 2
+ std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3)};
+ std::vector<uint64_t> Sizes = {90, 90, 60};
+ std::vector<Oid> Keys = {Op1, Op2, Op3};
+
+ std::vector<std::vector<IoHash>> Blocks;
+ Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); });
+
+ REQUIRE(Blocks.size() == 2);
+ CHECK(Blocks[0].size() == 2);
+ CHECK(Blocks[0][0] == MakeTestHash(1));
+ CHECK(Blocks[0][1] == MakeTestHash(2));
+ CHECK(Blocks[1].size() == 1);
+ CHECK(Blocks[1][0] == MakeTestHash(3));
+}
+
+TEST_CASE("project.store.blockcomposer.path_a_b_final_flush")
+{
+ // Path A -> Path B -> final flush: first op count-saturates -> standalone block, second op placed via Path B.
+ using namespace projectstore_testutils;
+
+ constexpr uint64_t UsableSize = 1000;
+ constexpr uint64_t MaxChunks = 4;
+ remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks));
+
+ Oid Op1 = MakeTestOid(1);
+ Oid Op2 = MakeTestOid(2);
+ // Op1: 4 x 100 bytes -> MaxChunksPerBlock reached -> CurrentOpFillFullBlock=true -> Path A
+ // Op2: 2 x 100 bytes -> Path B (pending empty) -> final flush
+ std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4), MakeTestHash(5), MakeTestHash(6)};
+ std::vector<uint64_t> Sizes = {100, 100, 100, 100, 100, 100};
+ std::vector<Oid> Keys = {Op1, Op1, Op1, Op1, Op2, Op2};
+
+ std::vector<std::vector<IoHash>> Blocks;
+ Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); });
+
+ REQUIRE(Blocks.size() == 2);
+ CHECK(Blocks[0].size() == 4);
+ CHECK(Blocks[0][0] == MakeTestHash(1));
+ CHECK(Blocks[0][3] == MakeTestHash(4));
+ CHECK(Blocks[1].size() == 2);
+ CHECK(Blocks[1][0] == MakeTestHash(5));
+ CHECK(Blocks[1][1] == MakeTestHash(6));
+}
+
+TEST_CASE("project.store.blockcomposer.empty_input")
+{
+ // Zero attachments -> no blocks emitted.
+ using namespace projectstore_testutils;
+
+ constexpr uint64_t UsableSize = 1000;
+ constexpr uint64_t MaxChunks = 4;
+ remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks));
+
+ std::vector<std::vector<IoHash>> Blocks;
+ Composer.Compose({}, {}, {}, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); });
+
+ CHECK(Blocks.empty());
+}
+
+TEST_CASE("project.store.blockcomposer.single_attachment")
+{
+ // Single chunk -> Path B into empty pending, final flush emits it.
+ using namespace projectstore_testutils;
+
+ constexpr uint64_t UsableSize = 1000;
+ constexpr uint64_t MaxChunks = 4;
+ remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks));
+
+ Oid Op1 = MakeTestOid(1);
+ std::vector<IoHash> Hashes = {MakeTestHash(1)};
+ std::vector<uint64_t> Sizes = {60};
+ std::vector<Oid> Keys = {Op1};
+
+ std::vector<std::vector<IoHash>> Blocks;
+ Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); });
+
+ REQUIRE(Blocks.size() == 1);
+ CHECK(Blocks[0].size() == 1);
+ CHECK(Blocks[0][0] == MakeTestHash(1));
+}
+
+TEST_CASE("project.store.blockcomposer.path_a_size_saturation")
+{
+ // Path A by size overflow: 60+60 > UsableSize=100; first chunk emitted standalone, second via Path B.
+ using namespace projectstore_testutils;
+
+ constexpr uint64_t UsableSize = 100; // MaxChunkEmbedSize=100; two 60-byte chunks overflow
+ constexpr uint64_t MaxChunks = 4;
+ remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks));
+
+ Oid Op1 = MakeTestOid(1);
+ // chunk0=60, chunk1=60: 60+60=120 > UsableSize=100 -> size overflow after gathering chunk0
+ std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2)};
+ std::vector<uint64_t> Sizes = {60, 60};
+ std::vector<Oid> Keys = {Op1, Op1};
+
+ std::vector<std::vector<IoHash>> Blocks;
+ Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); });
+
+ REQUIRE(Blocks.size() == 2);
+ CHECK(Blocks[0].size() == 1);
+ CHECK(Blocks[0][0] == MakeTestHash(1));
+ CHECK(Blocks[1].size() == 1);
+ CHECK(Blocks[1][0] == MakeTestHash(2));
+}
+
+TEST_CASE("project.store.blockcomposer.path_b_exact_size_fill")
+{
+ // Path B immediate flush when pending reaches UsableBlockSize exactly (vs count-fill in path_b_exact_count_fill).
+ using namespace projectstore_testutils;
+
+ constexpr uint64_t UsableSize = 100;
+ constexpr uint64_t MaxChunks = 4;
+ remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks));
+
+ Oid Op1 = MakeTestOid(1);
+ Oid Op2 = MakeTestOid(2);
+ // Op1: 60 bytes -> Path B, pending = {60 bytes, 1 chunk}
+ // Op2: 40 bytes -> 60+40=100 == UsableSize -> Path B, immediate size-exact flush
+ std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2)};
+ std::vector<uint64_t> Sizes = {60, 40};
+ std::vector<Oid> Keys = {Op1, Op2};
+
+ std::vector<std::vector<IoHash>> Blocks;
+ Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); });
+
+ REQUIRE(Blocks.size() == 1);
+ CHECK(Blocks[0].size() == 2);
+ CHECK(Blocks[0][0] == MakeTestHash(1));
+ CHECK(Blocks[0][1] == MakeTestHash(2));
+}
+
+TEST_CASE("project.store.blockcomposer.path_d_size_limited_greedy")
+{
+ // Path D where greedy fill is limited by size (not count). MaxChunks=8 ensures size is binding.
+ using namespace projectstore_testutils;
+
+ constexpr uint64_t UsableSize = 200; // 75% threshold = 150 bytes
+ constexpr uint64_t MaxChunks = 8;
+ remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks));
+
+ Oid Op1 = MakeTestOid(1);
+ Oid Op2 = MakeTestOid(2);
+ std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4)};
+ std::vector<uint64_t> Sizes = {90, 60, 60, 60};
+ std::vector<Oid> Keys = {Op1, Op2, Op2, Op2};
+
+ std::vector<std::vector<IoHash>> Blocks;
+ Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); });
+
+ REQUIRE(Blocks.size() == 2);
+ CHECK(Blocks[0].size() == 2);
+ CHECK(Blocks[0][0] == MakeTestHash(1));
+ CHECK(Blocks[0][1] == MakeTestHash(2));
+ CHECK(Blocks[1].size() == 2);
+ CHECK(Blocks[1][0] == MakeTestHash(3));
+ CHECK(Blocks[1][1] == MakeTestHash(4));
+}
+
+TEST_CASE("project.store.blockcomposer.path_a_pending_untouched")
+{
+ // Path A leaves pending untouched: Op1 in pending, Op2 count-saturates -> standalone block. Final flush emits Op1.
+ using namespace projectstore_testutils;
+
+ constexpr uint64_t UsableSize = 1000;
+ constexpr uint64_t MaxChunks = 4;
+ remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks));
+
+ Oid Op1 = MakeTestOid(1);
+ Oid Op2 = MakeTestOid(2);
+ // Op1: 2 x 60 bytes -> Path B, pending = {2 chunks, 120 bytes}
+ // Op2: 4 x 100 bytes -> count reaches MaxChunks=4 -> CurrentOpFillFullBlock=true -> Path A
+ // Path A emits Op2 standalone as block 1; pending (Op1's chunks) is left untouched.
+ // Final flush emits pending -> block 2.
+ std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4), MakeTestHash(5), MakeTestHash(6)};
+ std::vector<uint64_t> Sizes = {60, 60, 100, 100, 100, 100};
+ std::vector<Oid> Keys = {Op1, Op1, Op2, Op2, Op2, Op2};
+
+ std::vector<std::vector<IoHash>> Blocks;
+ Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); });
+
+ REQUIRE(Blocks.size() == 2);
+ CHECK(Blocks[0].size() == 4);
+ CHECK(Blocks[0][0] == MakeTestHash(3));
+ CHECK(Blocks[0][3] == MakeTestHash(6));
+ CHECK(Blocks[1].size() == 2);
+ CHECK(Blocks[1][0] == MakeTestHash(1));
+ CHECK(Blocks[1][1] == MakeTestHash(2));
+}
+
+// ---------------------------------------------------------------------------
+// BuildContainer-direct tests
+// ---------------------------------------------------------------------------
+
+TEST_CASE("buildcontainer.public_overload_smoke")
+{
+ // Verifies the public BuildContainer overload runs successfully and calls AsyncOnBlock.
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_smoke", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024})));
+
+ WorkerThreadPool WorkerPool(GetWorkerCount());
+
+ std::atomic<int> BlockCallCount{0};
+ CbObject Container = BuildContainer(
+ Log(),
+ CidStore,
+ *Project,
+ *Oplog,
+ WorkerPool,
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ 64u * 1024u * 1024u,
+ /*BuildBlocks=*/true,
+ /*IgnoreMissingAttachments=*/false,
+ /*AllowChunking=*/true,
+ [&](CompressedBuffer&&, ChunkBlockDescription&&) { BlockCallCount.fetch_add(1); },
+ [](const IoHash&, TGetAttachmentBufferFunc&&) {},
+ [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {},
+ /*EmbedLooseFiles=*/false);
+
+ CHECK(Container.GetSize() > 0);
+ CHECK(BlockCallCount.load() >= 1);
+}
+
+TEST_CASE("buildcontainer.build_blocks_false_on_block_chunks")
+{
+ // BuildBlocks=false: small attachments go to OnBlockChunks instead of AsyncOnBlock.
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_no_blocks", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(
+ CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024, 1024})));
+ Oplog->AppendNewOplogEntry(
+ CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024, 1024})));
+
+ WorkerThreadPool WorkerPool(GetWorkerCount());
+
+ std::atomic<int> BlockChunksCallCount{0};
+ CbObject Container = BuildContainer(
+ Log(),
+ CidStore,
+ *Project,
+ *Oplog,
+ WorkerPool,
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ 64u * 1024u * 1024u,
+ /*BuildBlocks=*/false,
+ /*IgnoreMissingAttachments=*/false,
+ /*AllowChunking=*/true,
+ [](CompressedBuffer&&, ChunkBlockDescription&&) { CHECK(false); },
+ [](const IoHash&, TGetAttachmentBufferFunc&&) {},
+ [&](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) { BlockChunksCallCount.fetch_add(1); },
+ /*EmbedLooseFiles=*/false);
+
+ CHECK(Container.GetSize() > 0);
+ CHECK(BlockChunksCallCount.load() >= 1);
+}
+
+TEST_CASE("buildcontainer.ignore_missing_binary_attachment_warn")
+{
+ // A bulk-data op references a hash that is absent from CidStore.
+ // SUBCASE warn: IgnoreMissingAttachments=true -> ReportMessage("Missing attachment ...").
+ // SUBCASE throw: IgnoreMissingAttachments=false -> std::runtime_error.
+ using namespace projectstore_testutils;
+ using namespace std::literals;
+
+ ScopedTemporaryDirectory TempDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ // Fabricate a hash not in CidStore and build a package that references it as a
+ // BinaryAttachment field but carries no inline attachment data.
+ IoBuffer FakeData = CreateRandomBlob(256);
+ IoHash FakeHash = IoHash::HashBuffer(FakeData);
+
+ CbObjectWriter Object;
+ Object << "key"sv << OidAsString(Oid::NewOid());
+ Object.BeginArray("bulkdata"sv);
+ {
+ Object.BeginObject();
+ Object << "id"sv << Oid::NewOid();
+ Object << "type"sv
+ << "Standard"sv;
+ Object.AddBinaryAttachment("data"sv, FakeHash);
+ Object.EndObject();
+ }
+ Object.EndArray();
+ CbPackage Package;
+ Package.SetObject(Object.Save());
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_missing_bin", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(Package);
+
+ WorkerThreadPool WorkerPool(GetWorkerCount());
+
+ SUBCASE("warn")
+ {
+ CapturingJobContext Ctx;
+ BuildContainer(
+ Log(),
+ CidStore,
+ *Project,
+ *Oplog,
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ 64u * 1024u * 1024u,
+ /*BuildBlocks=*/true,
+ /*IgnoreMissingAttachments=*/true,
+ /*AllowChunking=*/true,
+ {},
+ WorkerPool,
+ [](CompressedBuffer&&, ChunkBlockDescription&&) {},
+ [](const IoHash&, TGetAttachmentBufferFunc&&) {},
+ [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {},
+ /*EmbedLooseFiles=*/false,
+ &Ctx);
+ CHECK(Ctx.HasMessage("Missing attachment"));
+ }
+
+ SUBCASE("throw")
+ {
+ CHECK_THROWS(BuildContainer(
+ Log(),
+ CidStore,
+ *Project,
+ *Oplog,
+ WorkerPool,
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ 64u * 1024u * 1024u,
+ /*BuildBlocks=*/true,
+ /*IgnoreMissingAttachments=*/false,
+ /*AllowChunking=*/true,
+ [](CompressedBuffer&&, ChunkBlockDescription&&) {},
+ [](const IoHash&, TGetAttachmentBufferFunc&&) {},
+ [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {},
+ /*EmbedLooseFiles=*/false));
+ }
+}
+
+TEST_CASE("buildcontainer.ignore_missing_file_attachment_warn")
+{
+ // File attachments are created on disk then deleted before BuildContainer runs.
+ // SUBCASE warn: IgnoreMissingAttachments=true -> ReportMessage("Missing attachment ...").
+ // SUBCASE throw: IgnoreMissingAttachments=false -> exception.
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ std::filesystem::path RootDir = TempDir.Path() / "root";
+ auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{512, 1024});
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_missing_file", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(Oid::NewOid(), RootDir, FileAtts));
+
+ // Delete files before BuildContainer runs so RewriteOp finds them missing.
+ for (const auto& [Id, Path] : FileAtts)
+ {
+ std::filesystem::remove(Path);
+ }
+
+ WorkerThreadPool WorkerPool(GetWorkerCount());
+
+ SUBCASE("warn")
+ {
+ CapturingJobContext Ctx;
+ BuildContainer(
+ Log(),
+ CidStore,
+ *Project,
+ *Oplog,
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ 64u * 1024u * 1024u,
+ /*BuildBlocks=*/true,
+ /*IgnoreMissingAttachments=*/true,
+ /*AllowChunking=*/true,
+ {},
+ WorkerPool,
+ [](CompressedBuffer&&, ChunkBlockDescription&&) {},
+ [](const IoHash&, TGetAttachmentBufferFunc&&) {},
+ [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {},
+ /*EmbedLooseFiles=*/true,
+ &Ctx);
+ CHECK(Ctx.HasMessage("Missing attachment"));
+ }
+
+ SUBCASE("throw")
+ {
+ CHECK_THROWS(BuildContainer(
+ Log(),
+ CidStore,
+ *Project,
+ *Oplog,
+ WorkerPool,
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ 64u * 1024u * 1024u,
+ /*BuildBlocks=*/true,
+ /*IgnoreMissingAttachments=*/false,
+ /*AllowChunking=*/true,
+ [](CompressedBuffer&&, ChunkBlockDescription&&) {},
+ [](const IoHash&, TGetAttachmentBufferFunc&&) {},
+ [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {},
+ /*EmbedLooseFiles=*/true));
+ }
+}
+
+TEST_CASE("buildcontainer.zero_byte_file_attachment")
+{
+ // A zero-byte file on disk is a valid attachment. BuildContainer must process
+ // it without hitting ZEN_ASSERT(UploadAttachment->Size != 0) in
+ // ResolveAttachments. The empty file flows through the compress-inline path
+ // and becomes a LooseUploadAttachment with raw size 0.
+ using namespace projectstore_testutils;
+ using namespace std::literals;
+
+ ScopedTemporaryDirectory TempDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ std::filesystem::path RootDir = TempDir.Path() / "root";
+ auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{512});
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_zero_byte_file", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(Oid::NewOid(), RootDir, FileAtts));
+
+ // Truncate the file to zero bytes after the oplog entry is created.
+ // The file still exists on disk so RewriteOplog's IsFile() check passes,
+ // but MakeFromFile returns a zero-size buffer.
+ std::filesystem::resize_file(FileAtts[0].second, 0);
+
+ WorkerThreadPool WorkerPool(GetWorkerCount());
+
+ CbObject Container = BuildContainer(
+ Log(),
+ CidStore,
+ *Project,
+ *Oplog,
+ WorkerPool,
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ 64u * 1024u * 1024u,
+ /*BuildBlocks=*/true,
+ /*IgnoreMissingAttachments=*/false,
+ /*AllowChunking=*/true,
+ [](CompressedBuffer&&, ChunkBlockDescription&&) {},
+ [](const IoHash&, TGetAttachmentBufferFunc&&) {},
+ [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {},
+ /*EmbedLooseFiles=*/true);
+
+ CHECK(Container.GetSize() > 0);
+
+ // The zero-byte attachment is packed into a block via the compress-inline path.
+ CbArrayView Blocks = Container["blocks"sv].AsArrayView();
+ CHECK(Blocks.Num() > 0);
+}
+
+TEST_CASE("buildcontainer.embed_loose_files_false_no_rewrite")
+{
+ // EmbedLooseFiles=false: RewriteOp is skipped for file-op entries; they pass through
+ // unchanged. Neither AsyncOnBlock nor OnLargeAttachment should fire.
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ std::filesystem::path RootDir = TempDir.Path() / "root";
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_embed_false", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(
+ CreateFilesOplogPackage(Oid::NewOid(), RootDir, CreateFileAttachments(RootDir, std::initializer_list<size_t>{1024, 2048})));
+
+ WorkerThreadPool WorkerPool(GetWorkerCount());
+
+ CbObject Container = BuildContainer(
+ Log(),
+ CidStore,
+ *Project,
+ *Oplog,
+ WorkerPool,
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ 64u * 1024u * 1024u,
+ /*BuildBlocks=*/true,
+ /*IgnoreMissingAttachments=*/false,
+ /*AllowChunking=*/true,
+ [](CompressedBuffer&&, ChunkBlockDescription&&) { CHECK(false); },
+ [](const IoHash&, TGetAttachmentBufferFunc&&) { CHECK(false); },
+ [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {},
+ /*EmbedLooseFiles=*/false);
+
+ CHECK(Container.GetSize() > 0);
+}
+
+TEST_CASE("buildcontainer.allow_chunking_false")
+{
+ // AllowChunking=false: attachments exceeding ChunkFileSizeLimit skip chunking -> OnLargeAttachment.
+ // AllowChunking=true: same data is chunked, but chunk still exceeds MaxChunkEmbedSize -> OnLargeAttachment;
+ // exercises the AllowChunking branch in FindChunkSizes.
+ // 4 KB attachment: > MaxChunkEmbedSize (2 KB) and > ChunkFileSizeLimit (1 KB).
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ // None encoding: compressed ~ 4 KB > MaxChunkEmbedSize (2 KB) and ChunkFileSizeLimit (1 KB).
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_allow_chunk", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(
+ CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{4096}, OodleCompressionLevel::None)));
+
+ WorkerThreadPool WorkerPool(GetWorkerCount());
+
+ constexpr size_t TestMaxBlockSize = 16u * 1024u;
+ constexpr size_t TestMaxChunkEmbedSize = 2u * 1024u;
+ constexpr size_t TestChunkFileSizeLimit = 1u * 1024u;
+
+ SUBCASE("allow_chunking_false")
+ {
+ std::atomic<int> LargeAttachmentCallCount{0};
+ BuildContainer(
+ Log(),
+ CidStore,
+ *Project,
+ *Oplog,
+ WorkerPool,
+ TestMaxBlockSize,
+ 1000,
+ TestMaxChunkEmbedSize,
+ TestChunkFileSizeLimit,
+ /*BuildBlocks=*/true,
+ /*IgnoreMissingAttachments=*/false,
+ /*AllowChunking=*/false,
+ [](CompressedBuffer&&, ChunkBlockDescription&&) {},
+ [&](const IoHash&, TGetAttachmentBufferFunc&&) { LargeAttachmentCallCount.fetch_add(1); },
+ [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {},
+ /*EmbedLooseFiles=*/false);
+ CHECK(LargeAttachmentCallCount.load() >= 1);
+ }
+
+ SUBCASE("allow_chunking_true")
+ {
+ // Chunking branch in FindChunkSizes is taken, but the ~4 KB chunk still exceeds MaxChunkEmbedSize -> OnLargeAttachment.
+ std::atomic<int> LargeAttachmentCallCount{0};
+ BuildContainer(
+ Log(),
+ CidStore,
+ *Project,
+ *Oplog,
+ WorkerPool,
+ TestMaxBlockSize,
+ 1000,
+ TestMaxChunkEmbedSize,
+ TestChunkFileSizeLimit,
+ /*BuildBlocks=*/true,
+ /*IgnoreMissingAttachments=*/false,
+ /*AllowChunking=*/true,
+ [](CompressedBuffer&&, ChunkBlockDescription&&) {},
+ [&](const IoHash&, TGetAttachmentBufferFunc&&) { LargeAttachmentCallCount.fetch_add(1); },
+ [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {},
+ /*EmbedLooseFiles=*/false);
+ CHECK(LargeAttachmentCallCount.load() >= 1);
+ }
+}
+
+TEST_CASE("buildcontainer.async_on_block_exception_propagates")
+{
+ // If AsyncOnBlock throws, the exception must propagate out of BuildContainer.
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_block_exc", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(
+ CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024, 1024})));
+ Oplog->AppendNewOplogEntry(
+ CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024, 1024})));
+
+ WorkerThreadPool WorkerPool(GetWorkerCount());
+
+ CHECK_THROWS_AS(BuildContainer(
+ Log(),
+ CidStore,
+ *Project,
+ *Oplog,
+ WorkerPool,
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ 64u * 1024u * 1024u,
+ /*BuildBlocks=*/true,
+ /*IgnoreMissingAttachments=*/false,
+ /*AllowChunking=*/true,
+ [](CompressedBuffer&&, ChunkBlockDescription&&) { throw std::runtime_error("inject_block"); },
+ [](const IoHash&, TGetAttachmentBufferFunc&&) {},
+ [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {},
+ /*EmbedLooseFiles=*/false),
+ std::runtime_error);
+}
+
+TEST_CASE("buildcontainer.on_large_attachment_exception_propagates")
+{
+ // OnLargeAttachment exception must propagate. 64 KB with MaxChunkEmbedSize=32 KB -> OnLargeAttachment.
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_large_exc", {});
+ REQUIRE(Oplog);
+ // 64 KB with OodleCompressionLevel::None -> compressed ~ 64 KB > MaxChunkEmbedSize (32 KB).
+ Oplog->AppendNewOplogEntry(
+ CreateBulkDataOplogPackage(Oid::NewOid(),
+ CreateAttachments(std::initializer_list<size_t>{64u * 1024u}, OodleCompressionLevel::None)));
+
+ WorkerThreadPool WorkerPool(GetWorkerCount());
+
+ CHECK_THROWS_AS(BuildContainer(
+ Log(),
+ CidStore,
+ *Project,
+ *Oplog,
+ WorkerPool,
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ 64u * 1024u * 1024u,
+ /*BuildBlocks=*/true,
+ /*IgnoreMissingAttachments=*/false,
+ /*AllowChunking=*/false,
+ [](CompressedBuffer&&, ChunkBlockDescription&&) {},
+ [](const IoHash&, TGetAttachmentBufferFunc&&) { throw std::runtime_error("inject_large"); },
+ [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {},
+ /*EmbedLooseFiles=*/false),
+ std::runtime_error);
+}
+
+TEST_CASE("buildcontainer.context_cancellation_aborts")
+{
+ // IsCancelled() returns true from the start; BuildContainer must not crash or throw.
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_cancel", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024})));
+ Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024})));
+ Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024})));
+
+ WorkerThreadPool WorkerPool(GetWorkerCount());
+
+ CapturingJobContext Ctx;
+ Ctx.m_Cancel = true;
+
+ CHECK_NOTHROW(BuildContainer(
+ Log(),
+ CidStore,
+ *Project,
+ *Oplog,
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ 64u * 1024u * 1024u,
+ /*BuildBlocks=*/true,
+ /*IgnoreMissingAttachments=*/false,
+ /*AllowChunking=*/true,
+ {},
+ WorkerPool,
+ [](CompressedBuffer&&, ChunkBlockDescription&&) {},
+ [](const IoHash&, TGetAttachmentBufferFunc&&) {},
+ [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {},
+ /*EmbedLooseFiles=*/false,
+ &Ctx));
+}
+
+TEST_CASE("buildcontainer.context_progress_reporting")
+{
+ // BuildContainer calls ReportProgress at least once ("Scanning oplog").
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_progress", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024})));
+
+ WorkerThreadPool WorkerPool(GetWorkerCount());
+
+ CapturingJobContext Ctx;
+ BuildContainer(
+ Log(),
+ CidStore,
+ *Project,
+ *Oplog,
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ 64u * 1024u * 1024u,
+ /*BuildBlocks=*/true,
+ /*IgnoreMissingAttachments=*/false,
+ /*AllowChunking=*/true,
+ {},
+ WorkerPool,
+ [](CompressedBuffer&&, ChunkBlockDescription&&) {},
+ [](const IoHash&, TGetAttachmentBufferFunc&&) {},
+ [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {},
+ /*EmbedLooseFiles=*/false,
+ &Ctx);
+
+ CHECK(!Ctx.ProgressMessages.empty());
+}
+
+TEST_CASE("getblocksfromoplog.filtered")
+{
+ // GetBlocksFromOplog(ContainerObject, IncludeBlockHashes) returns only the requested blocks.
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ std::shared_ptr<RemoteProjectStore> RemoteStore = SetupExportStore(CidStore, *Project, NetworkPool, WorkerPool, ExportDir.Path());
+
+ RemoteProjectStore::LoadContainerResult ContainerResult = RemoteStore->LoadContainer();
+ REQUIRE(ContainerResult.ErrorCode == 0);
+
+ std::vector<IoHash> AllBlockHashes = GetBlockHashesFromOplog(ContainerResult.ContainerObject);
+ REQUIRE(!AllBlockHashes.empty());
+
+ // Filter to the first block only.
+ std::vector<IoHash> Subset = {AllBlockHashes[0]};
+ std::vector<ThinChunkBlockDescription> Filtered = GetBlocksFromOplog(ContainerResult.ContainerObject, Subset);
+ CHECK(Filtered.size() == 1);
+ CHECK(Filtered[0].BlockHash == AllBlockHashes[0]);
+ CHECK(!Filtered[0].ChunkRawHashes.empty());
+
+ // Empty include set returns empty result (exercises the no-match branch).
+ std::vector<ThinChunkBlockDescription> Empty = GetBlocksFromOplog(ContainerResult.ContainerObject, std::span<const IoHash>{});
+ CHECK(Empty.empty());
+}
+
+// ---------------------------------------------------------------------------
+// SaveOplog-focused tests
+// ---------------------------------------------------------------------------
+
+TEST_CASE("saveoplog.cancellation")
+{
+ // IsCancelled() returns true from the start; SaveOplog must not throw.
+ using namespace projectstore_testutils;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_cancel_save", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 2048})));
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ CapturingJobContext Ctx;
+ Ctx.m_Cancel = true;
+
+ CHECK_NOTHROW(RunSaveOplog(CidStore,
+ *Project,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ ExportDir.Path(),
+ "oplog_cancel_save",
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ /*EmbedLooseFiles=*/false,
+ /*ForceUpload=*/false,
+ /*IgnoreMissingAttachments=*/false,
+ &Ctx,
+ /*ForceDisableBlocks=*/false));
+}
+
+// ---------------------------------------------------------------------------
+// LoadOplog-focused tests
+// ---------------------------------------------------------------------------
+
+TEST_CASE("loadoplog.missing_block_attachment_ignored")
+{
+ // Export creates a block file; deleting it then loading with IgnoreMissingAttachments=true
+ // must succeed and report the failure via "Failed to download block attachment".
+ using namespace projectstore_testutils;
+ using namespace std::literals;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir;
+
+ GcManager Gc;
+ CidStore CidStore(Gc);
+ std::unique_ptr<ProjectStore> ProjectStoreDummy;
+ Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy);
+
+ Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_missing_block", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(
+ CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024, 2048, 512})));
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ std::shared_ptr<RemoteProjectStore> RemoteStore;
+ RunSaveOplog(CidStore,
+ *Project,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ ExportDir.Path(),
+ "oplog_missing_block",
+ 64u * 1024u,
+ 1000,
+ 32u * 1024u,
+ /*EmbedLooseFiles=*/false,
+ /*ForceUpload=*/false,
+ /*IgnoreMissingAttachments=*/false,
+ /*OptionalContext=*/nullptr,
+ /*ForceDisableBlocks=*/false,
+ &RemoteStore);
+
+ RemoteProjectStore::GetKnownBlocksResult KnownBlocks = RemoteStore->GetKnownBlocks();
+ REQUIRE(KnownBlocks.ErrorCode == 0);
+ REQUIRE(!KnownBlocks.Blocks.empty());
+
+ for (const ChunkBlockDescription& BlockDesc : KnownBlocks.Blocks)
+ {
+ std::string HexStr = BlockDesc.BlockHash.ToHexString();
+ std::filesystem::path BlockPath = ExportDir.Path() / HexStr.substr(0, 3) / HexStr.substr(3, 2) / (HexStr.substr(5) + ".blob");
+ REQUIRE(std::filesystem::exists(BlockPath));
+ std::filesystem::remove(BlockPath);
+ }
+
+ CapturingJobContext Ctx;
+ Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_missing_block_import", {});
+ CHECK_NOTHROW(LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = CidStore,
+ .RemoteStore = *RemoteStore,
+ .Oplog = *ImportOplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .ForceDownload = true,
+ .IgnoreMissingAttachments = true,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed,
+ .OptionalJobContext = &Ctx}));
+ CHECK(Ctx.HasMessage("Failed to download block attachment"));
+}
+
+TEST_CASE("loadoplog.clean_oplog_with_populated_cache")
+{
+ // Second import with CleanOplog=true and a non-null cache exercises the OptionalCache->Flush() path.
+ using namespace projectstore_testutils;
+ using namespace std::literals;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir;
+
+ std::filesystem::path RootDir = TempDir.Path() / "root";
+ std::filesystem::path EngineRootDir = TempDir.Path() / "engine";
+ std::filesystem::path ProjectRootDir = TempDir.Path() / "game";
+ std::filesystem::path ProjectFilePath = TempDir.Path() / "game" / "game.uproject";
+
+ // Export side.
+ GcManager ExportGc;
+ CidStore ExportCidStore(ExportGc);
+ CidStoreConfiguration ExportCidConfig = {.RootDirectory = TempDir.Path() / "export_cas",
+ .TinyValueThreshold = 1024,
+ .HugeValueThreshold = 4096};
+ ExportCidStore.Initialize(ExportCidConfig);
+
+ std::filesystem::path ExportBasePath = TempDir.Path() / "export_projectstore";
+ ProjectStore ExportProjectStore(ExportCidStore, ExportBasePath, ExportGc, ProjectStore::Configuration{});
+ Ref<ProjectStore::Project> ExportProject(ExportProjectStore.NewProject(ExportBasePath / "proj1"sv,
+ "proj1"sv,
+ RootDir.string(),
+ EngineRootDir.string(),
+ ProjectRootDir.string(),
+ ProjectFilePath.string()));
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ std::shared_ptr<RemoteProjectStore> RemoteStore =
+ SetupExportStore(ExportCidStore, *ExportProject, NetworkPool, WorkerPool, ExportDir.Path());
+
+ // Import side, starts empty.
+ GcManager ImportGc;
+ CidStore ImportCidStore(ImportGc);
+ CidStoreConfiguration ImportCidConfig = {.RootDirectory = TempDir.Path() / "import_cas",
+ .TinyValueThreshold = 1024,
+ .HugeValueThreshold = 4096};
+ ImportCidStore.Initialize(ImportCidConfig);
+
+ std::filesystem::path ImportBasePath = TempDir.Path() / "import_projectstore";
+ ProjectStore ImportProjectStore(ImportCidStore, ImportBasePath, ImportGc, ProjectStore::Configuration{});
+ Ref<ProjectStore::Project> ImportProject(ImportProjectStore.NewProject(ImportBasePath / "proj1"sv,
+ "proj1"sv,
+ RootDir.string(),
+ EngineRootDir.string(),
+ ProjectRootDir.string(),
+ ProjectFilePath.string()));
+
+ const Oid CacheBuildId = Oid::NewOid();
+ BuildStorageCache::Statistics CacheStats;
+ std::unique_ptr<BuildStorageCache> Cache = CreateInMemoryBuildStorageCache(256u, CacheStats);
+
+ {
+ Ref<ProjectStore::Oplog> Phase1Oplog = ImportProject->NewOplog("oplog_clean_cache_p1", {});
+ LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = ImportCidStore,
+ .RemoteStore = *RemoteStore,
+ .OptionalCache = Cache.get(),
+ .CacheBuildId = CacheBuildId,
+ .Oplog = *Phase1Oplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .ForceDownload = false,
+ .IgnoreMissingAttachments = false,
+ .CleanOplog = false,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed,
+ .PopulateCache = true});
+ }
+
+ {
+ Ref<ProjectStore::Oplog> Phase2Oplog = ImportProject->NewOplog("oplog_clean_cache_p2", {});
+ CHECK_NOTHROW(LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = ImportCidStore,
+ .RemoteStore = *RemoteStore,
+ .OptionalCache = Cache.get(),
+ .CacheBuildId = CacheBuildId,
+ .Oplog = *Phase2Oplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .ForceDownload = true,
+ .IgnoreMissingAttachments = false,
+ .CleanOplog = true,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed,
+ .PopulateCache = false}));
+ }
+}
+
+TEST_CASE("project.store.export.block_reuse_fresh_receiver")
+{
+ // Regression test: after a second export that reuses existing blocks, a fresh import must still
+ // receive all chunks. The bug: FindReuseBlocks erases reused-block chunks from UploadAttachments,
+ // but never adds the reused blocks to the container's "blocks" section. A fresh receiver then
+ // silently misses those chunks because ParseOplogContainer never sees them.
+ using namespace projectstore_testutils;
+ using namespace std::literals;
+
+ ScopedTemporaryDirectory TempDir;
+ ScopedTemporaryDirectory ExportDir;
+
+ // -- Export side ----------------------------------------------------------
+ GcManager ExportGc;
+ CidStore ExportCidStore(ExportGc);
+ CidStoreConfiguration ExportCidConfig = {.RootDirectory = TempDir.Path() / "export_cas",
+ .TinyValueThreshold = 1024,
+ .HugeValueThreshold = 4096};
+ ExportCidStore.Initialize(ExportCidConfig);
+
+ std::filesystem::path ExportBasePath = TempDir.Path() / "export_projectstore";
+ ProjectStore ExportProjectStore(ExportCidStore, ExportBasePath, ExportGc, ProjectStore::Configuration{});
+ std::filesystem::path RootDir = TempDir.Path() / "root";
+ std::filesystem::path EngineRootDir = TempDir.Path() / "engine";
+ std::filesystem::path ProjectRootDir = TempDir.Path() / "game";
+ std::filesystem::path ProjectFilePath = TempDir.Path() / "game" / "game.uproject";
+ Ref<ProjectStore::Project> ExportProject(ExportProjectStore.NewProject(ExportBasePath / "proj1"sv,
+ "proj1"sv,
+ RootDir.string(),
+ EngineRootDir.string(),
+ ProjectRootDir.string(),
+ ProjectFilePath.string()));
+
+ // 20 KB with None encoding: compressed ~ 20 KB < MaxChunkEmbedSize (32 KB) -> packed into blocks.
+ Ref<ProjectStore::Oplog> Oplog = ExportProject->NewOplog("oplog_reuse_rt", {});
+ REQUIRE(Oplog);
+ Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(
+ Oid::NewOid(),
+ CreateAttachments(std::initializer_list<size_t>{20u * 1024u, 20u * 1024u}, OodleCompressionLevel::None)));
+
+ TestWorkerPools Pools;
+ WorkerThreadPool& NetworkPool = Pools.NetworkPool;
+ WorkerThreadPool& WorkerPool = Pools.WorkerPool;
+
+ constexpr size_t MaxBlockSize = 64u * 1024u;
+ constexpr size_t MaxChunksPerBlock = 1000;
+ constexpr size_t MaxChunkEmbedSize = 32u * 1024u;
+ constexpr size_t ChunkFileSizeLimit = 64u * 1024u * 1024u;
+
+ // First export: creates blocks on disk.
+ FileRemoteStoreOptions Options = {RemoteStoreOptions{.MaxBlockSize = MaxBlockSize,
+ .MaxChunksPerBlock = MaxChunksPerBlock,
+ .MaxChunkEmbedSize = MaxChunkEmbedSize,
+ .ChunkFileSizeLimit = ChunkFileSizeLimit},
+ /*.FolderPath =*/ExportDir.Path(),
+ /*.Name =*/std::string("oplog_reuse_rt"),
+ /*.OptionalBaseName =*/std::string(),
+ /*.ForceDisableBlocks =*/false,
+ /*.ForceEnableTempBlocks =*/false};
+
+ std::shared_ptr<RemoteProjectStore> RemoteStore = CreateFileRemoteStore(Log(), Options);
+ SaveOplog(Log(),
+ ExportCidStore,
+ *RemoteStore,
+ *ExportProject,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ MaxBlockSize,
+ MaxChunksPerBlock,
+ MaxChunkEmbedSize,
+ ChunkFileSizeLimit,
+ /*EmbedLooseFiles*/ true,
+ /*ForceUpload*/ false,
+ /*IgnoreMissingAttachments*/ false,
+ /*OptionalContext*/ nullptr);
+
+ // Verify first export produced blocks.
+ RemoteProjectStore::GetKnownBlocksResult KnownAfterFirst = RemoteStore->GetKnownBlocks();
+ REQUIRE(!KnownAfterFirst.Blocks.empty());
+
+ // Second export to the SAME store: triggers block reuse via GetKnownBlocks.
+ SaveOplog(Log(),
+ ExportCidStore,
+ *RemoteStore,
+ *ExportProject,
+ *Oplog,
+ NetworkPool,
+ WorkerPool,
+ MaxBlockSize,
+ MaxChunksPerBlock,
+ MaxChunkEmbedSize,
+ ChunkFileSizeLimit,
+ /*EmbedLooseFiles*/ true,
+ /*ForceUpload*/ false,
+ /*IgnoreMissingAttachments*/ false,
+ /*OptionalContext*/ nullptr);
+
+ // Verify the container has no duplicate block entries.
+ {
+ RemoteProjectStore::LoadContainerResult ContainerResult = RemoteStore->LoadContainer();
+ REQUIRE(ContainerResult.ErrorCode == 0);
+ std::vector<IoHash> BlockHashes = GetBlockHashesFromOplog(ContainerResult.ContainerObject);
+ REQUIRE(!BlockHashes.empty());
+ std::unordered_set<IoHash, IoHash::Hasher> UniqueBlockHashes(BlockHashes.begin(), BlockHashes.end());
+ CHECK(UniqueBlockHashes.size() == BlockHashes.size());
+ }
+
+ // Collect all attachment hashes referenced by the oplog ops.
+ std::unordered_set<IoHash, IoHash::Hasher> ExpectedHashes;
+ Oplog->IterateOplogWithKey([&](int, const Oid&, CbObjectView Op) {
+ Op.IterateAttachments([&](CbFieldView FieldView) { ExpectedHashes.insert(FieldView.AsAttachment()); });
+ });
+ REQUIRE(!ExpectedHashes.empty());
+
+ // -- Import side (fresh, empty CAS) --------------------------------------
+ GcManager ImportGc;
+ CidStore ImportCidStore(ImportGc);
+ CidStoreConfiguration ImportCidConfig = {.RootDirectory = TempDir.Path() / "import_cas",
+ .TinyValueThreshold = 1024,
+ .HugeValueThreshold = 4096};
+ ImportCidStore.Initialize(ImportCidConfig);
+
+ std::filesystem::path ImportBasePath = TempDir.Path() / "import_projectstore";
+ ProjectStore ImportProjectStore(ImportCidStore, ImportBasePath, ImportGc, ProjectStore::Configuration{});
+ Ref<ProjectStore::Project> ImportProject(ImportProjectStore.NewProject(ImportBasePath / "proj1"sv,
+ "proj1"sv,
+ RootDir.string(),
+ EngineRootDir.string(),
+ ProjectRootDir.string(),
+ ProjectFilePath.string()));
+
+ Ref<ProjectStore::Oplog> ImportOplog = ImportProject->NewOplog("oplog_reuse_rt_import", {});
+ REQUIRE(ImportOplog);
+
+ LoadOplog(LoadOplogContext{.Log = Log(),
+ .ChunkStore = ImportCidStore,
+ .RemoteStore = *RemoteStore,
+ .Oplog = *ImportOplog,
+ .NetworkWorkerPool = NetworkPool,
+ .WorkerPool = WorkerPool,
+ .ForceDownload = true,
+ .IgnoreMissingAttachments = false,
+ .PartialBlockRequestMode = EPartialBlockRequestMode::All});
+
+ // Every attachment hash from the original oplog must be present in the import CAS.
+ for (const IoHash& Hash : ExpectedHashes)
+ {
+ CHECK_MESSAGE(ImportCidStore.ContainsChunk(Hash), "Missing chunk after import: ", Hash);
+ }
+}
+
+TEST_SUITE_END();
+
#endif // ZEN_WITH_TESTS
void
diff --git a/src/zenremotestore/projectstore/zenremoteprojectstore.cpp b/src/zenremotestore/projectstore/zenremoteprojectstore.cpp
index b4c1156ac..a08a07fcd 100644
--- a/src/zenremotestore/projectstore/zenremoteprojectstore.cpp
+++ b/src/zenremotestore/projectstore/zenremoteprojectstore.cpp
@@ -159,7 +159,8 @@ public:
virtual LoadAttachmentsResult LoadAttachments(const std::vector<IoHash>& RawHashes) override
{
- std::string LoadRequest = fmt::format("/{}/oplog/{}/rpc"sv, m_Project, m_Oplog);
+ LoadAttachmentsResult Result;
+ std::string LoadRequest = fmt::format("/{}/oplog/{}/rpc"sv, m_Project, m_Oplog);
CbObject Request;
{
@@ -187,7 +188,7 @@ public:
HttpClient::Response Response = m_Client.Post(LoadRequest, Request, HttpClient::Accept(ZenContentType::kCbPackage));
AddStats(Response);
- LoadAttachmentsResult Result = LoadAttachmentsResult{ConvertResult(Response)};
+ Result = LoadAttachmentsResult{ConvertResult(Response)};
if (Result.ErrorCode)
{
Result.Reason = fmt::format("Failed fetching {} oplog attachments from {}/{}/{}. Reason: '{}'",
@@ -249,25 +250,23 @@ public:
return GetKnownBlocksResult{{.ErrorCode = static_cast<int>(HttpResponseCode::NoContent)}};
}
- virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes) override
+ virtual GetBlockDescriptionsResult GetBlockDescriptions(std::span<const IoHash> BlockHashes,
+ BuildStorageCache* OptionalCache,
+ const Oid& CacheBuildId) override
{
- ZEN_UNUSED(BlockHashes);
+ ZEN_UNUSED(BlockHashes, OptionalCache, CacheBuildId);
return GetBlockDescriptionsResult{Result{.ErrorCode = int(HttpResponseCode::NotFound)}};
}
- virtual AttachmentExistsInCacheResult AttachmentExistsInCache(std::span<const IoHash> RawHashes) override
- {
- return AttachmentExistsInCacheResult{Result{.ErrorCode = 0}, std::vector<bool>(RawHashes.size(), false)};
- }
-
- virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash, const AttachmentRange& Range) override
+ virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash) override
{
+ LoadAttachmentResult Result;
std::string LoadRequest = fmt::format("/{}/oplog/{}/{}"sv, m_Project, m_Oplog, RawHash);
HttpClient::Response Response =
m_Client.Download(LoadRequest, m_TempFilePath, HttpClient::Accept(ZenContentType::kCompressedBinary));
AddStats(Response);
- LoadAttachmentResult Result = LoadAttachmentResult{ConvertResult(Response)};
+ Result = LoadAttachmentResult{ConvertResult(Response)};
if (Result.ErrorCode)
{
Result.Reason = fmt::format("Failed fetching oplog attachment from {}/{}/{}/{}. Reason: '{}'",
@@ -277,20 +276,38 @@ public:
RawHash,
Result.Reason);
}
- if (!Result.ErrorCode && Range)
+ Result.Bytes = Response.ResponsePayload;
+ Result.Bytes.MakeOwned();
+ return Result;
+ }
+
+ virtual LoadAttachmentRangesResult LoadAttachmentRanges(const IoHash& RawHash,
+ std::span<const std::pair<uint64_t, uint64_t>> Ranges) override
+ {
+ ZEN_ASSERT(!Ranges.empty());
+ LoadAttachmentRangesResult Result;
+ std::string LoadRequest = fmt::format("/{}/oplog/{}/{}"sv, m_Project, m_Oplog, RawHash);
+ HttpClient::Response Response =
+ m_Client.Download(LoadRequest, m_TempFilePath, HttpClient::Accept(ZenContentType::kCompressedBinary));
+ AddStats(Response);
+
+ Result = LoadAttachmentRangesResult{ConvertResult(Response)};
+ if (Result.ErrorCode)
{
- Result.Bytes = IoBuffer(Response.ResponsePayload, Range.Offset, Range.Bytes);
+ Result.Reason = fmt::format("Failed fetching oplog attachment from {}/{}/{}/{}. Reason: '{}'",
+ m_ProjectStoreUrl,
+ m_Project,
+ m_Oplog,
+ RawHash,
+ Result.Reason);
}
else
{
- Result.Bytes = Response.ResponsePayload;
+ Result.Ranges = std::vector<std::pair<uint64_t, uint64_t>>(Ranges.begin(), Ranges.end());
}
- Result.Bytes.MakeOwned();
return Result;
}
- virtual void Flush() override {}
-
private:
void AddStats(const HttpClient::Response& Result)
{
@@ -312,7 +329,7 @@ private:
{
if (Response.Error)
{
- return {.ErrorCode = Response.Error.value().ErrorCode,
+ return {.ErrorCode = static_cast<int32_t>(Response.Error.value().ErrorCode),
.ElapsedSeconds = Response.ElapsedSeconds,
.Reason = Response.ErrorMessage(""),
.Text = Response.ToText()};
diff --git a/src/zenremotestore/zenremotestore.cpp b/src/zenremotestore/zenremotestore.cpp
index a0bb17260..74d0efb9e 100644
--- a/src/zenremotestore/zenremotestore.cpp
+++ b/src/zenremotestore/zenremotestore.cpp
@@ -4,28 +4,31 @@
#include <zenremotestore/builds/buildmanifest.h>
#include <zenremotestore/builds/buildsavedstate.h>
-#include <zenremotestore/builds/buildstorageoperations.h>
+#include <zenremotestore/builds/buildstorageutil.h>
+#include <zenremotestore/builds/jupiterbuildstorage.h>
#include <zenremotestore/chunking/chunkedcontent.h>
#include <zenremotestore/chunking/chunkedfile.h>
#include <zenremotestore/chunking/chunkingcache.h>
-#include <zenremotestore/filesystemutils.h>
#include <zenremotestore/projectstore/remoteprojectstore.h>
#if ZEN_WITH_TESTS
namespace zen {
+void buildoperations_tests_forcelink();
+
void
zenremotestore_forcelinktests()
{
buildmanifest_forcelink();
buildsavedstate_forcelink();
- buildstorageoperations_forcelink();
+ jupiterbuildstorage_forcelink();
+ buildstorageutil_forcelink();
+ buildoperations_tests_forcelink();
chunkblock_forcelink();
chunkedcontent_forcelink();
chunkedfile_forcelink();
chunkingcache_forcelink();
- filesystemutils_forcelink();
remoteprojectstore_forcelink();
}