diff options
Diffstat (limited to 'src/zenremotestore/builds/buildinspect.cpp')
| -rw-r--r-- | src/zenremotestore/builds/buildinspect.cpp | 463 |
1 files changed, 463 insertions, 0 deletions
diff --git a/src/zenremotestore/builds/buildinspect.cpp b/src/zenremotestore/builds/buildinspect.cpp new file mode 100644 index 000000000..1af9e20af --- /dev/null +++ b/src/zenremotestore/builds/buildinspect.cpp @@ -0,0 +1,463 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenremotestore/builds/buildinspect.h> + +#include <zencore/compactbinarybuilder.h> +#include <zencore/fmtutils.h> +#include <zencore/scopeguard.h> +#include <zencore/timer.h> +#include <zencore/trace.h> +#include <zenremotestore/builds/buildcontent.h> +#include <zenremotestore/builds/buildmanifest.h> +#include <zenremotestore/builds/buildstoragecache.h> +#include <zenremotestore/builds/buildupdatefolder.h> +#include <zenremotestore/builds/builduploadfolder.h> +#include <zenremotestore/chunking/chunkingcache.h> +#include <zenremotestore/chunking/chunkingcontroller.h> +#include <zenremotestore/transferthreadworkers.h> +#include <zenutil/filesystemutils.h> +#include <zenutil/filteredrate.h> +#include <zenutil/progress.h> +#include <zenutil/wildcard.h> + +#include <numeric> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +using namespace std::literals; + +ChunkedFolderContent +ScanAndChunkFolder(ProgressBase& Progress, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + bool IsQuiet, + TransferThreadWorkers& Workers, + GetFolderContentStatistics& GetFolderContentStats, + ChunkingStatistics& ChunkingStats, + const std::filesystem::path& Path, + std::function<bool(const std::string_view& RelativePath)>&& IsAcceptedFolder, + std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& IsAcceptedFile, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache) +{ + Stopwatch Timer; + + ZEN_TRACE_CPU("ScanAndChunkFolder"); + + FolderContent Content = GetFolderContent( + GetFolderContentStats, + Path, + std::move(IsAcceptedFolder), + std::move(IsAcceptedFile), + Workers.GetIOWorkerPool(), + Progress.GetProgressUpdateDelayMS(), + [](bool, std::ptrdiff_t) {}, + AbortFlag); + if (AbortFlag) + { + return {}; + } + + BuildState LocalContent = GetLocalContent(Progress, + AbortFlag, + PauseFlag, + IsQuiet, + Workers, + GetFolderContentStats, + ChunkingStats, + Path, + ZenStateFilePath(Path / ZenFolderName), + ChunkController, + ChunkCache) + .State; + + std::vector<std::filesystem::path> UntrackedPaths = GetNewPaths(LocalContent.ChunkedContent.Paths, Content.Paths); + + BuildState UntrackedLocalContent = GetLocalStateFromPaths(Progress, + AbortFlag, + PauseFlag, + Workers, + GetFolderContentStats, + ChunkingStats, + Path, + ChunkController, + ChunkCache, + UntrackedPaths) + .State; + + ChunkedFolderContent Result = + MergeChunkedFolderContents(LocalContent.ChunkedContent, std::vector<ChunkedFolderContent>{UntrackedLocalContent.ChunkedContent}); + + const uint64_t TotalRawSize = std::accumulate(Result.RawSizes.begin(), Result.RawSizes.end(), std::uint64_t(0)); + const uint64_t ChunkedRawSize = + std::accumulate(Result.ChunkedContent.ChunkRawSizes.begin(), Result.ChunkedContent.ChunkRawSizes.end(), std::uint64_t(0)); + + if (!IsQuiet) + { + ZEN_CONSOLE("Found {} ({}) files divided into {} ({}) unique chunks in '{}' in {}. Average hash rate {}B/sec", + Result.Paths.size(), + NiceBytes(TotalRawSize), + Result.ChunkedContent.ChunkHashes.size(), + NiceBytes(ChunkedRawSize), + Path, + NiceTimeSpanMs(Timer.GetElapsedTimeMs()), + NiceNum(GetBytesPerSecond(ChunkingStats.ElapsedWallTimeUS, ChunkingStats.BytesHashed))); + } + return Result; +}; + +void +ListBuild(bool IsQuiet, + StorageInstance& Storage, + const Oid& BuildId, + const std::vector<Oid>& BuildPartIds, + std::span<const std::string> BuildPartNames, + std::span<const std::string> IncludeWildcards, + std::span<const std::string> ExcludeWildcards, + CbObjectWriter* OptionalStructuredOutput) +{ + std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; + + CbObject BuildObject = GetBuild(*Storage.BuildStorage, BuildId, IsQuiet); + + if (OptionalStructuredOutput != nullptr) + { + OptionalStructuredOutput->AddObjectId("buildId"sv, BuildId); + OptionalStructuredOutput->AddObject("build"sv, BuildObject); + } + + std::vector<std::pair<Oid, std::string>> AllBuildParts = + ResolveBuildPartNames(BuildObject, BuildId, BuildPartIds, BuildPartNames, PreferredMultipartChunkSize); + + if (!AllBuildParts.empty()) + { + Stopwatch GetBuildPartTimer; + + if (OptionalStructuredOutput != nullptr) + { + OptionalStructuredOutput->BeginArray("parts"sv); + } + + for (size_t BuildPartIndex = 0; BuildPartIndex < AllBuildParts.size(); BuildPartIndex++) + { + const Oid BuildPartId = AllBuildParts[BuildPartIndex].first; + const std::string_view BuildPartName = AllBuildParts[BuildPartIndex].second; + CbObject BuildPartManifest = Storage.BuildStorage->GetBuildPart(BuildId, BuildPartId); + + if (OptionalStructuredOutput != nullptr) + { + OptionalStructuredOutput->BeginObject(); + OptionalStructuredOutput->AddObjectId("id"sv, BuildPartId); + OptionalStructuredOutput->AddString("partName"sv, BuildPartName); + } + { + if (OptionalStructuredOutput != nullptr) + { + } + else if (!IsQuiet) + { + ZEN_CONSOLE("{}Part: {} ('{}'):\n", + BuildPartIndex > 0 ? "\n" : "", + BuildPartId, + BuildPartName, + NiceTimeSpanMs(GetBuildPartTimer.GetElapsedTimeMs()), + NiceBytes(BuildPartManifest.GetSize())); + } + + std::vector<std::filesystem::path> Paths; + std::vector<IoHash> RawHashes; + std::vector<uint64_t> RawSizes; + std::vector<uint32_t> Attributes; + + SourcePlatform Platform; + std::vector<IoHash> SequenceRawHashes; + std::vector<uint32_t> ChunkCounts; + std::vector<uint32_t> AbsoluteChunkOrders; + std::vector<IoHash> LooseChunkHashes; + std::vector<uint64_t> LooseChunkRawSizes; + std::vector<IoHash> BlockRawHashes; + + ReadBuildContentFromCompactBinary(BuildPartManifest, + Platform, + Paths, + RawHashes, + RawSizes, + Attributes, + SequenceRawHashes, + ChunkCounts, + AbsoluteChunkOrders, + LooseChunkHashes, + LooseChunkRawSizes, + BlockRawHashes); + + std::vector<size_t> Order(Paths.size()); + std::iota(Order.begin(), Order.end(), 0); + + std::sort(Order.begin(), Order.end(), [&](size_t Lhs, size_t Rhs) { + const std::filesystem::path& LhsPath = Paths[Lhs]; + const std::filesystem::path& RhsPath = Paths[Rhs]; + return LhsPath < RhsPath; + }); + + if (OptionalStructuredOutput != nullptr) + { + OptionalStructuredOutput->BeginArray("files"sv); + } + { + for (size_t Index : Order) + { + const std::filesystem::path& Path = Paths[Index]; + if (IncludePath(IncludeWildcards, ExcludeWildcards, ToLower(Path.generic_string()), /*CaseSensitive*/ true)) + { + const IoHash& RawHash = RawHashes[Index]; + const uint64_t RawSize = RawSizes[Index]; + const uint32_t Attribute = Attributes[Index]; + + if (OptionalStructuredOutput != nullptr) + { + OptionalStructuredOutput->BeginObject(); + { + OptionalStructuredOutput->AddString("path"sv, fmt::format("{}", Path)); + OptionalStructuredOutput->AddInteger("rawSize"sv, RawSize); + OptionalStructuredOutput->AddHash("rawHash"sv, RawHash); + switch (Platform) + { + case SourcePlatform::Windows: + OptionalStructuredOutput->AddInteger("attributes"sv, Attribute); + break; + case SourcePlatform::MacOS: + case SourcePlatform::Linux: + OptionalStructuredOutput->AddString("chmod"sv, fmt::format("{:#04o}", Attribute)); + break; + default: + throw std::runtime_error(fmt::format("Unsupported platform: {}", (int)Platform)); + } + } + OptionalStructuredOutput->EndObject(); + } + else + { + ZEN_CONSOLE("{}\t{}\t{}", Path, RawSize, RawHash); + } + } + } + } + if (OptionalStructuredOutput != nullptr) + { + OptionalStructuredOutput->EndArray(); // "files" + } + } + if (OptionalStructuredOutput != nullptr) + { + OptionalStructuredOutput->EndObject(); + } + } + if (OptionalStructuredOutput != nullptr) + { + OptionalStructuredOutput->EndArray(); // parts + } + } +} + +void +DiffFolders(ProgressBase& Progress, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + bool IsQuiet, + TransferThreadWorkers& Workers, + const std::filesystem::path& BasePath, + const std::filesystem::path& ComparePath, + ChunkingController& ChunkController, + ChunkingCache& ChunkCache, + const std::vector<std::string>& ExcludeFolders, + const std::vector<std::string>& ExcludeExtensions) +{ + ZEN_TRACE_CPU("DiffFolders"); + + Progress.SetLogOperationName("Diff Folders"); + + enum TaskSteps : uint32_t + { + CheckBase, + CheckCompare, + Diff, + Cleanup, + StepCount + }; + + auto EndProgress = MakeGuard([&]() { Progress.SetLogOperationProgress(TaskSteps::StepCount, TaskSteps::StepCount); }); + + ChunkedFolderContent BaseFolderContent; + ChunkedFolderContent CompareFolderContent; + + { + auto IsAcceptedFolder = [ExcludeFolders](const std::string_view& RelativePath) -> bool { + for (const std::string& ExcludeFolder : ExcludeFolders) + { + if (RelativePath.starts_with(ExcludeFolder)) + { + if (RelativePath.length() == ExcludeFolder.length()) + { + return false; + } + else if (RelativePath[ExcludeFolder.length()] == '/') + { + return false; + } + } + } + return true; + }; + + auto IsAcceptedFile = [ExcludeExtensions](const std::string_view& RelativePath, uint64_t, uint32_t) -> bool { + for (const std::string& ExcludeExtension : ExcludeExtensions) + { + if (RelativePath.ends_with(ExcludeExtension)) + { + return false; + } + } + return true; + }; + + Progress.SetLogOperationProgress(TaskSteps::CheckBase, TaskSteps::StepCount); + + GetFolderContentStatistics BaseGetFolderContentStats; + ChunkingStatistics BaseChunkingStats; + BaseFolderContent = ScanAndChunkFolder(Progress, + AbortFlag, + PauseFlag, + IsQuiet, + Workers, + BaseGetFolderContentStats, + BaseChunkingStats, + BasePath, + IsAcceptedFolder, + IsAcceptedFile, + ChunkController, + ChunkCache); + if (AbortFlag) + { + return; + } + + Progress.SetLogOperationProgress(TaskSteps::CheckCompare, TaskSteps::StepCount); + + GetFolderContentStatistics CompareGetFolderContentStats; + ChunkingStatistics CompareChunkingStats; + CompareFolderContent = ScanAndChunkFolder(Progress, + AbortFlag, + PauseFlag, + IsQuiet, + Workers, + CompareGetFolderContentStats, + CompareChunkingStats, + ComparePath, + IsAcceptedFolder, + IsAcceptedFile, + ChunkController, + ChunkCache); + + if (AbortFlag) + { + return; + } + } + + Progress.SetLogOperationProgress(TaskSteps::Diff, TaskSteps::StepCount); + + std::vector<IoHash> AddedHashes; + std::vector<IoHash> RemovedHashes; + uint64_t RemovedSize = 0; + uint64_t AddedSize = 0; + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> BaseRawHashLookup; + for (size_t PathIndex = 0; PathIndex < BaseFolderContent.RawHashes.size(); PathIndex++) + { + const IoHash& RawHash = BaseFolderContent.RawHashes[PathIndex]; + BaseRawHashLookup.insert_or_assign(RawHash, PathIndex); + } + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CompareRawHashLookup; + for (size_t PathIndex = 0; PathIndex < CompareFolderContent.RawHashes.size(); PathIndex++) + { + const IoHash& RawHash = CompareFolderContent.RawHashes[PathIndex]; + if (!BaseRawHashLookup.contains(RawHash)) + { + AddedHashes.push_back(RawHash); + AddedSize += CompareFolderContent.RawSizes[PathIndex]; + } + CompareRawHashLookup.insert_or_assign(RawHash, PathIndex); + } + for (uint32_t PathIndex = 0; PathIndex < BaseFolderContent.Paths.size(); PathIndex++) + { + const IoHash& RawHash = BaseFolderContent.RawHashes[PathIndex]; + if (!CompareRawHashLookup.contains(RawHash)) + { + RemovedHashes.push_back(RawHash); + RemovedSize += BaseFolderContent.RawSizes[PathIndex]; + } + } + + uint64_t BaseTotalRawSize = 0; + for (uint32_t PathIndex = 0; PathIndex < BaseFolderContent.Paths.size(); PathIndex++) + { + BaseTotalRawSize += BaseFolderContent.RawSizes[PathIndex]; + } + + double KeptPercent = BaseTotalRawSize > 0 ? (100.0 * (BaseTotalRawSize - RemovedSize)) / BaseTotalRawSize : 0; + + ZEN_CONSOLE("File diff : {} ({}) removed, {} ({}) added, {} ({} {:.1f}%) kept", + RemovedHashes.size(), + NiceBytes(RemovedSize), + AddedHashes.size(), + NiceBytes(AddedSize), + BaseFolderContent.Paths.size() - RemovedHashes.size(), + NiceBytes(BaseTotalRawSize - RemovedSize), + KeptPercent); + + uint64_t CompareTotalRawSize = 0; + + uint64_t FoundChunkCount = 0; + uint64_t FoundChunkSize = 0; + uint64_t NewChunkCount = 0; + uint64_t NewChunkSize = 0; + const ChunkedContentLookup BaseFolderLookup = BuildChunkedContentLookup(BaseFolderContent); + for (uint32_t ChunkIndex = 0; ChunkIndex < CompareFolderContent.ChunkedContent.ChunkHashes.size(); ChunkIndex++) + { + const IoHash& ChunkHash = CompareFolderContent.ChunkedContent.ChunkHashes[ChunkIndex]; + if (BaseFolderLookup.ChunkHashToChunkIndex.contains(ChunkHash)) + { + FoundChunkCount++; + FoundChunkSize += CompareFolderContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + else + { + NewChunkCount++; + NewChunkSize += CompareFolderContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + CompareTotalRawSize += CompareFolderContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + + double FoundPercent = CompareTotalRawSize > 0 ? (100.0 * FoundChunkSize) / CompareTotalRawSize : 0; + double NewPercent = CompareTotalRawSize > 0 ? (100.0 * NewChunkSize) / CompareTotalRawSize : 0; + + ZEN_CONSOLE("Chunk diff: {} ({} {:.1f}%) out of {} ({}) chunks in {} ({}) base chunks. Added {} ({} {:.1f}%) chunks.", + FoundChunkCount, + NiceBytes(FoundChunkSize), + FoundPercent, + CompareFolderContent.ChunkedContent.ChunkHashes.size(), + NiceBytes(CompareTotalRawSize), + BaseFolderContent.ChunkedContent.ChunkHashes.size(), + NiceBytes(BaseTotalRawSize), + NewChunkCount, + NiceBytes(NewChunkSize), + NewPercent); + + Progress.SetLogOperationProgress(TaskSteps::Cleanup, TaskSteps::StepCount); +} + +} // namespace zen |