diff options
| author | Dan Engelbrecht <[email protected]> | 2025-03-14 09:50:00 +0100 |
|---|---|---|
| committer | Dan Engelbrecht <[email protected]> | 2025-03-14 09:50:00 +0100 |
| commit | 55c67aec301cfc99178ab54c6366cbc88f35d46a (patch) | |
| tree | 84b4c73220f7dd041763b6d1919eedc8d0b90844 /src | |
| parent | Merge remote-tracking branch 'origin/de/zen-service-command' into de/zen-serv... (diff) | |
| parent | fix quoted command lines arguments (#306) (diff) | |
| download | zen-55c67aec301cfc99178ab54c6366cbc88f35d46a.tar.xz zen-55c67aec301cfc99178ab54c6366cbc88f35d46a.zip | |
Merge remote-tracking branch 'origin/main' into de/zen-service-command
Diffstat (limited to 'src')
101 files changed, 13954 insertions, 1120 deletions
diff --git a/src/zen/cmds/admin_cmd.cpp b/src/zen/cmds/admin_cmd.cpp index 995ed4136..835e01151 100644 --- a/src/zen/cmds/admin_cmd.cpp +++ b/src/zen/cmds/admin_cmd.cpp @@ -714,26 +714,29 @@ CopyStateCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) throw OptionParseException("data path must be given"); } - if (!std::filesystem::is_directory(m_DataPath)) + std::filesystem::path DataPath = StringToPath(m_DataPath); + std::filesystem::path TargetPath = StringToPath(m_TargetPath); + + if (!std::filesystem::is_directory(DataPath)) { throw OptionParseException("data path must exist"); } - if (m_TargetPath.empty()) + if (TargetPath.empty()) { throw OptionParseException("target path must be given"); } - std::filesystem::path RootManifestPath = m_DataPath / "root_manifest"; - std::filesystem::path TargetRootManifestPath = m_TargetPath / "root_manifest"; + std::filesystem::path RootManifestPath = DataPath / "root_manifest"; + std::filesystem::path TargetRootManifestPath = TargetPath / "root_manifest"; if (!TryCopy(RootManifestPath, TargetRootManifestPath)) { throw OptionParseException("data path is invalid, missing root_manifest"); } - std::filesystem::path CachePath = m_DataPath / "cache"; - std::filesystem::path TargetCachePath = m_TargetPath / "cache"; + std::filesystem::path CachePath = DataPath / "cache"; + std::filesystem::path TargetCachePath = TargetPath / "cache"; // Copy cache state DirectoryContent CacheDirectoryContent; @@ -778,8 +781,8 @@ CopyStateCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } } - std::filesystem::path CasPath = m_DataPath / "cas"; - std::filesystem::path TargetCasPath = m_TargetPath / "cas"; + std::filesystem::path CasPath = DataPath / "cas"; + std::filesystem::path TargetCasPath = TargetPath / "cas"; { std::filesystem::path UCasRootPath = CasPath / ".ucas_root"; diff --git a/src/zen/cmds/admin_cmd.h b/src/zen/cmds/admin_cmd.h index c593b2cac..8b6d3e258 100644 --- a/src/zen/cmds/admin_cmd.h +++ b/src/zen/cmds/admin_cmd.h @@ -155,10 +155,10 @@ public: virtual cxxopts::Options& Options() override { return m_Options; } private: - cxxopts::Options m_Options{"copy-state", "Copy zen server disk state"}; - std::filesystem::path m_DataPath; - std::filesystem::path m_TargetPath; - bool m_SkipLogs = false; + cxxopts::Options m_Options{"copy-state", "Copy zen server disk state"}; + std::string m_DataPath; + std::string m_TargetPath; + bool m_SkipLogs = false; }; } // namespace zen diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp new file mode 100644 index 000000000..f0ee4904e --- /dev/null +++ b/src/zen/cmds/builds_cmd.cpp @@ -0,0 +1,7973 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "builds_cmd.h" + +#include <zencore/basicfile.h> +#include <zencore/compactbinarybuilder.h> +#include <zencore/compactbinaryfile.h> +#include <zencore/compactbinaryfmt.h> +#include <zencore/compress.h> +#include <zencore/except.h> +#include <zencore/filesystem.h> +#include <zencore/fmtutils.h> +#include <zencore/logging.h> +#include <zencore/scopeguard.h> +#include <zencore/string.h> +#include <zencore/trace.h> +#include <zencore/uid.h> +#include <zenhttp/formatters.h> +#include <zenhttp/httpclient.h> +#include <zenhttp/httpclientauth.h> +#include <zenhttp/httpcommon.h> +#include <zenutil/chunkblock.h> +#include <zenutil/chunkedcontent.h> +#include <zenutil/chunkedfile.h> +#include <zenutil/chunkingcontroller.h> +#include <zenutil/filebuildstorage.h> +#include <zenutil/jupiter/jupiterbuildstorage.h> +#include <zenutil/jupiter/jupitersession.h> +#include <zenutil/parallellwork.h> +#include <zenutil/workerpools.h> +#include <zenutil/zenserverprocess.h> + +#include <signal.h> +#include <memory> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +#include <tsl/robin_set.h> +#include <json11.hpp> +ZEN_THIRD_PARTY_INCLUDES_END + +#if ZEN_PLATFORM_WINDOWS +# include <zencore/windows.h> +#else +# include <fcntl.h> +# include <sys/file.h> +# include <sys/stat.h> +# include <unistd.h> +#endif + +#define EXTRA_VERIFY 0 + +namespace zen { +namespace { + static std::atomic<bool> AbortFlag = false; + static void SignalCallbackHandler(int SigNum) + { + if (SigNum == SIGINT) + { + AbortFlag = true; + } +#if ZEN_PLATFORM_WINDOWS + if (SigNum == SIGBREAK) + { + AbortFlag = true; + } +#endif // ZEN_PLATFORM_WINDOWS + } + + using namespace std::literals; + + static const size_t DefaultMaxBlockSize = 64u * 1024u * 1024u; + static const size_t DefaultMaxChunkEmbedSize = 3u * 512u * 1024u; + + struct ChunksBlockParameters + { + size_t MaxBlockSize = DefaultMaxBlockSize; + size_t MaxChunkEmbedSize = DefaultMaxChunkEmbedSize; + }; + + const ChunksBlockParameters DefaultChunksBlockParams{.MaxBlockSize = 32u * 1024u * 1024u, + .MaxChunkEmbedSize = DefaultChunkedParams.MaxSize}; + + const uint64_t DefaultPreferredMultipartChunkSize = 32u * 1024u * 1024u; + + const double DefaultLatency = 0; // .0010; + const double DefaultDelayPerKBSec = 0; // 0.00005; + + const std::string ZenFolderName = ".zen"; + const std::string ZenStateFilePath = fmt::format("{}/current_state.cbo", ZenFolderName); + const std::string ZenStateFileJsonPath = fmt::format("{}/current_state.json", ZenFolderName); + const std::string ZenTempFolderName = fmt::format("{}/tmp", ZenFolderName); + + const std::string ZenTempCacheFolderName = + fmt::format("{}/cache", ZenTempFolderName); // Decompressed and verified data - chunks & sequences + const std::string ZenTempBlockFolderName = fmt::format("{}/blocks", ZenTempFolderName); // Temp storage for whole and partial blocks + const std::string ZenTempChunkFolderName = + fmt::format("{}/chunks", ZenTempFolderName); // Temp storage for decompressed and validated chunks + + const std::string ZenTempDownloadFolderName = + fmt::format("{}/download", ZenTempFolderName); // Temp storage for unverfied downloaded blobs + + const std::string ZenTempStorageFolderName = + fmt::format("{}/storage", ZenTempFolderName); // Temp storage folder for BuildStorage implementations + + const std::string ZenExcludeManifestName = ".zen_exclude_manifest.txt"; + + const std::string UnsyncFolderName = ".unsync"; + + const std::string UGSFolderName = ".ugs"; + const std::string LegacyZenTempFolderName = ".zen-tmp"; + + const std::vector<std::string_view> DefaultExcludeFolders({UnsyncFolderName, ZenFolderName, UGSFolderName, LegacyZenTempFolderName}); + const std::vector<std::string_view> DefaultExcludeExtensions({}); + + static bool IsVerbose = false; + static bool UsePlainProgress = false; + +#define ZEN_CONSOLE_VERBOSE(fmtstr, ...) \ + if (IsVerbose) \ + { \ + ZEN_CONSOLE_LOG(zen::logging::level::Info, fmtstr, ##__VA_ARGS__); \ + } + + const std::string DefaultAccessTokenEnvVariableName( +#if ZEN_PLATFORM_WINDOWS + "UE-CloudDataCacheAccessToken"sv +#endif +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + "UE_CloudDataCacheAccessToken"sv +#endif + + ); + + uint32_t SetNativeFileAttributes(const std::filesystem::path FilePath, SourcePlatform SourcePlatform, uint32_t Attributes) + { +#if ZEN_PLATFORM_WINDOWS + if (SourcePlatform == SourcePlatform::Windows) + { + SetFileAttributes(FilePath, Attributes); + return Attributes; + } + else + { + uint32_t CurrentAttributes = GetFileAttributes(FilePath); + uint32_t NewAttributes = MakeFileAttributeReadOnly(CurrentAttributes, IsFileModeReadOnly(Attributes)); + if (CurrentAttributes != NewAttributes) + { + SetFileAttributes(FilePath, NewAttributes); + } + return NewAttributes; + } +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + if (SourcePlatform != SourcePlatform::Windows) + { + SetFileMode(FilePath, Attributes); + return Attributes; + } + else + { + uint32_t CurrentMode = GetFileMode(FilePath); + uint32_t NewMode = MakeFileModeReadOnly(CurrentMode, IsFileAttributeReadOnly(Attributes)); + if (CurrentMode != NewMode) + { + SetFileMode(FilePath, NewMode); + } + return NewMode; + } +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + }; + + uint32_t GetNativeFileAttributes(const std::filesystem::path FilePath) + { +#if ZEN_PLATFORM_WINDOWS + return GetFileAttributes(FilePath); +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + return GetFileMode(FilePath); +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + } + + template<typename T> + std::string FormatArray(std::span<const T> Items, std::string_view Prefix) + { + ExtendableStringBuilder<512> SB; + for (const T& Item : Items) + { + SB.Append(fmt::format("{}{}", Prefix, Item)); + } + return SB.ToString(); + } + + bool CleanDirectory(const std::filesystem::path& Path, std::span<const std::string_view> ExcludeDirectories) + { + ZEN_TRACE_CPU("CleanDirectory"); + + bool CleanWipe = true; + + DirectoryContent LocalDirectoryContent; + GetDirectoryContent(Path, DirectoryContentFlags::IncludeDirs | DirectoryContentFlags::IncludeFiles, LocalDirectoryContent); + for (const std::filesystem::path& LocalFilePath : LocalDirectoryContent.Files) + { + try + { + std::filesystem::remove(LocalFilePath); + } + catch (const std::exception&) + { + // DeleteOnClose files may be a bit slow in getting cleaned up, so pause amd retry one time + Sleep(200); + try + { + std::filesystem::remove(LocalFilePath); + } + catch (const std::exception& Ex) + { + ZEN_WARN("Failed removing file {}. Reason: {}", LocalFilePath, Ex.what()); + CleanWipe = false; + } + } + } + + for (const std::filesystem::path& LocalDirPath : LocalDirectoryContent.Directories) + { + bool Leave = false; + for (const std::string_view ExcludeDirectory : ExcludeDirectories) + { + if (LocalDirPath == (Path / ExcludeDirectory)) + { + Leave = true; + break; + } + } + if (!Leave) + { + try + { + zen::CleanDirectory(LocalDirPath); + std::filesystem::remove(LocalDirPath); + } + catch (const std::exception&) + { + Sleep(200); + try + { + zen::CleanDirectory(LocalDirPath); + std::filesystem::remove(LocalDirPath); + } + catch (const std::exception& Ex) + { + ZEN_WARN("Failed removing directory {}. Reason: {}", LocalDirPath, Ex.what()); + CleanWipe = false; + } + } + } + } + return CleanWipe; + } + + std::string ReadAccessTokenFromFile(const std::filesystem::path& Path) + { + if (!std::filesystem::is_regular_file(Path)) + { + throw std::runtime_error(fmt::format("the file '{}' does not exist", Path)); + } + IoBuffer Body = IoBufferBuilder::MakeFromFile(Path); + std::string JsonText(reinterpret_cast<const char*>(Body.GetData()), Body.GetSize()); + std::string JsonError; + json11::Json TokenInfo = json11::Json::parse(JsonText, JsonError); + if (!JsonError.empty()) + { + throw std::runtime_error(fmt::format("failed parsing json file '{}'. Reason: '{}'", Path, JsonError)); + } + const std::string AuthToken = TokenInfo["Token"].string_value(); + if (AuthToken.empty()) + { + throw std::runtime_error(fmt::format("the json file '{}' does not contain a value for \"Token\"", Path)); + } + return AuthToken; + } + + bool IsBufferDiskBased(const IoBuffer& Buffer) + { + IoBufferFileReference FileRef; + if (Buffer.GetFileReference(FileRef)) + { + return true; + } + return false; + } + + bool IsBufferDiskBased(const CompositeBuffer& Buffer) + { + // If this is a file based buffer or a compressed buffer with a memory-based header, we don't need to rewrite to disk to save memory + std::span<const SharedBuffer> Segments = Buffer.GetSegments(); + ZEN_ASSERT(Buffer.GetSegments().size() > 0); + return IsBufferDiskBased(Segments.back().AsIoBuffer()); + } + + IoBuffer WriteToTempFile(CompositeBuffer&& Buffer, + const std::filesystem::path& TempFolderPath, + const IoHash& Hash, + const std::string& Suffix = {}) + { + std::filesystem::path TempFilePath = (TempFolderPath / (Hash.ToHexString() + Suffix)).make_preferred(); + return WriteToTempFile(std::move(Buffer), TempFilePath); + } + + class FilteredRate + { + public: + FilteredRate() {} + + void Start() + { + if (StartTimeUS == (uint64_t)-1) + { + uint64_t Expected = (uint64_t)-1; + if (StartTimeUS.compare_exchange_weak(Expected, Timer.GetElapsedTimeUs())) + { + LastTimeUS = StartTimeUS.load(); + } + } + } + void Stop() + { + if (EndTimeUS == (uint64_t)-1) + { + uint64_t Expected = (uint64_t)-1; + EndTimeUS.compare_exchange_weak(Expected, Timer.GetElapsedTimeUs()); + } + } + + void Update(uint64_t Count) + { + if (LastTimeUS == (uint64_t)-1) + { + return; + } + uint64_t TimeUS = Timer.GetElapsedTimeUs(); + uint64_t TimeDeltaUS = TimeUS - LastTimeUS; + if (TimeDeltaUS >= 2000000) + { + uint64_t Delta = Count - LastCount; + uint64_t PerSecond = (Delta * 1000000) / TimeDeltaUS; + + LastPerSecond = PerSecond; + + LastCount = Count; + + FilteredPerSecond = (PerSecond + (LastPerSecond * 7)) / 8; + + LastTimeUS = TimeUS; + } + } + + uint64_t GetCurrent() const // If Stopped - return total count / total time + { + if (LastTimeUS == (uint64_t)-1) + { + return 0; + } + return FilteredPerSecond; + } + + uint64_t GetElapsedTimeUS() const + { + if (StartTimeUS == (uint64_t)-1) + { + return 0; + } + if (EndTimeUS == (uint64_t)-1) + { + return 0; + } + uint64_t TimeDeltaUS = EndTimeUS - StartTimeUS; + return TimeDeltaUS; + } + + bool IsActive() const { return (StartTimeUS != (uint64_t)-1) && (EndTimeUS == (uint64_t)-1); } + + private: + Stopwatch Timer; + std::atomic<uint64_t> StartTimeUS = (uint64_t)-1; + std::atomic<uint64_t> EndTimeUS = (uint64_t)-1; + std::atomic<uint64_t> LastTimeUS = (uint64_t)-1; + uint64_t LastCount = 0; + uint64_t LastPerSecond = 0; + uint64_t FilteredPerSecond = 0; + }; + + uint64_t GetBytesPerSecond(uint64_t ElapsedWallTimeUS, uint64_t Count) + { + if (ElapsedWallTimeUS == 0) + { + return 0; + } + return Count * 1000000 / ElapsedWallTimeUS; + } + + std::filesystem::path GetTempChunkedSequenceFileName(const std::filesystem::path& CacheFolderPath, const IoHash& RawHash) + { + return (CacheFolderPath / (RawHash.ToHexString() + ".tmp")).make_preferred(); + } + + std::filesystem::path GetFinalChunkedSequenceFileName(const std::filesystem::path& CacheFolderPath, const IoHash& RawHash) + { + return (CacheFolderPath / RawHash.ToHexString()).make_preferred(); + } + + ChunkedFolderContent ScanAndChunkFolder( + GetFolderContentStatistics& GetFolderContentStats, + ChunkingStatistics& ChunkingStats, + const std::filesystem::path& Path, + std::function<bool(const std::string_view& RelativePath)>&& IsAcceptedFolder, + std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& IsAcceptedFile, + ChunkingController& ChunkController) + { + ZEN_TRACE_CPU("ScanAndChunkFolder"); + + FolderContent Content = GetFolderContent( + GetFolderContentStats, + Path, + std::move(IsAcceptedFolder), + std::move(IsAcceptedFile), + GetMediumWorkerPool(EWorkloadType::Burst), + UsePlainProgress ? 5000 : 200, + [](bool, std::ptrdiff_t) {}, + AbortFlag); + if (AbortFlag) + { + return {}; + } + + ProgressBar ProgressBar(UsePlainProgress); + FilteredRate FilteredBytesHashed; + FilteredBytesHashed.Start(); + ChunkedFolderContent FolderContent = ChunkFolderContent( + ChunkingStats, + GetMediumWorkerPool(EWorkloadType::Burst), + Path, + Content, + ChunkController, + UsePlainProgress ? 5000 : 200, + [&](bool, std::ptrdiff_t) { + FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); + std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found", + ChunkingStats.FilesProcessed.load(), + GetFolderContentStats.AcceptedFileCount.load(), + NiceBytes(ChunkingStats.BytesHashed.load()), + NiceBytes(GetFolderContentStats.FoundFileByteCount), + NiceNum(FilteredBytesHashed.GetCurrent()), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load())); + ProgressBar.UpdateState({.Task = "Scanning files ", + .Details = Details, + .TotalCount = GetFolderContentStats.AcceptedFileByteCount, + .RemainingCount = GetFolderContentStats.AcceptedFileByteCount - ChunkingStats.BytesHashed.load()}, + false); + }, + AbortFlag); + if (AbortFlag) + { + return {}; + } + FilteredBytesHashed.Stop(); + ProgressBar.Finish(); + + ZEN_CONSOLE("Found {} ({}) files divided into {} ({}) unique chunks in '{}' in {}. Average hash rate {}B/sec", + ChunkingStats.FilesProcessed.load(), + NiceBytes(ChunkingStats.BytesHashed.load()), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load()), + Path, + NiceTimeSpanMs((GetFolderContentStats.ElapsedWallTimeUS + ChunkingStats.ElapsedWallTimeUS) / 1000), + NiceNum(GetBytesPerSecond(ChunkingStats.ElapsedWallTimeUS, ChunkingStats.BytesHashed))); + return FolderContent; + }; + + struct DiskStatistics + { + std::atomic<uint64_t> OpenReadCount = 0; + std::atomic<uint64_t> OpenWriteCount = 0; + std::atomic<uint64_t> ReadCount = 0; + std::atomic<uint64_t> ReadByteCount = 0; + std::atomic<uint64_t> WriteCount = 0; + std::atomic<uint64_t> WriteByteCount = 0; + std::atomic<uint64_t> CurrentOpenFileCount = 0; + }; + + struct FindBlocksStatistics + { + uint64_t FindBlockTimeMS = 0; + uint64_t PotentialChunkCount = 0; + uint64_t PotentialChunkByteCount = 0; + uint64_t FoundBlockCount = 0; + uint64_t FoundBlockChunkCount = 0; + uint64_t FoundBlockByteCount = 0; + uint64_t AcceptedBlockCount = 0; + uint64_t AcceptedChunkCount = 0; + uint64_t AcceptedByteCount = 0; + uint64_t RejectedBlockCount = 0; + uint64_t RejectedChunkCount = 0; + uint64_t RejectedByteCount = 0; + uint64_t AcceptedReduntantChunkCount = 0; + uint64_t AcceptedReduntantByteCount = 0; + uint64_t NewBlocksCount = 0; + uint64_t NewBlocksChunkCount = 0; + uint64_t NewBlocksChunkByteCount = 0; + }; + + struct UploadStatistics + { + std::atomic<uint64_t> BlockCount = 0; + std::atomic<uint64_t> BlocksBytes = 0; + std::atomic<uint64_t> ChunkCount = 0; + std::atomic<uint64_t> ChunksBytes = 0; + std::atomic<uint64_t> ReadFromDiskBytes = 0; + std::atomic<uint64_t> MultipartAttachmentCount = 0; + uint64_t ElapsedWallTimeUS = 0; + + UploadStatistics& operator+=(const UploadStatistics& Rhs) + { + BlockCount += Rhs.BlockCount; + BlocksBytes += Rhs.BlocksBytes; + ChunkCount += Rhs.ChunkCount; + ChunksBytes += Rhs.ChunksBytes; + ReadFromDiskBytes += Rhs.ReadFromDiskBytes; + MultipartAttachmentCount += Rhs.MultipartAttachmentCount; + ElapsedWallTimeUS += Rhs.ElapsedWallTimeUS; + return *this; + } + }; + + struct LooseChunksStatistics + { + uint64_t ChunkCount = 0; + uint64_t ChunkByteCount = 0; + std::atomic<uint64_t> CompressedChunkCount = 0; + std::atomic<uint64_t> CompressedChunkBytes = 0; + uint64_t CompressChunksElapsedWallTimeUS = 0; + + LooseChunksStatistics& operator+=(const LooseChunksStatistics& Rhs) + { + ChunkCount += Rhs.ChunkCount; + ChunkByteCount += Rhs.ChunkByteCount; + CompressedChunkCount += Rhs.CompressedChunkCount; + CompressedChunkBytes += Rhs.CompressedChunkBytes; + CompressChunksElapsedWallTimeUS += Rhs.CompressChunksElapsedWallTimeUS; + return *this; + } + }; + + struct GenerateBlocksStatistics + { + std::atomic<uint64_t> GeneratedBlockByteCount = 0; + std::atomic<uint64_t> GeneratedBlockCount = 0; + uint64_t GenerateBlocksElapsedWallTimeUS = 0; + + GenerateBlocksStatistics& operator+=(const GenerateBlocksStatistics& Rhs) + { + GeneratedBlockByteCount += Rhs.GeneratedBlockByteCount; + GeneratedBlockCount += Rhs.GeneratedBlockCount; + GenerateBlocksElapsedWallTimeUS += Rhs.GenerateBlocksElapsedWallTimeUS; + return *this; + } + }; + + std::vector<uint32_t> CalculateAbsoluteChunkOrders(const std::span<const IoHash> LocalChunkHashes, + const std::span<const uint32_t> LocalChunkOrder, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToLocalChunkIndex, + const std::span<const uint32_t>& LooseChunkIndexes, + const std::span<const ChunkBlockDescription>& BlockDescriptions) + { + ZEN_TRACE_CPU("CalculateAbsoluteChunkOrders"); + +#if EXTRA_VERIFY + std::vector<IoHash> TmpAbsoluteChunkHashes; + TmpAbsoluteChunkHashes.reserve(LocalChunkHashes.size()); +#endif // EXTRA_VERIFY + std::vector<uint32_t> LocalChunkIndexToAbsoluteChunkIndex; + LocalChunkIndexToAbsoluteChunkIndex.resize(LocalChunkHashes.size(), (uint32_t)-1); + std::uint32_t AbsoluteChunkCount = 0; + for (uint32_t ChunkIndex : LooseChunkIndexes) + { + LocalChunkIndexToAbsoluteChunkIndex[ChunkIndex] = AbsoluteChunkCount; +#if EXTRA_VERIFY + TmpAbsoluteChunkHashes.push_back(LocalChunkHashes[ChunkIndex]); +#endif // EXTRA_VERIFY + AbsoluteChunkCount++; + } + for (const ChunkBlockDescription& Block : BlockDescriptions) + { + for (const IoHash& ChunkHash : Block.ChunkRawHashes) + { + if (auto It = ChunkHashToLocalChunkIndex.find(ChunkHash); It != ChunkHashToLocalChunkIndex.end()) + { + const uint32_t LocalChunkIndex = It->second; + ZEN_ASSERT_SLOW(LocalChunkHashes[LocalChunkIndex] == ChunkHash); + LocalChunkIndexToAbsoluteChunkIndex[LocalChunkIndex] = AbsoluteChunkCount; + } +#if EXTRA_VERIFY + TmpAbsoluteChunkHashes.push_back(ChunkHash); +#endif // EXTRA_VERIFY + AbsoluteChunkCount++; + } + } + std::vector<uint32_t> AbsoluteChunkOrder; + AbsoluteChunkOrder.reserve(LocalChunkHashes.size()); + for (const uint32_t LocalChunkIndex : LocalChunkOrder) + { + const uint32_t AbsoluteChunkIndex = LocalChunkIndexToAbsoluteChunkIndex[LocalChunkIndex]; +#if EXTRA_VERIFY + ZEN_ASSERT(LocalChunkHashes[LocalChunkIndex] == TmpAbsoluteChunkHashes[AbsoluteChunkIndex]); +#endif // EXTRA_VERIFY + AbsoluteChunkOrder.push_back(AbsoluteChunkIndex); + } +#if EXTRA_VERIFY + { + uint32_t OrderIndex = 0; + while (OrderIndex < LocalChunkOrder.size()) + { + const uint32_t LocalChunkIndex = LocalChunkOrder[OrderIndex]; + const IoHash& LocalChunkHash = LocalChunkHashes[LocalChunkIndex]; + const uint32_t AbsoluteChunkIndex = AbsoluteChunkOrder[OrderIndex]; + const IoHash& AbsoluteChunkHash = TmpAbsoluteChunkHashes[AbsoluteChunkIndex]; + ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash); + OrderIndex++; + } + } +#endif // EXTRA_VERIFY + return AbsoluteChunkOrder; + } + + void CalculateLocalChunkOrders(const std::span<const uint32_t>& AbsoluteChunkOrders, + const std::span<const IoHash> LooseChunkHashes, + const std::span<const uint64_t> LooseChunkRawSizes, + const std::span<const ChunkBlockDescription>& BlockDescriptions, + std::vector<IoHash>& OutLocalChunkHashes, + std::vector<uint64_t>& OutLocalChunkRawSizes, + std::vector<uint32_t>& OutLocalChunkOrders) + { + ZEN_TRACE_CPU("CalculateLocalChunkOrders"); + + std::vector<IoHash> AbsoluteChunkHashes; + std::vector<uint64_t> AbsoluteChunkRawSizes; + AbsoluteChunkHashes.insert(AbsoluteChunkHashes.end(), LooseChunkHashes.begin(), LooseChunkHashes.end()); + AbsoluteChunkRawSizes.insert(AbsoluteChunkRawSizes.end(), LooseChunkRawSizes.begin(), LooseChunkRawSizes.end()); + for (const ChunkBlockDescription& Block : BlockDescriptions) + { + AbsoluteChunkHashes.insert(AbsoluteChunkHashes.end(), Block.ChunkRawHashes.begin(), Block.ChunkRawHashes.end()); + AbsoluteChunkRawSizes.insert(AbsoluteChunkRawSizes.end(), Block.ChunkRawLengths.begin(), Block.ChunkRawLengths.end()); + } + OutLocalChunkHashes.reserve(AbsoluteChunkHashes.size()); + OutLocalChunkRawSizes.reserve(AbsoluteChunkRawSizes.size()); + OutLocalChunkOrders.reserve(AbsoluteChunkOrders.size()); + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; + ChunkHashToChunkIndex.reserve(AbsoluteChunkHashes.size()); + + for (uint32_t AbsoluteChunkOrderIndex = 0; AbsoluteChunkOrderIndex < AbsoluteChunkOrders.size(); AbsoluteChunkOrderIndex++) + { + const uint32_t AbsoluteChunkIndex = AbsoluteChunkOrders[AbsoluteChunkOrderIndex]; + const IoHash& AbsoluteChunkHash = AbsoluteChunkHashes[AbsoluteChunkIndex]; + const uint64_t AbsoluteChunkRawSize = AbsoluteChunkRawSizes[AbsoluteChunkIndex]; + + if (auto It = ChunkHashToChunkIndex.find(AbsoluteChunkHash); It != ChunkHashToChunkIndex.end()) + { + const uint32_t LocalChunkIndex = It->second; + OutLocalChunkOrders.push_back(LocalChunkIndex); + } + else + { + uint32_t LocalChunkIndex = gsl::narrow<uint32_t>(OutLocalChunkHashes.size()); + OutLocalChunkHashes.push_back(AbsoluteChunkHash); + OutLocalChunkRawSizes.push_back(AbsoluteChunkRawSize); + OutLocalChunkOrders.push_back(LocalChunkIndex); + ChunkHashToChunkIndex.insert_or_assign(AbsoluteChunkHash, LocalChunkIndex); + } +#if EXTRA_VERIFY + const uint32_t LocalChunkIndex = OutLocalChunkOrders[AbsoluteChunkOrderIndex]; + const IoHash& LocalChunkHash = OutLocalChunkHashes[LocalChunkIndex]; + const uint64_t& LocalChunkRawSize = OutLocalChunkRawSizes[LocalChunkIndex]; + ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash); + ZEN_ASSERT(LocalChunkRawSize == AbsoluteChunkRawSize); +#endif // EXTRA_VERIFY + } +#if EXTRA_VERIFY + for (uint32_t OrderIndex = 0; OrderIndex < OutLocalChunkOrders.size(); OrderIndex++) + { + uint32_t LocalChunkIndex = OutLocalChunkOrders[OrderIndex]; + const IoHash LocalChunkHash = OutLocalChunkHashes[LocalChunkIndex]; + uint64_t LocalChunkRawSize = OutLocalChunkRawSizes[LocalChunkIndex]; + + uint32_t VerifyChunkIndex = AbsoluteChunkOrders[OrderIndex]; + const IoHash VerifyChunkHash = AbsoluteChunkHashes[VerifyChunkIndex]; + uint64_t VerifyChunkRawSize = AbsoluteChunkRawSizes[VerifyChunkIndex]; + + ZEN_ASSERT(LocalChunkHash == VerifyChunkHash); + ZEN_ASSERT(LocalChunkRawSize == VerifyChunkRawSize); + } +#endif // EXTRA_VERIFY + } + + void WriteBuildContentToCompactBinary(CbObjectWriter& PartManifestWriter, + const SourcePlatform Platform, + std::span<const std::filesystem::path> Paths, + std::span<const IoHash> RawHashes, + std::span<const uint64_t> RawSizes, + std::span<const uint32_t> Attributes, + std::span<const IoHash> SequenceRawHashes, + std::span<const uint32_t> ChunkCounts, + std::span<const IoHash> LocalChunkHashes, + std::span<const uint64_t> LocalChunkRawSizes, + std::vector<uint32_t> AbsoluteChunkOrders, + const std::span<const uint32_t> LooseLocalChunkIndexes, + const std::span<IoHash> BlockHashes) + { + ZEN_ASSERT(Platform != SourcePlatform::_Count); + PartManifestWriter.AddString("platform"sv, ToString(Platform)); + + uint64_t TotalSize = 0; + for (const uint64_t Size : RawSizes) + { + TotalSize += Size; + } + PartManifestWriter.AddInteger("totalSize", TotalSize); + + PartManifestWriter.BeginObject("files"sv); + { + compactbinary_helpers::WriteArray(Paths, "paths"sv, PartManifestWriter); + compactbinary_helpers::WriteArray(RawHashes, "rawhashes"sv, PartManifestWriter); + compactbinary_helpers::WriteArray(RawSizes, "rawsizes"sv, PartManifestWriter); + if (Platform == SourcePlatform::Windows) + { + compactbinary_helpers::WriteArray(Attributes, "attributes"sv, PartManifestWriter); + } + if (Platform == SourcePlatform::Linux || Platform == SourcePlatform::MacOS) + { + compactbinary_helpers::WriteArray(Attributes, "mode"sv, PartManifestWriter); + } + } + PartManifestWriter.EndObject(); // files + + PartManifestWriter.BeginObject("chunkedContent"); + { + compactbinary_helpers::WriteArray(SequenceRawHashes, "sequenceRawHashes"sv, PartManifestWriter); + compactbinary_helpers::WriteArray(ChunkCounts, "chunkcounts"sv, PartManifestWriter); + compactbinary_helpers::WriteArray(AbsoluteChunkOrders, "chunkorders"sv, PartManifestWriter); + } + PartManifestWriter.EndObject(); // chunkedContent + + size_t LooseChunkCount = LooseLocalChunkIndexes.size(); + if (LooseChunkCount > 0) + { + PartManifestWriter.BeginObject("chunkAttachments"); + { + PartManifestWriter.BeginArray("rawHashes"sv); + for (uint32_t ChunkIndex : LooseLocalChunkIndexes) + { + PartManifestWriter.AddBinaryAttachment(LocalChunkHashes[ChunkIndex]); + } + PartManifestWriter.EndArray(); // rawHashes + + PartManifestWriter.BeginArray("chunkRawSizes"sv); + for (uint32_t ChunkIndex : LooseLocalChunkIndexes) + { + PartManifestWriter.AddInteger(LocalChunkRawSizes[ChunkIndex]); + } + PartManifestWriter.EndArray(); // chunkSizes + } + PartManifestWriter.EndObject(); // + } + + if (BlockHashes.size() > 0) + { + PartManifestWriter.BeginObject("blockAttachments"); + { + compactbinary_helpers::WriteBinaryAttachmentArray(BlockHashes, "rawHashes"sv, PartManifestWriter); + } + PartManifestWriter.EndObject(); // blocks + } + } + + void ReadBuildContentFromCompactBinary(CbObjectView BuildPartManifest, + SourcePlatform& OutPlatform, + std::vector<std::filesystem::path>& OutPaths, + std::vector<IoHash>& OutRawHashes, + std::vector<uint64_t>& OutRawSizes, + std::vector<uint32_t>& OutAttributes, + std::vector<IoHash>& OutSequenceRawHashes, + std::vector<uint32_t>& OutChunkCounts, + std::vector<uint32_t>& OutAbsoluteChunkOrders, + std::vector<IoHash>& OutLooseChunkHashes, + std::vector<uint64_t>& OutLooseChunkRawSizes, + std::vector<IoHash>& OutBlockRawHashes) + { + OutPlatform = FromString(BuildPartManifest["platform"sv].AsString(), SourcePlatform::_Count); + + CbObjectView FilesObject = BuildPartManifest["files"sv].AsObjectView(); + + compactbinary_helpers::ReadArray("paths"sv, FilesObject, OutPaths); + compactbinary_helpers::ReadArray("rawhashes"sv, FilesObject, OutRawHashes); + compactbinary_helpers::ReadArray("rawsizes"sv, FilesObject, OutRawSizes); + + uint64_t PathCount = OutPaths.size(); + if (OutRawHashes.size() != PathCount) + { + throw std::runtime_error(fmt::format("Number of raw hashes entries does not match number of paths")); + } + if (OutRawSizes.size() != PathCount) + { + throw std::runtime_error(fmt::format("Number of raw sizes entries does not match number of paths")); + } + + std::vector<uint32_t> ModeArray; + compactbinary_helpers::ReadArray("mode"sv, FilesObject, ModeArray); + if (ModeArray.size() != PathCount && ModeArray.size() != 0) + { + throw std::runtime_error(fmt::format("Number of attribute entries does not match number of paths")); + } + + std::vector<uint32_t> AttributeArray; + compactbinary_helpers::ReadArray("attributes"sv, FilesObject, ModeArray); + if (AttributeArray.size() != PathCount && AttributeArray.size() != 0) + { + throw std::runtime_error(fmt::format("Number of attribute entries does not match number of paths")); + } + + if (ModeArray.size() > 0) + { + if (OutPlatform == SourcePlatform::_Count) + { + OutPlatform = SourcePlatform::Linux; // Best guess - under dev format + } + OutAttributes = std::move(ModeArray); + } + else if (AttributeArray.size() > 0) + { + if (OutPlatform == SourcePlatform::_Count) + { + OutPlatform = SourcePlatform::Windows; + } + OutAttributes = std::move(AttributeArray); + } + else + { + if (OutPlatform == SourcePlatform::_Count) + { + OutPlatform = GetSourceCurrentPlatform(); + } + } + + if (CbObjectView ChunkContentView = BuildPartManifest["chunkedContent"sv].AsObjectView(); ChunkContentView) + { + compactbinary_helpers::ReadArray("sequenceRawHashes"sv, ChunkContentView, OutSequenceRawHashes); + compactbinary_helpers::ReadArray("chunkcounts"sv, ChunkContentView, OutChunkCounts); + if (OutChunkCounts.size() != OutSequenceRawHashes.size()) + { + throw std::runtime_error(fmt::format("Number of chunk count entries does not match number of paths")); + } + compactbinary_helpers::ReadArray("chunkorders"sv, ChunkContentView, OutAbsoluteChunkOrders); + } + else if (FilesObject["chunkcounts"sv]) + { + // Legacy zen style + + std::vector<uint32_t> LegacyChunkCounts; + compactbinary_helpers::ReadArray("chunkcounts"sv, FilesObject, LegacyChunkCounts); + if (LegacyChunkCounts.size() != PathCount) + { + throw std::runtime_error(fmt::format("Number of chunk count entries does not match number of paths")); + } + std::vector<uint32_t> LegacyAbsoluteChunkOrders; + compactbinary_helpers::ReadArray("chunkorders"sv, FilesObject, LegacyAbsoluteChunkOrders); + + CbArrayView ChunkOrdersArray = BuildPartManifest["chunkorders"sv].AsArrayView(); + const uint64_t ChunkOrdersCount = ChunkOrdersArray.Num(); + + tsl::robin_set<IoHash, IoHash::Hasher> FoundRawHashes; + FoundRawHashes.reserve(PathCount); + + OutChunkCounts.reserve(PathCount); + OutAbsoluteChunkOrders.reserve(ChunkOrdersCount); + + uint32_t OrderIndexOffset = 0; + for (uint32_t PathIndex = 0; PathIndex < OutPaths.size(); PathIndex++) + { + const IoHash& PathRawHash = OutRawHashes[PathIndex]; + uint32_t LegacyChunkCount = LegacyChunkCounts[PathIndex]; + + if (FoundRawHashes.insert(PathRawHash).second) + { + OutSequenceRawHashes.push_back(PathRawHash); + OutChunkCounts.push_back(LegacyChunkCount); + std::span<uint32_t> AbsoluteChunkOrder = + std::span<uint32_t>(LegacyAbsoluteChunkOrders).subspan(OrderIndexOffset, LegacyChunkCount); + OutAbsoluteChunkOrders.insert(OutAbsoluteChunkOrders.end(), AbsoluteChunkOrder.begin(), AbsoluteChunkOrder.end()); + } + OrderIndexOffset += LegacyChunkCounts[PathIndex]; + } + } + else + { + // Legacy C# style + + tsl::robin_set<IoHash, IoHash::Hasher> FoundRawHashes; + FoundRawHashes.reserve(PathCount); + uint32_t OrderIndexOffset = 0; + for (uint32_t PathIndex = 0; PathIndex < OutPaths.size(); PathIndex++) + { + if (OutRawSizes[PathIndex] > 0) + { + const IoHash& PathRawHash = OutRawHashes[PathIndex]; + if (FoundRawHashes.insert(PathRawHash).second) + { + OutSequenceRawHashes.push_back(PathRawHash); + OutChunkCounts.push_back(1); + OutAbsoluteChunkOrders.push_back(OrderIndexOffset); + OutLooseChunkHashes.push_back(PathRawHash); + OutLooseChunkRawSizes.push_back(OutRawSizes[PathIndex]); + OrderIndexOffset += 1; + } + } + } + } + + CbObjectView ChunkAttachmentsView = BuildPartManifest["chunkAttachments"sv].AsObjectView(); + { + compactbinary_helpers::ReadBinaryAttachmentArray("rawHashes"sv, ChunkAttachmentsView, OutLooseChunkHashes); + compactbinary_helpers::ReadArray("chunkRawSizes"sv, ChunkAttachmentsView, OutLooseChunkRawSizes); + if (OutLooseChunkHashes.size() != OutLooseChunkRawSizes.size()) + { + throw std::runtime_error( + fmt::format("Number of attachment chunk hashes does not match number of attachemnt chunk raw sizes")); + } + } + + CbObjectView BlocksView = BuildPartManifest["blockAttachments"sv].AsObjectView(); + { + compactbinary_helpers::ReadBinaryAttachmentArray("rawHashes"sv, BlocksView, OutBlockRawHashes); + } + } + + bool ReadStateObject(CbObjectView StateView, + Oid& OutBuildId, + std::vector<Oid>& BuildPartsIds, + std::vector<std::string>& BuildPartsNames, + std::vector<ChunkedFolderContent>& OutPartContents, + FolderContent& OutLocalFolderState) + { + try + { + CbObjectView BuildView = StateView["builds"sv].AsArrayView().CreateViewIterator().AsObjectView(); + OutBuildId = BuildView["buildId"sv].AsObjectId(); + for (CbFieldView PartView : BuildView["parts"sv].AsArrayView()) + { + CbObjectView PartObjectView = PartView.AsObjectView(); + BuildPartsIds.push_back(PartObjectView["partId"sv].AsObjectId()); + BuildPartsNames.push_back(std::string(PartObjectView["partName"sv].AsString())); + OutPartContents.push_back(LoadChunkedFolderContentToCompactBinary(PartObjectView["content"sv].AsObjectView())); + } + OutLocalFolderState = LoadFolderContentToCompactBinary(StateView["localFolderState"sv].AsObjectView()); + return true; + } + catch (const std::exception& Ex) + { + ZEN_CONSOLE("Unable to read local state: ", Ex.what()); + return false; + } + } + + CbObject CreateStateObject(const Oid& BuildId, + std::vector<std::pair<Oid, std::string>> AllBuildParts, + std::span<const ChunkedFolderContent> PartContents, + const FolderContent& LocalFolderState) + { + CbObjectWriter CurrentStateWriter; + CurrentStateWriter.BeginArray("builds"sv); + { + CurrentStateWriter.BeginObject(); + { + CurrentStateWriter.AddObjectId("buildId"sv, BuildId); + CurrentStateWriter.BeginArray("parts"sv); + for (size_t PartIndex = 0; PartIndex < AllBuildParts.size(); PartIndex++) + { + const Oid BuildPartId = AllBuildParts[PartIndex].first; + CurrentStateWriter.BeginObject(); + { + CurrentStateWriter.AddObjectId("partId"sv, BuildPartId); + CurrentStateWriter.AddString("partName"sv, AllBuildParts[PartIndex].second); + CurrentStateWriter.BeginObject("content"); + { + SaveChunkedFolderContentToCompactBinary(PartContents[PartIndex], CurrentStateWriter); + } + CurrentStateWriter.EndObject(); + } + CurrentStateWriter.EndObject(); + } + CurrentStateWriter.EndArray(); // parts + } + CurrentStateWriter.EndObject(); + } + CurrentStateWriter.EndArray(); // builds + + CurrentStateWriter.BeginObject("localFolderState"sv); + { + SaveFolderContentToCompactBinary(LocalFolderState, CurrentStateWriter); + } + CurrentStateWriter.EndObject(); // localFolderState + + return CurrentStateWriter.Save(); + } + + class BufferedOpenFile + { + public: + BufferedOpenFile(const std::filesystem::path Path) : Source(Path, BasicFile::Mode::kRead), SourceSize(Source.FileSize()) {} + BufferedOpenFile() = delete; + BufferedOpenFile(const BufferedOpenFile&) = delete; + BufferedOpenFile(BufferedOpenFile&&) = delete; + BufferedOpenFile& operator=(BufferedOpenFile&&) = delete; + BufferedOpenFile& operator=(const BufferedOpenFile&) = delete; + + const uint64_t BlockSize = 256u * 1024u; + CompositeBuffer GetRange(uint64_t Offset, uint64_t Size) + { + ZEN_TRACE_CPU("BufferedOpenFile::GetRange"); + + ZEN_ASSERT((CacheBlockIndex == (uint64_t)-1) || Cache); + auto _ = MakeGuard([&]() { ZEN_ASSERT((CacheBlockIndex == (uint64_t)-1) || Cache); }); + + ZEN_ASSERT((Offset + Size) <= SourceSize); + const uint64_t BlockIndexStart = Offset / BlockSize; + const uint64_t BlockIndexEnd = (Offset + Size - 1) / BlockSize; + + std::vector<SharedBuffer> BufferRanges; + BufferRanges.reserve(BlockIndexEnd - BlockIndexStart + 1); + + uint64_t ReadOffset = Offset; + for (uint64_t BlockIndex = BlockIndexStart; BlockIndex <= BlockIndexEnd; BlockIndex++) + { + const uint64_t BlockStartOffset = BlockIndex * BlockSize; + if (CacheBlockIndex != BlockIndex) + { + uint64_t CacheSize = Min(BlockSize, SourceSize - BlockStartOffset); + ZEN_ASSERT(CacheSize > 0); + Cache = IoBuffer(CacheSize); + Source.Read(Cache.GetMutableView().GetData(), CacheSize, BlockStartOffset); + CacheBlockIndex = BlockIndex; + } + + const uint64_t BytesRead = ReadOffset - Offset; + ZEN_ASSERT(BlockStartOffset <= ReadOffset); + const uint64_t OffsetIntoBlock = ReadOffset - BlockStartOffset; + ZEN_ASSERT(OffsetIntoBlock < Cache.GetSize()); + const uint64_t BlockBytes = Min(Cache.GetSize() - OffsetIntoBlock, Size - BytesRead); + BufferRanges.emplace_back(SharedBuffer(IoBuffer(Cache, OffsetIntoBlock, BlockBytes))); + ReadOffset += BlockBytes; + } + CompositeBuffer Result(std::move(BufferRanges)); + ZEN_ASSERT(Result.GetSize() == Size); + return Result; + } + + private: + BasicFile Source; + const uint64_t SourceSize; + uint64_t CacheBlockIndex = (uint64_t)-1; + IoBuffer Cache; + }; + + class ReadFileCache + { + public: + // A buffered file reader that provides CompositeBuffer where the buffers are owned and the memory never overwritten + ReadFileCache(DiskStatistics& DiskStats, + const std::filesystem::path& Path, + const ChunkedFolderContent& LocalContent, + const ChunkedContentLookup& LocalLookup, + size_t MaxOpenFileCount) + : m_Path(Path) + , m_LocalContent(LocalContent) + , m_LocalLookup(LocalLookup) + , m_DiskStats(DiskStats) + { + m_OpenFiles.reserve(MaxOpenFileCount); + } + ~ReadFileCache() + { + m_DiskStats.CurrentOpenFileCount -= m_OpenFiles.size(); + m_OpenFiles.clear(); + } + + CompositeBuffer GetRange(uint32_t SequenceIndex, uint64_t Offset, uint64_t Size) + { + ZEN_TRACE_CPU("ReadFileCache::GetRange"); + + auto CacheIt = std::find_if(m_OpenFiles.begin(), m_OpenFiles.end(), [SequenceIndex](const auto& Lhs) { + return Lhs.first == SequenceIndex; + }); + if (CacheIt != m_OpenFiles.end()) + { + if (CacheIt != m_OpenFiles.begin()) + { + auto CachedFile(std::move(CacheIt->second)); + m_OpenFiles.erase(CacheIt); + m_OpenFiles.insert(m_OpenFiles.begin(), std::make_pair(SequenceIndex, std::move(CachedFile))); + } + CompositeBuffer Result = m_OpenFiles.front().second->GetRange(Offset, Size); + m_DiskStats.ReadByteCount += Result.GetSize(); + return Result; + } + const uint32_t LocalPathIndex = m_LocalLookup.SequenceIndexFirstPathIndex[SequenceIndex]; + const std::filesystem::path LocalFilePath = (m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred(); + if (Size == m_LocalContent.RawSizes[LocalPathIndex]) + { + IoBuffer Result = IoBufferBuilder::MakeFromFile(LocalFilePath); + m_DiskStats.OpenReadCount++; + m_DiskStats.ReadByteCount += Result.GetSize(); + return CompositeBuffer(SharedBuffer(Result)); + } + if (m_OpenFiles.size() == m_OpenFiles.capacity()) + { + m_OpenFiles.pop_back(); + m_DiskStats.CurrentOpenFileCount--; + } + m_OpenFiles.insert(m_OpenFiles.begin(), std::make_pair(SequenceIndex, std::make_unique<BufferedOpenFile>(LocalFilePath))); + CompositeBuffer Result = m_OpenFiles.front().second->GetRange(Offset, Size); + m_DiskStats.ReadByteCount += Result.GetSize(); + m_DiskStats.OpenReadCount++; + m_DiskStats.CurrentOpenFileCount++; + return Result; + } + + private: + const std::filesystem::path m_Path; + const ChunkedFolderContent& m_LocalContent; + const ChunkedContentLookup& m_LocalLookup; + std::vector<std::pair<uint32_t, std::unique_ptr<BufferedOpenFile>>> m_OpenFiles; + DiskStatistics& m_DiskStats; + }; + + CompositeBuffer ValidateBlob(IoBuffer&& Payload, const IoHash& BlobHash, uint64_t& OutCompressedSize, uint64_t& OutDecompressedSize) + { + ZEN_TRACE_CPU("ValidateBlob"); + + if (Payload.GetContentType() != ZenContentType::kCompressedBinary) + { + throw std::runtime_error(fmt::format("Blob {} ({} bytes) has unexpected content type '{}'", + BlobHash, + Payload.GetSize(), + ToString(Payload.GetContentType()))); + } + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(Payload), RawHash, RawSize); + if (!Compressed) + { + throw std::runtime_error(fmt::format("Blob {} ({} bytes) compressed header is invalid", BlobHash, Payload.GetSize())); + } + if (RawHash != BlobHash) + { + throw std::runtime_error( + fmt::format("Blob {} ({} bytes) compressed header has a mismatching raw hash {}", BlobHash, Payload.GetSize(), RawHash)); + } + + IoHashStream Hash; + bool CouldDecompress = Compressed.DecompressToStream(0, RawSize, [&Hash](uint64_t, const CompositeBuffer& RangeBuffer) { + if (!AbortFlag) + { + for (const SharedBuffer& Segment : RangeBuffer.GetSegments()) + { + Hash.Append(Segment.GetView()); + } + return true; + } + return false; + }); + + if (AbortFlag) + { + return CompositeBuffer{}; + } + + if (!CouldDecompress) + { + throw std::runtime_error( + fmt::format("Blob {} ({} bytes) failed to decompress - header information mismatch", BlobHash, Payload.GetSize())); + } + IoHash ValidateRawHash = Hash.GetHash(); + if (ValidateRawHash != BlobHash) + { + throw std::runtime_error(fmt::format("Blob {} ({} bytes) decompressed hash {} does not match header information", + BlobHash, + Payload.GetSize(), + ValidateRawHash)); + } + OodleCompressor Compressor; + OodleCompressionLevel CompressionLevel; + uint64_t BlockSize; + if (!Compressed.TryGetCompressParameters(Compressor, CompressionLevel, BlockSize)) + { + throw std::runtime_error(fmt::format("Blob {} ({} bytes) failed to get compression details", BlobHash, Payload.GetSize())); + } + OutCompressedSize = Payload.GetSize(); + OutDecompressedSize = RawSize; + if (CompressionLevel == OodleCompressionLevel::None) + { + // Only decompress to composite if we need it for block verification + CompositeBuffer DecompressedComposite = Compressed.DecompressToComposite(); + if (!DecompressedComposite) + { + throw std::runtime_error(fmt::format("Blob {} ({} bytes) failed to decompress to composite", BlobHash, Payload.GetSize())); + } + return DecompressedComposite; + } + return CompositeBuffer{}; + } + + CompositeBuffer ValidateBlob(BuildStorage& Storage, + const Oid& BuildId, + const IoHash& BlobHash, + uint64_t& OutCompressedSize, + uint64_t& OutDecompressedSize) + { + ZEN_TRACE_CPU("ValidateBlob"); + IoBuffer Payload = Storage.GetBuildBlob(BuildId, BlobHash); + if (!Payload) + { + throw std::runtime_error(fmt::format("Blob {} could not be found", BlobHash)); + } + return ValidateBlob(std::move(Payload), BlobHash, OutCompressedSize, OutDecompressedSize); + } + + ChunkBlockDescription ValidateChunkBlock(IoBuffer&& Payload, + const IoHash& BlobHash, + uint64_t& OutCompressedSize, + uint64_t& OutDecompressedSize) + { + CompositeBuffer BlockBuffer = ValidateBlob(std::move(Payload), BlobHash, OutCompressedSize, OutDecompressedSize); + if (!BlockBuffer) + { + throw std::runtime_error(fmt::format("Chunk block blob {} is not compressed using 'None' compression level", BlobHash)); + } + return GetChunkBlockDescription(BlockBuffer.Flatten(), BlobHash); + } + + CompositeBuffer FetchChunk(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const IoHash& ChunkHash, + ReadFileCache& OpenFileCache) + { + ZEN_TRACE_CPU("FetchChunk"); + auto It = Lookup.ChunkHashToChunkIndex.find(ChunkHash); + ZEN_ASSERT(It != Lookup.ChunkHashToChunkIndex.end()); + uint32_t ChunkIndex = It->second; + std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkLocations = GetChunkSequenceLocations(Lookup, ChunkIndex); + ZEN_ASSERT(!ChunkLocations.empty()); + CompositeBuffer Chunk = OpenFileCache.GetRange(ChunkLocations[0].SequenceIndex, + ChunkLocations[0].Offset, + Content.ChunkedContent.ChunkRawSizes[ChunkIndex]); + ZEN_ASSERT_SLOW(IoHash::HashBuffer(Chunk) == ChunkHash); + return Chunk; + }; + + CompressedBuffer GenerateBlock(const std::filesystem::path& Path, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const std::vector<uint32_t>& ChunksInBlock, + ChunkBlockDescription& OutBlockDescription, + DiskStatistics& DiskStats) + { + ZEN_TRACE_CPU("GenerateBlock"); + ReadFileCache OpenFileCache(DiskStats, Path, Content, Lookup, 4); + + std::vector<std::pair<IoHash, FetchChunkFunc>> BlockContent; + BlockContent.reserve(ChunksInBlock.size()); + for (uint32_t ChunkIndex : ChunksInBlock) + { + BlockContent.emplace_back(std::make_pair( + Content.ChunkedContent.ChunkHashes[ChunkIndex], + [&Content, &Lookup, &OpenFileCache, ChunkIndex](const IoHash& ChunkHash) -> std::pair<uint64_t, CompressedBuffer> { + CompositeBuffer Chunk = FetchChunk(Content, Lookup, ChunkHash, OpenFileCache); + if (!Chunk) + { + ZEN_ASSERT(false); + } + uint64_t RawSize = Chunk.GetSize(); + return {RawSize, CompressedBuffer::Compress(Chunk, OodleCompressor::Mermaid, OodleCompressionLevel::None)}; + })); + } + + return GenerateChunkBlock(std::move(BlockContent), OutBlockDescription); + }; + + CompressedBuffer RebuildBlock(const std::filesystem::path& Path, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + CompositeBuffer&& HeaderBuffer, + const std::vector<uint32_t>& ChunksInBlock, + DiskStatistics& DiskStats) + { + ZEN_TRACE_CPU("RebuildBlock"); + ReadFileCache OpenFileCache(DiskStats, Path, Content, Lookup, 4); + + std::vector<SharedBuffer> ResultBuffers; + ResultBuffers.reserve(HeaderBuffer.GetSegments().size() + ChunksInBlock.size()); + ResultBuffers.insert(ResultBuffers.end(), HeaderBuffer.GetSegments().begin(), HeaderBuffer.GetSegments().end()); + for (uint32_t ChunkIndex : ChunksInBlock) + { + std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkLocations = GetChunkSequenceLocations(Lookup, ChunkIndex); + ZEN_ASSERT(!ChunkLocations.empty()); + CompositeBuffer Chunk = OpenFileCache.GetRange(ChunkLocations[0].SequenceIndex, + ChunkLocations[0].Offset, + Content.ChunkedContent.ChunkRawSizes[ChunkIndex]); + ZEN_ASSERT_SLOW(IoHash::HashBuffer(Chunk) == Content.ChunkedContent.ChunkHashes[ChunkIndex]); + CompositeBuffer CompressedChunk = + CompressedBuffer::Compress(std::move(Chunk), OodleCompressor::Mermaid, OodleCompressionLevel::None).GetCompressed(); + ResultBuffers.insert(ResultBuffers.end(), CompressedChunk.GetSegments().begin(), CompressedChunk.GetSegments().end()); + } + return CompressedBuffer::FromCompressedNoValidate(CompositeBuffer(std::move(ResultBuffers))); + }; + + void DownloadLargeBlob(BuildStorage& Storage, + const std::filesystem::path& DownloadFolder, + const Oid& BuildId, + const IoHash& ChunkHash, + const std::uint64_t PreferredMultipartChunkSize, + ParallellWork& Work, + WorkerThreadPool& NetworkPool, + std::atomic<uint64_t>& BytesDownloaded, + std::atomic<uint64_t>& MultipartAttachmentCount, + std::function<void(IoBuffer&& Payload)>&& OnDownloadComplete) + { + ZEN_TRACE_CPU("DownloadLargeBlob"); + + struct WorkloadData + { + TemporaryFile TempFile; + }; + std::shared_ptr<WorkloadData> Workload(std::make_shared<WorkloadData>()); + + std::error_code Ec; + Workload->TempFile.CreateTemporary(DownloadFolder, Ec); + if (Ec) + { + throw std::runtime_error( + fmt::format("Failed opening temporary file '{}': {} ({})", Workload->TempFile.GetPath(), Ec.message(), Ec.value())); + } + std::vector<std::function<void()>> WorkItems = Storage.GetLargeBuildBlob( + BuildId, + ChunkHash, + PreferredMultipartChunkSize, + [Workload, &BytesDownloaded, OnDownloadComplete = std::move(OnDownloadComplete)](uint64_t Offset, + const IoBuffer& Chunk, + uint64_t BytesRemaining) { + BytesDownloaded += Chunk.GetSize(); + + if (!AbortFlag.load()) + { + ZEN_TRACE_CPU("DownloadLargeBlob_Save"); + Workload->TempFile.Write(Chunk.GetView(), Offset); + if (Chunk.GetSize() == BytesRemaining) + { + uint64_t PayloadSize = Workload->TempFile.FileSize(); + void* FileHandle = Workload->TempFile.Detach(); + ZEN_ASSERT(FileHandle != nullptr); + IoBuffer Payload(IoBuffer::File, FileHandle, 0, PayloadSize, true); + Payload.SetDeleteOnClose(true); + OnDownloadComplete(std::move(Payload)); + } + } + }); + if (!WorkItems.empty()) + { + MultipartAttachmentCount++; + } + for (auto& WorkItem : WorkItems) + { + Work.ScheduleWork( + NetworkPool, // GetSyncWorkerPool(),// + [WorkItem = std::move(WorkItem)](std::atomic<bool>&) { + ZEN_TRACE_CPU("DownloadLargeBlob_Work"); + if (!AbortFlag) + { + WorkItem(); + } + }, + Work.DefaultErrorFunction()); + } + } + + void ValidateBuildPart(BuildStorage& Storage, const Oid& BuildId, Oid BuildPartId, const std::string_view BuildPartName) + { + Stopwatch Timer; + auto _ = MakeGuard([&]() { + ZEN_CONSOLE("Validated build part {}/{} ('{}') in {}", + BuildId, + BuildPartId, + BuildPartName, + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + CbObject Build = Storage.GetBuild(BuildId); + if (!BuildPartName.empty()) + { + BuildPartId = Build["parts"sv].AsObjectView()[BuildPartName].AsObjectId(); + if (BuildPartId == Oid::Zero) + { + throw std::runtime_error(fmt::format("Build {} does not have a part named '{}'", BuildId, BuildPartName)); + } + } + uint64_t PreferredMultipartChunkSize = DefaultPreferredMultipartChunkSize; + if (auto ChunkSize = Build["chunkSize"sv].AsUInt64(); ChunkSize != 0) + { + PreferredMultipartChunkSize = ChunkSize; + } + CbObject BuildPart = Storage.GetBuildPart(BuildId, BuildPartId); + ZEN_CONSOLE("Validating build part {}/{} ({})", BuildId, BuildPartId, NiceBytes(BuildPart.GetSize())); + std::vector<IoHash> ChunkAttachments; + for (CbFieldView LooseFileView : BuildPart["chunkAttachments"sv].AsObjectView()["rawHashes"sv]) + { + ChunkAttachments.push_back(LooseFileView.AsBinaryAttachment()); + } + std::vector<IoHash> BlockAttachments; + for (CbFieldView BlocksView : BuildPart["blockAttachments"sv].AsObjectView()["rawHashes"sv]) + { + BlockAttachments.push_back(BlocksView.AsBinaryAttachment()); + } + + std::vector<ChunkBlockDescription> VerifyBlockDescriptions = Storage.GetBlockMetadata(BuildId, BlockAttachments); + if (VerifyBlockDescriptions.size() != BlockAttachments.size()) + { + throw std::runtime_error(fmt::format("Uploaded blocks metadata could not all be found, {} blocks metadata is missing", + BlockAttachments.size() - VerifyBlockDescriptions.size())); + } + + WorkerThreadPool& NetworkPool = GetSmallWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // + WorkerThreadPool& ReadPool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // + WorkerThreadPool& VerifyPool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // + ParallellWork Work(AbortFlag); + + const std::filesystem::path TempFolder = ".zen-tmp"; + + CreateDirectories(TempFolder); + auto __ = MakeGuard([&TempFolder]() { + if (CleanDirectory(TempFolder, {})) + { + std::filesystem::remove(TempFolder); + } + }); + + ProgressBar ProgressBar(UsePlainProgress); + + uint64_t AttachmentsToVerifyCount = ChunkAttachments.size() + BlockAttachments.size(); + std::atomic<uint64_t> DownloadedAttachmentCount = 0; + std::atomic<uint64_t> VerifiedAttachmentCount = 0; + std::atomic<uint64_t> DownloadedByteCount = 0; + std::atomic<uint64_t> VerifiedByteCount = 0; + FilteredRate FilteredDownloadedBytesPerSecond; + FilteredRate FilteredVerifiedBytesPerSecond; + + std::atomic<uint64_t> MultipartAttachmentCount = 0; + + for (const IoHash& ChunkAttachment : ChunkAttachments) + { + Work.ScheduleWork( + ReadPool, + [&, ChunkAttachment](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("ValidateBuildPart_GetChunk"); + + FilteredDownloadedBytesPerSecond.Start(); + DownloadLargeBlob(Storage, + TempFolder, + BuildId, + ChunkAttachment, + PreferredMultipartChunkSize, + Work, + NetworkPool, + DownloadedByteCount, + MultipartAttachmentCount, + [&, ChunkHash = ChunkAttachment](IoBuffer&& Payload) { + Payload.SetContentType(ZenContentType::kCompressedBinary); + if (!AbortFlag) + { + Work.ScheduleWork( + VerifyPool, + [&, Payload = std::move(Payload), ChunkHash](std::atomic<bool>&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("ValidateBuildPart_Validate"); + + FilteredVerifiedBytesPerSecond.Start(); + + uint64_t CompressedSize; + uint64_t DecompressedSize; + ValidateBlob(std::move(Payload), ChunkHash, CompressedSize, DecompressedSize); + ZEN_CONSOLE_VERBOSE("Chunk attachment {} ({} -> {}) is valid", + ChunkHash, + NiceBytes(CompressedSize), + NiceBytes(DecompressedSize)); + VerifiedAttachmentCount++; + VerifiedByteCount += DecompressedSize; + if (VerifiedAttachmentCount.load() == AttachmentsToVerifyCount) + { + FilteredVerifiedBytesPerSecond.Stop(); + } + } + }, + Work.DefaultErrorFunction()); + } + }); + } + }, + Work.DefaultErrorFunction()); + } + + for (const IoHash& BlockAttachment : BlockAttachments) + { + Work.ScheduleWork( + NetworkPool, + [&, BlockAttachment](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("ValidateBuildPart_GetBlock"); + + FilteredDownloadedBytesPerSecond.Start(); + IoBuffer Payload = Storage.GetBuildBlob(BuildId, BlockAttachment); + DownloadedAttachmentCount++; + DownloadedByteCount += Payload.GetSize(); + if (DownloadedAttachmentCount.load() == AttachmentsToVerifyCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + if (!Payload) + { + throw std::runtime_error(fmt::format("Block attachment {} could not be found", BlockAttachment)); + } + if (!AbortFlag) + { + Work.ScheduleWork( + VerifyPool, + [&, Payload = std::move(Payload), BlockAttachment](std::atomic<bool>&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("ValidateBuildPart_ValidateBlock"); + + FilteredVerifiedBytesPerSecond.Start(); + + uint64_t CompressedSize; + uint64_t DecompressedSize; + ValidateChunkBlock(std::move(Payload), BlockAttachment, CompressedSize, DecompressedSize); + ZEN_CONSOLE_VERBOSE("Chunk block {} ({} -> {}) is valid", + BlockAttachment, + NiceBytes(CompressedSize), + NiceBytes(DecompressedSize)); + VerifiedAttachmentCount++; + VerifiedByteCount += DecompressedSize; + if (VerifiedAttachmentCount.load() == AttachmentsToVerifyCount) + { + FilteredVerifiedBytesPerSecond.Stop(); + } + } + }, + Work.DefaultErrorFunction()); + } + } + }, + Work.DefaultErrorFunction()); + } + + Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, PendingWork); + + FilteredDownloadedBytesPerSecond.Update(DownloadedByteCount); + FilteredVerifiedBytesPerSecond.Update(VerifiedByteCount); + + std::string Details = fmt::format("Downloaded {}/{} ({}, {}bits/s). Verified {}/{} ({}, {}B/s)", + DownloadedAttachmentCount.load(), + AttachmentsToVerifyCount, + NiceBytes(DownloadedByteCount.load()), + NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8), + VerifiedAttachmentCount.load(), + AttachmentsToVerifyCount, + NiceBytes(VerifiedByteCount.load()), + NiceNum(FilteredVerifiedBytesPerSecond.GetCurrent())); + + ProgressBar.UpdateState( + {.Task = "Validating blobs ", + .Details = Details, + .TotalCount = gsl::narrow<uint64_t>(AttachmentsToVerifyCount * 2), + .RemainingCount = gsl::narrow<uint64_t>(AttachmentsToVerifyCount * 2 - + (DownloadedAttachmentCount.load() + VerifiedAttachmentCount.load()))}, + false); + }); + + ProgressBar.Finish(); + } + + void ArrangeChunksIntoBlocks(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + uint64_t MaxBlockSize, + std::vector<uint32_t>& ChunkIndexes, + std::vector<std::vector<uint32_t>>& OutBlocks) + { + ZEN_TRACE_CPU("ArrangeChunksIntoBlocks"); + std::sort(ChunkIndexes.begin(), ChunkIndexes.end(), [&Content, &Lookup](uint32_t Lhs, uint32_t Rhs) { + const ChunkedContentLookup::ChunkSequenceLocation& LhsLocation = GetChunkSequenceLocations(Lookup, Lhs)[0]; + const ChunkedContentLookup::ChunkSequenceLocation& RhsLocation = GetChunkSequenceLocations(Lookup, Rhs)[0]; + if (LhsLocation.SequenceIndex < RhsLocation.SequenceIndex) + { + return true; + } + else if (LhsLocation.SequenceIndex > RhsLocation.SequenceIndex) + { + return false; + } + return LhsLocation.Offset < RhsLocation.Offset; + }); + + uint64_t MaxBlockSizeLowThreshold = MaxBlockSize - (MaxBlockSize / 16); + + uint64_t BlockSize = 0; + + uint32_t ChunkIndexStart = 0; + for (uint32_t ChunkIndexOffset = 0; ChunkIndexOffset < ChunkIndexes.size();) + { + const uint32_t ChunkIndex = ChunkIndexes[ChunkIndexOffset]; + const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + + if ((BlockSize + ChunkSize) > MaxBlockSize) + { + // Within the span of MaxBlockSizeLowThreshold and MaxBlockSize, see if there is a break + // between source paths for chunks. Break the block at the last such break if any. + ZEN_ASSERT(ChunkIndexOffset > ChunkIndexStart); + + const uint32_t ChunkSequenceIndex = + Lookup.ChunkSequenceLocations[Lookup.ChunkSequenceLocationOffset[ChunkIndex]].SequenceIndex; + + uint64_t ScanBlockSize = BlockSize; + + uint32_t ScanChunkIndexOffset = ChunkIndexOffset - 1; + while (ScanChunkIndexOffset > (ChunkIndexStart + 2)) + { + const uint32_t TestChunkIndex = ChunkIndexes[ScanChunkIndexOffset]; + const uint64_t TestChunkSize = Content.ChunkedContent.ChunkRawSizes[TestChunkIndex]; + if ((ScanBlockSize - TestChunkSize) < MaxBlockSizeLowThreshold) + { + break; + } + + const uint32_t TestSequenceIndex = + Lookup.ChunkSequenceLocations[Lookup.ChunkSequenceLocationOffset[TestChunkIndex]].SequenceIndex; + if (ChunkSequenceIndex != TestSequenceIndex) + { + ChunkIndexOffset = ScanChunkIndexOffset + 1; + break; + } + + ScanBlockSize -= TestChunkSize; + ScanChunkIndexOffset--; + } + + std::vector<uint32_t> ChunksInBlock; + ChunksInBlock.reserve(ChunkIndexOffset - ChunkIndexStart); + for (uint32_t AddIndexOffset = ChunkIndexStart; AddIndexOffset < ChunkIndexOffset; AddIndexOffset++) + { + const uint32_t AddChunkIndex = ChunkIndexes[AddIndexOffset]; + ChunksInBlock.push_back(AddChunkIndex); + } + OutBlocks.emplace_back(std::move(ChunksInBlock)); + BlockSize = 0; + ChunkIndexStart = ChunkIndexOffset; + } + else + { + ChunkIndexOffset++; + BlockSize += ChunkSize; + } + } + if (ChunkIndexStart < ChunkIndexes.size()) + { + std::vector<uint32_t> ChunksInBlock; + ChunksInBlock.reserve(ChunkIndexes.size() - ChunkIndexStart); + for (uint32_t AddIndexOffset = ChunkIndexStart; AddIndexOffset < ChunkIndexes.size(); AddIndexOffset++) + { + const uint32_t AddChunkIndex = ChunkIndexes[AddIndexOffset]; + ChunksInBlock.push_back(AddChunkIndex); + } + OutBlocks.emplace_back(std::move(ChunksInBlock)); + } + } + + CompositeBuffer CompressChunk(const std::filesystem::path& Path, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + uint32_t ChunkIndex, + const std::filesystem::path& TempFolderPath) + { + ZEN_TRACE_CPU("CompressChunk"); + ZEN_ASSERT(!TempFolderPath.empty()); + const IoHash& ChunkHash = Content.ChunkedContent.ChunkHashes[ChunkIndex]; + const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + + const ChunkedContentLookup::ChunkSequenceLocation& Source = GetChunkSequenceLocations(Lookup, ChunkIndex)[0]; + const std::uint32_t PathIndex = Lookup.SequenceIndexFirstPathIndex[Source.SequenceIndex]; + IoBuffer RawSource = IoBufferBuilder::MakeFromFile((Path / Content.Paths[PathIndex]).make_preferred(), Source.Offset, ChunkSize); + if (!RawSource) + { + throw std::runtime_error(fmt::format("Failed fetching chunk {}", ChunkHash)); + } + if (RawSource.GetSize() != ChunkSize) + { + throw std::runtime_error(fmt::format("Fetched chunk {} has invalid size", ChunkHash)); + } + ZEN_ASSERT_SLOW(IoHash::HashBuffer(RawSource) == ChunkHash); + { + std::filesystem::path TempFilePath = (TempFolderPath / ChunkHash.ToHexString()).make_preferred(); + + BasicFile CompressedFile; + std::error_code Ec; + CompressedFile.Open(TempFilePath, BasicFile::Mode::kTruncate, Ec); + if (Ec) + { + throw std::runtime_error( + fmt::format("Failed creating temporary file for compressing blob {}. Reason: {}", ChunkHash, Ec.message())); + } + + bool CouldCompress = CompressedBuffer::CompressToStream( + CompositeBuffer(SharedBuffer(RawSource)), + [&](uint64_t Offset, const CompositeBuffer& RangeBuffer) { CompressedFile.Write(RangeBuffer, Offset); }); + if (CouldCompress) + { + uint64_t CompressedSize = CompressedFile.FileSize(); + void* FileHandle = CompressedFile.Detach(); + IoBuffer TempPayload = IoBuffer(IoBuffer::File, + FileHandle, + 0, + CompressedSize, + /*IsWholeFile*/ true); + ZEN_ASSERT(TempPayload); + TempPayload.SetDeleteOnClose(true); + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(TempPayload), RawHash, RawSize); + ZEN_ASSERT(Compressed); + ZEN_ASSERT(RawHash == ChunkHash); + ZEN_ASSERT(RawSize == ChunkSize); + return Compressed.GetCompressed(); + } + CompressedFile.Close(); + std::filesystem::remove(TempFilePath, Ec); + ZEN_UNUSED(Ec); + } + + // Try regular compress - decompress may fail if compressed data is larger than non-compressed + CompressedBuffer CompressedBlob = CompressedBuffer::Compress(SharedBuffer(std::move(RawSource))); + if (!CompressedBlob) + { + throw std::runtime_error(fmt::format("Failed to compress large blob {}", ChunkHash)); + } + if (!IsBufferDiskBased(CompressedBlob.GetCompressed())) + { + IoBuffer TempPayload = WriteToTempFile(std::move(CompressedBlob).GetCompressed(), TempFolderPath, ChunkHash); + CompressedBlob = CompressedBuffer::FromCompressedNoValidate(std::move(TempPayload)); + } + return std::move(CompressedBlob).GetCompressed(); + } + + struct GeneratedBlocks + { + std::vector<ChunkBlockDescription> BlockDescriptions; + std::vector<uint64_t> BlockSizes; + std::vector<CompositeBuffer> BlockHeaders; + std::vector<CbObject> BlockMetaDatas; + std::vector<bool> MetaDataHasBeenUploaded; + tsl::robin_map<IoHash, size_t, IoHash::Hasher> BlockHashToBlockIndex; + }; + + void GenerateBuildBlocks(const std::filesystem::path& Path, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + BuildStorage& Storage, + const Oid& BuildId, + const std::vector<std::vector<uint32_t>>& NewBlockChunks, + GeneratedBlocks& OutBlocks, + DiskStatistics& DiskStats, + UploadStatistics& UploadStats, + GenerateBlocksStatistics& GenerateBlocksStats) + { + ZEN_TRACE_CPU("GenerateBuildBlocks"); + const std::size_t NewBlockCount = NewBlockChunks.size(); + if (NewBlockCount > 0) + { + ProgressBar ProgressBar(UsePlainProgress); + + OutBlocks.BlockDescriptions.resize(NewBlockCount); + OutBlocks.BlockSizes.resize(NewBlockCount); + OutBlocks.BlockMetaDatas.resize(NewBlockCount); + OutBlocks.BlockHeaders.resize(NewBlockCount); + OutBlocks.MetaDataHasBeenUploaded.resize(NewBlockCount, false); + OutBlocks.BlockHashToBlockIndex.reserve(NewBlockCount); + + RwLock Lock; + + WorkerThreadPool& GenerateBlobsPool = + GetMediumWorkerPool(EWorkloadType::Burst); // GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool();// + WorkerThreadPool& UploadBlocksPool = GetSmallWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool();// + + FilteredRate FilteredGeneratedBytesPerSecond; + FilteredRate FilteredUploadedBytesPerSecond; + + ParallellWork Work(AbortFlag); + + std::atomic<uint64_t> QueuedPendingBlocksForUpload = 0; + + for (size_t BlockIndex = 0; BlockIndex < NewBlockCount; BlockIndex++) + { + if (Work.IsAborted()) + { + break; + } + const std::vector<uint32_t>& ChunksInBlock = NewBlockChunks[BlockIndex]; + Work.ScheduleWork( + GenerateBlobsPool, + [&, BlockIndex](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("GenerateBuildBlocks_Generate"); + + FilteredGeneratedBytesPerSecond.Start(); + // TODO: Convert ScheduleWork body to function + + CompressedBuffer CompressedBlock = + GenerateBlock(Path, Content, Lookup, ChunksInBlock, OutBlocks.BlockDescriptions[BlockIndex], DiskStats); + ZEN_CONSOLE_VERBOSE("Generated block {} ({}) containing {} chunks", + OutBlocks.BlockDescriptions[BlockIndex].BlockHash, + NiceBytes(CompressedBlock.GetCompressedSize()), + OutBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); + + OutBlocks.BlockSizes[BlockIndex] = CompressedBlock.GetCompressedSize(); + { + CbObjectWriter Writer; + Writer.AddString("createdBy", "zen"); + OutBlocks.BlockMetaDatas[BlockIndex] = Writer.Save(); + } + GenerateBlocksStats.GeneratedBlockByteCount += OutBlocks.BlockSizes[BlockIndex]; + GenerateBlocksStats.GeneratedBlockCount++; + + Lock.WithExclusiveLock([&]() { + OutBlocks.BlockHashToBlockIndex.insert_or_assign(OutBlocks.BlockDescriptions[BlockIndex].BlockHash, + BlockIndex); + }); + + { + std::span<const SharedBuffer> Segments = CompressedBlock.GetCompressed().GetSegments(); + ZEN_ASSERT(Segments.size() >= 2); + OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]); + } + + if (GenerateBlocksStats.GeneratedBlockCount == NewBlockCount) + { + FilteredGeneratedBytesPerSecond.Stop(); + } + + if (QueuedPendingBlocksForUpload.load() > 16) + { + std::span<const SharedBuffer> Segments = CompressedBlock.GetCompressed().GetSegments(); + ZEN_ASSERT(Segments.size() >= 2); + OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]); + } + else + { + if (!AbortFlag) + { + QueuedPendingBlocksForUpload++; + + Work.ScheduleWork( + UploadBlocksPool, + [&, BlockIndex, Payload = std::move(CompressedBlock)](std::atomic<bool>&) mutable { + auto _ = MakeGuard([&QueuedPendingBlocksForUpload] { QueuedPendingBlocksForUpload--; }); + if (!AbortFlag) + { + if (GenerateBlocksStats.GeneratedBlockCount == NewBlockCount) + { + ZEN_TRACE_CPU("GenerateBuildBlocks_Save"); + + FilteredUploadedBytesPerSecond.Stop(); + std::span<const SharedBuffer> Segments = Payload.GetCompressed().GetSegments(); + ZEN_ASSERT(Segments.size() >= 2); + OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]); + } + else + { + ZEN_TRACE_CPU("GenerateBuildBlocks_Upload"); + + FilteredUploadedBytesPerSecond.Start(); + // TODO: Convert ScheduleWork body to function + + const CbObject BlockMetaData = + BuildChunkBlockDescription(OutBlocks.BlockDescriptions[BlockIndex], + OutBlocks.BlockMetaDatas[BlockIndex]); + + const IoHash& BlockHash = OutBlocks.BlockDescriptions[BlockIndex].BlockHash; + const uint64_t CompressedBlockSize = Payload.GetCompressedSize(); + + Storage.PutBuildBlob(BuildId, + BlockHash, + ZenContentType::kCompressedBinary, + std::move(Payload).GetCompressed()); + UploadStats.BlocksBytes += CompressedBlockSize; + ZEN_CONSOLE_VERBOSE("Uploaded block {} ({}) containing {} chunks", + OutBlocks.BlockDescriptions[BlockIndex].BlockHash, + NiceBytes(CompressedBlockSize), + OutBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); + + Storage.PutBlockMetadata(BuildId, + OutBlocks.BlockDescriptions[BlockIndex].BlockHash, + BlockMetaData); + ZEN_CONSOLE_VERBOSE("Uploaded block {} metadata ({})", + OutBlocks.BlockDescriptions[BlockIndex].BlockHash, + NiceBytes(BlockMetaData.GetSize())); + + OutBlocks.MetaDataHasBeenUploaded[BlockIndex] = true; + + UploadStats.BlocksBytes += BlockMetaData.GetSize(); + UploadStats.BlockCount++; + if (UploadStats.BlockCount == NewBlockCount) + { + FilteredUploadedBytesPerSecond.Stop(); + } + } + } + }, + Work.DefaultErrorFunction()); + } + } + } + }, + Work.DefaultErrorFunction()); + } + + Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, PendingWork); + + FilteredGeneratedBytesPerSecond.Update(GenerateBlocksStats.GeneratedBlockByteCount.load()); + FilteredUploadedBytesPerSecond.Update(UploadStats.BlocksBytes.load()); + + std::string Details = fmt::format("Generated {}/{} ({}, {}B/s). Uploaded {}/{} ({}, {}bits/s)", + GenerateBlocksStats.GeneratedBlockCount.load(), + NewBlockCount, + NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount.load()), + NiceNum(FilteredGeneratedBytesPerSecond.GetCurrent()), + UploadStats.BlockCount.load(), + NewBlockCount, + NiceBytes(UploadStats.BlocksBytes.load()), + NiceNum(FilteredUploadedBytesPerSecond.GetCurrent() * 8)); + + ProgressBar.UpdateState( + {.Task = "Generating blocks", + .Details = Details, + .TotalCount = gsl::narrow<uint64_t>(NewBlockCount), + .RemainingCount = gsl::narrow<uint64_t>(NewBlockCount - GenerateBlocksStats.GeneratedBlockCount.load())}, + false); + }); + + ZEN_ASSERT(AbortFlag || QueuedPendingBlocksForUpload.load() == 0); + + ProgressBar.Finish(); + + GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS = FilteredGeneratedBytesPerSecond.GetElapsedTimeUS(); + UploadStats.ElapsedWallTimeUS = FilteredUploadedBytesPerSecond.GetElapsedTimeUS(); + } + } + + void UploadPartBlobs(BuildStorage& Storage, + const Oid& BuildId, + const std::filesystem::path& Path, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + std::span<IoHash> RawHashes, + const std::vector<std::vector<uint32_t>>& NewBlockChunks, + GeneratedBlocks& NewBlocks, + std::span<const uint32_t> LooseChunkIndexes, + const std::uint64_t LargeAttachmentSize, + DiskStatistics& DiskStats, + UploadStatistics& UploadStats, + GenerateBlocksStatistics& GenerateBlocksStats, + LooseChunksStatistics& LooseChunksStats) + { + ZEN_TRACE_CPU("UploadPartBlobs"); + { + ProgressBar ProgressBar(UsePlainProgress); + + WorkerThreadPool& ReadChunkPool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // + WorkerThreadPool& UploadChunkPool = GetSmallWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // + + FilteredRate FilteredGenerateBlockBytesPerSecond; + FilteredRate FilteredCompressedBytesPerSecond; + FilteredRate FilteredUploadedBytesPerSecond; + + ParallellWork Work(AbortFlag); + + std::atomic<size_t> UploadedBlockSize = 0; + std::atomic<size_t> UploadedBlockCount = 0; + std::atomic<size_t> UploadedRawChunkSize = 0; + std::atomic<size_t> UploadedCompressedChunkSize = 0; + std::atomic<uint32_t> UploadedChunkCount = 0; + + tsl::robin_map<uint32_t, uint32_t> ChunkIndexToLooseChunkOrderIndex; + ChunkIndexToLooseChunkOrderIndex.reserve(LooseChunkIndexes.size()); + for (uint32_t OrderIndex = 0; OrderIndex < LooseChunkIndexes.size(); OrderIndex++) + { + ChunkIndexToLooseChunkOrderIndex.insert_or_assign(LooseChunkIndexes[OrderIndex], OrderIndex); + } + + std::vector<size_t> BlockIndexes; + std::vector<uint32_t> LooseChunkOrderIndexes; + + uint64_t TotalLooseChunksSize = 0; + uint64_t TotalBlocksSize = 0; + for (const IoHash& RawHash : RawHashes) + { + if (auto It = NewBlocks.BlockHashToBlockIndex.find(RawHash); It != NewBlocks.BlockHashToBlockIndex.end()) + { + BlockIndexes.push_back(It->second); + TotalBlocksSize += NewBlocks.BlockSizes[It->second]; + } + if (auto ChunkIndexIt = Lookup.ChunkHashToChunkIndex.find(RawHash); ChunkIndexIt != Lookup.ChunkHashToChunkIndex.end()) + { + const uint32_t ChunkIndex = ChunkIndexIt->second; + if (auto LooseOrderIndexIt = ChunkIndexToLooseChunkOrderIndex.find(ChunkIndex); + LooseOrderIndexIt != ChunkIndexToLooseChunkOrderIndex.end()) + { + LooseChunkOrderIndexes.push_back(LooseOrderIndexIt->second); + TotalLooseChunksSize += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + } + } + uint64_t TotalRawSize = TotalLooseChunksSize + TotalBlocksSize; + + const size_t UploadBlockCount = BlockIndexes.size(); + const uint32_t UploadChunkCount = gsl::narrow<uint32_t>(LooseChunkOrderIndexes.size()); + + auto AsyncUploadBlock = [&](const size_t BlockIndex, + const IoHash BlockHash, + CompositeBuffer&& Payload, + std::atomic<uint64_t>& QueuedPendingInMemoryBlocksForUpload) { + bool IsInMemoryBlock = true; + if (QueuedPendingInMemoryBlocksForUpload.load() > 16) + { + ZEN_TRACE_CPU("AsyncUploadBlock_WriteTempBlock"); + Payload = CompositeBuffer(WriteToTempFile(std::move(Payload), Path / ZenTempBlockFolderName, BlockHash)); + IsInMemoryBlock = false; + } + else + { + QueuedPendingInMemoryBlocksForUpload++; + } + + Work.ScheduleWork( + UploadChunkPool, + [&, IsInMemoryBlock, BlockIndex, BlockHash, Payload = std::move(Payload)](std::atomic<bool>&) mutable { + auto _ = MakeGuard([IsInMemoryBlock, &QueuedPendingInMemoryBlocksForUpload] { + if (IsInMemoryBlock) + { + QueuedPendingInMemoryBlocksForUpload--; + } + }); + if (!AbortFlag) + { + ZEN_TRACE_CPU("AsyncUploadBlock"); + + const uint64_t PayloadSize = Payload.GetSize(); + + FilteredUploadedBytesPerSecond.Start(); + const CbObject BlockMetaData = + BuildChunkBlockDescription(NewBlocks.BlockDescriptions[BlockIndex], NewBlocks.BlockMetaDatas[BlockIndex]); + + Storage.PutBuildBlob(BuildId, BlockHash, ZenContentType::kCompressedBinary, Payload); + ZEN_CONSOLE_VERBOSE("Uploaded block {} ({}) containing {} chunks", + NewBlocks.BlockDescriptions[BlockIndex].BlockHash, + NiceBytes(PayloadSize), + NewBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); + UploadedBlockSize += PayloadSize; + UploadStats.BlocksBytes += PayloadSize; + + Storage.PutBlockMetadata(BuildId, BlockHash, BlockMetaData); + ZEN_CONSOLE_VERBOSE("Uploaded block {} metadata ({})", + NewBlocks.BlockDescriptions[BlockIndex].BlockHash, + NiceBytes(BlockMetaData.GetSize())); + + NewBlocks.MetaDataHasBeenUploaded[BlockIndex] = true; + + UploadStats.BlockCount++; + UploadStats.BlocksBytes += BlockMetaData.GetSize(); + + UploadedBlockCount++; + if (UploadedBlockCount == UploadBlockCount && UploadedChunkCount == UploadChunkCount) + { + FilteredUploadedBytesPerSecond.Stop(); + } + } + }, + Work.DefaultErrorFunction()); + }; + + auto AsyncUploadLooseChunk = [&](const IoHash& RawHash, uint64_t RawSize, CompositeBuffer&& Payload) { + Work.ScheduleWork( + UploadChunkPool, + [&, RawHash, RawSize, Payload = CompositeBuffer(std::move(Payload))](std::atomic<bool>&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("AsyncUploadLooseChunk"); + + const uint64_t PayloadSize = Payload.GetSize(); + ; + if (PayloadSize >= LargeAttachmentSize) + { + ZEN_TRACE_CPU("AsyncUploadLooseChunk_Multipart"); + UploadStats.MultipartAttachmentCount++; + std::vector<std::function<void()>> MultipartWork = Storage.PutLargeBuildBlob( + BuildId, + RawHash, + ZenContentType::kCompressedBinary, + PayloadSize, + [Payload = std::move(Payload), &FilteredUploadedBytesPerSecond](uint64_t Offset, + uint64_t Size) mutable -> IoBuffer { + FilteredUploadedBytesPerSecond.Start(); + + IoBuffer PartPayload = Payload.Mid(Offset, Size).Flatten().AsIoBuffer(); + PartPayload.SetContentType(ZenContentType::kBinary); + return PartPayload; + }, + [&, RawSize](uint64_t SentBytes, bool IsComplete) { + UploadStats.ChunksBytes += SentBytes; + UploadedCompressedChunkSize += SentBytes; + if (IsComplete) + { + UploadStats.ChunkCount++; + UploadedChunkCount++; + if (UploadedBlockCount == UploadBlockCount && UploadedChunkCount == UploadChunkCount) + { + FilteredUploadedBytesPerSecond.Stop(); + } + UploadedRawChunkSize += RawSize; + } + }); + for (auto& WorkPart : MultipartWork) + { + Work.ScheduleWork( + UploadChunkPool, + [Work = std::move(WorkPart)](std::atomic<bool>&) { + ZEN_TRACE_CPU("AsyncUploadLooseChunk_Multipart_Work"); + if (!AbortFlag) + { + Work(); + } + }, + Work.DefaultErrorFunction()); + } + ZEN_CONSOLE_VERBOSE("Uploaded multipart chunk {} ({})", RawHash, NiceBytes(PayloadSize)); + } + else + { + ZEN_TRACE_CPU("AsyncUploadLooseChunk_Singlepart"); + Storage.PutBuildBlob(BuildId, RawHash, ZenContentType::kCompressedBinary, Payload); + ZEN_CONSOLE_VERBOSE("Uploaded chunk {} ({})", RawHash, NiceBytes(PayloadSize)); + UploadStats.ChunksBytes += Payload.GetSize(); + UploadStats.ChunkCount++; + UploadedCompressedChunkSize += Payload.GetSize(); + UploadedRawChunkSize += RawSize; + UploadedChunkCount++; + if (UploadedChunkCount == UploadChunkCount) + { + FilteredUploadedBytesPerSecond.Stop(); + } + } + } + }, + Work.DefaultErrorFunction()); + }; + + std::vector<size_t> GenerateBlockIndexes; + + std::atomic<uint64_t> GeneratedBlockCount = 0; + std::atomic<uint64_t> GeneratedBlockByteCount = 0; + + std::vector<uint32_t> CompressLooseChunkOrderIndexes; + + std::atomic<uint64_t> QueuedPendingInMemoryBlocksForUpload = 0; + + // Start upload of any pre-compressed loose chunks + for (const uint32_t LooseChunkOrderIndex : LooseChunkOrderIndexes) + { + CompressLooseChunkOrderIndexes.push_back(LooseChunkOrderIndex); + } + + // Start generation of any non-prebuilt blocks and schedule upload + for (const size_t BlockIndex : BlockIndexes) + { + const IoHash& BlockHash = NewBlocks.BlockDescriptions[BlockIndex].BlockHash; + if (!AbortFlag) + { + Work.ScheduleWork( + ReadChunkPool, // GetSyncWorkerPool() + [&, BlockIndex](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UploadPartBlobs_GenerateBlock"); + + FilteredGenerateBlockBytesPerSecond.Start(); + + CompositeBuffer Payload; + if (NewBlocks.BlockHeaders[BlockIndex]) + { + Payload = RebuildBlock(Path, + Content, + Lookup, + std::move(NewBlocks.BlockHeaders[BlockIndex]), + NewBlockChunks[BlockIndex], + DiskStats) + .GetCompressed(); + } + else + { + ChunkBlockDescription BlockDescription; + CompressedBuffer CompressedBlock = + GenerateBlock(Path, Content, Lookup, NewBlockChunks[BlockIndex], BlockDescription, DiskStats); + if (!CompressedBlock) + { + throw std::runtime_error(fmt::format("Failed generating block {}", BlockHash)); + } + ZEN_ASSERT(BlockDescription.BlockHash == BlockHash); + Payload = std::move(CompressedBlock).GetCompressed(); + } + + GenerateBlocksStats.GeneratedBlockByteCount += NewBlocks.BlockSizes[BlockIndex]; + GenerateBlocksStats.GeneratedBlockCount++; + GeneratedBlockByteCount += NewBlocks.BlockSizes[BlockIndex]; + GeneratedBlockCount++; + if (GeneratedBlockCount == GenerateBlockIndexes.size()) + { + FilteredGenerateBlockBytesPerSecond.Stop(); + } + if (!AbortFlag) + { + AsyncUploadBlock(BlockIndex, BlockHash, std::move(Payload), QueuedPendingInMemoryBlocksForUpload); + } + ZEN_CONSOLE_VERBOSE("Regenerated block {} ({}) containing {} chunks", + NewBlocks.BlockDescriptions[BlockIndex].BlockHash, + NiceBytes(NewBlocks.BlockSizes[BlockIndex]), + NewBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); + } + }, + Work.DefaultErrorFunction()); + } + } + + std::atomic<uint64_t> CompressedLooseChunkCount = 0; + std::atomic<uint64_t> CompressedLooseChunkByteCount = 0; + std::atomic<uint64_t> RawLooseChunkByteCount = 0; + + // Start compression of any non-precompressed loose chunks and schedule upload + for (const uint32_t CompressLooseChunkOrderIndex : CompressLooseChunkOrderIndexes) + { + const uint32_t ChunkIndex = LooseChunkIndexes[CompressLooseChunkOrderIndex]; + Work.ScheduleWork( + ReadChunkPool, // GetSyncWorkerPool(),// ReadChunkPool, + [&, ChunkIndex](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UploadPartBlobs_CompressChunk"); + + FilteredCompressedBytesPerSecond.Start(); + CompositeBuffer Payload = CompressChunk(Path, Content, Lookup, ChunkIndex, Path / ZenTempChunkFolderName); + ZEN_CONSOLE_VERBOSE("Compressed chunk {} ({} -> {})", + Content.ChunkedContent.ChunkHashes[ChunkIndex], + NiceBytes(Content.ChunkedContent.ChunkRawSizes[ChunkIndex]), + NiceBytes(Payload.GetSize())); + const uint64_t ChunkRawSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + UploadStats.ReadFromDiskBytes += ChunkRawSize; + LooseChunksStats.CompressedChunkBytes += Payload.GetSize(); + LooseChunksStats.CompressedChunkCount++; + CompressedLooseChunkByteCount += Payload.GetSize(); + CompressedLooseChunkCount++; + RawLooseChunkByteCount += ChunkRawSize; + if (CompressedLooseChunkCount == CompressLooseChunkOrderIndexes.size()) + { + FilteredCompressedBytesPerSecond.Stop(); + } + if (!AbortFlag) + { + AsyncUploadLooseChunk(Content.ChunkedContent.ChunkHashes[ChunkIndex], ChunkRawSize, std::move(Payload)); + } + } + }, + Work.DefaultErrorFunction()); + } + + Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, PendingWork); + FilteredCompressedBytesPerSecond.Update(CompressedLooseChunkByteCount.load()); + FilteredGenerateBlockBytesPerSecond.Update(GeneratedBlockByteCount.load()); + FilteredUploadedBytesPerSecond.Update(UploadedCompressedChunkSize.load() + UploadedBlockSize.load()); + uint64_t UploadedRawSize = UploadedRawChunkSize.load() + UploadedBlockSize.load(); + uint64_t UploadedCompressedSize = UploadedCompressedChunkSize.load() + UploadedBlockSize.load(); + + std::string Details = fmt::format( + "Compressed {}/{} ({}/{}) chunks. " + "Uploaded {}/{} ({}/{}) blobs " + "({} {}bits/s)", + CompressedLooseChunkCount.load(), + CompressLooseChunkOrderIndexes.size(), + NiceBytes(RawLooseChunkByteCount), + NiceBytes(TotalLooseChunksSize), + + UploadedBlockCount.load() + UploadedChunkCount.load(), + UploadBlockCount + UploadChunkCount, + NiceBytes(UploadedRawSize), + NiceBytes(TotalRawSize), + + NiceBytes(UploadedCompressedSize), + NiceNum(FilteredUploadedBytesPerSecond.GetCurrent())); + + ProgressBar.UpdateState({.Task = "Uploading blobs ", + .Details = Details, + .TotalCount = gsl::narrow<uint64_t>(TotalRawSize), + .RemainingCount = gsl::narrow<uint64_t>(TotalRawSize - UploadedRawSize)}, + false); + }); + + ZEN_ASSERT(AbortFlag || QueuedPendingInMemoryBlocksForUpload.load() == 0); + + ProgressBar.Finish(); + UploadStats.ElapsedWallTimeUS = FilteredUploadedBytesPerSecond.GetElapsedTimeUS(); + GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS = FilteredGenerateBlockBytesPerSecond.GetElapsedTimeUS(); + LooseChunksStats.CompressChunksElapsedWallTimeUS = FilteredCompressedBytesPerSecond.GetElapsedTimeUS(); + } + } + + std::vector<size_t> FindReuseBlocks(const std::vector<ChunkBlockDescription>& KnownBlocks, + std::span<const IoHash> ChunkHashes, + std::span<const uint32_t> ChunkIndexes, + uint8_t MinPercentLimit, + std::vector<uint32_t>& OutUnusedChunkIndexes, + FindBlocksStatistics& FindBlocksStats) + { + ZEN_TRACE_CPU("FindReuseBlocks"); + + // Find all blocks with a usage level higher than MinPercentLimit + // Pick out the blocks with usage higher or equal to MinPercentLimit + // Sort them with highest size usage - most usage first + // Make a list of all chunks and mark them as not found + // For each block, recalculate the block has usage percent based on the chunks marked as not found + // If the block still reaches MinPercentLimit, keep it and remove the matching chunks from the not found list + // Repeat for following all remaining block that initially matched MinPercentLimit + + std::vector<size_t> FilteredReuseBlockIndexes; + + uint32_t ChunkCount = gsl::narrow<uint32_t>(ChunkHashes.size()); + std::vector<bool> ChunkFound(ChunkCount, false); + + if (ChunkCount > 0) + { + if (!KnownBlocks.empty()) + { + Stopwatch ReuseTimer; + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; + ChunkHashToChunkIndex.reserve(ChunkIndexes.size()); + for (uint32_t ChunkIndex : ChunkIndexes) + { + ChunkHashToChunkIndex.insert_or_assign(ChunkHashes[ChunkIndex], ChunkIndex); + } + + std::vector<size_t> BlockSizes(KnownBlocks.size(), 0); + std::vector<size_t> BlockUseSize(KnownBlocks.size(), 0); + + std::vector<size_t> ReuseBlockIndexes; + + for (size_t KnownBlockIndex = 0; KnownBlockIndex < KnownBlocks.size(); KnownBlockIndex++) + { + const ChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; + if (KnownBlock.BlockHash != IoHash::Zero && + KnownBlock.ChunkRawHashes.size() == KnownBlock.ChunkCompressedLengths.size()) + { + size_t BlockAttachmentCount = KnownBlock.ChunkRawHashes.size(); + if (BlockAttachmentCount == 0) + { + continue; + } + size_t ReuseSize = 0; + size_t BlockSize = 0; + size_t FoundAttachmentCount = 0; + size_t BlockChunkCount = KnownBlock.ChunkRawHashes.size(); + for (size_t BlockChunkIndex = 0; BlockChunkIndex < BlockChunkCount; BlockChunkIndex++) + { + const IoHash& BlockChunkHash = KnownBlock.ChunkRawHashes[BlockChunkIndex]; + const uint32_t BlockChunkSize = KnownBlock.ChunkCompressedLengths[BlockChunkIndex]; + BlockSize += BlockChunkSize; + if (ChunkHashToChunkIndex.contains(BlockChunkHash)) + { + ReuseSize += BlockChunkSize; + FoundAttachmentCount++; + } + } + + size_t ReusePercent = (ReuseSize * 100) / BlockSize; + + if (ReusePercent >= MinPercentLimit) + { + ZEN_CONSOLE_VERBOSE("Reusing block {}. {} attachments found, usage level: {}%", + KnownBlock.BlockHash, + FoundAttachmentCount, + ReusePercent); + ReuseBlockIndexes.push_back(KnownBlockIndex); + + BlockSizes[KnownBlockIndex] = BlockSize; + BlockUseSize[KnownBlockIndex] = ReuseSize; + } + else if (FoundAttachmentCount > 0) + { + ZEN_CONSOLE_VERBOSE("Skipping block {}. {} attachments found, usage level: {}%", + KnownBlock.BlockHash, + FoundAttachmentCount, + ReusePercent); + FindBlocksStats.RejectedBlockCount++; + FindBlocksStats.RejectedChunkCount += FoundAttachmentCount; + FindBlocksStats.RejectedByteCount += ReuseSize; + } + } + } + + if (!ReuseBlockIndexes.empty()) + { + std::sort(ReuseBlockIndexes.begin(), ReuseBlockIndexes.end(), [&](size_t Lhs, size_t Rhs) { + return BlockUseSize[Lhs] > BlockUseSize[Rhs]; + }); + + for (size_t KnownBlockIndex : ReuseBlockIndexes) + { + std::vector<uint32_t> FoundChunkIndexes; + size_t BlockSize = 0; + size_t AdjustedReuseSize = 0; + const ChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; + for (size_t BlockChunkIndex = 0; BlockChunkIndex < KnownBlock.ChunkRawHashes.size(); BlockChunkIndex++) + { + const IoHash& BlockChunkHash = KnownBlock.ChunkRawHashes[BlockChunkIndex]; + const uint32_t BlockChunkSize = KnownBlock.ChunkCompressedLengths[BlockChunkIndex]; + BlockSize += BlockChunkSize; + if (auto It = ChunkHashToChunkIndex.find(BlockChunkHash); It != ChunkHashToChunkIndex.end()) + { + const uint32_t ChunkIndex = It->second; + if (!ChunkFound[ChunkIndex]) + { + FoundChunkIndexes.push_back(ChunkIndex); + AdjustedReuseSize += KnownBlock.ChunkCompressedLengths[BlockChunkIndex]; + } + } + } + + size_t ReusePercent = (AdjustedReuseSize * 100) / BlockSize; + + if (ReusePercent >= MinPercentLimit) + { + ZEN_CONSOLE_VERBOSE("Reusing block {}. {} attachments found, usage level: {}%", + KnownBlock.BlockHash, + FoundChunkIndexes.size(), + ReusePercent); + FilteredReuseBlockIndexes.push_back(KnownBlockIndex); + + for (uint32_t ChunkIndex : FoundChunkIndexes) + { + ChunkFound[ChunkIndex] = true; + } + FindBlocksStats.AcceptedChunkCount += FoundChunkIndexes.size(); + FindBlocksStats.AcceptedByteCount += AdjustedReuseSize; + FindBlocksStats.AcceptedReduntantChunkCount += KnownBlock.ChunkRawHashes.size() - FoundChunkIndexes.size(); + FindBlocksStats.AcceptedReduntantByteCount += BlockSize - AdjustedReuseSize; + } + else + { + ZEN_CONSOLE_VERBOSE("Skipping block {}. filtered usage level: {}%", KnownBlock.BlockHash, ReusePercent); + FindBlocksStats.RejectedBlockCount++; + FindBlocksStats.RejectedChunkCount += FoundChunkIndexes.size(); + FindBlocksStats.RejectedByteCount += AdjustedReuseSize; + } + } + } + } + OutUnusedChunkIndexes.reserve(ChunkIndexes.size() - FindBlocksStats.AcceptedChunkCount); + for (uint32_t ChunkIndex : ChunkIndexes) + { + if (!ChunkFound[ChunkIndex]) + { + OutUnusedChunkIndexes.push_back(ChunkIndex); + } + } + } + return FilteredReuseBlockIndexes; + }; + + void UploadFolder(BuildStorage& Storage, + const Oid& BuildId, + const Oid& BuildPartId, + const std::string_view BuildPartName, + const std::filesystem::path& Path, + const std::filesystem::path& ManifestPath, + const uint8_t BlockReuseMinPercentLimit, + bool AllowMultiparts, + const CbObject& MetaData, + bool CreateBuild, + bool IgnoreExistingBlocks, + bool PostUploadVerify) + { + Stopwatch ProcessTimer; + + const std::filesystem::path ZenTempFolder = Path / ZenTempFolderName; + CreateDirectories(ZenTempFolder); + CleanDirectory(ZenTempFolder, {}); + auto _ = MakeGuard([&]() { + if (CleanDirectory(ZenTempFolder, {})) + { + std::filesystem::remove(ZenTempFolder); + } + }); + CreateDirectories(Path / ZenTempBlockFolderName); + CreateDirectories(Path / ZenTempChunkFolderName); + + CbObject ChunkerParameters; + + struct PrepareBuildResult + { + std::vector<ChunkBlockDescription> KnownBlocks; + uint64_t PreferredMultipartChunkSize = DefaultPreferredMultipartChunkSize; + uint64_t PayloadSize = 0; + uint64_t PrepareBuildTimeMs = 0; + uint64_t FindBlocksTimeMs = 0; + uint64_t ElapsedTimeMs = 0; + }; + + FindBlocksStatistics FindBlocksStats; + + std::future<PrepareBuildResult> PrepBuildResultFuture = + GetSmallWorkerPool(EWorkloadType::Burst) + .EnqueueTask(std::packaged_task<PrepareBuildResult()>{ + [&Storage, BuildId, &MetaData, CreateBuild, AllowMultiparts, IgnoreExistingBlocks, &FindBlocksStats] { + ZEN_TRACE_CPU("PrepareBuild"); + + PrepareBuildResult Result; + Stopwatch Timer; + if (CreateBuild) + { + ZEN_TRACE_CPU("CreateBuild"); + + Stopwatch PutBuildTimer; + CbObject PutBuildResult = Storage.PutBuild(BuildId, MetaData); + Result.PrepareBuildTimeMs = PutBuildTimer.GetElapsedTimeMs(); + Result.PreferredMultipartChunkSize = PutBuildResult["chunkSize"sv].AsUInt64(Result.PreferredMultipartChunkSize); + Result.PayloadSize = MetaData.GetSize(); + } + else + { + ZEN_TRACE_CPU("PutBuild"); + Stopwatch GetBuildTimer; + CbObject Build = Storage.GetBuild(BuildId); + Result.PrepareBuildTimeMs = GetBuildTimer.GetElapsedTimeMs(); + Result.PayloadSize = Build.GetSize(); + if (auto ChunkSize = Build["chunkSize"sv].AsUInt64(); ChunkSize != 0) + { + Result.PreferredMultipartChunkSize = ChunkSize; + } + else if (AllowMultiparts) + { + ZEN_WARN("PreferredMultipartChunkSize is unknown. Defaulting to '{}'", + NiceBytes(Result.PreferredMultipartChunkSize)); + } + } + + if (!IgnoreExistingBlocks) + { + ZEN_TRACE_CPU("FindBlocks"); + Stopwatch KnownBlocksTimer; + Result.KnownBlocks = Storage.FindBlocks(BuildId); + FindBlocksStats.FindBlockTimeMS = KnownBlocksTimer.GetElapsedTimeMs(); + FindBlocksStats.FoundBlockCount = Result.KnownBlocks.size(); + Result.FindBlocksTimeMs = KnownBlocksTimer.GetElapsedTimeMs(); + } + Result.ElapsedTimeMs = Timer.GetElapsedTimeMs(); + return Result; + }}); + + ChunkedFolderContent LocalContent; + + GetFolderContentStatistics LocalFolderScanStats; + ChunkingStatistics ChunkingStats; + { + auto IsAcceptedFolder = [ExcludeFolders = DefaultExcludeFolders](const std::string_view& RelativePath) -> bool { + for (const std::string_view& ExcludeFolder : ExcludeFolders) + { + if (RelativePath.starts_with(ExcludeFolder)) + { + if (RelativePath.length() == ExcludeFolder.length()) + { + return false; + } + else if (RelativePath[ExcludeFolder.length()] == '/') + { + return false; + } + } + } + return true; + }; + + auto IsAcceptedFile = [ExcludeExtensions = + DefaultExcludeExtensions](const std::string_view& RelativePath, uint64_t, uint32_t) -> bool { + for (const std::string_view& ExcludeExtension : ExcludeExtensions) + { + if (RelativePath.ends_with(ExcludeExtension)) + { + return false; + } + } + return true; + }; + + auto ParseManifest = [](const std::filesystem::path& Path, + const std::filesystem::path& ManifestPath) -> std::vector<std::filesystem::path> { + std::vector<std::filesystem::path> AssetPaths; + std::filesystem::path AbsoluteManifestPath = ManifestPath.is_absolute() ? ManifestPath : Path / ManifestPath; + IoBuffer ManifestContent = ReadFile(AbsoluteManifestPath).Flatten(); + std::string_view ManifestString((const char*)ManifestContent.GetView().GetData(), ManifestContent.GetSize()); + std::string_view::size_type Offset = 0; + while (Offset < ManifestContent.GetSize()) + { + size_t PathBreakOffset = ManifestString.find_first_of("\t\r\n", Offset); + if (PathBreakOffset == std::string_view::npos) + { + PathBreakOffset = ManifestContent.GetSize(); + } + std::string_view AssetPath = ManifestString.substr(Offset, PathBreakOffset - Offset); + if (!AssetPath.empty()) + { + AssetPaths.emplace_back(std::filesystem::path(AssetPath)); + } + Offset = PathBreakOffset; + size_t EolOffset = ManifestString.find_first_of("\r\n", Offset); + if (EolOffset == std::string_view::npos) + { + break; + } + Offset = EolOffset; + size_t LineBreakOffset = ManifestString.find_first_not_of("\t\r\n", Offset); + if (LineBreakOffset == std::string_view::npos) + { + break; + } + Offset = LineBreakOffset; + } + return AssetPaths; + }; + + Stopwatch ScanTimer; + FolderContent Content; + if (ManifestPath.empty()) + { + std::filesystem::path ExcludeManifestPath = Path / ZenExcludeManifestName; + tsl::robin_set<std::string> ExcludeAssetPaths; + if (std::filesystem::is_regular_file(ExcludeManifestPath)) + { + std::vector<std::filesystem::path> AssetPaths = ParseManifest(Path, ExcludeManifestPath); + ExcludeAssetPaths.reserve(AssetPaths.size()); + for (const std::filesystem::path& AssetPath : AssetPaths) + { + ExcludeAssetPaths.insert(AssetPath.generic_string()); + } + } + Content = GetFolderContent( + LocalFolderScanStats, + Path, + std::move(IsAcceptedFolder), + [&IsAcceptedFile, + &ExcludeAssetPaths](const std::string_view& RelativePath, uint64_t Size, uint32_t Attributes) -> bool { + if (RelativePath == ZenExcludeManifestName) + { + return false; + } + if (!IsAcceptedFile(RelativePath, Size, Attributes)) + { + return false; + } + if (ExcludeAssetPaths.contains(std::filesystem::path(RelativePath).generic_string())) + { + return false; + } + return true; + }, + GetMediumWorkerPool(EWorkloadType::Burst), + UsePlainProgress ? 5000 : 200, + [&](bool, std::ptrdiff_t) { + ZEN_DEBUG("Found {} files in '{}'...", LocalFolderScanStats.AcceptedFileCount.load(), Path); + }, + AbortFlag); + } + else + { + Stopwatch ManifestParseTimer; + std::vector<std::filesystem::path> AssetPaths = ParseManifest(Path, ManifestPath); + for (const std::filesystem::path& AssetPath : AssetPaths) + { + Content.Paths.push_back(AssetPath); + Content.RawSizes.push_back(std::filesystem::file_size(Path / AssetPath)); +#if ZEN_PLATFORM_WINDOWS + Content.Attributes.push_back(GetFileAttributes(Path / AssetPath)); +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX + Content.Attributes.push_back(GetFileMode(Path / AssetPath)); +#endif // ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX + LocalFolderScanStats.AcceptedFileByteCount += Content.RawSizes.back(); + LocalFolderScanStats.AcceptedFileCount++; + } + if (ManifestPath.is_relative()) + { + Content.Paths.push_back(ManifestPath); + Content.RawSizes.push_back(std::filesystem::file_size(ManifestPath)); +#if ZEN_PLATFORM_WINDOWS + Content.Attributes.push_back(GetFileAttributes(ManifestPath)); +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX + Content.Attributes.push_back(GetFileMode(ManifestPath)); +#endif // ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX + + LocalFolderScanStats.AcceptedFileByteCount += Content.RawSizes.back(); + LocalFolderScanStats.AcceptedFileCount++; + } + LocalFolderScanStats.FoundFileByteCount.store(LocalFolderScanStats.AcceptedFileByteCount); + LocalFolderScanStats.FoundFileCount.store(LocalFolderScanStats.AcceptedFileCount); + LocalFolderScanStats.ElapsedWallTimeUS = ManifestParseTimer.GetElapsedTimeUs(); + } + + std::unique_ptr<ChunkingController> ChunkController = CreateBasicChunkingController(); + { + CbObjectWriter ChunkParametersWriter; + ChunkParametersWriter.AddString("name"sv, ChunkController->GetName()); + ChunkParametersWriter.AddObject("parameters"sv, ChunkController->GetParameters()); + ChunkerParameters = ChunkParametersWriter.Save(); + } + + std::uint64_t TotalRawSize = std::accumulate(Content.RawSizes.begin(), Content.RawSizes.end(), std::uint64_t(0)); + + { + ProgressBar ProgressBar(UsePlainProgress); + FilteredRate FilteredBytesHashed; + FilteredBytesHashed.Start(); + LocalContent = ChunkFolderContent( + ChunkingStats, + GetMediumWorkerPool(EWorkloadType::Burst), + Path, + Content, + *ChunkController, + UsePlainProgress ? 5000 : 200, + [&](bool, std::ptrdiff_t) { + FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); + std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found", + ChunkingStats.FilesProcessed.load(), + Content.Paths.size(), + NiceBytes(ChunkingStats.BytesHashed.load()), + NiceBytes(TotalRawSize), + NiceNum(FilteredBytesHashed.GetCurrent()), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load())); + ProgressBar.UpdateState({.Task = "Scanning files ", + .Details = Details, + .TotalCount = TotalRawSize, + .RemainingCount = TotalRawSize - ChunkingStats.BytesHashed.load()}, + false); + }, + AbortFlag); + if (AbortFlag) + { + return; + } + FilteredBytesHashed.Stop(); + ProgressBar.Finish(); + } + + ZEN_CONSOLE("Found {} ({}) files divided into {} ({}) unique chunks in '{}' in {}. Average hash rate {}B/sec", + LocalContent.Paths.size(), + NiceBytes(TotalRawSize), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load()), + Path, + NiceTimeSpanMs(ScanTimer.GetElapsedTimeMs()), + NiceNum(GetBytesPerSecond(ChunkingStats.ElapsedWallTimeUS, ChunkingStats.BytesHashed))); + } + + const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalContent); + + GenerateBlocksStatistics GenerateBlocksStats; + LooseChunksStatistics LooseChunksStats; + + std::vector<size_t> ReuseBlockIndexes; + std::vector<uint32_t> NewBlockChunkIndexes; + + PrepareBuildResult PrepBuildResult = PrepBuildResultFuture.get(); + + ZEN_CONSOLE("Build prepare took {}. {} took {}, payload size {}{}", + NiceTimeSpanMs(PrepBuildResult.ElapsedTimeMs), + CreateBuild ? "PutBuild" : "GetBuild", + NiceTimeSpanMs(PrepBuildResult.PrepareBuildTimeMs), + NiceBytes(PrepBuildResult.PayloadSize), + IgnoreExistingBlocks ? "" + : fmt::format(". Found {} blocks in {}", + PrepBuildResult.KnownBlocks.size(), + NiceTimeSpanMs(PrepBuildResult.FindBlocksTimeMs))); + + const std::uint64_t LargeAttachmentSize = AllowMultiparts ? PrepBuildResult.PreferredMultipartChunkSize * 4u : (std::uint64_t)-1; + + Stopwatch BlockArrangeTimer; + + std::vector<std::uint32_t> LooseChunkIndexes; + { + bool EnableBlocks = true; + std::vector<std::uint32_t> BlockChunkIndexes; + for (uint32_t ChunkIndex = 0; ChunkIndex < LocalContent.ChunkedContent.ChunkHashes.size(); ChunkIndex++) + { + const uint64_t ChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + if (!EnableBlocks || ChunkRawSize == 0 || ChunkRawSize > DefaultChunksBlockParams.MaxChunkEmbedSize) + { + LooseChunkIndexes.push_back(ChunkIndex); + LooseChunksStats.ChunkByteCount += ChunkRawSize; + } + else + { + BlockChunkIndexes.push_back(ChunkIndex); + FindBlocksStats.PotentialChunkByteCount += ChunkRawSize; + } + } + FindBlocksStats.PotentialChunkCount = BlockChunkIndexes.size(); + LooseChunksStats.ChunkCount = LooseChunkIndexes.size(); + + if (IgnoreExistingBlocks) + { + ZEN_CONSOLE("Ignoring any existing blocks in store"); + NewBlockChunkIndexes = std::move(BlockChunkIndexes); + } + else + { + ReuseBlockIndexes = FindReuseBlocks(PrepBuildResult.KnownBlocks, + LocalContent.ChunkedContent.ChunkHashes, + BlockChunkIndexes, + BlockReuseMinPercentLimit, + NewBlockChunkIndexes, + FindBlocksStats); + FindBlocksStats.AcceptedBlockCount = ReuseBlockIndexes.size(); + + for (const ChunkBlockDescription& Description : PrepBuildResult.KnownBlocks) + { + for (uint32_t ChunkRawLength : Description.ChunkRawLengths) + { + FindBlocksStats.FoundBlockByteCount += ChunkRawLength; + } + FindBlocksStats.FoundBlockChunkCount += Description.ChunkRawHashes.size(); + } + } + } + + std::vector<std::vector<uint32_t>> NewBlockChunks; + ArrangeChunksIntoBlocks(LocalContent, LocalLookup, DefaultChunksBlockParams.MaxBlockSize, NewBlockChunkIndexes, NewBlockChunks); + + FindBlocksStats.NewBlocksCount = NewBlockChunks.size(); + for (uint32_t ChunkIndex : NewBlockChunkIndexes) + { + FindBlocksStats.NewBlocksChunkByteCount += LocalContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + FindBlocksStats.NewBlocksChunkCount = NewBlockChunkIndexes.size(); + + const double AcceptedByteCountPercent = FindBlocksStats.PotentialChunkByteCount > 0 + ? (100.0 * FindBlocksStats.AcceptedByteCount / FindBlocksStats.PotentialChunkByteCount) + : 0.0; + + const double AcceptedReduntantByteCountPercent = + FindBlocksStats.AcceptedByteCount > 0 ? (100.0 * FindBlocksStats.AcceptedReduntantByteCount) / + (FindBlocksStats.AcceptedByteCount + FindBlocksStats.AcceptedReduntantByteCount) + : 0.0; + ZEN_CONSOLE( + "Found {} chunks in {} ({}) blocks eligeble for reuse in {}\n" + " Reusing {} ({}) matching chunks in {} blocks ({:.1f}%)\n" + " Accepting {} ({}) redundant chunks ({:.1f}%)\n" + " Rejected {} ({}) chunks in {} blocks\n" + " Arranged {} ({}) chunks in {} new blocks\n" + " Keeping {} ({}) chunks as loose chunks\n" + " Discovery completed in {}", + FindBlocksStats.FoundBlockChunkCount, + FindBlocksStats.FoundBlockCount, + NiceBytes(FindBlocksStats.FoundBlockByteCount), + NiceTimeSpanMs(FindBlocksStats.FindBlockTimeMS), + + FindBlocksStats.AcceptedChunkCount, + NiceBytes(FindBlocksStats.AcceptedByteCount), + FindBlocksStats.AcceptedBlockCount, + AcceptedByteCountPercent, + + FindBlocksStats.AcceptedReduntantChunkCount, + NiceBytes(FindBlocksStats.AcceptedReduntantByteCount), + AcceptedReduntantByteCountPercent, + + FindBlocksStats.RejectedChunkCount, + NiceBytes(FindBlocksStats.RejectedByteCount), + FindBlocksStats.RejectedBlockCount, + + FindBlocksStats.NewBlocksChunkCount, + NiceBytes(FindBlocksStats.NewBlocksChunkByteCount), + FindBlocksStats.NewBlocksCount, + + LooseChunksStats.ChunkCount, + NiceBytes(LooseChunksStats.ChunkByteCount), + + NiceTimeSpanMs(BlockArrangeTimer.GetElapsedTimeMs())); + + DiskStatistics DiskStats; + UploadStatistics UploadStats; + GeneratedBlocks NewBlocks; + + if (!NewBlockChunks.empty()) + { + Stopwatch GenerateBuildBlocksTimer; + auto __ = MakeGuard([&]() { + uint64_t BlockGenerateTimeUs = GenerateBuildBlocksTimer.GetElapsedTimeUs(); + ZEN_CONSOLE("Generated {} ({}) and uploaded {} ({}) blocks in {}. Generate speed: {}B/sec. Transfer speed {}bits/sec.", + GenerateBlocksStats.GeneratedBlockCount.load(), + NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount), + UploadStats.BlockCount.load(), + NiceBytes(UploadStats.BlocksBytes.load()), + NiceTimeSpanMs(BlockGenerateTimeUs / 1000), + NiceNum(GetBytesPerSecond(GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS, + GenerateBlocksStats.GeneratedBlockByteCount)), + NiceNum(GetBytesPerSecond(UploadStats.ElapsedWallTimeUS, UploadStats.BlocksBytes * 8))); + }); + GenerateBuildBlocks(Path, + LocalContent, + LocalLookup, + Storage, + BuildId, + NewBlockChunks, + NewBlocks, + DiskStats, + UploadStats, + GenerateBlocksStats); + } + + CbObject PartManifest; + { + CbObjectWriter PartManifestWriter; + Stopwatch ManifestGenerationTimer; + auto __ = MakeGuard([&]() { + ZEN_CONSOLE("Generated build part manifest in {} ({})", + NiceTimeSpanMs(ManifestGenerationTimer.GetElapsedTimeMs()), + NiceBytes(PartManifestWriter.GetSaveSize())); + }); + PartManifestWriter.AddObject("chunker"sv, ChunkerParameters); + + std::vector<IoHash> AllChunkBlockHashes; + std::vector<ChunkBlockDescription> AllChunkBlockDescriptions; + AllChunkBlockHashes.reserve(ReuseBlockIndexes.size() + NewBlocks.BlockDescriptions.size()); + AllChunkBlockDescriptions.reserve(ReuseBlockIndexes.size() + NewBlocks.BlockDescriptions.size()); + for (size_t ReuseBlockIndex : ReuseBlockIndexes) + { + AllChunkBlockDescriptions.push_back(PrepBuildResult.KnownBlocks[ReuseBlockIndex]); + AllChunkBlockHashes.push_back(PrepBuildResult.KnownBlocks[ReuseBlockIndex].BlockHash); + } + AllChunkBlockDescriptions.insert(AllChunkBlockDescriptions.end(), + NewBlocks.BlockDescriptions.begin(), + NewBlocks.BlockDescriptions.end()); + for (const ChunkBlockDescription& BlockDescription : NewBlocks.BlockDescriptions) + { + AllChunkBlockHashes.push_back(BlockDescription.BlockHash); + } +#if EXTRA_VERIFY + tsl::robin_map<IoHash, size_t, IoHash::Hasher> ChunkHashToAbsoluteChunkIndex; + std::vector<IoHash> AbsoluteChunkHashes; + AbsoluteChunkHashes.reserve(LocalContent.ChunkedContent.ChunkHashes.size()); + for (uint32_t ChunkIndex : LooseChunkIndexes) + { + ChunkHashToAbsoluteChunkIndex.insert({LocalContent.ChunkedContent.ChunkHashes[ChunkIndex], AbsoluteChunkHashes.size()}); + AbsoluteChunkHashes.push_back(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); + } + for (const ChunkBlockDescription& Block : AllChunkBlockDescriptions) + { + for (const IoHash& ChunkHash : Block.ChunkHashes) + { + ChunkHashToAbsoluteChunkIndex.insert({ChunkHash, AbsoluteChunkHashes.size()}); + AbsoluteChunkHashes.push_back(ChunkHash); + } + } + for (const IoHash& ChunkHash : LocalContent.ChunkedContent.ChunkHashes) + { + ZEN_ASSERT(AbsoluteChunkHashes[ChunkHashToAbsoluteChunkIndex.at(ChunkHash)] == ChunkHash); + ZEN_ASSERT(LocalContent.ChunkedContent.ChunkHashes[LocalLookup.ChunkHashToChunkIndex.at(ChunkHash)] == ChunkHash); + } + for (const uint32_t ChunkIndex : LocalContent.ChunkedContent.ChunkOrders) + { + ZEN_ASSERT(AbsoluteChunkHashes[ChunkHashToAbsoluteChunkIndex.at(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex])] == + LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); + ZEN_ASSERT(LocalLookup.ChunkHashToChunkIndex.at(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]) == ChunkIndex); + } +#endif // EXTRA_VERIFY + std::vector<uint32_t> AbsoluteChunkOrders = CalculateAbsoluteChunkOrders(LocalContent.ChunkedContent.ChunkHashes, + LocalContent.ChunkedContent.ChunkOrders, + LocalLookup.ChunkHashToChunkIndex, + LooseChunkIndexes, + AllChunkBlockDescriptions); + +#if EXTRA_VERIFY + for (uint32_t ChunkOrderIndex = 0; ChunkOrderIndex < LocalContent.ChunkedContent.ChunkOrders.size(); ChunkOrderIndex++) + { + uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[ChunkOrderIndex]; + uint32_t AbsoluteChunkIndex = AbsoluteChunkOrders[ChunkOrderIndex]; + const IoHash& LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; + const IoHash& AbsoluteChunkHash = AbsoluteChunkHashes[AbsoluteChunkIndex]; + ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash); + } +#endif // EXTRA_VERIFY + + WriteBuildContentToCompactBinary(PartManifestWriter, + LocalContent.Platform, + LocalContent.Paths, + LocalContent.RawHashes, + LocalContent.RawSizes, + LocalContent.Attributes, + LocalContent.ChunkedContent.SequenceRawHashes, + LocalContent.ChunkedContent.ChunkCounts, + LocalContent.ChunkedContent.ChunkHashes, + LocalContent.ChunkedContent.ChunkRawSizes, + AbsoluteChunkOrders, + LooseChunkIndexes, + AllChunkBlockHashes); + +#if EXTRA_VERIFY + { + ChunkedFolderContent VerifyFolderContent; + + std::vector<uint32_t> OutAbsoluteChunkOrders; + std::vector<IoHash> OutLooseChunkHashes; + std::vector<uint64_t> OutLooseChunkRawSizes; + std::vector<IoHash> OutBlockRawHashes; + ReadBuildContentFromCompactBinary(PartManifestWriter.Save(), + VerifyFolderContent.Platform, + VerifyFolderContent.Paths, + VerifyFolderContent.RawHashes, + VerifyFolderContent.RawSizes, + VerifyFolderContent.Attributes, + VerifyFolderContent.ChunkedContent.SequenceRawHashes, + VerifyFolderContent.ChunkedContent.ChunkCounts, + OutAbsoluteChunkOrders, + OutLooseChunkHashes, + OutLooseChunkRawSizes, + OutBlockRawHashes); + ZEN_ASSERT(OutBlockRawHashes == AllChunkBlockHashes); + + for (uint32_t OrderIndex = 0; OrderIndex < OutAbsoluteChunkOrders.size(); OrderIndex++) + { + uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[OrderIndex]; + const IoHash LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; + + uint32_t VerifyChunkIndex = OutAbsoluteChunkOrders[OrderIndex]; + const IoHash VerifyChunkHash = AbsoluteChunkHashes[VerifyChunkIndex]; + + ZEN_ASSERT(LocalChunkHash == VerifyChunkHash); + } + + CalculateLocalChunkOrders(OutAbsoluteChunkOrders, + OutLooseChunkHashes, + OutLooseChunkRawSizes, + AllChunkBlockDescriptions, + VerifyFolderContent.ChunkedContent.ChunkHashes, + VerifyFolderContent.ChunkedContent.ChunkRawSizes, + VerifyFolderContent.ChunkedContent.ChunkOrders); + + ZEN_ASSERT(LocalContent.Paths == VerifyFolderContent.Paths); + ZEN_ASSERT(LocalContent.RawHashes == VerifyFolderContent.RawHashes); + ZEN_ASSERT(LocalContent.RawSizes == VerifyFolderContent.RawSizes); + ZEN_ASSERT(LocalContent.Attributes == VerifyFolderContent.Attributes); + ZEN_ASSERT(LocalContent.ChunkedContent.SequenceRawHashes == VerifyFolderContent.ChunkedContent.SequenceRawHashes); + ZEN_ASSERT(LocalContent.ChunkedContent.ChunkCounts == VerifyFolderContent.ChunkedContent.ChunkCounts); + + for (uint32_t OrderIndex = 0; OrderIndex < LocalContent.ChunkedContent.ChunkOrders.size(); OrderIndex++) + { + uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[OrderIndex]; + const IoHash LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; + uint64_t LocalChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[LocalChunkIndex]; + + uint32_t VerifyChunkIndex = VerifyFolderContent.ChunkedContent.ChunkOrders[OrderIndex]; + const IoHash VerifyChunkHash = VerifyFolderContent.ChunkedContent.ChunkHashes[VerifyChunkIndex]; + uint64_t VerifyChunkRawSize = VerifyFolderContent.ChunkedContent.ChunkRawSizes[VerifyChunkIndex]; + + ZEN_ASSERT(LocalChunkHash == VerifyChunkHash); + ZEN_ASSERT(LocalChunkRawSize == VerifyChunkRawSize); + } + } +#endif // EXTRA_VERIFY + PartManifest = PartManifestWriter.Save(); + } + + Stopwatch PutBuildPartResultTimer; + std::pair<IoHash, std::vector<IoHash>> PutBuildPartResult = Storage.PutBuildPart(BuildId, BuildPartId, BuildPartName, PartManifest); + ZEN_CONSOLE("PutBuildPart took {}, payload size {}. {} attachments are needed.", + NiceTimeSpanMs(PutBuildPartResultTimer.GetElapsedTimeMs()), + NiceBytes(PartManifest.GetSize()), + PutBuildPartResult.second.size()); + IoHash PartHash = PutBuildPartResult.first; + + auto UploadAttachments = [&](std::span<IoHash> RawHashes) { + if (!AbortFlag) + { + ZEN_CONSOLE_VERBOSE("Uploading attachments: {}", FormatArray<IoHash>(RawHashes, "\n "sv)); + + UploadStatistics TempUploadStats; + GenerateBlocksStatistics TempGenerateBlocksStats; + LooseChunksStatistics TempLooseChunksStats; + + Stopwatch TempUploadTimer; + auto __ = MakeGuard([&]() { + uint64_t TempChunkUploadTimeUs = TempUploadTimer.GetElapsedTimeUs(); + ZEN_CONSOLE( + "Generated {} ({} {}B/s) and uploaded {} ({}) blocks. " + "Compressed {} ({} {}B/s) and uploaded {} ({}) chunks. " + "Transferred {} ({}bits/s) in {}", + TempGenerateBlocksStats.GeneratedBlockCount.load(), + NiceBytes(TempGenerateBlocksStats.GeneratedBlockByteCount.load()), + NiceNum(GetBytesPerSecond(TempGenerateBlocksStats.GenerateBlocksElapsedWallTimeUS, + TempGenerateBlocksStats.GeneratedBlockByteCount)), + TempUploadStats.BlockCount.load(), + NiceBytes(TempUploadStats.BlocksBytes), + + TempLooseChunksStats.CompressedChunkCount.load(), + NiceBytes(TempLooseChunksStats.CompressedChunkBytes.load()), + NiceNum(GetBytesPerSecond(TempLooseChunksStats.CompressChunksElapsedWallTimeUS, + TempLooseChunksStats.CompressedChunkBytes)), + TempUploadStats.ChunkCount.load(), + NiceBytes(TempUploadStats.ChunksBytes), + + NiceBytes(TempUploadStats.BlocksBytes + TempUploadStats.ChunksBytes), + NiceNum(GetBytesPerSecond(TempUploadStats.ElapsedWallTimeUS, TempUploadStats.ChunksBytes * 8)), + NiceTimeSpanMs(TempChunkUploadTimeUs / 1000)); + }); + UploadPartBlobs(Storage, + BuildId, + Path, + LocalContent, + LocalLookup, + RawHashes, + NewBlockChunks, + NewBlocks, + LooseChunkIndexes, + LargeAttachmentSize, + DiskStats, + TempUploadStats, + TempGenerateBlocksStats, + TempLooseChunksStats); + UploadStats += TempUploadStats; + LooseChunksStats += TempLooseChunksStats; + GenerateBlocksStats += TempGenerateBlocksStats; + } + }; + if (IgnoreExistingBlocks) + { + ZEN_CONSOLE_VERBOSE("PutBuildPart uploading all attachments, needs are: {}", + FormatArray<IoHash>(PutBuildPartResult.second, "\n "sv)); + + std::vector<IoHash> ForceUploadChunkHashes; + ForceUploadChunkHashes.reserve(LooseChunkIndexes.size()); + + for (uint32_t ChunkIndex : LooseChunkIndexes) + { + ForceUploadChunkHashes.push_back(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); + } + + for (size_t BlockIndex = 0; BlockIndex < NewBlocks.BlockHeaders.size(); BlockIndex++) + { + if (NewBlocks.BlockHeaders[BlockIndex]) + { + // Block was not uploaded during generation + ForceUploadChunkHashes.push_back(NewBlocks.BlockDescriptions[BlockIndex].BlockHash); + } + } + UploadAttachments(ForceUploadChunkHashes); + } + else if (!PutBuildPartResult.second.empty()) + { + ZEN_CONSOLE_VERBOSE("PutBuildPart needs attachments: {}", FormatArray<IoHash>(PutBuildPartResult.second, "\n "sv)); + UploadAttachments(PutBuildPartResult.second); + } + + while (!AbortFlag) + { + Stopwatch FinalizeBuildPartTimer; + std::vector<IoHash> Needs = Storage.FinalizeBuildPart(BuildId, BuildPartId, PartHash); + ZEN_CONSOLE("FinalizeBuildPart took {}. {} attachments are missing.", + NiceTimeSpanMs(FinalizeBuildPartTimer.GetElapsedTimeMs()), + Needs.size()); + if (Needs.empty()) + { + break; + } + ZEN_CONSOLE_VERBOSE("FinalizeBuildPart needs attachments: {}", FormatArray<IoHash>(Needs, "\n "sv)); + UploadAttachments(Needs); + } + + if (CreateBuild && !AbortFlag) + { + Stopwatch FinalizeBuildTimer; + Storage.FinalizeBuild(BuildId); + ZEN_CONSOLE("FinalizeBuild took {}", NiceTimeSpanMs(FinalizeBuildTimer.GetElapsedTimeMs())); + } + + if (!NewBlocks.BlockDescriptions.empty() && !AbortFlag) + { + uint64_t UploadBlockMetadataCount = 0; + std::vector<IoHash> BlockHashes; + BlockHashes.reserve(NewBlocks.BlockDescriptions.size()); + Stopwatch UploadBlockMetadataTimer; + for (size_t BlockIndex = 0; BlockIndex < NewBlocks.BlockDescriptions.size(); BlockIndex++) + { + const IoHash& BlockHash = NewBlocks.BlockDescriptions[BlockIndex].BlockHash; + if (!NewBlocks.MetaDataHasBeenUploaded[BlockIndex]) + { + const CbObject BlockMetaData = + BuildChunkBlockDescription(NewBlocks.BlockDescriptions[BlockIndex], NewBlocks.BlockMetaDatas[BlockIndex]); + Storage.PutBlockMetadata(BuildId, BlockHash, BlockMetaData); + UploadStats.BlocksBytes += BlockMetaData.GetSize(); + NewBlocks.MetaDataHasBeenUploaded[BlockIndex] = true; + UploadBlockMetadataCount++; + } + BlockHashes.push_back(BlockHash); + } + if (UploadBlockMetadataCount > 0) + { + uint64_t ElapsedUS = UploadBlockMetadataTimer.GetElapsedTimeUs(); + UploadStats.ElapsedWallTimeUS += ElapsedUS; + ZEN_CONSOLE("Uploaded metadata for {} blocks in {}", UploadBlockMetadataCount, NiceTimeSpanMs(ElapsedUS / 1000)); + } + } + + if (PostUploadVerify && !AbortFlag) + { + ValidateBuildPart(Storage, BuildId, BuildPartId, BuildPartName); + } + + const double DeltaByteCountPercent = + ChunkingStats.BytesHashed > 0 + ? (100.0 * (FindBlocksStats.NewBlocksChunkByteCount + LooseChunksStats.CompressedChunkBytes)) / (ChunkingStats.BytesHashed) + : 0.0; + + const std::string LargeAttachmentStats = + (LargeAttachmentSize != (uint64_t)-1) ? fmt::format(" ({} as multipart)", UploadStats.MultipartAttachmentCount.load()) : ""; + + ZEN_CONSOLE_VERBOSE( + "Folder scanning stats:" + "\n FoundFileCount: {}" + "\n FoundFileByteCount: {}" + "\n AcceptedFileCount: {}" + "\n AcceptedFileByteCount: {}" + "\n ElapsedWallTimeUS: {}", + LocalFolderScanStats.FoundFileCount.load(), + NiceBytes(LocalFolderScanStats.FoundFileByteCount.load()), + LocalFolderScanStats.AcceptedFileCount.load(), + NiceBytes(LocalFolderScanStats.AcceptedFileByteCount.load()), + NiceLatencyNs(LocalFolderScanStats.ElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE_VERBOSE( + "Chunking stats:" + "\n FilesProcessed: {}" + "\n FilesChunked: {}" + "\n BytesHashed: {}" + "\n UniqueChunksFound: {}" + "\n UniqueSequencesFound: {}" + "\n UniqueBytesFound: {}" + "\n ElapsedWallTimeUS: {}", + ChunkingStats.FilesProcessed.load(), + ChunkingStats.FilesChunked.load(), + NiceBytes(ChunkingStats.BytesHashed.load()), + ChunkingStats.UniqueChunksFound.load(), + ChunkingStats.UniqueSequencesFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load()), + NiceLatencyNs(ChunkingStats.ElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE_VERBOSE( + "Find block stats:" + "\n FindBlockTimeMS: {}" + "\n PotentialChunkCount: {}" + "\n PotentialChunkByteCount: {}" + "\n FoundBlockCount: {}" + "\n FoundBlockChunkCount: {}" + "\n FoundBlockByteCount: {}" + "\n AcceptedBlockCount: {}" + "\n AcceptedChunkCount: {}" + "\n AcceptedByteCount: {}" + "\n RejectedBlockCount: {}" + "\n RejectedChunkCount: {}" + "\n RejectedByteCount: {}" + "\n AcceptedReduntantChunkCount: {}" + "\n AcceptedReduntantByteCount: {}" + "\n NewBlocksCount: {}" + "\n NewBlocksChunkCount: {}" + "\n NewBlocksChunkByteCount: {}", + NiceTimeSpanMs(FindBlocksStats.FindBlockTimeMS), + FindBlocksStats.PotentialChunkCount, + NiceBytes(FindBlocksStats.PotentialChunkByteCount), + FindBlocksStats.FoundBlockCount, + FindBlocksStats.FoundBlockChunkCount, + NiceBytes(FindBlocksStats.FoundBlockByteCount), + FindBlocksStats.AcceptedBlockCount, + FindBlocksStats.AcceptedChunkCount, + NiceBytes(FindBlocksStats.AcceptedByteCount), + FindBlocksStats.RejectedBlockCount, + FindBlocksStats.RejectedChunkCount, + NiceBytes(FindBlocksStats.RejectedByteCount), + FindBlocksStats.AcceptedReduntantChunkCount, + NiceBytes(FindBlocksStats.AcceptedReduntantByteCount), + FindBlocksStats.NewBlocksCount, + FindBlocksStats.NewBlocksChunkCount, + NiceBytes(FindBlocksStats.NewBlocksChunkByteCount)); + + ZEN_CONSOLE_VERBOSE( + "Generate blocks stats:" + "\n GeneratedBlockByteCount: {}" + "\n GeneratedBlockCount: {}" + "\n GenerateBlocksElapsedWallTimeUS: {}", + NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount.load()), + GenerateBlocksStats.GeneratedBlockCount.load(), + NiceLatencyNs(GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE_VERBOSE( + "Generate blocks stats:" + "\n ChunkCount: {}" + "\n ChunkByteCount: {}" + "\n CompressedChunkCount: {}" + "\n CompressChunksElapsedWallTimeUS: {}", + LooseChunksStats.ChunkCount, + NiceBytes(LooseChunksStats.ChunkByteCount), + LooseChunksStats.CompressedChunkCount.load(), + NiceBytes(LooseChunksStats.CompressedChunkBytes.load()), + NiceLatencyNs(LooseChunksStats.CompressChunksElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE_VERBOSE( + "Disk stats:" + "\n OpenReadCount: {}" + "\n OpenWriteCount: {}" + "\n ReadCount: {}" + "\n ReadByteCount: {}" + "\n WriteCount: {}" + "\n WriteByteCount: {}" + "\n CurrentOpenFileCount: {}", + DiskStats.OpenReadCount.load(), + DiskStats.OpenWriteCount.load(), + DiskStats.ReadCount.load(), + NiceBytes(DiskStats.ReadByteCount.load()), + DiskStats.WriteCount.load(), + NiceBytes(DiskStats.WriteByteCount.load()), + DiskStats.CurrentOpenFileCount.load()); + + ZEN_CONSOLE_VERBOSE( + "Upload stats:" + "\n BlockCount: {}" + "\n BlocksBytes: {}" + "\n ChunkCount: {}" + "\n ChunksBytes: {}" + "\n ReadFromDiskBytes: {}" + "\n MultipartAttachmentCount: {}" + "\n ElapsedWallTimeUS: {}", + UploadStats.BlockCount.load(), + NiceBytes(UploadStats.BlocksBytes.load()), + UploadStats.ChunkCount.load(), + NiceBytes(UploadStats.ChunksBytes.load()), + NiceBytes(UploadStats.ReadFromDiskBytes.load()), + UploadStats.MultipartAttachmentCount.load(), + NiceLatencyNs(UploadStats.ElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE( + "Uploaded {}\n" + " Delta: {}/{} ({:.1f}%)\n" + " Blocks: {} ({})\n" + " Chunks: {} ({}){}\n" + " Rate: {}bits/sec", + NiceBytes(UploadStats.BlocksBytes + UploadStats.ChunksBytes), + + NiceBytes(FindBlocksStats.NewBlocksChunkByteCount + LooseChunksStats.CompressedChunkBytes), + NiceBytes(ChunkingStats.BytesHashed), + DeltaByteCountPercent, + + UploadStats.BlockCount.load(), + NiceBytes(UploadStats.BlocksBytes), + UploadStats.ChunkCount.load(), + NiceBytes(UploadStats.ChunksBytes), + LargeAttachmentStats, + + NiceNum(GetBytesPerSecond(UploadStats.ElapsedWallTimeUS, (UploadStats.ChunksBytes + UploadStats.BlocksBytes * 8)))); + + ZEN_CONSOLE("Uploaded ({}) build {} part {} ({}) in {}", + NiceBytes(FindBlocksStats.NewBlocksChunkByteCount + LooseChunksStats.CompressedChunkBytes), + BuildId, + BuildPartName, + BuildPartId, + NiceTimeSpanMs(ProcessTimer.GetElapsedTimeMs())); + } + + void VerifyFolder(const ChunkedFolderContent& Content, const std::filesystem::path& Path, bool VerifyFileHash) + { + ZEN_TRACE_CPU("VerifyFolder"); + + ProgressBar ProgressBar(UsePlainProgress); + std::atomic<uint64_t> FilesVerified(0); + std::atomic<uint64_t> FilesFailed(0); + std::atomic<uint64_t> ReadBytes(0); + + WorkerThreadPool& VerifyPool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // + + ParallellWork Work(AbortFlag); + + const uint32_t PathCount = gsl::narrow<uint32_t>(Content.Paths.size()); + + RwLock ErrorLock; + std::vector<std::string> Errors; + + auto IsAcceptedFolder = [ExcludeFolders = DefaultExcludeFolders](const std::string_view& RelativePath) -> bool { + for (const std::string_view& ExcludeFolder : ExcludeFolders) + { + if (RelativePath.starts_with(ExcludeFolder)) + { + if (RelativePath.length() == ExcludeFolder.length()) + { + return false; + } + else if (RelativePath[ExcludeFolder.length()] == '/') + { + return false; + } + } + } + return true; + }; + + const ChunkedContentLookup Lookup = BuildChunkedContentLookup(Content); + + for (uint32_t PathIndex = 0; PathIndex < PathCount; PathIndex++) + { + if (Work.IsAborted()) + { + break; + } + + Work.ScheduleWork( + VerifyPool, + [&, PathIndex](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("VerifyFile_work"); + + // TODO: Convert ScheduleWork body to function + + const std::filesystem::path TargetPath = (Path / Content.Paths[PathIndex]).make_preferred(); + if (IsAcceptedFolder(TargetPath.parent_path().generic_string())) + { + const uint64_t ExpectedSize = Content.RawSizes[PathIndex]; + if (!std::filesystem::exists(TargetPath)) + { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back(fmt::format("File {} with expected size {} does not exist", TargetPath, ExpectedSize)); + }); + FilesFailed++; + } + else + { + std::error_code Ec; + uint64_t SizeOnDisk = gsl::narrow<uint64_t>(std::filesystem::file_size(TargetPath, Ec)); + if (Ec) + { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back( + fmt::format("Failed to get size of file {}: {} ({})", TargetPath, Ec.message(), Ec.value())); + }); + FilesFailed++; + } + else if (SizeOnDisk < ExpectedSize) + { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back(fmt::format("Size of file {} is smaller than expected. Expected: {}, Found: {}", + TargetPath, + ExpectedSize, + SizeOnDisk)); + }); + FilesFailed++; + } + else if (SizeOnDisk > ExpectedSize) + { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back(fmt::format("Size of file {} is bigger than expected. Expected: {}, Found: {}", + TargetPath, + ExpectedSize, + SizeOnDisk)); + }); + FilesFailed++; + } + else if (SizeOnDisk > 0 && VerifyFileHash) + { + const IoHash& ExpectedRawHash = Content.RawHashes[PathIndex]; + IoBuffer Buffer = IoBufferBuilder::MakeFromFile(TargetPath); + IoHash RawHash = IoHash::HashBuffer(Buffer); + if (RawHash != ExpectedRawHash) + { + uint64_t FileOffset = 0; + const uint32_t SequenceIndex = Lookup.RawHashToSequenceIndex.at(ExpectedRawHash); + const uint32_t OrderOffset = Lookup.SequenceIndexChunkOrderOffset[SequenceIndex]; + for (uint32_t OrderIndex = OrderOffset; + OrderIndex < OrderOffset + Content.ChunkedContent.ChunkCounts[SequenceIndex]; + OrderIndex++) + { + uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex]; + uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + IoHash ChunkHash = Content.ChunkedContent.ChunkHashes[ChunkIndex]; + IoBuffer FileChunk = IoBuffer(Buffer, FileOffset, ChunkSize); + if (IoHash::HashBuffer(FileChunk) != ChunkHash) + { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back(fmt::format( + "WARNING: Hash of file {} does not match expected hash. Expected: {}, Found: {}. " + "Mismatch at chunk {}", + TargetPath, + ExpectedRawHash, + RawHash, + OrderIndex - OrderOffset)); + }); + break; + } + FileOffset += ChunkSize; + } + FilesFailed++; + } + ReadBytes += SizeOnDisk; + } + } + } + FilesVerified++; + } + }, + [&, PathIndex](const std::exception& Ex, std::atomic<bool>&) { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back(fmt::format("Failed verifying file '{}'. Reason: {}", + (Path / Content.Paths[PathIndex]).make_preferred(), + Ex.what())); + }); + FilesFailed++; + }); + } + + Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, PendingWork); + std::string Details = fmt::format("Verified {}/{} ({}). Failed files: {}", + FilesVerified.load(), + PathCount, + NiceBytes(ReadBytes.load()), + FilesFailed.load()); + ProgressBar.UpdateState({.Task = "Verifying files ", + .Details = Details, + .TotalCount = gsl::narrow<uint64_t>(PathCount), + .RemainingCount = gsl::narrow<uint64_t>(PathCount - FilesVerified.load())}, + false); + }); + ProgressBar.Finish(); + for (const std::string& Error : Errors) + { + ZEN_CONSOLE("{}", Error); + } + if (!Errors.empty()) + { + throw std::runtime_error(fmt::format("Verify failed with {} errors", Errors.size())); + } + } + + class WriteFileCache + { + public: + WriteFileCache() {} + ~WriteFileCache() { Flush(); } + + template<typename TBufferType> + void WriteToFile(uint32_t TargetIndex, + std::function<std::filesystem::path(uint32_t TargetIndex)>&& GetTargetPath, + const TBufferType& Buffer, + uint64_t FileOffset, + uint64_t TargetFinalSize) + { + ZEN_TRACE_CPU("WriteFileCache_WriteToFile"); + if (!SeenTargetIndexes.empty() && SeenTargetIndexes.back() == TargetIndex) + { + ZEN_TRACE_CPU("WriteFileCache_WriteToFile_CacheWrite"); + ZEN_ASSERT(OpenFileWriter); + OpenFileWriter->Write(Buffer, FileOffset); + } + else + { + std::unique_ptr<BasicFile> NewOutputFile; + { + ZEN_TRACE_CPU("WriteFileCache_WriteToFile_Open"); + Flush(); + const std::filesystem::path& TargetPath = GetTargetPath(TargetIndex); + CreateDirectories(TargetPath.parent_path()); + uint32_t Tries = 5; + NewOutputFile = + std::make_unique<BasicFile>(TargetPath, BasicFile::Mode::kWrite, [&Tries, TargetPath](std::error_code& Ec) { + if (Tries < 3) + { + ZEN_CONSOLE("Failed opening file '{}': {}{}", TargetPath, Ec.message(), Tries > 1 ? " Retrying"sv : ""sv); + } + if (Tries > 1) + { + Sleep(100); + } + return --Tries > 0; + }); + } + + const bool CacheWriter = TargetFinalSize > Buffer.GetSize(); + if (CacheWriter) + { + ZEN_TRACE_CPU("WriteFileCache_WriteToFile_CacheWrite"); + ZEN_ASSERT_SLOW(std::find(SeenTargetIndexes.begin(), SeenTargetIndexes.end(), TargetIndex) == SeenTargetIndexes.end()); + + OutputFile = std::move(NewOutputFile); + OpenFileWriter = std::make_unique<BasicFileWriter>(*OutputFile, Min(TargetFinalSize, 256u * 1024u)); + OpenFileWriter->Write(Buffer, FileOffset); + SeenTargetIndexes.push_back(TargetIndex); + } + else + { + ZEN_TRACE_CPU("WriteFileCache_WriteToFile_Write"); + NewOutputFile->Write(Buffer, FileOffset); + } + } + } + + void Flush() + { + ZEN_TRACE_CPU("WriteFileCache_Flush"); + OpenFileWriter = {}; + OutputFile = {}; + } + std::vector<uint32_t> SeenTargetIndexes; + std::unique_ptr<BasicFile> OutputFile; + std::unique_ptr<BasicFileWriter> OpenFileWriter; + }; + + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> GetRemainingChunkTargets( + std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + const ChunkedContentLookup& Lookup, + uint32_t ChunkIndex) + { + std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkSources = GetChunkSequenceLocations(Lookup, ChunkIndex); + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs; + if (!ChunkSources.empty()) + { + ChunkTargetPtrs.reserve(ChunkSources.size()); + for (const ChunkedContentLookup::ChunkSequenceLocation& Source : ChunkSources) + { + if (SequenceIndexChunksLeftToWriteCounters[Source.SequenceIndex].load() > 0) + { + ChunkTargetPtrs.push_back(&Source); + } + } + } + return ChunkTargetPtrs; + }; + + struct BlockWriteOps + { + std::vector<CompositeBuffer> ChunkBuffers; + struct WriteOpData + { + const ChunkedContentLookup::ChunkSequenceLocation* Target = nullptr; + size_t ChunkBufferIndex = (size_t)-1; + }; + std::vector<WriteOpData> WriteOps; + }; + + void WriteBlockChunkOps(const std::filesystem::path& CacheFolderPath, + const ChunkedFolderContent& RemoteContent, + const ChunkedContentLookup& Lookup, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + const BlockWriteOps& Ops, + std::atomic<uint32_t>& OutChunksComplete, + std::atomic<uint64_t>& OutBytesWritten) + { + ZEN_TRACE_CPU("WriteBlockChunkOps"); + { + WriteFileCache OpenFileCache; + for (const BlockWriteOps::WriteOpData& WriteOp : Ops.WriteOps) + { + if (AbortFlag) + { + break; + } + const CompositeBuffer& Chunk = Ops.ChunkBuffers[WriteOp.ChunkBufferIndex]; + const uint32_t SequenceIndex = WriteOp.Target->SequenceIndex; + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[SequenceIndex].load() <= + RemoteContent.ChunkedContent.ChunkCounts[SequenceIndex]); + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[SequenceIndex].load() > 0); + const uint64_t ChunkSize = Chunk.GetSize(); + const uint64_t FileOffset = WriteOp.Target->Offset; + const uint32_t PathIndex = Lookup.SequenceIndexFirstPathIndex[SequenceIndex]; + ZEN_ASSERT(FileOffset + ChunkSize <= RemoteContent.RawSizes[PathIndex]); + + OpenFileCache.WriteToFile<CompositeBuffer>( + SequenceIndex, + [&CacheFolderPath, &RemoteContent](uint32_t SequenceIndex) { + return GetTempChunkedSequenceFileName(CacheFolderPath, + RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]); + }, + Chunk, + FileOffset, + RemoteContent.RawSizes[PathIndex]); + OutBytesWritten += ChunkSize; + } + } + if (!AbortFlag) + { + // Write tracking, updating this must be done without any files open (WriteFileCache) + for (const BlockWriteOps::WriteOpData& WriteOp : Ops.WriteOps) + { + const uint32_t RemoteSequenceIndex = WriteOp.Target->SequenceIndex; + if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1) == 1) + { + const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + { + ZEN_TRACE_CPU("VerifyChunkHash"); + const IoHash VerifyChunkHash = IoHash::HashBuffer( + IoBufferBuilder::MakeFromFile(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); + if (VerifyChunkHash != SequenceRawHash) + { + throw std::runtime_error(fmt::format("Written chunk sequence {} hash does not match expected hash {}", + VerifyChunkHash, + SequenceRawHash)); + } + } + ZEN_TRACE_CPU("VerifyChunkHashes_rename"); + ZEN_ASSERT_SLOW(!std::filesystem::exists(GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); + std::filesystem::rename(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), + GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); + } + } + OutChunksComplete += gsl::narrow<uint32_t>(Ops.ChunkBuffers.size()); + } + } + + bool GetBlockWriteOps(const ChunkedFolderContent& RemoteContent, + const ChunkedContentLookup& Lookup, + std::span<const IoHash> ChunkRawHashes, + std::span<const uint32_t> ChunkCompressedLengths, + std::span<const uint32_t> ChunkRawLengths, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags, + CompositeBuffer&& PartialBlockBuffer, + uint32_t FirstIncludedBlockChunkIndex, + uint32_t LastIncludedBlockChunkIndex, + BlockWriteOps& OutOps) + { + ZEN_TRACE_CPU("GetBlockWriteOps"); + MemoryView BlockMemoryView; + UniqueBuffer BlockMemoryBuffer; + IoBufferFileReference FileRef = {}; + if (PartialBlockBuffer.GetSegments().size() == 1 && PartialBlockBuffer.GetSegments()[0].AsIoBuffer().GetFileReference(FileRef)) + { + BlockMemoryBuffer = UniqueBuffer::Alloc(FileRef.FileChunkSize); + BasicFile Reader; + Reader.Attach(FileRef.FileHandle); + Reader.Read(BlockMemoryBuffer.GetData(), FileRef.FileChunkSize, FileRef.FileChunkOffset); + BlockMemoryView = BlockMemoryBuffer.GetView(); + Reader.Detach(); + } + else + { + BlockMemoryView = PartialBlockBuffer.ViewOrCopyRange(0, PartialBlockBuffer.GetSize(), BlockMemoryBuffer); + } + uint32_t OffsetInBlock = 0; + for (uint32_t ChunkBlockIndex = FirstIncludedBlockChunkIndex; ChunkBlockIndex <= LastIncludedBlockChunkIndex; ChunkBlockIndex++) + { + const uint32_t ChunkCompressedSize = ChunkCompressedLengths[ChunkBlockIndex]; + const IoHash& ChunkHash = ChunkRawHashes[ChunkBlockIndex]; + if (auto It = Lookup.ChunkHashToChunkIndex.find(ChunkHash); It != Lookup.ChunkHashToChunkIndex.end()) + { + const uint32_t ChunkIndex = It->second; + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs = + GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, Lookup, ChunkIndex); + + if (!ChunkTargetPtrs.empty()) + { + bool NeedsWrite = true; + if (RemoteChunkIndexNeedsCopyFromSourceFlags[ChunkIndex].compare_exchange_strong(NeedsWrite, false)) + { + // CompositeBuffer Chunk = PartialBlockBuffer.Mid(OffsetInBlock, ChunkCompressedSize); + MemoryView ChunkMemory = BlockMemoryView.Mid(OffsetInBlock, ChunkCompressedSize); + CompositeBuffer Chunk = CompositeBuffer(IoBuffer(IoBuffer::Wrap, ChunkMemory.GetData(), ChunkMemory.GetSize())); + IoHash VerifyChunkHash; + uint64_t VerifyRawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(Chunk, VerifyChunkHash, VerifyRawSize); + if (!Compressed) + { + ZEN_ASSERT(false); + } + if (VerifyChunkHash != ChunkHash) + { + ZEN_ASSERT(false); + } + if (!ChunkRawLengths.empty()) + { + if (VerifyRawSize != ChunkRawLengths[ChunkBlockIndex]) + { + ZEN_ASSERT(false); + } + } + CompositeBuffer Decompressed = Compressed.DecompressToComposite(); + if (!Decompressed) + { + throw std::runtime_error(fmt::format("Decompression of build blob {} failed", ChunkHash)); + } + ZEN_ASSERT_SLOW(ChunkHash == IoHash::HashBuffer(Decompressed)); + ZEN_ASSERT(Decompressed.GetSize() == RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]); + for (const ChunkedContentLookup::ChunkSequenceLocation* Target : ChunkTargetPtrs) + { + OutOps.WriteOps.push_back( + BlockWriteOps::WriteOpData{.Target = Target, .ChunkBufferIndex = OutOps.ChunkBuffers.size()}); + } + OutOps.ChunkBuffers.emplace_back(std::move(Decompressed)); + } + } + } + + OffsetInBlock += ChunkCompressedSize; + } + std::sort(OutOps.WriteOps.begin(), + OutOps.WriteOps.end(), + [](const BlockWriteOps::WriteOpData& Lhs, const BlockWriteOps::WriteOpData& Rhs) { + if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex) + { + return true; + } + if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex) + { + return false; + } + return Lhs.Target->Offset < Rhs.Target->Offset; + }); + return true; + } + + bool WriteBlockToDisk(const std::filesystem::path& CacheFolderPath, + const ChunkedFolderContent& RemoteContent, + const ChunkBlockDescription& BlockDescription, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + CompositeBuffer&& BlockBuffer, + const ChunkedContentLookup& Lookup, + std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags, + std::atomic<uint32_t>& OutChunksComplete, + std::atomic<uint64_t>& OutBytesWritten) + { + ZEN_TRACE_CPU("WriteBlockToDisk"); + + BlockWriteOps Ops; + if ((BlockDescription.HeaderSize == 0) || BlockDescription.ChunkCompressedLengths.empty()) + { + ZEN_TRACE_CPU("WriteBlockToDisk_Legacy"); + + UniqueBuffer CopyBuffer; + const MemoryView BlockView = BlockBuffer.ViewOrCopyRange(0, BlockBuffer.GetSize(), CopyBuffer); + uint64_t HeaderSize; + const std::vector<uint32_t> ChunkCompressedLengths = ReadChunkBlockHeader(BlockView, HeaderSize); + + CompositeBuffer PartialBlockBuffer = std::move(BlockBuffer).Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder() + HeaderSize); + + if (GetBlockWriteOps(RemoteContent, + Lookup, + BlockDescription.ChunkRawHashes, + ChunkCompressedLengths, + BlockDescription.ChunkRawLengths, + SequenceIndexChunksLeftToWriteCounters, + RemoteChunkIndexNeedsCopyFromSourceFlags, + std::move(PartialBlockBuffer), + 0, + gsl::narrow<uint32_t>(BlockDescription.ChunkRawHashes.size() - 1), + Ops)) + { + WriteBlockChunkOps(CacheFolderPath, + RemoteContent, + Lookup, + SequenceIndexChunksLeftToWriteCounters, + Ops, + OutChunksComplete, + OutBytesWritten); + return true; + } + return false; + } + + CompositeBuffer PartialBlockBuffer = + std::move(BlockBuffer).Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize); + if (GetBlockWriteOps(RemoteContent, + Lookup, + BlockDescription.ChunkRawHashes, + BlockDescription.ChunkCompressedLengths, + BlockDescription.ChunkRawLengths, + SequenceIndexChunksLeftToWriteCounters, + RemoteChunkIndexNeedsCopyFromSourceFlags, + std::move(PartialBlockBuffer), + 0, + gsl::narrow<uint32_t>(BlockDescription.ChunkRawHashes.size() - 1), + Ops)) + { + WriteBlockChunkOps(CacheFolderPath, + RemoteContent, + Lookup, + SequenceIndexChunksLeftToWriteCounters, + Ops, + OutChunksComplete, + OutBytesWritten); + return true; + } + return false; + } + + bool WritePartialBlockToDisk(const std::filesystem::path& CacheFolderPath, + const ChunkedFolderContent& RemoteContent, + const ChunkBlockDescription& BlockDescription, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + CompositeBuffer&& PartialBlockBuffer, + uint32_t FirstIncludedBlockChunkIndex, + uint32_t LastIncludedBlockChunkIndex, + const ChunkedContentLookup& Lookup, + std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags, + std::atomic<uint32_t>& OutChunksComplete, + std::atomic<uint64_t>& OutBytesWritten) + { + ZEN_TRACE_CPU("WritePartialBlockToDisk"); + BlockWriteOps Ops; + if (GetBlockWriteOps(RemoteContent, + Lookup, + BlockDescription.ChunkRawHashes, + BlockDescription.ChunkCompressedLengths, + BlockDescription.ChunkRawLengths, + SequenceIndexChunksLeftToWriteCounters, + RemoteChunkIndexNeedsCopyFromSourceFlags, + std::move(PartialBlockBuffer), + FirstIncludedBlockChunkIndex, + LastIncludedBlockChunkIndex, + Ops)) + { + WriteBlockChunkOps(CacheFolderPath, + RemoteContent, + Lookup, + SequenceIndexChunksLeftToWriteCounters, + Ops, + OutChunksComplete, + OutBytesWritten); + return true; + } + else + { + return false; + } + } + + SharedBuffer Decompress(CompositeBuffer&& CompressedChunk, const IoHash& ChunkHash, const uint64_t ChunkRawSize) + { + ZEN_TRACE_CPU("Decompress"); + + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(CompressedChunk, RawHash, RawSize); + if (!Compressed) + { + throw std::runtime_error(fmt::format("Invalid build blob format for chunk {}", ChunkHash)); + } + if (RawHash != ChunkHash) + { + throw std::runtime_error(fmt::format("Mismatching build blob {}, but compressed header rawhash is {}", ChunkHash, RawHash)); + } + if (RawSize != ChunkRawSize) + { + throw std::runtime_error( + fmt::format("Mismatching build blob {}, expected raw size {} but recevied raw size {}", ChunkHash, ChunkRawSize, RawSize)); + } + if (!Compressed) + { + throw std::runtime_error(fmt::format("Invalid build blob {}, not a compressed buffer", ChunkHash)); + } + + SharedBuffer Decompressed = Compressed.Decompress(); + + if (!Decompressed) + { + throw std::runtime_error(fmt::format("Decompression of build blob {} failed", ChunkHash)); + } + return Decompressed; + } + + void WriteChunkToDisk(const std::filesystem::path& CacheFolderPath, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + std::span<const ChunkedContentLookup::ChunkSequenceLocation* const> ChunkTargets, + CompositeBuffer&& ChunkData, + WriteFileCache& OpenFileCache, + std::atomic<uint64_t>& OutBytesWritten) + { + ZEN_TRACE_CPU("WriteChunkToDisk"); + + for (const ChunkedContentLookup::ChunkSequenceLocation* TargetPtr : ChunkTargets) + { + const auto& Target = *TargetPtr; + const uint64_t FileOffset = Target.Offset; + const uint32_t SequenceIndex = Target.SequenceIndex; + const uint32_t PathIndex = Lookup.SequenceIndexFirstPathIndex[SequenceIndex]; + + OpenFileCache.WriteToFile( + SequenceIndex, + [&CacheFolderPath, &Content](uint32_t SequenceIndex) { + return GetTempChunkedSequenceFileName(CacheFolderPath, Content.ChunkedContent.SequenceRawHashes[SequenceIndex]); + }, + ChunkData, + FileOffset, + Content.RawSizes[PathIndex]); + OutBytesWritten += ChunkData.GetSize(); + } + } + + bool CanDecompressDirectToSequence(const ChunkedFolderContent& RemoteContent, + const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> Locations) + { + if (Locations.size() == 1) + { + const uint32_t FirstSequenceIndex = Locations[0]->SequenceIndex; + if (Locations[0]->Offset == 0 && RemoteContent.ChunkedContent.ChunkCounts[FirstSequenceIndex] == 1) + { + return true; + } + } + return false; + } + + void StreamDecompress(const std::filesystem::path& CacheFolderPath, + const IoHash& SequenceRawHash, + CompositeBuffer&& CompressedPart, + std::atomic<uint64_t>& WriteToDiskBytes) + { + ZEN_TRACE_CPU("StreamDecompress"); + const std::filesystem::path TempChunkSequenceFileName = GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash); + TemporaryFile DecompressedTemp; + std::error_code Ec; + DecompressedTemp.CreateTemporary(TempChunkSequenceFileName.parent_path(), Ec); + if (Ec) + { + throw std::runtime_error( + fmt::format("Failed creating temporary file for decompressing large blob {}. Reason: {}", SequenceRawHash, Ec.message())); + } + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(CompressedPart, RawHash, RawSize); + if (!Compressed) + { + throw std::runtime_error(fmt::format("Failed to parse header of compressed large blob {}", SequenceRawHash)); + } + if (RawHash != SequenceRawHash) + { + throw std::runtime_error(fmt::format("RawHash in header {} in large blob {} does match.", RawHash, SequenceRawHash)); + } + IoHashStream Hash; + bool CouldDecompress = Compressed.DecompressToStream(0, (uint64_t)-1, [&](uint64_t Offset, const CompositeBuffer& RangeBuffer) { + ZEN_TRACE_CPU("StreamDecompress_Write"); + if (!AbortFlag) + { + DecompressedTemp.Write(RangeBuffer, Offset); + for (const SharedBuffer& Segment : RangeBuffer.GetSegments()) + { + Hash.Append(Segment.GetView()); + } + WriteToDiskBytes += RangeBuffer.GetSize(); + return true; + } + return false; + }); + + if (AbortFlag) + { + return; + } + + if (!CouldDecompress) + { + throw std::runtime_error(fmt::format("Failed to decompress large blob {}", SequenceRawHash)); + } + const IoHash VerifyHash = Hash.GetHash(); + if (VerifyHash != SequenceRawHash) + { + throw std::runtime_error( + fmt::format("Decompressed blob payload hash {} does not match expected hash {}", VerifyHash, SequenceRawHash)); + } + DecompressedTemp.MoveTemporaryIntoPlace(TempChunkSequenceFileName, Ec); + if (Ec) + { + throw std::runtime_error( + fmt::format("Failed moving temporary file for decompressing large blob {}. Reason: {}", SequenceRawHash, Ec.message())); + } + } + + bool WriteCompressedChunk(const std::filesystem::path& TargetFolder, + const ChunkedFolderContent& RemoteContent, + const ChunkedContentLookup& RemoteLookup, + const IoHash& ChunkHash, + const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>& ChunkTargetPtrs, + IoBuffer&& CompressedPart, + std::atomic<uint64_t>& WriteToDiskBytes) + { + auto ChunkHashToChunkIndexIt = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); + ZEN_ASSERT(ChunkHashToChunkIndexIt != RemoteLookup.ChunkHashToChunkIndex.end()); + if (CanDecompressDirectToSequence(RemoteContent, ChunkTargetPtrs)) + { + const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[ChunkTargetPtrs.front()->SequenceIndex]; + StreamDecompress(TargetFolder, SequenceRawHash, CompositeBuffer(std::move(CompressedPart)), WriteToDiskBytes); + } + else + { + const uint32_t ChunkIndex = ChunkHashToChunkIndexIt->second; + SharedBuffer Chunk = + Decompress(CompositeBuffer(std::move(CompressedPart)), ChunkHash, RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]); + + if (!AbortFlag) + { + WriteFileCache OpenFileCache; + + WriteChunkToDisk(TargetFolder, + RemoteContent, + RemoteLookup, + ChunkTargetPtrs, + CompositeBuffer(std::move(Chunk)), + OpenFileCache, + WriteToDiskBytes); + return true; + } + } + return false; + } + + void CompleteChunkTargets(const std::filesystem::path& TargetFolder, + const ChunkedFolderContent& RemoteContent, + const IoHash& ChunkHash, + const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>& ChunkTargetPtrs, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + const bool NeedHashVerify) + { + ZEN_TRACE_CPU("CompleteChunkTargets"); + + for (const ChunkedContentLookup::ChunkSequenceLocation* Location : ChunkTargetPtrs) + { + const uint32_t RemoteSequenceIndex = Location->SequenceIndex; + if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1) == 1) + { + const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + if (NeedHashVerify) + { + ZEN_TRACE_CPU("VerifyChunkHash"); + + const IoHash VerifyChunkHash = + IoHash::HashBuffer(IoBufferBuilder::MakeFromFile(GetTempChunkedSequenceFileName(TargetFolder, SequenceRawHash))); + if (VerifyChunkHash != ChunkHash) + { + throw std::runtime_error( + fmt::format("Written chunk sequence {} hash does not match expected hash {}", VerifyChunkHash, ChunkHash)); + } + } + + ZEN_TRACE_CPU("RenameToFinal"); + ZEN_ASSERT_SLOW(!std::filesystem::exists(GetFinalChunkedSequenceFileName(TargetFolder, SequenceRawHash))); + std::filesystem::rename(GetTempChunkedSequenceFileName(TargetFolder, SequenceRawHash), + GetFinalChunkedSequenceFileName(TargetFolder, SequenceRawHash)); + } + } + } + + void AsyncWriteDownloadedChunk(const std::filesystem::path& Path, + const ChunkedFolderContent& RemoteContent, + const ChunkedContentLookup& RemoteLookup, + uint32_t RemoteChunkIndex, + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>&& ChunkTargetPtrs, + ParallellWork& Work, + WorkerThreadPool& WritePool, + IoBuffer&& Payload, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + std::atomic<uint64_t>& WriteToDiskBytes, + std::atomic<uint32_t>& ChunkCountWritten, + std::atomic<uint64_t>& WritePartsComplete, + std::atomic<uint64_t>& TotalPartWriteCount, + std::atomic<uint64_t>& LooseChunksBytes, + FilteredRate& FilteredWrittenBytesPerSecond) + { + ZEN_TRACE_CPU("AsyncWriteDownloadedChunk"); + + const IoHash& ChunkHash = RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + + uint64_t Size = Payload.GetSize(); + LooseChunksBytes += Size; + + std::filesystem::path CompressedChunkPath; + + // Check if the dowloaded chunk is file based and we can move it directly without rewriting it + { + IoBufferFileReference FileRef; + if (Payload.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) && (FileRef.FileChunkSize == Size)) + { + ZEN_TRACE_CPU("MoveTempChunk"); + std::error_code Ec; + std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec); + if (!Ec) + { + Payload.SetDeleteOnClose(false); + Payload = {}; + CompressedChunkPath = Path / ZenTempDownloadFolderName / ChunkHash.ToHexString(); + std::filesystem::rename(TempBlobPath, CompressedChunkPath, Ec); + if (Ec) + { + CompressedChunkPath = std::filesystem::path{}; + + // Re-open the temp file again + BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete); + Payload = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, Size, true); + Payload.SetDeleteOnClose(true); + } + } + } + } + + if (CompressedChunkPath.empty() && (Size > 512u * 1024u)) + { + ZEN_TRACE_CPU("WriteTempChunk"); + // Could not be moved and rather large, lets store it on disk + CompressedChunkPath = Path / ZenTempDownloadFolderName / ChunkHash.ToHexString(); + TemporaryFile::SafeWriteFile(CompressedChunkPath, Payload); + Payload = {}; + } + + Work.ScheduleWork( + WritePool, // GetSyncWorkerPool(),// + [&, + SequenceIndexChunksLeftToWriteCounters, + CompressedChunkPath, + RemoteChunkIndex, + ChunkTargetPtrs = std::move(ChunkTargetPtrs), + CompressedPart = std::move(Payload)](std::atomic<bool>&) mutable { + ZEN_TRACE_CPU("UpdateFolder_WriteChunk"); + + if (!AbortFlag) + { + FilteredWrittenBytesPerSecond.Start(); + + const IoHash& ChunkHash = RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + if (CompressedChunkPath.empty()) + { + ZEN_ASSERT(CompressedPart); + } + else + { + ZEN_ASSERT(!CompressedPart); + CompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath); + if (!CompressedPart) + { + throw std::runtime_error( + fmt::format("Could not open dowloaded compressed chunk {} from {}", ChunkHash, CompressedChunkPath)); + } + } + + std::filesystem::path TargetFolder = Path / ZenTempCacheFolderName; + + bool NeedHashVerify = WriteCompressedChunk(TargetFolder, + RemoteContent, + RemoteLookup, + ChunkHash, + ChunkTargetPtrs, + std::move(CompressedPart), + WriteToDiskBytes); + + if (!AbortFlag) + { + ChunkCountWritten++; + WritePartsComplete++; + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + + std::filesystem::remove(CompressedChunkPath); + + CompleteChunkTargets(TargetFolder, + RemoteContent, + ChunkHash, + ChunkTargetPtrs, + SequenceIndexChunksLeftToWriteCounters, + NeedHashVerify); + } + } + }, + Work.DefaultErrorFunction()); + }; + + void UpdateFolder(BuildStorage& Storage, + const Oid& BuildId, + const std::filesystem::path& Path, + const std::uint64_t LargeAttachmentSize, + const std::uint64_t PreferredMultipartChunkSize, + const ChunkedFolderContent& LocalContent, + const ChunkedFolderContent& RemoteContent, + const std::vector<ChunkBlockDescription>& BlockDescriptions, + const std::vector<IoHash>& LooseChunkHashes, + bool AllowPartialBlockRequests, + bool WipeTargetFolder, + FolderContent& OutLocalFolderState) + { + ZEN_TRACE_CPU("UpdateFolder"); + + ZEN_UNUSED(WipeTargetFolder); + std::atomic<uint64_t> DownloadedBlocks = 0; + std::atomic<uint64_t> BlockBytes = 0; + std::atomic<uint64_t> DownloadedChunks = 0; + std::atomic<uint64_t> LooseChunksBytes = 0; + std::atomic<uint64_t> WriteToDiskBytes = 0; + std::atomic<uint64_t> MultipartAttachmentCount = 0; + + DiskStatistics DiskStats; + + Stopwatch IndexTimer; + + const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalContent); + + const ChunkedContentLookup RemoteLookup = BuildChunkedContentLookup(RemoteContent); + + ZEN_CONSOLE("Indexed local and remote content in {}", NiceTimeSpanMs(IndexTimer.GetElapsedTimeMs())); + + const std::filesystem::path CacheFolderPath = Path / ZenTempCacheFolderName; + + Stopwatch CacheMappingTimer; + + std::vector<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters(RemoteContent.ChunkedContent.SequenceRawHashes.size()); + // std::vector<bool> RemoteSequenceIndexIsCachedFlags(RemoteContent.ChunkedContent.SequenceRawHashes.size(), false); + std::vector<bool> RemoteChunkIndexNeedsCopyFromLocalFileFlags(RemoteContent.ChunkedContent.ChunkHashes.size()); + // Guard if he same chunks is in multiple blocks (can happen due to block reuse, cache reuse blocks writes directly) + std::vector<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags(RemoteContent.ChunkedContent.ChunkHashes.size()); + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CachedChunkHashesFound; + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CachedSequenceHashesFound; + uint64_t CachedChunkHashesByteCountFound = 0; + uint64_t CachedSequenceHashesByteCountFound = 0; + { + ZEN_TRACE_CPU("UpdateFolder_CheckChunkCache"); + + DirectoryContent CacheDirContent; + GetDirectoryContent(CacheFolderPath, + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes, + CacheDirContent); + for (size_t Index = 0; Index < CacheDirContent.Files.size(); Index++) + { + IoHash FileHash; + if (IoHash::TryParse(CacheDirContent.Files[Index].filename().string(), FileHash)) + { + if (auto ChunkIt = RemoteLookup.ChunkHashToChunkIndex.find(FileHash); + ChunkIt != RemoteLookup.ChunkHashToChunkIndex.end()) + { + const uint32_t ChunkIndex = ChunkIt->second; + const uint64_t ChunkSize = RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + if (ChunkSize == CacheDirContent.FileSizes[Index]) + { + CachedChunkHashesFound.insert({FileHash, ChunkIndex}); + CachedChunkHashesByteCountFound += ChunkSize; + continue; + } + } + else if (auto SequenceIt = RemoteLookup.RawHashToSequenceIndex.find(FileHash); + SequenceIt != RemoteLookup.RawHashToSequenceIndex.end()) + { + const uint32_t SequenceIndex = SequenceIt->second; + const uint32_t PathIndex = RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex]; + const uint64_t SequenceSize = RemoteContent.RawSizes[PathIndex]; + if (SequenceSize == CacheDirContent.FileSizes[Index]) + { + CachedSequenceHashesFound.insert({FileHash, SequenceIndex}); + CachedSequenceHashesByteCountFound += SequenceSize; + continue; + } + } + } + std::filesystem::remove(CacheDirContent.Files[Index]); + } + } + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CachedBlocksFound; + uint64_t CachedBlocksByteCountFound = 0; + { + ZEN_TRACE_CPU("UpdateFolder_CheckBlockCache"); + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> AllBlockSizes; + AllBlockSizes.reserve(BlockDescriptions.size()); + for (uint32_t BlockIndex = 0; BlockIndex < BlockDescriptions.size(); BlockIndex++) + { + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + AllBlockSizes.insert({BlockDescription.BlockHash, BlockIndex}); + } + + DirectoryContent BlockDirContent; + GetDirectoryContent(Path / ZenTempBlockFolderName, + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes, + BlockDirContent); + CachedBlocksFound.reserve(BlockDirContent.Files.size()); + for (size_t Index = 0; Index < BlockDirContent.Files.size(); Index++) + { + IoHash FileHash; + if (IoHash::TryParse(BlockDirContent.Files[Index].filename().string(), FileHash)) + { + if (auto BlockIt = AllBlockSizes.find(FileHash); BlockIt != AllBlockSizes.end()) + { + const uint32_t BlockIndex = BlockIt->second; + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + uint64_t BlockSize = CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize; + for (uint64_t ChunkSize : BlockDescription.ChunkCompressedLengths) + { + BlockSize += ChunkSize; + } + + if (BlockSize == BlockDirContent.FileSizes[Index]) + { + CachedBlocksFound.insert({FileHash, BlockIndex}); + CachedBlocksByteCountFound += BlockSize; + continue; + } + } + } + std::filesystem::remove(BlockDirContent.Files[Index]); + } + } + + std::vector<uint32_t> LocalPathIndexesMatchingSequenceIndexes; + uint64_t LocalPathIndexesByteCountMatchingSequenceIndexes = 0; + // Pick up all whole files we can use from current local state + { + ZEN_TRACE_CPU("UpdateFolder_CheckLocalChunks"); + for (uint32_t RemoteSequenceIndex = 0; RemoteSequenceIndex < RemoteContent.ChunkedContent.SequenceRawHashes.size(); + RemoteSequenceIndex++) + { + const IoHash& RemoteSequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + if (auto CacheSequenceIt = CachedSequenceHashesFound.find(RemoteSequenceRawHash); + CacheSequenceIt != CachedSequenceHashesFound.end()) + { + // const uint32_t RemoteSequenceIndex = CacheSequenceIt->second; + // const uint32_t RemotePathIndex = GetFirstPathIndexForSeqeuenceIndex(RemoteLookup, RemoteSequenceIndex); + // RemoteSequenceByteCountFoundInCache += RemoteContent.RawSizes[RemotePathIndex]; + } + else if (auto CacheChunkIt = CachedChunkHashesFound.find(RemoteSequenceRawHash); + CacheChunkIt != CachedChunkHashesFound.end()) + { + // const uint32_t RemoteChunkIndex = CacheChunkIt->second; + // const uint32_t RemotePathIndex = GetFirstPathIndexForSeqeuenceIndex(RemoteLookup, RemoteSequenceIndex); + // RemoteSequenceByteCountFoundInCache += RemoteContent.RawSizes[RemotePathIndex]; + } + else if (auto It = LocalLookup.RawHashToSequenceIndex.find(RemoteSequenceRawHash); + It != LocalLookup.RawHashToSequenceIndex.end()) + { + const uint32_t LocalSequenceIndex = It->second; + const uint32_t LocalPathIndex = GetFirstPathIndexForSeqeuenceIndex(LocalLookup, LocalSequenceIndex); + uint64_t RawSize = LocalContent.RawSizes[LocalPathIndex]; + LocalPathIndexesMatchingSequenceIndexes.push_back(LocalPathIndex); + LocalPathIndexesByteCountMatchingSequenceIndexes += RawSize; + } + else + { + // We must write the sequence + SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = + RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex]; + } + } + } + // Pick up all chunks in current local state + struct CacheCopyData + { + uint32_t LocalSequenceIndex = (uint32_t)-1; + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> TargetChunkLocationPtrs; + struct ChunkTarget + { + uint32_t TargetChunkLocationCount = (uint32_t)-1; + uint64_t ChunkRawSize = (uint64_t)-1; + uint64_t CacheFileOffset = (uint64_t)-1; + }; + std::vector<ChunkTarget> ChunkTargets; + }; + + tsl::robin_map<IoHash, size_t, IoHash::Hasher> RawHashToCacheCopyDataIndex; + std::vector<CacheCopyData> CacheCopyDatas; + uint64_t LocalChunkHashesMatchingRemoteCount = 0; + uint64_t LocalChunkHashesMatchingRemoteByteCount = 0; + + { + ZEN_TRACE_CPU("UpdateFolder_GetLocalChunks"); + + for (uint32_t LocalSequenceIndex = 0; LocalSequenceIndex < LocalContent.ChunkedContent.SequenceRawHashes.size(); + LocalSequenceIndex++) + { + const IoHash& LocalSequenceRawHash = LocalContent.ChunkedContent.SequenceRawHashes[LocalSequenceIndex]; + const uint32_t LocalOrderOffset = LocalLookup.SequenceIndexChunkOrderOffset[LocalSequenceIndex]; + + { + uint64_t SourceOffset = 0; + const uint32_t LocalChunkCount = LocalContent.ChunkedContent.ChunkCounts[LocalSequenceIndex]; + for (uint32_t LocalOrderIndex = 0; LocalOrderIndex < LocalChunkCount; LocalOrderIndex++) + { + const uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[LocalOrderOffset + LocalOrderIndex]; + const IoHash& LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; + const uint64_t LocalChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[LocalChunkIndex]; + + if (auto RemoteChunkIt = RemoteLookup.ChunkHashToChunkIndex.find(LocalChunkHash); + RemoteChunkIt != RemoteLookup.ChunkHashToChunkIndex.end()) + { + const uint32_t RemoteChunkIndex = RemoteChunkIt->second; + if (!RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex]) + { + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs = + GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteLookup, RemoteChunkIndex); + + if (!ChunkTargetPtrs.empty()) + { + CacheCopyData::ChunkTarget Target = { + .TargetChunkLocationCount = gsl::narrow<uint32_t>(ChunkTargetPtrs.size()), + .ChunkRawSize = LocalChunkRawSize, + .CacheFileOffset = SourceOffset}; + if (auto CopySourceIt = RawHashToCacheCopyDataIndex.find(LocalSequenceRawHash); + CopySourceIt != RawHashToCacheCopyDataIndex.end()) + { + CacheCopyData& Data = CacheCopyDatas[CopySourceIt->second]; + if (Data.TargetChunkLocationPtrs.size() > 1024) + { + RawHashToCacheCopyDataIndex.insert_or_assign(LocalSequenceRawHash, CacheCopyDatas.size()); + CacheCopyDatas.push_back( + CacheCopyData{.LocalSequenceIndex = LocalSequenceIndex, + .TargetChunkLocationPtrs = ChunkTargetPtrs, + .ChunkTargets = std::vector<CacheCopyData::ChunkTarget>{Target}}); + } + else + { + Data.TargetChunkLocationPtrs.insert(Data.TargetChunkLocationPtrs.end(), + ChunkTargetPtrs.begin(), + ChunkTargetPtrs.end()); + Data.ChunkTargets.push_back(Target); + } + } + else + { + RawHashToCacheCopyDataIndex.insert_or_assign(LocalSequenceRawHash, CacheCopyDatas.size()); + CacheCopyDatas.push_back( + CacheCopyData{.LocalSequenceIndex = LocalSequenceIndex, + .TargetChunkLocationPtrs = ChunkTargetPtrs, + .ChunkTargets = std::vector<CacheCopyData::ChunkTarget>{Target}}); + } + LocalChunkHashesMatchingRemoteByteCount += LocalChunkRawSize; + LocalChunkHashesMatchingRemoteCount++; + RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex] = true; + } + } + } + SourceOffset += LocalChunkRawSize; + } + } + } + } + + if (!CachedSequenceHashesFound.empty() || !CachedChunkHashesFound.empty() || !CachedBlocksFound.empty() || + !LocalPathIndexesMatchingSequenceIndexes.empty() || LocalChunkHashesMatchingRemoteCount > 0) + { + ZEN_CONSOLE( + "Cache: {} ({}) chunk sequences, {} ({}) chunks, {} ({}) blocks. Local state: {} ({}) chunk sequences, {} ({}) chunks", + CachedSequenceHashesFound.size(), + NiceBytes(CachedSequenceHashesByteCountFound), + CachedChunkHashesFound.size(), + NiceBytes(CachedChunkHashesByteCountFound), + CachedBlocksFound.size(), + NiceBytes(CachedBlocksByteCountFound), + LocalPathIndexesMatchingSequenceIndexes.size(), + NiceBytes(LocalPathIndexesByteCountMatchingSequenceIndexes), + LocalChunkHashesMatchingRemoteCount, + NiceBytes(LocalChunkHashesMatchingRemoteByteCount)); + } + + uint32_t ChunkCountToWrite = 0; + for (uint32_t RemoteChunkIndex = 0; RemoteChunkIndex < RemoteContent.ChunkedContent.ChunkHashes.size(); RemoteChunkIndex++) + { + if (RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex]) + { + ChunkCountToWrite++; + } + else + { + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs = + GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteLookup, RemoteChunkIndex); + if (!ChunkTargetPtrs.empty()) + { + RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex] = true; + ChunkCountToWrite++; + } + } + } + + uint64_t TotalRequestCount = 0; + std::atomic<uint64_t> RequestsComplete = 0; + std::atomic<uint32_t> ChunkCountWritten = 0; + std::atomic<uint64_t> TotalPartWriteCount = 0; + std::atomic<uint64_t> WritePartsComplete = 0; + + { + ZEN_TRACE_CPU("WriteChunks"); + + Stopwatch WriteTimer; + + FilteredRate FilteredDownloadedBytesPerSecond; + FilteredRate FilteredWrittenBytesPerSecond; + + WorkerThreadPool& NetworkPool = GetSmallWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // + WorkerThreadPool& WritePool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // + + ProgressBar WriteProgressBar(UsePlainProgress); + ParallellWork Work(AbortFlag); + + std::atomic<uint64_t> BytesDownloaded = 0; + + struct LooseChunkHashWorkData + { + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs; + uint32_t RemoteChunkIndex = (uint32_t)-1; + }; + + std::vector<LooseChunkHashWorkData> LooseChunkHashWorks; + TotalPartWriteCount += CacheCopyDatas.size(); + + for (const IoHash ChunkHash : LooseChunkHashes) + { + auto RemoteChunkIndexIt = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); + ZEN_ASSERT(RemoteChunkIndexIt != RemoteLookup.ChunkHashToChunkIndex.end()); + const uint32_t RemoteChunkIndex = RemoteChunkIndexIt->second; + if (RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex]) + { + ZEN_DEBUG("Skipping chunk {} due to cache reuse", ChunkHash); + continue; + } + bool NeedsCopy = true; + if (RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex].compare_exchange_strong(NeedsCopy, false)) + { + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs = + GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteLookup, RemoteChunkIndex); + + if (ChunkTargetPtrs.empty()) + { + ZEN_DEBUG("Skipping chunk {} due to cache reuse", ChunkHash); + } + else + { + TotalRequestCount++; + TotalPartWriteCount++; + LooseChunkHashWorks.push_back( + LooseChunkHashWorkData{.ChunkTargetPtrs = ChunkTargetPtrs, .RemoteChunkIndex = RemoteChunkIndex}); + } + } + } + + uint32_t BlockCount = gsl::narrow<uint32_t>(BlockDescriptions.size()); + + std::vector<bool> ChunkIsPickedUpByBlock(RemoteContent.ChunkedContent.ChunkHashes.size(), false); + auto GetNeededChunkBlockIndexes = [&RemoteContent, + &RemoteLookup, + &RemoteChunkIndexNeedsCopyFromSourceFlags, + &ChunkIsPickedUpByBlock](const ChunkBlockDescription& BlockDescription) { + ZEN_TRACE_CPU("UpdateFolder_GetNeededChunkBlockIndexes"); + std::vector<uint32_t> NeededBlockChunkIndexes; + for (uint32_t ChunkBlockIndex = 0; ChunkBlockIndex < BlockDescription.ChunkRawHashes.size(); ChunkBlockIndex++) + { + const IoHash& ChunkHash = BlockDescription.ChunkRawHashes[ChunkBlockIndex]; + if (auto It = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); It != RemoteLookup.ChunkHashToChunkIndex.end()) + { + const uint32_t RemoteChunkIndex = It->second; + if (!ChunkIsPickedUpByBlock[RemoteChunkIndex]) + { + if (RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex]) + { + ChunkIsPickedUpByBlock[RemoteChunkIndex] = true; + NeededBlockChunkIndexes.push_back(ChunkBlockIndex); + } + } + } + } + return NeededBlockChunkIndexes; + }; + + std::vector<uint32_t> CachedChunkBlockIndexes; + + struct BlockRangeDescriptor + { + uint32_t BlockIndex = (uint32_t)-1; + uint64_t RangeStart = 0; + uint64_t RangeLength = 0; + uint32_t ChunkBlockIndexStart = 0; + uint32_t ChunkBlockIndexCount = 0; + }; + std::vector<BlockRangeDescriptor> BlockRangeWorks; + + std::vector<uint32_t> FullBlockWorks; + + size_t BlocksNeededCount = 0; + uint64_t AllBlocksSize = 0; + uint64_t AllBlocksFetch = 0; + uint64_t AllBlocksSlack = 0; + uint64_t AllBlockRequests = 0; + uint64_t AllBlockChunksSize = 0; + for (uint32_t BlockIndex = 0; BlockIndex < BlockCount; BlockIndex++) + { + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + const std::vector<uint32_t> BlockChunkIndexNeeded = GetNeededChunkBlockIndexes(BlockDescription); + if (!BlockChunkIndexNeeded.empty()) + { + bool UsingCachedBlock = false; + if (auto It = CachedBlocksFound.find(BlockDescription.BlockHash); It != CachedBlocksFound.end()) + { + ZEN_TRACE_CPU("UpdateFolder_HandleBlocks_CacheGet"); + + TotalPartWriteCount++; + + std::filesystem::path BlockPath = Path / ZenTempBlockFolderName / BlockDescription.BlockHash.ToHexString(); + if (std::filesystem::exists(BlockPath)) + { + CachedChunkBlockIndexes.push_back(BlockIndex); + UsingCachedBlock = true; + } + } + + if (!UsingCachedBlock) + { + bool WantsToDoPartialBlockDownload = BlockChunkIndexNeeded.size() < BlockDescription.ChunkRawHashes.size(); + bool CanDoPartialBlockDownload = + (BlockDescription.HeaderSize > 0) && + (BlockDescription.ChunkCompressedLengths.size() == BlockDescription.ChunkRawHashes.size()); + if (AllowPartialBlockRequests && WantsToDoPartialBlockDownload && CanDoPartialBlockDownload) + { + std::vector<BlockRangeDescriptor> BlockRanges; + + ZEN_TRACE_CPU("UpdateFolder_HandleBlocks_PartialAnalysis"); + + uint32_t NeedBlockChunkIndexOffset = 0; + uint32_t ChunkBlockIndex = 0; + uint32_t CurrentOffset = + gsl::narrow<uint32_t>(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize); + + BlockRangeDescriptor NextRange{.BlockIndex = BlockIndex}; + while (NeedBlockChunkIndexOffset < BlockChunkIndexNeeded.size() && + ChunkBlockIndex < BlockDescription.ChunkRawHashes.size()) + { + const uint32_t ChunkCompressedLength = BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; + if (ChunkBlockIndex < BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) + { + if (NextRange.RangeLength > 0) + { + BlockRanges.push_back(NextRange); + NextRange = {.BlockIndex = BlockIndex}; + } + ChunkBlockIndex++; + CurrentOffset += ChunkCompressedLength; + } + else if (ChunkBlockIndex == BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) + { + AllBlockChunksSize += ChunkCompressedLength; + if (NextRange.RangeLength == 0) + { + NextRange.RangeStart = CurrentOffset; + NextRange.ChunkBlockIndexStart = ChunkBlockIndex; + } + NextRange.RangeLength += ChunkCompressedLength; + NextRange.ChunkBlockIndexCount++; + ChunkBlockIndex++; + CurrentOffset += ChunkCompressedLength; + NeedBlockChunkIndexOffset++; + } + else + { + ZEN_ASSERT(false); + } + } + AllBlocksSize += CurrentOffset; + if (NextRange.RangeLength > 0) + { + BlockRanges.push_back(NextRange); + } + + ZEN_ASSERT(!BlockRanges.empty()); + std::vector<BlockRangeDescriptor> CollapsedBlockRanges; + auto It = BlockRanges.begin(); + CollapsedBlockRanges.push_back(*It++); + uint64_t TotalSlack = 0; + while (It != BlockRanges.end()) + { + BlockRangeDescriptor& LastRange = CollapsedBlockRanges.back(); + uint64_t Slack = It->RangeStart - (LastRange.RangeStart + LastRange.RangeLength); + uint64_t BothRangeSize = It->RangeLength + LastRange.RangeLength; + if (Slack <= Max(BothRangeSize / 8, 64u * 1024u)) // Made up heuristic - we'll see how it pans out + { + LastRange.ChunkBlockIndexCount = + (It->ChunkBlockIndexStart + It->ChunkBlockIndexCount) - LastRange.ChunkBlockIndexStart; + LastRange.RangeLength = (It->RangeStart + It->RangeLength) - LastRange.RangeStart; + TotalSlack += Slack; + } + else + { + CollapsedBlockRanges.push_back(*It); + } + ++It; + } + + uint64_t TotalFetch = 0; + for (const BlockRangeDescriptor& Range : CollapsedBlockRanges) + { + TotalFetch += Range.RangeLength; + } + + AllBlocksFetch += TotalFetch; + AllBlocksSlack += TotalSlack; + BlocksNeededCount++; + AllBlockRequests += CollapsedBlockRanges.size(); + + TotalRequestCount += CollapsedBlockRanges.size(); + TotalPartWriteCount += CollapsedBlockRanges.size(); + + BlockRangeWorks.insert(BlockRangeWorks.end(), CollapsedBlockRanges.begin(), CollapsedBlockRanges.end()); + } + else + { + BlocksNeededCount++; + TotalRequestCount++; + TotalPartWriteCount++; + + FullBlockWorks.push_back(BlockIndex); + } + } + } + else + { + ZEN_DEBUG("Skipping block {} due to cache reuse", BlockDescriptions[BlockIndex].BlockHash); + } + } + + for (size_t CopyDataIndex = 0; CopyDataIndex < CacheCopyDatas.size(); CopyDataIndex++) + { + if (AbortFlag) + { + break; + } + + Work.ScheduleWork( + WritePool, // GetSyncWorkerPool(),// + [&, CopyDataIndex](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_CopyLocal"); + + FilteredWrittenBytesPerSecond.Start(); + const CacheCopyData& CopyData = CacheCopyDatas[CopyDataIndex]; + const uint32_t LocalPathIndex = LocalLookup.SequenceIndexFirstPathIndex[CopyData.LocalSequenceIndex]; + const std::filesystem::path LocalFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred(); + ZEN_ASSERT(!CopyData.TargetChunkLocationPtrs.empty()); + + uint64_t CacheLocalFileBytesRead = 0; + + size_t TargetStart = 0; + const std::span<const ChunkedContentLookup::ChunkSequenceLocation* const> AllTargets( + CopyData.TargetChunkLocationPtrs); + + struct WriteOp + { + const ChunkedContentLookup::ChunkSequenceLocation* Target = nullptr; + uint64_t CacheFileOffset = (uint64_t)-1; + uint64_t ChunkSize = (uint64_t)-1; + }; + + std::vector<WriteOp> WriteOps; + + if (!AbortFlag) + { + ZEN_TRACE_CPU("Sort"); + WriteOps.reserve(AllTargets.size()); + for (const CacheCopyData::ChunkTarget& ChunkTarget : CopyData.ChunkTargets) + { + std::span<const ChunkedContentLookup::ChunkSequenceLocation* const> TargetRange = + AllTargets.subspan(TargetStart, ChunkTarget.TargetChunkLocationCount); + for (const ChunkedContentLookup::ChunkSequenceLocation* Target : TargetRange) + { + WriteOps.push_back(WriteOp{.Target = Target, + .CacheFileOffset = ChunkTarget.CacheFileOffset, + .ChunkSize = ChunkTarget.ChunkRawSize}); + } + TargetStart += ChunkTarget.TargetChunkLocationCount; + } + + std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOp& Lhs, const WriteOp& Rhs) { + if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex) + { + return true; + } + else if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex) + { + return false; + } + if (Lhs.Target->Offset < Rhs.Target->Offset) + { + return true; + } + return false; + }); + } + + if (!AbortFlag) + { + ZEN_TRACE_CPU("Write"); + + BufferedOpenFile SourceFile(LocalFilePath); + WriteFileCache OpenFileCache; + for (const WriteOp& Op : WriteOps) + { + if (AbortFlag) + { + break; + } + const uint32_t RemoteSequenceIndex = Op.Target->SequenceIndex; + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].load() <= + RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex]); + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].load() > 0); + const uint32_t RemotePathIndex = RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex]; + const uint64_t ChunkSize = Op.ChunkSize; + CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset, ChunkSize); + + ZEN_ASSERT(Op.Target->Offset + ChunkSource.GetSize() <= RemoteContent.RawSizes[RemotePathIndex]); + + OpenFileCache.WriteToFile<CompositeBuffer>( + RemoteSequenceIndex, + [&CacheFolderPath, &RemoteContent](uint32_t SequenceIndex) { + return GetTempChunkedSequenceFileName( + CacheFolderPath, + RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]); + }, + ChunkSource, + Op.Target->Offset, + RemoteContent.RawSizes[RemotePathIndex]); + WriteToDiskBytes += ChunkSize; + CacheLocalFileBytesRead += ChunkSize; // TODO: This should be the sum of unique chunk sizes? + } + } + if (!AbortFlag) + { + // Write tracking, updating this must be done without any files open (WriteFileCache) + for (const WriteOp& Op : WriteOps) + { + const uint32_t RemoteSequenceIndex = Op.Target->SequenceIndex; + if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1) == 1) + { + const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + { + ZEN_TRACE_CPU("VerifyHash"); + const IoHash VerifyChunkHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile( + GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); + if (VerifyChunkHash != SequenceRawHash) + { + throw std::runtime_error( + fmt::format("Written chunk sequence {} hash does not match expected hash {}", + VerifyChunkHash, + SequenceRawHash)); + } + } + + ZEN_TRACE_CPU("rename"); + ZEN_ASSERT_SLOW( + !std::filesystem::exists(GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); + std::filesystem::rename(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), + GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); + } + } + + ChunkCountWritten += gsl::narrow<uint32_t>(CopyData.ChunkTargets.size()); + ZEN_DEBUG("Copied {} from {}", NiceBytes(CacheLocalFileBytesRead), LocalContent.Paths[LocalPathIndex]); + } + WritePartsComplete++; + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + } + }, + Work.DefaultErrorFunction()); + } + + for (uint32_t LooseChunkHashWorkIndex = 0; LooseChunkHashWorkIndex < LooseChunkHashWorks.size(); LooseChunkHashWorkIndex++) + { + if (AbortFlag) + { + break; + } + + LooseChunkHashWorkData& LooseChunkHashWork = LooseChunkHashWorks[LooseChunkHashWorkIndex]; + + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs = + std::move(LooseChunkHashWork.ChunkTargetPtrs); + const uint32_t RemoteChunkIndex = LooseChunkHashWork.RemoteChunkIndex; + + Work.ScheduleWork( + WritePool, // NetworkPool, // GetSyncWorkerPool(),// + [&, RemoteChunkIndex, ChunkTargetPtrs](std::atomic<bool>&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_ReadPreDownloaded"); + std::filesystem::path ExistingCompressedChunkPath; + { + const IoHash& ChunkHash = RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + std::filesystem::path CompressedChunkPath = Path / ZenTempDownloadFolderName / ChunkHash.ToHexString(); + if (std::filesystem::exists(CompressedChunkPath)) + { + IoBuffer ExistingCompressedPart = IoBufferBuilder::MakeFromFile(ExistingCompressedChunkPath); + if (ExistingCompressedPart) + { + IoHash RawHash; + uint64_t RawSize; + if (CompressedBuffer::ValidateCompressedHeader(ExistingCompressedPart, RawHash, RawSize)) + { + LooseChunksBytes += ExistingCompressedPart.GetSize(); + RequestsComplete++; + if (RequestsComplete == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + ExistingCompressedChunkPath = std::move(CompressedChunkPath); + } + else + { + std::error_code DummyEc; + std::filesystem::remove(CompressedChunkPath, DummyEc); + } + } + } + } + if (!ExistingCompressedChunkPath.empty()) + { + Work.ScheduleWork( + WritePool, // WritePool, GetSyncWorkerPool() + [&Path, + &RemoteContent, + &RemoteLookup, + &CacheFolderPath, + &SequenceIndexChunksLeftToWriteCounters, + &WriteToDiskBytes, + &ChunkCountWritten, + &WritePartsComplete, + &TotalPartWriteCount, + &FilteredWrittenBytesPerSecond, + RemoteChunkIndex, + ChunkTargetPtrs, + CompressedChunkPath = std::move(ExistingCompressedChunkPath)](std::atomic<bool>&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_WritePreDownloaded"); + + FilteredWrittenBytesPerSecond.Start(); + + const IoHash& ChunkHash = RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + + IoBuffer CompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath); + if (!CompressedPart) + { + throw std::runtime_error(fmt::format("Could not open dowloaded compressed chunk {} from {}", + ChunkHash, + CompressedChunkPath)); + } + + std::filesystem::path TargetFolder = Path / ZenTempCacheFolderName; + bool NeedHashVerify = WriteCompressedChunk(TargetFolder, + RemoteContent, + RemoteLookup, + ChunkHash, + ChunkTargetPtrs, + std::move(CompressedPart), + WriteToDiskBytes); + + if (!AbortFlag) + { + ChunkCountWritten++; + WritePartsComplete++; + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + + std::filesystem::remove(CompressedChunkPath); + + CompleteChunkTargets(TargetFolder, + RemoteContent, + ChunkHash, + ChunkTargetPtrs, + SequenceIndexChunksLeftToWriteCounters, + NeedHashVerify); + } + } + }, + Work.DefaultErrorFunction()); + } + else + { + FilteredDownloadedBytesPerSecond.Start(); + const IoHash& ChunkHash = RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + if (RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex] >= LargeAttachmentSize) + { + ZEN_TRACE_CPU("UpdateFolder_GetLargeChunk"); + DownloadLargeBlob(Storage, + Path / ZenTempDownloadFolderName, + BuildId, + ChunkHash, + PreferredMultipartChunkSize, + Work, + NetworkPool, + BytesDownloaded, + MultipartAttachmentCount, + [&, RemoteChunkIndex, ChunkTargetPtrs](IoBuffer&& Payload) mutable { + RequestsComplete++; + if (RequestsComplete == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + AsyncWriteDownloadedChunk(Path, + RemoteContent, + RemoteLookup, + RemoteChunkIndex, + std::move(ChunkTargetPtrs), + Work, + WritePool, + std::move(Payload), + SequenceIndexChunksLeftToWriteCounters, + WriteToDiskBytes, + ChunkCountWritten, + WritePartsComplete, + TotalPartWriteCount, + LooseChunksBytes, + FilteredWrittenBytesPerSecond); + }); + } + else + { + ZEN_TRACE_CPU("UpdateFolder_GetChunk"); + + IoBuffer BuildBlob = Storage.GetBuildBlob(BuildId, ChunkHash); + if (!BuildBlob) + { + throw std::runtime_error(fmt::format("Chunk {} is missing", ChunkHash)); + } + uint64_t BlobSize = BuildBlob.GetSize(); + BytesDownloaded += BlobSize; + + RequestsComplete++; + if (RequestsComplete == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + AsyncWriteDownloadedChunk(Path, + RemoteContent, + RemoteLookup, + RemoteChunkIndex, + std::move(ChunkTargetPtrs), + Work, + WritePool, + std::move(BuildBlob), + SequenceIndexChunksLeftToWriteCounters, + WriteToDiskBytes, + ChunkCountWritten, + WritePartsComplete, + TotalPartWriteCount, + LooseChunksBytes, + FilteredWrittenBytesPerSecond); + } + } + } + }, + Work.DefaultErrorFunction()); + } + + for (uint32_t BlockIndex : CachedChunkBlockIndexes) + { + if (AbortFlag) + { + break; + } + + Work.ScheduleWork( + WritePool, // GetSyncWorkerPool(), // WritePool, + [&, BlockIndex](std::atomic<bool>&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_WriteCachedBlock"); + + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + FilteredWrittenBytesPerSecond.Start(); + + std::filesystem::path BlockChunkPath = Path / ZenTempBlockFolderName / BlockDescription.BlockHash.ToHexString(); + IoBuffer BlockBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath); + if (!BlockBuffer) + { + throw std::runtime_error( + fmt::format("Can not read block {} at {}", BlockDescription.BlockHash, BlockChunkPath)); + } + + if (!WriteBlockToDisk(CacheFolderPath, + RemoteContent, + BlockDescription, + SequenceIndexChunksLeftToWriteCounters, + CompositeBuffer(std::move(BlockBuffer)), + RemoteLookup, + RemoteChunkIndexNeedsCopyFromSourceFlags, + ChunkCountWritten, + WriteToDiskBytes)) + { + std::error_code DummyEc; + std::filesystem::remove(BlockChunkPath, DummyEc); + throw std::runtime_error(fmt::format("Block {} is malformed", BlockDescription.BlockHash)); + } + WritePartsComplete++; + std::filesystem::remove(BlockChunkPath); + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + } + }, + Work.DefaultErrorFunction()); + } + + for (size_t BlockRangeIndex = 0; BlockRangeIndex < BlockRangeWorks.size(); BlockRangeIndex++) + { + if (AbortFlag) + { + break; + } + const BlockRangeDescriptor BlockRange = BlockRangeWorks[BlockRangeIndex]; + ZEN_ASSERT(BlockRange.BlockIndex != (uint32_t)-1); + const uint32_t BlockIndex = BlockRange.BlockIndex; + Work.ScheduleWork( + NetworkPool, // NetworkPool, // GetSyncWorkerPool() + [&, BlockIndex, BlockRange](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_GetPartialBlock"); + + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + + FilteredDownloadedBytesPerSecond.Start(); + IoBuffer BlockBuffer = + Storage.GetBuildBlob(BuildId, BlockDescription.BlockHash, BlockRange.RangeStart, BlockRange.RangeLength); + if (!BlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} is missing", BlockDescription.BlockHash)); + } + uint64_t BlockSize = BlockBuffer.GetSize(); + BytesDownloaded += BlockSize; + BlockBytes += BlockSize; + DownloadedBlocks++; + RequestsComplete++; + if (RequestsComplete == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + + std::filesystem::path BlockChunkPath; + + // Check if the dowloaded block is file based and we can move it directly without rewriting it + { + IoBufferFileReference FileRef; + if (BlockBuffer.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) && + (FileRef.FileChunkSize == BlockSize)) + { + ZEN_TRACE_CPU("UpdateFolder_MoveTempBlock"); + + std::error_code Ec; + std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec); + if (!Ec) + { + BlockBuffer.SetDeleteOnClose(false); + BlockBuffer = {}; + BlockChunkPath = Path / ZenTempBlockFolderName / + fmt::format("{}_{:x}_{:x}", + BlockDescription.BlockHash, + BlockRange.RangeStart, + BlockRange.RangeLength); + std::filesystem::rename(TempBlobPath, BlockChunkPath, Ec); + if (Ec) + { + BlockChunkPath = std::filesystem::path{}; + + // Re-open the temp file again + BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete); + BlockBuffer = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BlockSize, true); + BlockBuffer.SetDeleteOnClose(true); + } + } + } + } + + if (BlockChunkPath.empty() && (BlockSize > 512u * 1024u)) + { + ZEN_TRACE_CPU("UpdateFolder_WriteTempBlock"); + // Could not be moved and rather large, lets store it on disk + BlockChunkPath = + Path / ZenTempBlockFolderName / + fmt::format("{}_{:x}_{:x}", BlockDescription.BlockHash, BlockRange.RangeStart, BlockRange.RangeLength); + TemporaryFile::SafeWriteFile(BlockChunkPath, BlockBuffer); + BlockBuffer = {}; + } + + if (!AbortFlag) + { + Work.ScheduleWork( + WritePool, // WritePool, // GetSyncWorkerPool(), + [&, BlockIndex, BlockRange, BlockChunkPath, BlockPartialBuffer = std::move(BlockBuffer)]( + std::atomic<bool>&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_WritePartialBlock"); + + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + + if (BlockChunkPath.empty()) + { + ZEN_ASSERT(BlockPartialBuffer); + } + else + { + ZEN_ASSERT(!BlockPartialBuffer); + BlockPartialBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath); + if (!BlockPartialBuffer) + { + throw std::runtime_error(fmt::format("Could not open downloaded block {} from {}", + BlockDescription.BlockHash, + BlockChunkPath)); + } + } + + FilteredWrittenBytesPerSecond.Start(); + + if (!WritePartialBlockToDisk( + CacheFolderPath, + RemoteContent, + BlockDescription, + SequenceIndexChunksLeftToWriteCounters, + CompositeBuffer(std::move(BlockPartialBuffer)), + BlockRange.ChunkBlockIndexStart, + BlockRange.ChunkBlockIndexStart + BlockRange.ChunkBlockIndexCount - 1, + RemoteLookup, + RemoteChunkIndexNeedsCopyFromSourceFlags, + ChunkCountWritten, + WriteToDiskBytes)) + { + std::error_code DummyEc; + std::filesystem::remove(BlockChunkPath, DummyEc); + throw std::runtime_error( + fmt::format("Partial block {} is malformed", BlockDescription.BlockHash)); + } + WritePartsComplete++; + + if (!BlockChunkPath.empty()) + { + std::filesystem::remove(BlockChunkPath); + } + + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + } + }, + Work.DefaultErrorFunction()); + } + } + }, + Work.DefaultErrorFunction()); + } + + for (uint32_t BlockIndex : FullBlockWorks) + { + if (AbortFlag) + { + break; + } + Work.ScheduleWork( + NetworkPool, // GetSyncWorkerPool(), // NetworkPool, + [&, BlockIndex](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_GetFullBlock"); + + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + + FilteredDownloadedBytesPerSecond.Start(); + IoBuffer BlockBuffer = Storage.GetBuildBlob(BuildId, BlockDescription.BlockHash); + if (!BlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} is missing", BlockDescription.BlockHash)); + } + uint64_t BlockSize = BlockBuffer.GetSize(); + BytesDownloaded += BlockSize; + BlockBytes += BlockSize; + DownloadedBlocks++; + RequestsComplete++; + if (RequestsComplete == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + + std::filesystem::path BlockChunkPath; + + // Check if the dowloaded block is file based and we can move it directly without rewriting it + { + IoBufferFileReference FileRef; + if (BlockBuffer.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) && + (FileRef.FileChunkSize == BlockSize)) + { + ZEN_TRACE_CPU("UpdateFolder_MoveTempBlock"); + std::error_code Ec; + std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec); + if (!Ec) + { + BlockBuffer.SetDeleteOnClose(false); + BlockBuffer = {}; + BlockChunkPath = Path / ZenTempBlockFolderName / BlockDescription.BlockHash.ToHexString(); + std::filesystem::rename(TempBlobPath, BlockChunkPath, Ec); + if (Ec) + { + BlockChunkPath = std::filesystem::path{}; + + // Re-open the temp file again + BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete); + BlockBuffer = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BlockSize, true); + BlockBuffer.SetDeleteOnClose(true); + } + } + } + } + + if (BlockChunkPath.empty() && (BlockSize > 512u * 1024u)) + { + ZEN_TRACE_CPU("UpdateFolder_WriteTempBlock"); + // Could not be moved and rather large, lets store it on disk + BlockChunkPath = Path / ZenTempBlockFolderName / BlockDescription.BlockHash.ToHexString(); + TemporaryFile::SafeWriteFile(BlockChunkPath, BlockBuffer); + BlockBuffer = {}; + } + + if (!AbortFlag) + { + Work.ScheduleWork( + WritePool, // WritePool, GetSyncWorkerPool() + [&RemoteContent, + &RemoteLookup, + CacheFolderPath, + &RemoteChunkIndexNeedsCopyFromSourceFlags, + &SequenceIndexChunksLeftToWriteCounters, + BlockIndex, + &BlockDescriptions, + &ChunkCountWritten, + &WriteToDiskBytes, + &WritePartsComplete, + &TotalPartWriteCount, + &FilteredWrittenBytesPerSecond, + BlockChunkPath, + BlockBuffer = std::move(BlockBuffer)](std::atomic<bool>&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_WriteFullBlock"); + + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + + if (BlockChunkPath.empty()) + { + ZEN_ASSERT(BlockBuffer); + } + else + { + ZEN_ASSERT(!BlockBuffer); + BlockBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath); + if (!BlockBuffer) + { + throw std::runtime_error(fmt::format("Could not open dowloaded block {} from {}", + BlockDescription.BlockHash, + BlockChunkPath)); + } + } + + FilteredWrittenBytesPerSecond.Start(); + if (!WriteBlockToDisk(CacheFolderPath, + RemoteContent, + BlockDescription, + SequenceIndexChunksLeftToWriteCounters, + CompositeBuffer(std::move(BlockBuffer)), + RemoteLookup, + RemoteChunkIndexNeedsCopyFromSourceFlags, + ChunkCountWritten, + WriteToDiskBytes)) + { + std::error_code DummyEc; + std::filesystem::remove(BlockChunkPath, DummyEc); + throw std::runtime_error(fmt::format("Block {} is malformed", BlockDescription.BlockHash)); + } + WritePartsComplete++; + + if (!BlockChunkPath.empty()) + { + std::filesystem::remove(BlockChunkPath); + } + + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + } + }, + Work.DefaultErrorFunction()); + } + } + }, + Work.DefaultErrorFunction()); + } + + ZEN_DEBUG("Fetching {} with {} slack (ideal {}) out of {} using {} requests for {} blocks", + NiceBytes(AllBlocksFetch), + NiceBytes(AllBlocksSlack), + NiceBytes(AllBlockChunksSize), + NiceBytes(AllBlocksSize), + AllBlockRequests, + BlocksNeededCount); + { + ZEN_TRACE_CPU("WriteChunks_Wait"); + + Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, PendingWork); + ZEN_ASSERT(ChunkCountToWrite >= ChunkCountWritten.load()); + FilteredWrittenBytesPerSecond.Update(WriteToDiskBytes.load()); + FilteredDownloadedBytesPerSecond.Update(BytesDownloaded.load()); + std::string Details = fmt::format("{}/{} ({} {}bits/s) downloaded. {}/{} ({} {}B/s) written.", + RequestsComplete.load(), + TotalRequestCount, + NiceBytes(BytesDownloaded.load()), + NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8), + ChunkCountWritten.load(), + ChunkCountToWrite, + NiceBytes(WriteToDiskBytes.load()), + NiceNum(FilteredWrittenBytesPerSecond.GetCurrent())); + WriteProgressBar.UpdateState({.Task = "Writing chunks ", + .Details = Details, + .TotalCount = gsl::narrow<uint64_t>(ChunkCountToWrite), + .RemainingCount = gsl::narrow<uint64_t>(ChunkCountToWrite - ChunkCountWritten.load())}, + false); + }); + } + + FilteredWrittenBytesPerSecond.Stop(); + FilteredDownloadedBytesPerSecond.Stop(); + + if (AbortFlag) + { + return; + } + + WriteProgressBar.Finish(); + + uint32_t RawSequencesMissingWriteCount = 0; + for (uint32_t SequenceIndex = 0; SequenceIndex < SequenceIndexChunksLeftToWriteCounters.size(); SequenceIndex++) + { + const auto& SequenceIndexChunksLeftToWriteCounter = SequenceIndexChunksLeftToWriteCounters[SequenceIndex]; + if (SequenceIndexChunksLeftToWriteCounter.load() != 0) + { + RawSequencesMissingWriteCount++; + const uint32_t PathIndex = RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex]; + const std::filesystem::path& IncompletePath = RemoteContent.Paths[PathIndex]; + ZEN_ASSERT(!IncompletePath.empty()); + const uint32_t ExpectedSequenceCount = RemoteContent.ChunkedContent.ChunkCounts[SequenceIndex]; + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounter.load() <= ExpectedSequenceCount); + } + } + ZEN_ASSERT(RawSequencesMissingWriteCount == 0); + + ZEN_CONSOLE("Downloaded {} ({}bits/s) in {}. Wrote {} ({}B/s) in {}. Completed in {}", + NiceBytes(BytesDownloaded.load()), + NiceNum(GetBytesPerSecond(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS(), BytesDownloaded * 8)), + NiceTimeSpanMs(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS() / 1000), + NiceBytes(WriteToDiskBytes.load()), + NiceNum(GetBytesPerSecond(FilteredWrittenBytesPerSecond.GetElapsedTimeUS(), WriteToDiskBytes.load())), + NiceTimeSpanMs(FilteredWrittenBytesPerSecond.GetElapsedTimeUS() / 1000), + NiceTimeSpanMs(WriteTimer.GetElapsedTimeMs())); + } + + std::vector<std::pair<IoHash, uint32_t>> Targets; + Targets.reserve(RemoteContent.Paths.size()); + for (uint32_t RemotePathIndex = 0; RemotePathIndex < RemoteContent.Paths.size(); RemotePathIndex++) + { + Targets.push_back(std::make_pair(RemoteContent.RawHashes[RemotePathIndex], RemotePathIndex)); + } + std::sort(Targets.begin(), Targets.end(), [](const std::pair<IoHash, uint32_t>& Lhs, const std::pair<IoHash, uint32_t>& Rhs) { + return Lhs.first < Rhs.first; + }); + + // Move all files we will reuse to cache folder + // TODO: If WipeTargetFolder is false we could check which files are already correct and leave them in place + if (!LocalPathIndexesMatchingSequenceIndexes.empty()) + { + ZEN_TRACE_CPU("UpdateFolder_CacheReused"); + uint64_t TotalFullFileSizeCached = 0; + for (uint32_t LocalPathIndex : LocalPathIndexesMatchingSequenceIndexes) + { + const IoHash& RawHash = LocalContent.RawHashes[LocalPathIndex]; + const std::filesystem::path LocalFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred(); + const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(CacheFolderPath, RawHash); + ZEN_ASSERT_SLOW(std::filesystem::exists(LocalFilePath)); + SetFileReadOnly(LocalFilePath, false); + ZEN_ASSERT_SLOW(!std::filesystem::exists(CacheFilePath)); + std::filesystem::rename(LocalFilePath, CacheFilePath); + TotalFullFileSizeCached += std::filesystem::file_size(CacheFilePath); + } + ZEN_CONSOLE("Saved {} ({}) unchanged files in cache", + LocalPathIndexesMatchingSequenceIndexes.size(), + NiceBytes(TotalFullFileSizeCached)); + } + + if (WipeTargetFolder) + { + ZEN_TRACE_CPU("UpdateFolder_WipeTarget"); + + // Clean target folder + ZEN_CONSOLE("Wiping {}", Path); + if (!CleanDirectory(Path, DefaultExcludeFolders)) + { + ZEN_WARN("Some files in {} could not be removed", Path); + } + } + else + { + ZEN_TRACE_CPU("UpdateFolder_RemoveUnused"); + + // Remove unused tracked files + tsl::robin_map<std::string, uint32_t> RemotePathToRemoteIndex; + RemotePathToRemoteIndex.reserve(RemoteContent.Paths.size()); + for (uint32_t RemotePathIndex = 0; RemotePathIndex < RemoteContent.Paths.size(); RemotePathIndex++) + { + RemotePathToRemoteIndex.insert({RemoteContent.Paths[RemotePathIndex].generic_string(), RemotePathIndex}); + } + std::vector<std::filesystem::path> LocalFilesToRemove; + for (uint32_t LocalPathIndex = 0; LocalPathIndex < LocalContent.Paths.size(); LocalPathIndex++) + { + if (!RemotePathToRemoteIndex.contains(LocalContent.Paths[LocalPathIndex].generic_string())) + { + const std::filesystem::path LocalFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred(); + if (std::filesystem::exists(LocalFilePath)) + { + LocalFilesToRemove.emplace_back(std::move(LocalFilePath)); + } + } + } + if (!LocalFilesToRemove.empty()) + { + ZEN_CONSOLE("Cleaning {} removed files from {}", LocalFilesToRemove.size(), Path); + for (const std::filesystem::path& LocalFilePath : LocalFilesToRemove) + { + SetFileReadOnly(LocalFilePath, false); + std::filesystem::remove(LocalFilePath); + } + } + } + + { + ZEN_TRACE_CPU("UpdateFolder_FinalizeTree"); + + WorkerThreadPool& WritePool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // + + ProgressBar RebuildProgressBar(UsePlainProgress); + ParallellWork Work(AbortFlag); + + OutLocalFolderState.Paths.resize(RemoteContent.Paths.size()); + OutLocalFolderState.RawSizes.resize(RemoteContent.Paths.size()); + OutLocalFolderState.Attributes.resize(RemoteContent.Paths.size()); + OutLocalFolderState.ModificationTicks.resize(RemoteContent.Paths.size()); + + std::atomic<uint64_t> TargetsComplete = 0; + + size_t TargetOffset = 0; + while (TargetOffset < Targets.size()) + { + if (AbortFlag) + { + break; + } + + size_t TargetCount = 1; + const IoHash& RawHash = Targets[TargetOffset].first; + while (Targets[TargetOffset + TargetCount].first == RawHash) + { + TargetCount++; + } + + Work.ScheduleWork( + WritePool, // GetSyncWorkerPool(),// + [&, BaseTargetOffset = TargetOffset, TargetCount](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("FinalizeTree_Work"); + + size_t TargetOffset = BaseTargetOffset; + const IoHash& RawHash = Targets[TargetOffset].first; + const uint32_t FirstTargetPathIndex = Targets[TargetOffset].second; + const std::filesystem::path& FirstTargetPath = RemoteContent.Paths[FirstTargetPathIndex]; + OutLocalFolderState.Paths[FirstTargetPathIndex] = FirstTargetPath; + OutLocalFolderState.RawSizes[FirstTargetPathIndex] = RemoteContent.RawSizes[FirstTargetPathIndex]; + const std::filesystem::path FirstTargetFilePath = (Path / FirstTargetPath).make_preferred(); + if (RawHash == IoHash::Zero) + { + if (std::filesystem::exists(FirstTargetFilePath)) + { + SetFileReadOnly(FirstTargetFilePath, false); + } + CreateDirectories(FirstTargetFilePath.parent_path()); + { + BasicFile OutputFile; + OutputFile.Open(FirstTargetFilePath, BasicFile::Mode::kTruncate); + } + } + else + { + ZEN_TRACE_CPU("FinalizeTree_MoveIntoPlace"); + + const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(CacheFolderPath, RawHash); + ZEN_ASSERT_SLOW(std::filesystem::exists(CacheFilePath)); + CreateDirectories(FirstTargetFilePath.parent_path()); + if (std::filesystem::exists(FirstTargetFilePath)) + { + SetFileReadOnly(FirstTargetFilePath, false); + } + std::filesystem::rename(CacheFilePath, FirstTargetFilePath); + } + + OutLocalFolderState.Attributes[FirstTargetPathIndex] = + RemoteContent.Attributes.empty() ? GetNativeFileAttributes(FirstTargetFilePath) + : SetNativeFileAttributes(FirstTargetFilePath, + RemoteContent.Platform, + RemoteContent.Attributes[FirstTargetPathIndex]); + OutLocalFolderState.ModificationTicks[FirstTargetPathIndex] = GetModificationTickFromPath(FirstTargetFilePath); + + TargetOffset++; + TargetsComplete++; + while (TargetOffset < (BaseTargetOffset + TargetCount)) + { + ZEN_TRACE_CPU("FinalizeTree_Copy"); + + ZEN_ASSERT(Targets[TargetOffset].first == RawHash); + ZEN_ASSERT_SLOW(std::filesystem::exists(FirstTargetFilePath)); + const uint32_t ExtraTargetPathIndex = Targets[TargetOffset].second; + const std::filesystem::path& ExtraTargetPath = RemoteContent.Paths[ExtraTargetPathIndex]; + const std::filesystem::path ExtraTargetFilePath = (Path / ExtraTargetPath).make_preferred(); + OutLocalFolderState.Paths[ExtraTargetPathIndex] = ExtraTargetPath; + OutLocalFolderState.RawSizes[ExtraTargetPathIndex] = RemoteContent.RawSizes[ExtraTargetPathIndex]; + CreateDirectories(ExtraTargetFilePath.parent_path()); + if (std::filesystem::exists(ExtraTargetFilePath)) + { + SetFileReadOnly(ExtraTargetFilePath, false); + } + CopyFile(FirstTargetFilePath, ExtraTargetFilePath, {.EnableClone = false}); + + OutLocalFolderState.Attributes[ExtraTargetPathIndex] = + RemoteContent.Attributes.empty() + ? GetNativeFileAttributes(ExtraTargetFilePath) + : SetNativeFileAttributes(ExtraTargetFilePath, + RemoteContent.Platform, + RemoteContent.Attributes[ExtraTargetPathIndex]); + OutLocalFolderState.ModificationTicks[ExtraTargetPathIndex] = + GetModificationTickFromPath(ExtraTargetFilePath); + + TargetOffset++; + TargetsComplete++; + } + } + }, + Work.DefaultErrorFunction()); + + TargetOffset += TargetCount; + } + + { + ZEN_TRACE_CPU("FinalizeTree_Wait"); + + Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, PendingWork); + std::string Details = fmt::format("{}/{} files", TargetsComplete.load(), Targets.size()); + RebuildProgressBar.UpdateState({.Task = "Rebuilding state ", + .Details = Details, + .TotalCount = gsl::narrow<uint64_t>(Targets.size()), + .RemainingCount = gsl::narrow<uint64_t>(Targets.size() - TargetsComplete.load())}, + false); + }); + } + + if (AbortFlag) + { + return; + } + + RebuildProgressBar.Finish(); + } + } + + std::vector<std::pair<Oid, std::string>> ResolveBuildPartNames(BuildStorage& Storage, + const Oid& BuildId, + const std::vector<Oid>& BuildPartIds, + std::span<const std::string> BuildPartNames, + std::uint64_t& OutPreferredMultipartChunkSize) + { + std::vector<std::pair<Oid, std::string>> Result; + { + Stopwatch GetBuildTimer; + + std::vector<std::pair<Oid, std::string>> AvailableParts; + + CbObject BuildObject = Storage.GetBuild(BuildId); + + ZEN_CONSOLE("GetBuild took {}. Name: '{}', Payload size: {}", + NiceTimeSpanMs(GetBuildTimer.GetElapsedTimeMs()), + BuildObject["BuildName"sv].AsString(), + NiceBytes(BuildObject.GetSize())); + + ZEN_DEBUG("Build object: {}", BuildObject); + + CbObjectView PartsObject = BuildObject["parts"sv].AsObjectView(); + if (!PartsObject) + { + throw std::runtime_error("Build object does not have a 'parts' object"); + } + + OutPreferredMultipartChunkSize = BuildObject["chunkSize"sv].AsUInt64(OutPreferredMultipartChunkSize); + + for (CbFieldView PartView : PartsObject) + { + const std::string BuildPartName = std::string(PartView.GetName()); + const Oid BuildPartId = PartView.AsObjectId(); + if (BuildPartId == Oid::Zero) + { + ExtendableStringBuilder<128> SB; + for (CbFieldView ScanPartView : PartsObject) + { + SB.Append(fmt::format("\n {}: {}", ScanPartView.GetName(), ScanPartView.AsObjectId())); + } + throw std::runtime_error( + fmt::format("Build object parts does not have a '{}' object id{}", BuildPartName, SB.ToView())); + } + AvailableParts.push_back({BuildPartId, BuildPartName}); + } + + if (BuildPartIds.empty() && BuildPartNames.empty()) + { + Result = AvailableParts; + } + else + { + for (const std::string& BuildPartName : BuildPartNames) + { + if (auto It = std::find_if(AvailableParts.begin(), + AvailableParts.end(), + [&BuildPartName](const auto& Part) { return Part.second == BuildPartName; }); + It != AvailableParts.end()) + { + Result.push_back(*It); + } + else + { + throw std::runtime_error(fmt::format("Build {} object does not have a part named '{}'", BuildId, BuildPartName)); + } + } + for (const Oid& BuildPartId : BuildPartIds) + { + if (auto It = std::find_if(AvailableParts.begin(), + AvailableParts.end(), + [&BuildPartId](const auto& Part) { return Part.first == BuildPartId; }); + It != AvailableParts.end()) + { + Result.push_back(*It); + } + else + { + throw std::runtime_error(fmt::format("Build {} object does not have a part with id '{}'", BuildId, BuildPartId)); + } + } + } + + if (Result.empty()) + { + throw std::runtime_error(fmt::format("Build object does not have any parts", BuildId)); + } + } + return Result; + } + + ChunkedFolderContent GetRemoteContent(BuildStorage& Storage, + const Oid& BuildId, + const std::vector<std::pair<Oid, std::string>>& BuildParts, + std::unique_ptr<ChunkingController>& OutChunkController, + std::vector<ChunkedFolderContent>& OutPartContents, + std::vector<ChunkBlockDescription>& OutBlockDescriptions, + std::vector<IoHash>& OutLooseChunkHashes) + { + ZEN_TRACE_CPU("GetRemoteContent"); + + Stopwatch GetBuildPartTimer; + const Oid BuildPartId = BuildParts[0].first; + const std::string_view BuildPartName = BuildParts[0].second; + CbObject BuildPartManifest = Storage.GetBuildPart(BuildId, BuildPartId); + ZEN_CONSOLE("GetBuildPart {} ('{}') took {}. Payload size: {}", + BuildPartId, + BuildPartName, + NiceTimeSpanMs(GetBuildPartTimer.GetElapsedTimeMs()), + NiceBytes(BuildPartManifest.GetSize())); + + { + CbObjectView Chunker = BuildPartManifest["chunker"sv].AsObjectView(); + std::string_view ChunkerName = Chunker["name"sv].AsString(); + CbObjectView Parameters = Chunker["parameters"sv].AsObjectView(); + OutChunkController = CreateChunkingController(ChunkerName, Parameters); + } + + auto ParseBuildPartManifest = [](BuildStorage& Storage, + const Oid& BuildId, + const Oid& BuildPartId, + CbObject BuildPartManifest, + ChunkedFolderContent& OutRemoteContent, + std::vector<ChunkBlockDescription>& OutBlockDescriptions, + std::vector<IoHash>& OutLooseChunkHashes) { + std::vector<uint32_t> AbsoluteChunkOrders; + std::vector<uint64_t> LooseChunkRawSizes; + std::vector<IoHash> BlockRawHashes; + + ReadBuildContentFromCompactBinary(BuildPartManifest, + OutRemoteContent.Platform, + OutRemoteContent.Paths, + OutRemoteContent.RawHashes, + OutRemoteContent.RawSizes, + OutRemoteContent.Attributes, + OutRemoteContent.ChunkedContent.SequenceRawHashes, + OutRemoteContent.ChunkedContent.ChunkCounts, + AbsoluteChunkOrders, + OutLooseChunkHashes, + LooseChunkRawSizes, + BlockRawHashes); + + // TODO: GetBlockDescriptions for all BlockRawHashes in one go - check for local block descriptions when we cache them + + Stopwatch GetBlockMetadataTimer; + OutBlockDescriptions = Storage.GetBlockMetadata(BuildId, BlockRawHashes); + ZEN_CONSOLE("GetBlockMetadata for {} took {}. Found {} blocks", + BuildPartId, + NiceTimeSpanMs(GetBlockMetadataTimer.GetElapsedTimeMs()), + OutBlockDescriptions.size()); + + if (OutBlockDescriptions.size() != BlockRawHashes.size()) + { + bool AttemptFallback = false; + std::string ErrorDescription = + fmt::format("All required blocks could not be found, {} blocks does not have metadata in this context.", + BlockRawHashes.size() - OutBlockDescriptions.size()); + if (AttemptFallback) + { + ZEN_CONSOLE("{} Attemping fallback options.", ErrorDescription); + std::vector<ChunkBlockDescription> AugmentedBlockDescriptions; + AugmentedBlockDescriptions.reserve(BlockRawHashes.size()); + std::vector<ChunkBlockDescription> FoundBlocks = Storage.FindBlocks(BuildId); + + for (const IoHash& BlockHash : BlockRawHashes) + { + if (auto It = std::find_if( + OutBlockDescriptions.begin(), + OutBlockDescriptions.end(), + [BlockHash](const ChunkBlockDescription& Description) { return Description.BlockHash == BlockHash; }); + It != OutBlockDescriptions.end()) + { + AugmentedBlockDescriptions.emplace_back(std::move(*It)); + } + else if (auto ListBlocksIt = std::find_if( + FoundBlocks.begin(), + FoundBlocks.end(), + [BlockHash](const ChunkBlockDescription& Description) { return Description.BlockHash == BlockHash; }); + ListBlocksIt != FoundBlocks.end()) + { + ZEN_CONSOLE("Found block {} via context find successfully", BlockHash); + AugmentedBlockDescriptions.emplace_back(std::move(*ListBlocksIt)); + } + else + { + IoBuffer BlockBuffer = Storage.GetBuildBlob(BuildId, BlockHash); + if (!BlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} could not be found", BlockHash)); + } + IoHash BlockRawHash; + uint64_t BlockRawSize; + CompressedBuffer CompressedBlockBuffer = + CompressedBuffer::FromCompressed(SharedBuffer(std::move(BlockBuffer)), BlockRawHash, BlockRawSize); + if (!CompressedBlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} is not a compressed buffer", BlockHash)); + } + + if (BlockRawHash != BlockHash) + { + throw std::runtime_error( + fmt::format("Block {} header has a mismatching raw hash {}", BlockHash, BlockRawHash)); + } + + CompositeBuffer DecompressedBlockBuffer = CompressedBlockBuffer.DecompressToComposite(); + if (!DecompressedBlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} failed to decompress", BlockHash)); + } + + ChunkBlockDescription MissingChunkDescription = + GetChunkBlockDescription(DecompressedBlockBuffer.Flatten(), BlockHash); + AugmentedBlockDescriptions.emplace_back(std::move(MissingChunkDescription)); + } + } + OutBlockDescriptions.swap(AugmentedBlockDescriptions); + } + else + { + throw std::runtime_error(ErrorDescription); + } + } + + CalculateLocalChunkOrders(AbsoluteChunkOrders, + OutLooseChunkHashes, + LooseChunkRawSizes, + OutBlockDescriptions, + OutRemoteContent.ChunkedContent.ChunkHashes, + OutRemoteContent.ChunkedContent.ChunkRawSizes, + OutRemoteContent.ChunkedContent.ChunkOrders); + }; + + OutPartContents.resize(1); + ParseBuildPartManifest(Storage, + BuildId, + BuildPartId, + BuildPartManifest, + OutPartContents[0], + OutBlockDescriptions, + OutLooseChunkHashes); + ChunkedFolderContent RemoteContent; + if (BuildParts.size() > 1) + { + std::vector<ChunkBlockDescription> OverlayBlockDescriptions; + std::vector<IoHash> OverlayLooseChunkHashes; + for (size_t PartIndex = 1; PartIndex < BuildParts.size(); PartIndex++) + { + const Oid& OverlayBuildPartId = BuildParts[PartIndex].first; + const std::string& OverlayBuildPartName = BuildParts[PartIndex].second; + Stopwatch GetOverlayBuildPartTimer; + CbObject OverlayBuildPartManifest = Storage.GetBuildPart(BuildId, OverlayBuildPartId); + ZEN_CONSOLE("GetBuildPart {} ('{}') took {}. Payload size: {}", + OverlayBuildPartId, + OverlayBuildPartName, + NiceTimeSpanMs(GetOverlayBuildPartTimer.GetElapsedTimeMs()), + NiceBytes(OverlayBuildPartManifest.GetSize())); + + ChunkedFolderContent OverlayPartContent; + std::vector<ChunkBlockDescription> OverlayPartBlockDescriptions; + std::vector<IoHash> OverlayPartLooseChunkHashes; + + ParseBuildPartManifest(Storage, + BuildId, + OverlayBuildPartId, + OverlayBuildPartManifest, + OverlayPartContent, + OverlayPartBlockDescriptions, + OverlayPartLooseChunkHashes); + OutPartContents.push_back(OverlayPartContent); + OverlayBlockDescriptions.insert(OverlayBlockDescriptions.end(), + OverlayPartBlockDescriptions.begin(), + OverlayPartBlockDescriptions.end()); + OverlayLooseChunkHashes.insert(OverlayLooseChunkHashes.end(), + OverlayPartLooseChunkHashes.begin(), + OverlayPartLooseChunkHashes.end()); + } + + RemoteContent = + MergeChunkedFolderContents(OutPartContents[0], std::span<const ChunkedFolderContent>(OutPartContents).subspan(1)); + { + tsl::robin_set<IoHash> AllBlockHashes; + for (const ChunkBlockDescription& Description : OutBlockDescriptions) + { + AllBlockHashes.insert(Description.BlockHash); + } + for (const ChunkBlockDescription& Description : OverlayBlockDescriptions) + { + if (!AllBlockHashes.contains(Description.BlockHash)) + { + AllBlockHashes.insert(Description.BlockHash); + OutBlockDescriptions.push_back(Description); + } + } + } + { + tsl::robin_set<IoHash> AllLooseChunkHashes(OutLooseChunkHashes.begin(), OutLooseChunkHashes.end()); + for (const IoHash& OverlayLooseChunkHash : OverlayLooseChunkHashes) + { + if (!AllLooseChunkHashes.contains(OverlayLooseChunkHash)) + { + AllLooseChunkHashes.insert(OverlayLooseChunkHash); + OutLooseChunkHashes.push_back(OverlayLooseChunkHash); + } + } + } + } + else + { + RemoteContent = OutPartContents[0]; + } + return RemoteContent; + } + + ChunkedFolderContent GetLocalContent(GetFolderContentStatistics& LocalFolderScanStats, + ChunkingStatistics& ChunkingStats, + const std::filesystem::path& Path, + ChunkingController& ChunkController) + { + ChunkedFolderContent LocalContent; + + auto IsAcceptedFolder = [ExcludeFolders = DefaultExcludeFolders](const std::string_view& RelativePath) -> bool { + for (const std::string_view& ExcludeFolder : ExcludeFolders) + { + if (RelativePath.starts_with(ExcludeFolder)) + { + if (RelativePath.length() == ExcludeFolder.length()) + { + return false; + } + else if (RelativePath[ExcludeFolder.length()] == '/') + { + return false; + } + } + } + return true; + }; + + auto IsAcceptedFile = [ExcludeExtensions = + DefaultExcludeExtensions](const std::string_view& RelativePath, uint64_t, uint32_t) -> bool { + for (const std::string_view& ExcludeExtension : ExcludeExtensions) + { + if (RelativePath.ends_with(ExcludeExtension)) + { + return false; + } + } + return true; + }; + + FolderContent CurrentLocalFolderContent = GetFolderContent( + LocalFolderScanStats, + Path, + std::move(IsAcceptedFolder), + std::move(IsAcceptedFile), + GetMediumWorkerPool(EWorkloadType::Burst), + UsePlainProgress ? 5000 : 200, + [&](bool, std::ptrdiff_t) { ZEN_DEBUG("Found {} files in '{}'...", LocalFolderScanStats.AcceptedFileCount.load(), Path); }, + AbortFlag); + if (AbortFlag) + { + return {}; + } + + FolderContent LocalFolderState; + + bool ScanContent = true; + std::vector<uint32_t> PathIndexesOufOfDate; + if (std::filesystem::is_regular_file(Path / ZenStateFilePath)) + { + try + { + Stopwatch ReadStateTimer; + CbObject CurrentStateObject = LoadCompactBinaryObject(Path / ZenStateFilePath).Object; + if (CurrentStateObject) + { + Oid CurrentBuildId; + std::vector<Oid> SavedBuildPartIds; + std::vector<std::string> SavedBuildPartsNames; + std::vector<ChunkedFolderContent> SavedPartContents; + if (ReadStateObject(CurrentStateObject, + CurrentBuildId, + SavedBuildPartIds, + SavedBuildPartsNames, + SavedPartContents, + LocalFolderState)) + { + if (!SavedPartContents.empty()) + { + if (SavedPartContents.size() == 1) + { + LocalContent = std::move(SavedPartContents[0]); + } + else + { + LocalContent = + MergeChunkedFolderContents(SavedPartContents[0], + std::span<const ChunkedFolderContent>(SavedPartContents).subspan(1)); + } + + if (!LocalFolderState.AreKnownFilesEqual(CurrentLocalFolderContent)) + { + const size_t LocaStatePathCount = LocalFolderState.Paths.size(); + std::vector<std::filesystem::path> DeletedPaths; + FolderContent UpdatedContent = GetUpdatedContent(LocalFolderState, CurrentLocalFolderContent, DeletedPaths); + if (!DeletedPaths.empty()) + { + LocalContent = DeletePathsFromChunkedContent(LocalContent, DeletedPaths); + } + + ZEN_CONSOLE("Updating state, {} local files deleted and {} local files updated out of {}", + DeletedPaths.size(), + UpdatedContent.Paths.size(), + LocaStatePathCount); + if (UpdatedContent.Paths.size() > 0) + { + uint64_t ByteCountToScan = 0; + for (const uint64_t RawSize : UpdatedContent.RawSizes) + { + ByteCountToScan += RawSize; + } + ProgressBar ProgressBar(false); + FilteredRate FilteredBytesHashed; + FilteredBytesHashed.Start(); + ChunkedFolderContent UpdatedLocalContent = ChunkFolderContent( + ChunkingStats, + GetMediumWorkerPool(EWorkloadType::Burst), + Path, + UpdatedContent, + ChunkController, + UsePlainProgress ? 5000 : 200, + [&](bool, std::ptrdiff_t) { + FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); + std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found", + ChunkingStats.FilesProcessed.load(), + UpdatedContent.Paths.size(), + NiceBytes(ChunkingStats.BytesHashed.load()), + NiceBytes(ByteCountToScan), + NiceNum(FilteredBytesHashed.GetCurrent()), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load())); + ProgressBar.UpdateState({.Task = "Scanning files ", + .Details = Details, + .TotalCount = ByteCountToScan, + .RemainingCount = ByteCountToScan - ChunkingStats.BytesHashed.load()}, + false); + }, + AbortFlag); + if (AbortFlag) + { + return {}; + } + FilteredBytesHashed.Stop(); + ProgressBar.Finish(); + LocalContent = MergeChunkedFolderContents(LocalContent, {{UpdatedLocalContent}}); + } + } + else + { + // Remove files from LocalContent no longer in LocalFolderState + tsl::robin_set<std::string> LocalFolderPaths; + LocalFolderPaths.reserve(LocalFolderState.Paths.size()); + for (const std::filesystem::path& LocalFolderPath : LocalFolderState.Paths) + { + LocalFolderPaths.insert(LocalFolderPath.generic_string()); + } + std::vector<std::filesystem::path> DeletedPaths; + for (const std::filesystem::path& LocalContentPath : LocalContent.Paths) + { + if (!LocalFolderPaths.contains(LocalContentPath.generic_string())) + { + DeletedPaths.push_back(LocalContentPath); + } + } + if (!DeletedPaths.empty()) + { + LocalContent = DeletePathsFromChunkedContent(LocalContent, DeletedPaths); + } + + ZEN_CONSOLE("Using cached local state"); + } + ZEN_CONSOLE("Read local state in {}", NiceTimeSpanMs(ReadStateTimer.GetElapsedTimeMs())); + ScanContent = false; + } + } + } + } + catch (const std::exception& Ex) + { + ZEN_CONSOLE("Failed reading state file, falling back to scannning. Reason: {}", Ex.what()); + } + } + + if (ScanContent) + { + uint64_t ByteCountToScan = 0; + for (const uint64_t RawSize : CurrentLocalFolderContent.RawSizes) + { + ByteCountToScan += RawSize; + } + ProgressBar ProgressBar(false); + FilteredRate FilteredBytesHashed; + FilteredBytesHashed.Start(); + ChunkedFolderContent UpdatedLocalContent = ChunkFolderContent( + ChunkingStats, + GetMediumWorkerPool(EWorkloadType::Burst), + Path, + CurrentLocalFolderContent, + ChunkController, + UsePlainProgress ? 5000 : 200, + [&](bool, std::ptrdiff_t) { + FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); + std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found", + ChunkingStats.FilesProcessed.load(), + CurrentLocalFolderContent.Paths.size(), + NiceBytes(ChunkingStats.BytesHashed.load()), + ByteCountToScan, + NiceNum(FilteredBytesHashed.GetCurrent()), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load())); + ProgressBar.UpdateState({.Task = "Scanning files ", + .Details = Details, + .TotalCount = ByteCountToScan, + .RemainingCount = (ByteCountToScan - ChunkingStats.BytesHashed.load())}, + false); + }, + AbortFlag); + + if (AbortFlag) + { + return {}; + } + + FilteredBytesHashed.Stop(); + ProgressBar.Finish(); + } + return LocalContent; + } + + void DownloadFolder(BuildStorage& Storage, + const Oid& BuildId, + const std::vector<Oid>& BuildPartIds, + std::span<const std::string> BuildPartNames, + const std::filesystem::path& Path, + bool AllowMultiparts, + bool AllowPartialBlockRequests, + bool WipeTargetFolder, + bool PostDownloadVerify) + { + ZEN_TRACE_CPU("DownloadFolder"); + + Stopwatch DownloadTimer; + + const std::filesystem::path ZenTempFolder = Path / ZenTempFolderName; + CreateDirectories(ZenTempFolder); + + CreateDirectories(Path / ZenTempBlockFolderName); + CreateDirectories(Path / ZenTempCacheFolderName); + CreateDirectories(Path / ZenTempDownloadFolderName); + + std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; + + std::vector<std::pair<Oid, std::string>> AllBuildParts = + ResolveBuildPartNames(Storage, BuildId, BuildPartIds, BuildPartNames, PreferredMultipartChunkSize); + + std::vector<ChunkedFolderContent> PartContents; + + std::unique_ptr<ChunkingController> ChunkController; + + std::vector<ChunkBlockDescription> BlockDescriptions; + std::vector<IoHash> LooseChunkHashes; + + ChunkedFolderContent RemoteContent = + GetRemoteContent(Storage, BuildId, AllBuildParts, ChunkController, PartContents, BlockDescriptions, LooseChunkHashes); + + const std::uint64_t LargeAttachmentSize = AllowMultiparts ? PreferredMultipartChunkSize * 4u : (std::uint64_t)-1; + if (!ChunkController) + { + ZEN_CONSOLE("Warning: Unspecified chunking algorith, using default"); + ChunkController = CreateBasicChunkingController(); + } + + GetFolderContentStatistics LocalFolderScanStats; + ChunkingStatistics ChunkingStats; + ChunkedFolderContent LocalContent; + if (std::filesystem::is_directory(Path)) + { + if (!WipeTargetFolder) + { + LocalContent = GetLocalContent(LocalFolderScanStats, ChunkingStats, Path, *ChunkController); + } + } + else + { + CreateDirectories(Path); + } + if (AbortFlag) + { + return; + } + + auto CompareContent = [](const ChunkedFolderContent& Lhs, const ChunkedFolderContent& Rhs) { + tsl::robin_map<std::string, size_t> RhsPathToIndex; + const size_t RhsPathCount = Rhs.Paths.size(); + RhsPathToIndex.reserve(RhsPathCount); + for (size_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++) + { + RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex}); + } + const size_t LhsPathCount = Lhs.Paths.size(); + for (size_t LhsPathIndex = 0; LhsPathIndex < LhsPathCount; LhsPathIndex++) + { + if (auto It = RhsPathToIndex.find(Lhs.Paths[LhsPathIndex].generic_string()); It != RhsPathToIndex.end()) + { + const size_t RhsPathIndex = It->second; + if ((Lhs.RawHashes[LhsPathIndex] != Rhs.RawHashes[RhsPathIndex]) || + (!FolderContent::AreFileAttributesEqual(Lhs.Attributes[LhsPathIndex], Rhs.Attributes[RhsPathIndex]))) + { + return false; + } + } + else + { + return false; + } + } + tsl::robin_set<std::string> LhsPathExists; + LhsPathExists.reserve(LhsPathCount); + for (size_t LhsPathIndex = 0; LhsPathIndex < LhsPathCount; LhsPathIndex++) + { + LhsPathExists.insert({Lhs.Paths[LhsPathIndex].generic_string()}); + } + for (size_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++) + { + if (!LhsPathExists.contains(Rhs.Paths[RhsPathIndex].generic_string())) + { + return false; + } + } + + return true; + }; + + if (CompareContent(RemoteContent, LocalContent)) + { + ZEN_CONSOLE("Local state is identical to build to download. All done. Completed in {}.", + NiceTimeSpanMs(DownloadTimer.GetElapsedTimeMs())); + } + else + { + ExtendableStringBuilder<128> SB; + for (const std::pair<Oid, std::string>& BuildPart : AllBuildParts) + { + SB.Append(fmt::format(" {} ({})", BuildPart.second, BuildPart.first)); + } + ZEN_CONSOLE("Downloading build {}, parts:{}", BuildId, SB.ToView()); + FolderContent LocalFolderState; + UpdateFolder(Storage, + BuildId, + Path, + LargeAttachmentSize, + PreferredMultipartChunkSize, + LocalContent, + RemoteContent, + BlockDescriptions, + LooseChunkHashes, + AllowPartialBlockRequests, + WipeTargetFolder, + LocalFolderState); + + if (!AbortFlag) + { + VerifyFolder(RemoteContent, Path, PostDownloadVerify); + + Stopwatch WriteStateTimer; + CbObject StateObject = CreateStateObject(BuildId, AllBuildParts, PartContents, LocalFolderState); + + CreateDirectories((Path / ZenStateFilePath).parent_path()); + TemporaryFile::SafeWriteFile(Path / ZenStateFilePath, StateObject.GetView()); + ZEN_CONSOLE("Wrote local state in {}", NiceTimeSpanMs(WriteStateTimer.GetElapsedTimeMs())); + +#if 0 + ExtendableStringBuilder<1024> SB; + CompactBinaryToJson(StateObject, SB); + WriteFile(Path / ZenStateFileJsonPath, IoBuffer(IoBuffer::Wrap, SB.Data(), SB.Size())); +#endif // 0 + + ZEN_CONSOLE("Downloaded build in {}.", NiceTimeSpanMs(DownloadTimer.GetElapsedTimeMs())); + } + } + if (CleanDirectory(ZenTempFolder, {})) + { + std::filesystem::remove(ZenTempFolder); + } + } + + void DiffFolders(const std::filesystem::path& BasePath, const std::filesystem::path& ComparePath, bool OnlyChunked) + { + ChunkedFolderContent BaseFolderContent; + ChunkedFolderContent CompareFolderContent; + + { + std::unique_ptr<ChunkingController> ChunkController = CreateBasicChunkingController(); + std::vector<std::string_view> ExcludeExtensions = DefaultExcludeExtensions; + if (OnlyChunked) + { + ExcludeExtensions.insert(ExcludeExtensions.end(), + DefaultChunkingExcludeExtensions.begin(), + DefaultChunkingExcludeExtensions.end()); + } + + auto IsAcceptedFolder = [ExcludeFolders = DefaultExcludeFolders](const std::string_view& RelativePath) -> bool { + for (const std::string_view& ExcludeFolder : ExcludeFolders) + { + if (RelativePath.starts_with(ExcludeFolder)) + { + if (RelativePath.length() == ExcludeFolder.length()) + { + return false; + } + else if (RelativePath[ExcludeFolder.length()] == '/') + { + return false; + } + } + } + return true; + }; + + auto IsAcceptedFile = [ExcludeExtensions](const std::string_view& RelativePath, uint64_t, uint32_t) -> bool { + for (const std::string_view& ExcludeExtension : ExcludeExtensions) + { + if (RelativePath.ends_with(ExcludeExtension)) + { + return false; + } + } + return true; + }; + + GetFolderContentStatistics BaseGetFolderContentStats; + ChunkingStatistics BaseChunkingStats; + BaseFolderContent = ScanAndChunkFolder(BaseGetFolderContentStats, + BaseChunkingStats, + BasePath, + IsAcceptedFolder, + IsAcceptedFile, + *ChunkController); + if (AbortFlag) + { + return; + } + + GetFolderContentStatistics CompareGetFolderContentStats; + ChunkingStatistics CompareChunkingStats; + CompareFolderContent = ScanAndChunkFolder(CompareGetFolderContentStats, + CompareChunkingStats, + ComparePath, + IsAcceptedFolder, + IsAcceptedFile, + *ChunkController); + + if (AbortFlag) + { + return; + } + } + + std::vector<IoHash> AddedHashes; + std::vector<IoHash> RemovedHashes; + uint64_t RemovedSize = 0; + uint64_t AddedSize = 0; + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> BaseRawHashLookup; + for (size_t PathIndex = 0; PathIndex < BaseFolderContent.RawHashes.size(); PathIndex++) + { + const IoHash& RawHash = BaseFolderContent.RawHashes[PathIndex]; + BaseRawHashLookup.insert_or_assign(RawHash, PathIndex); + } + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CompareRawHashLookup; + for (size_t PathIndex = 0; PathIndex < CompareFolderContent.RawHashes.size(); PathIndex++) + { + const IoHash& RawHash = CompareFolderContent.RawHashes[PathIndex]; + if (!BaseRawHashLookup.contains(RawHash)) + { + AddedHashes.push_back(RawHash); + AddedSize += CompareFolderContent.RawSizes[PathIndex]; + } + CompareRawHashLookup.insert_or_assign(RawHash, PathIndex); + } + for (uint32_t PathIndex = 0; PathIndex < BaseFolderContent.Paths.size(); PathIndex++) + { + const IoHash& RawHash = BaseFolderContent.RawHashes[PathIndex]; + if (!CompareRawHashLookup.contains(RawHash)) + { + RemovedHashes.push_back(RawHash); + RemovedSize += BaseFolderContent.RawSizes[PathIndex]; + } + } + + uint64_t BaseTotalRawSize = 0; + for (uint32_t PathIndex = 0; PathIndex < BaseFolderContent.Paths.size(); PathIndex++) + { + BaseTotalRawSize += BaseFolderContent.RawSizes[PathIndex]; + } + + double KeptPercent = BaseTotalRawSize > 0 ? (100.0 * (BaseTotalRawSize - RemovedSize)) / BaseTotalRawSize : 0; + + ZEN_CONSOLE("{} ({}) files removed, {} ({}) files added, {} ({} {:.1f}%) files kept", + RemovedHashes.size(), + NiceBytes(RemovedSize), + AddedHashes.size(), + NiceBytes(AddedSize), + BaseFolderContent.Paths.size() - RemovedHashes.size(), + NiceBytes(BaseTotalRawSize - RemovedSize), + KeptPercent); + + uint64_t CompareTotalRawSize = 0; + + uint64_t FoundChunkCount = 0; + uint64_t FoundChunkSize = 0; + uint64_t NewChunkCount = 0; + uint64_t NewChunkSize = 0; + const ChunkedContentLookup BaseFolderLookup = BuildChunkedContentLookup(BaseFolderContent); + for (uint32_t ChunkIndex = 0; ChunkIndex < CompareFolderContent.ChunkedContent.ChunkHashes.size(); ChunkIndex++) + { + const IoHash& ChunkHash = CompareFolderContent.ChunkedContent.ChunkHashes[ChunkIndex]; + if (BaseFolderLookup.ChunkHashToChunkIndex.contains(ChunkHash)) + { + FoundChunkCount++; + FoundChunkSize += CompareFolderContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + else + { + NewChunkCount++; + NewChunkSize += CompareFolderContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + CompareTotalRawSize += CompareFolderContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + + double FoundPercent = CompareTotalRawSize > 0 ? (100.0 * FoundChunkSize) / CompareTotalRawSize : 0; + double NewPercent = CompareTotalRawSize > 0 ? (100.0 * NewChunkSize) / CompareTotalRawSize : 0; + + ZEN_CONSOLE("Found {} ({} {:.1f}%) out of {} ({}) chunks in {} ({}) base chunks. Added {} ({} {:.1f}%) chunks.", + FoundChunkCount, + NiceBytes(FoundChunkSize), + FoundPercent, + CompareFolderContent.ChunkedContent.ChunkHashes.size(), + NiceBytes(CompareTotalRawSize), + BaseFolderContent.ChunkedContent.ChunkHashes.size(), + NiceBytes(BaseTotalRawSize), + NewChunkCount, + NiceBytes(NewChunkSize), + NewPercent); + } + +} // namespace + +////////////////////////////////////////////////////////////////////////////////////////////////////// + +BuildsCommand::BuildsCommand() +{ + m_Options.add_options()("h,help", "Print help"); + + auto AddAuthOptions = [this](cxxopts::Options& Ops) { + Ops.add_option("", "", "system-dir", "Specify system root", cxxopts::value<std::string>(m_SystemRootDir), "<systemdir>"); + + // Direct access token (may expire) + Ops.add_option("auth-token", + "", + "access-token", + "Cloud/Builds Storage access token", + cxxopts::value(m_AccessToken), + "<accesstoken>"); + Ops.add_option("auth-token", + "", + "access-token-env", + "Name of environment variable that holds the cloud/builds Storage access token", + cxxopts::value(m_AccessTokenEnv)->default_value(DefaultAccessTokenEnvVariableName), + "<envvariable>"); + Ops.add_option("auth-token", + "", + "access-token-path", + "Path to json file that holds the cloud/builds Storage access token", + cxxopts::value(m_AccessTokenPath), + "<filepath>"); + + // Auth manager token encryption + Ops.add_option("security", + "", + "encryption-aes-key", + "256 bit AES encryption key", + cxxopts::value<std::string>(m_EncryptionKey), + ""); + Ops.add_option("security", + "", + "encryption-aes-iv", + "128 bit AES encryption initialization vector", + cxxopts::value<std::string>(m_EncryptionIV), + ""); + + // OpenId acccess token + Ops.add_option("openid", + "", + "openid-provider-name", + "Open ID provider name", + cxxopts::value<std::string>(m_OpenIdProviderName), + "Default"); + Ops.add_option("openid", "", "openid-provider-url", "Open ID provider url", cxxopts::value<std::string>(m_OpenIdProviderUrl), ""); + Ops.add_option("openid", "", "openid-client-id", "Open ID client id", cxxopts::value<std::string>(m_OpenIdClientId), ""); + Ops.add_option("openid", + "", + "openid-refresh-token", + "Open ID refresh token", + cxxopts::value<std::string>(m_OpenIdRefreshToken), + ""); + + // OAuth acccess token + Ops.add_option("oauth", "", "oauth-url", "OAuth provier url", cxxopts::value<std::string>(m_OAuthUrl)->default_value(""), ""); + Ops.add_option("oauth", + "", + "oauth-clientid", + "OAuth client id", + cxxopts::value<std::string>(m_OAuthClientId)->default_value(""), + ""); + Ops.add_option("oauth", + "", + "oauth-clientsecret", + "OAuth client secret", + cxxopts::value<std::string>(m_OAuthClientSecret)->default_value(""), + ""); + }; + + auto AddCloudOptions = [this, &AddAuthOptions](cxxopts::Options& Ops) { + AddAuthOptions(Ops); + + Ops.add_option("cloud build", "", "url", "Cloud Builds URL", cxxopts::value(m_BuildsUrl), "<url>"); + Ops.add_option("cloud build", + "", + "assume-http2", + "Assume that the builds endpoint is a HTTP/2 endpoint skipping HTTP/1.1 upgrade handshake", + cxxopts::value(m_AssumeHttp2), + "<assumehttp2>"); + + Ops.add_option("cloud build", "", "namespace", "Builds Storage namespace", cxxopts::value(m_Namespace), "<namespace>"); + Ops.add_option("cloud build", "", "bucket", "Builds Storage bucket", cxxopts::value(m_Bucket), "<bucket>"); + }; + + auto AddFileOptions = [this](cxxopts::Options& Ops) { + Ops.add_option("filestorage", "", "storage-path", "Builds Storage Path", cxxopts::value(m_StoragePath), "<storagepath>"); + Ops.add_option("filestorage", + "", + "json-metadata", + "Write build, part and block metadata as .json files in addition to .cb files", + cxxopts::value(m_WriteMetadataAsJson), + "<jsonmetadata>"); + }; + + auto AddOutputOptions = [this](cxxopts::Options& Ops) { + Ops.add_option("output", "", "plain-progress", "Show progress using plain output", cxxopts::value(m_PlainProgress), "<progress>"); + Ops.add_option("output", "", "verbose", "Enable verbose console output", cxxopts::value(m_Verbose), "<verbose>"); + }; + + m_Options.add_option("", "v", "verb", "Verb for build - list, upload, download, diff", cxxopts::value(m_Verb), "<verb>"); + m_Options.parse_positional({"verb"}); + m_Options.positional_help("verb"); + + // list + AddCloudOptions(m_ListOptions); + AddFileOptions(m_ListOptions); + AddOutputOptions(m_ListOptions); + m_ListOptions.add_options()("h,help", "Print help"); + + // upload + AddCloudOptions(m_UploadOptions); + AddFileOptions(m_UploadOptions); + AddOutputOptions(m_UploadOptions); + m_UploadOptions.add_options()("h,help", "Print help"); + m_UploadOptions.add_option("", "l", "local-path", "Root file system folder for build", cxxopts::value(m_Path), "<local-path>"); + m_UploadOptions.add_option("", + "", + "create-build", + "Set to true to create the containing build, if unset a builds-id must be given and the build already exist", + cxxopts::value(m_CreateBuild), + "<id>"); + m_UploadOptions.add_option("", "", "build-id", "Build Id", cxxopts::value(m_BuildId), "<id>"); + m_UploadOptions.add_option("", + "", + "build-part-id", + "Build part Id, if not given it will be auto generated", + cxxopts::value(m_BuildPartId), + "<id>"); + m_UploadOptions.add_option("", + "", + "build-part-name", + "Name of the build part, if not given it will be be named after the directory name at end of local-path", + cxxopts::value(m_BuildPartName), + "<name>"); + m_UploadOptions.add_option("", + "", + "metadata-path", + "Path to json file that holds the metadata for the build. Requires the create-build option to be set", + cxxopts::value(m_BuildMetadataPath), + "<metadata-path>"); + m_UploadOptions.add_option( + "", + "", + "metadata", + "Key-value pairs separated by ';' with build meta data. (key1=value1;key2=value2). Requires the create-build option to be set", + cxxopts::value(m_BuildMetadata), + "<metadata>"); + m_UploadOptions.add_option("", "", "clean", "Ignore existing blocks", cxxopts::value(m_Clean), "<clean>"); + m_UploadOptions.add_option("", + "", + "block-min-reuse", + "Percent of an existing block that must be relevant for it to be resused. Defaults to 85.", + cxxopts::value(m_BlockReuseMinPercentLimit), + "<minreuse>"); + m_UploadOptions.add_option("", + "", + "allow-multipart", + "Allow large attachments to be transfered using multipart protocol. Defaults to true.", + cxxopts::value(m_AllowMultiparts), + "<allowmultipart>"); + m_UploadOptions.add_option("", + "", + "manifest-path", + "Path to a text file with one line of <local path>[TAB]<modification date> per file to include.", + cxxopts::value(m_ManifestPath), + "<manifestpath>"); + m_UploadOptions + .add_option("", "", "verify", "Enable post upload verify of all uploaded data", cxxopts::value(m_PostUploadVerify), "<verify>"); + + m_UploadOptions.parse_positional({"local-path", "build-id"}); + m_UploadOptions.positional_help("local-path build-id"); + + // download + AddCloudOptions(m_DownloadOptions); + AddFileOptions(m_DownloadOptions); + AddOutputOptions(m_DownloadOptions); + m_DownloadOptions.add_options()("h,help", "Print help"); + m_DownloadOptions.add_option("", "l", "local-path", "Root file system folder for build", cxxopts::value(m_Path), "<local-path>"); + m_DownloadOptions.add_option("", "", "build-id", "Build Id", cxxopts::value(m_BuildId), "<id>"); + m_DownloadOptions.add_option( + "", + "", + "build-part-id", + "Build part Ids list separated by ',', if no build-part-ids or build-part-names are given all parts will be downloaded", + cxxopts::value(m_BuildPartIds), + "<id>"); + m_DownloadOptions.add_option("", + "", + "build-part-name", + "Name of the build parts list separated by ',', if no build-part-ids or build-part-names are given " + "all parts will be downloaded", + cxxopts::value(m_BuildPartNames), + "<name>"); + m_DownloadOptions + .add_option("", "", "clean", "Delete all data in target folder before downloading", cxxopts::value(m_Clean), "<clean>"); + m_DownloadOptions.add_option("", + "", + "allow-multipart", + "Allow large attachments to be transfered using multipart protocol. Defaults to true.", + cxxopts::value(m_AllowMultiparts), + "<allowmultipart>"); + m_DownloadOptions.add_option("", + "", + "allow-partial-block-requests", + "Allow request for partial chunk blocks. Defaults to true.", + cxxopts::value(m_AllowPartialBlockRequests), + "<allowpartialblockrequests>"); + m_DownloadOptions + .add_option("", "", "verify", "Enable post download verify of all tracked files", cxxopts::value(m_PostDownloadVerify), "<verify>"); + m_DownloadOptions.parse_positional({"local-path", "build-id", "build-part-name"}); + m_DownloadOptions.positional_help("local-path build-id build-part-name"); + + AddOutputOptions(m_DiffOptions); + m_DiffOptions.add_options()("h,help", "Print help"); + m_DiffOptions.add_option("", "l", "local-path", "Root file system folder used as base", cxxopts::value(m_Path), "<local-path>"); + m_DiffOptions.add_option("", "c", "compare-path", "Root file system folder used as diff", cxxopts::value(m_DiffPath), "<diff-path>"); + m_DiffOptions.add_option("", + "", + "only-chunked", + "Skip files from diff summation that are not processed with chunking", + cxxopts::value(m_OnlyChunked), + "<only-chunked>"); + m_DiffOptions.parse_positional({"local-path", "compare-path"}); + m_DiffOptions.positional_help("local-path compare-path"); + + AddCloudOptions(m_TestOptions); + AddFileOptions(m_TestOptions); + AddOutputOptions(m_TestOptions); + m_TestOptions.add_options()("h,help", "Print help"); + m_TestOptions.add_option("", "l", "local-path", "Root file system folder used as base", cxxopts::value(m_Path), "<local-path>"); + m_TestOptions.add_option("", + "", + "allow-multipart", + "Allow large attachments to be transfered using multipart protocol. Defaults to true.", + cxxopts::value(m_AllowMultiparts), + "<allowmultipart>"); + m_TestOptions.add_option("", + "", + "allow-partial-block-requests", + "Allow request for partial chunk blocks. Defaults to true.", + cxxopts::value(m_AllowPartialBlockRequests), + "<allowpartialblockrequests>"); + m_TestOptions.parse_positional({"local-path"}); + m_TestOptions.positional_help("local-path"); + + AddCloudOptions(m_FetchBlobOptions); + AddFileOptions(m_FetchBlobOptions); + AddOutputOptions(m_FetchBlobOptions); + m_FetchBlobOptions.add_option("", "", "build-id", "Build Id", cxxopts::value(m_BuildId), "<id>"); + m_FetchBlobOptions + .add_option("", "", "blob-hash", "IoHash in hex form identifying the blob to download", cxxopts::value(m_BlobHash), "<blob-hash>"); + m_FetchBlobOptions.parse_positional({"build-id", "blob-hash"}); + m_FetchBlobOptions.positional_help("build-id blob-hash"); + + AddCloudOptions(m_ValidateBuildPartOptions); + AddFileOptions(m_ValidateBuildPartOptions); + AddOutputOptions(m_ValidateBuildPartOptions); + m_ValidateBuildPartOptions.add_option("", "", "build-id", "Build Id", cxxopts::value(m_BuildId), "<id>"); + m_ValidateBuildPartOptions.add_option("", + "", + "build-part-id", + "Build part Id, if not given it will be auto generated", + cxxopts::value(m_BuildPartId), + "<id>"); + m_ValidateBuildPartOptions.add_option( + "", + "", + "build-part-name", + "Name of the build part, if not given it will be be named after the directory name at end of local-path", + cxxopts::value(m_BuildPartName), + "<name>"); + m_ValidateBuildPartOptions.parse_positional({"build-id", "build-part-id"}); + m_ValidateBuildPartOptions.positional_help("build-id build-part-id"); + + AddCloudOptions(m_MultiTestDownloadOptions); + AddFileOptions(m_MultiTestDownloadOptions); + AddOutputOptions(m_MultiTestDownloadOptions); + m_MultiTestDownloadOptions + .add_option("", "l", "local-path", "Root file system folder used as base", cxxopts::value(m_Path), "<local-path>"); + m_MultiTestDownloadOptions.add_option("", "", "build-ids", "Build Ids list separated by ','", cxxopts::value(m_BuildIds), "<ids>"); + m_MultiTestDownloadOptions.parse_positional({"local-path"}); + m_MultiTestDownloadOptions.positional_help("local-path"); +} + +BuildsCommand::~BuildsCommand() = default; + +int +BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) +{ + ZEN_UNUSED(GlobalOptions); + + signal(SIGINT, SignalCallbackHandler); +#if ZEN_PLATFORM_WINDOWS + signal(SIGBREAK, SignalCallbackHandler); +#endif // ZEN_PLATFORM_WINDOWS + + using namespace std::literals; + + std::vector<char*> SubCommandArguments; + cxxopts::Options* SubOption = nullptr; + int ParentCommandArgCount = GetSubCommand(m_Options, argc, argv, m_SubCommands, SubOption, SubCommandArguments); + if (!ParseOptions(ParentCommandArgCount, argv)) + { + return 0; + } + + if (SubOption == nullptr) + { + throw zen::OptionParseException("command verb is missing"); + } + + if (!ParseOptions(*SubOption, gsl::narrow<int>(SubCommandArguments.size()), SubCommandArguments.data())) + { + return 0; + } + + auto ParseStorageOptions = [&]() { + if (!m_BuildsUrl.empty()) + { + if (!m_StoragePath.empty()) + { + throw zen::OptionParseException(fmt::format("url is not compatible with the storage-path option\n{}", m_Options.help())); + } + if (m_Namespace.empty() || m_Bucket.empty()) + { + throw zen::OptionParseException( + fmt::format("namespace and bucket options are required for url option\n{}", m_Options.help())); + } + } + }; + + std::unique_ptr<AuthMgr> Auth; + HttpClientSettings ClientSettings{.AssumeHttp2 = m_AssumeHttp2, .AllowResume = true, .RetryCount = 2}; + + auto CreateAuthMgr = [&]() { + if (!Auth) + { + std::filesystem::path DataRoot = m_SystemRootDir.empty() ? PickDefaultSystemRootDirectory() : StringToPath(m_SystemRootDir); + + if (m_EncryptionKey.empty()) + { + m_EncryptionKey = "abcdefghijklmnopqrstuvxyz0123456"; + ZEN_CONSOLE("Warning: Using default encryption key"); + } + + if (m_EncryptionIV.empty()) + { + m_EncryptionIV = "0123456789abcdef"; + ZEN_CONSOLE("Warning: Using default encryption initialization vector"); + } + + AuthConfig AuthMgrConfig = {.RootDirectory = DataRoot / "auth", + .EncryptionKey = AesKey256Bit::FromString(m_EncryptionKey), + .EncryptionIV = AesIV128Bit::FromString(m_EncryptionIV)}; + if (!AuthMgrConfig.EncryptionKey.IsValid()) + { + throw zen::OptionParseException("Invalid AES encryption key"); + } + if (!AuthMgrConfig.EncryptionIV.IsValid()) + { + throw zen::OptionParseException("Invalid AES initialization vector"); + } + Auth = AuthMgr::Create(AuthMgrConfig); + } + }; + + auto ParseAuthOptions = [&]() { + if (!m_OpenIdProviderUrl.empty() && !m_OpenIdClientId.empty()) + { + CreateAuthMgr(); + std::string ProviderName = m_OpenIdProviderName.empty() ? "Default" : m_OpenIdProviderName; + Auth->AddOpenIdProvider({.Name = ProviderName, .Url = m_OpenIdProviderUrl, .ClientId = m_OpenIdClientId}); + if (!m_OpenIdRefreshToken.empty()) + { + Auth->AddOpenIdToken({.ProviderName = ProviderName, .RefreshToken = m_OpenIdRefreshToken}); + } + } + + if (!m_AccessToken.empty()) + { + ClientSettings.AccessTokenProvider = httpclientauth::CreateFromStaticToken(m_AccessToken); + } + else if (!m_AccessTokenPath.empty()) + { + std::string ResolvedAccessToken = ReadAccessTokenFromFile(m_AccessTokenPath); + if (!ResolvedAccessToken.empty()) + { + ClientSettings.AccessTokenProvider = httpclientauth::CreateFromStaticToken(ResolvedAccessToken); + } + } + else if (!m_AccessTokenEnv.empty()) + { + std::string ResolvedAccessToken = GetEnvVariable(m_AccessTokenEnv); + if (!ResolvedAccessToken.empty()) + { + ClientSettings.AccessTokenProvider = httpclientauth::CreateFromStaticToken(ResolvedAccessToken); + } + } + else if (!m_OAuthUrl.empty()) + { + ClientSettings.AccessTokenProvider = httpclientauth::CreateFromOAuthClientCredentials( + {.Url = m_OAuthUrl, .ClientId = m_OAuthClientId, .ClientSecret = m_OAuthClientSecret}); + } + else if (!m_OpenIdProviderName.empty()) + { + CreateAuthMgr(); + ClientSettings.AccessTokenProvider = httpclientauth::CreateFromOpenIdProvider(*Auth, m_OpenIdProviderName); + } + else + { + CreateAuthMgr(); + ClientSettings.AccessTokenProvider = httpclientauth::CreateFromDefaultOpenIdProvider(*Auth); + } + + if (!m_BuildsUrl.empty() && !ClientSettings.AccessTokenProvider) + { + ZEN_CONSOLE("Warning: No auth provider given, attempting operation without credentials."); + } + }; + + auto ParseOutputOptions = [&]() { + IsVerbose = m_Verbose; + UsePlainProgress = IsVerbose || m_PlainProgress; + }; + ParseOutputOptions(); + + try + { + if (SubOption == &m_ListOptions) + { + ParseStorageOptions(); + ParseAuthOptions(); + + HttpClient Http(m_BuildsUrl, ClientSettings); + + CbObjectWriter QueryWriter; + QueryWriter.BeginObject("query"); + { + // QueryWriter.BeginObject("platform"); + // { + // QueryWriter.AddString("$eq", "Windows"); + // } + // QueryWriter.EndObject(); // changelist + } + QueryWriter.EndObject(); // query + + BuildStorage::Statistics StorageStats; + std::unique_ptr<BuildStorage> Storage; + if (!m_BuildsUrl.empty()) + { + ZEN_CONSOLE("Querying builds in cloud endpoint '{}'. SessionId: '{}'. Namespace '{}', Bucket '{}'", + m_BuildsUrl, + Http.GetSessionId(), + m_Namespace, + m_Bucket); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, std::filesystem::path{}); + } + else if (!m_StoragePath.empty()) + { + std::filesystem::path StoragePath = StringToPath(m_StoragePath); + ZEN_CONSOLE("Querying builds in folder '{}'.", StoragePath); + Storage = CreateFileBuildStorage(StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); + } + else + { + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); + } + + CbObject Response = Storage->ListBuilds(QueryWriter.Save()); + ExtendableStringBuilder<1024> SB; + CompactBinaryToJson(Response.GetView(), SB); + ZEN_CONSOLE("{}", SB.ToView()); + return 0; + } + + if (SubOption == &m_UploadOptions) + { + ParseStorageOptions(); + ParseAuthOptions(); + + HttpClient Http(m_BuildsUrl, ClientSettings); + + if (m_Path.empty()) + { + throw zen::OptionParseException(fmt::format("local-path is required\n{}", m_UploadOptions.help())); + } + + if (m_CreateBuild) + { + if (m_BuildMetadataPath.empty() && m_BuildMetadata.empty()) + { + throw zen::OptionParseException(fmt::format("Options for builds target are missing\n{}", m_UploadOptions.help())); + } + if (!m_BuildMetadataPath.empty() && !m_BuildMetadata.empty()) + { + throw zen::OptionParseException(fmt::format("Conflicting options for builds target\n{}", m_UploadOptions.help())); + } + } + else + { + if (!m_BuildMetadataPath.empty()) + { + throw zen::OptionParseException( + fmt::format("metadata-path option is only valid if creating a build\n{}", m_UploadOptions.help())); + } + if (!m_BuildMetadata.empty()) + { + throw zen::OptionParseException( + fmt::format("metadata option is only valid if creating a build\n{}", m_UploadOptions.help())); + } + } + + std::filesystem::path Path = StringToPath(m_Path); + + if (m_BuildPartName.empty()) + { + m_BuildPartName = Path.filename().string(); + } + + const bool GeneratedBuildId = m_BuildId.empty(); + if (GeneratedBuildId) + { + m_BuildId = Oid::NewOid().ToString(); + } + else if (m_BuildId.length() != Oid::StringLength) + { + throw zen::OptionParseException(fmt::format("Invalid build id\n{}", m_UploadOptions.help())); + } + else if (Oid::FromHexString(m_BuildId) == Oid::Zero) + { + throw zen::OptionParseException(fmt::format("Invalid build id\n{}", m_UploadOptions.help())); + } + + const bool GeneratedBuildPartId = m_BuildPartId.empty(); + if (GeneratedBuildPartId) + { + m_BuildPartId = Oid::NewOid().ToString(); + } + else if (m_BuildPartId.length() != Oid::StringLength) + { + throw zen::OptionParseException(fmt::format("Invalid build id\n{}", m_UploadOptions.help())); + } + else if (Oid::FromHexString(m_BuildPartId) == Oid::Zero) + { + throw zen::OptionParseException(fmt::format("Invalid build part id\n{}", m_UploadOptions.help())); + } + + BuildStorage::Statistics StorageStats; + const Oid BuildId = Oid::FromHexString(m_BuildId); + const Oid BuildPartId = Oid::FromHexString(m_BuildPartId); + std::unique_ptr<BuildStorage> Storage; + std::string StorageName; + if (!m_BuildsUrl.empty()) + { + ZEN_CONSOLE("Uploading '{}' from '{}' to cloud endpoint '{}'. SessionId: '{}'. Namespace '{}', Bucket '{}', {}BuildId '{}'", + m_BuildPartName, + Path, + m_BuildsUrl, + Http.GetSessionId(), + m_Namespace, + m_Bucket, + GeneratedBuildId ? "Generated " : "", + BuildId); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, Path / ZenTempStorageFolderName); + StorageName = "Cloud DDC"; + } + else if (!m_StoragePath.empty()) + { + std::filesystem::path StoragePath = StringToPath(m_StoragePath); + ZEN_CONSOLE("Uploading '{}' from '{}' to folder '{}'. {}BuildId '{}'", + m_BuildPartName, + Path, + StoragePath, + GeneratedBuildId ? "Generated " : "", + BuildId); + Storage = CreateFileBuildStorage(StoragePath, StorageStats, m_WriteMetadataAsJson, DefaultLatency, DefaultDelayPerKBSec); + StorageName = fmt::format("Disk {}", StoragePath.stem()); + } + else + { + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); + } + + CbObject MetaData; + if (m_CreateBuild) + { + if (!m_BuildMetadataPath.empty()) + { + std::filesystem::path MetadataPath(m_BuildMetadataPath); + IoBuffer MetaDataJson = ReadFile(MetadataPath).Flatten(); + std::string_view Json(reinterpret_cast<const char*>(MetaDataJson.GetData()), MetaDataJson.GetSize()); + std::string JsonError; + MetaData = LoadCompactBinaryFromJson(Json, JsonError).AsObject(); + if (!JsonError.empty()) + { + throw std::runtime_error( + fmt::format("build metadata file '{}' is malformed. Reason: '{}'", m_BuildMetadataPath, JsonError)); + } + } + if (!m_BuildMetadata.empty()) + { + CbObjectWriter MetaDataWriter(1024); + ForEachStrTok(m_BuildMetadata, ';', [&](std::string_view Pair) { + size_t SplitPos = Pair.find('='); + if (SplitPos == std::string::npos || SplitPos == 0) + { + throw std::runtime_error(fmt::format("build metadata key-value pair '{}' is malformed", Pair)); + } + MetaDataWriter.AddString(Pair.substr(0, SplitPos), Pair.substr(SplitPos + 1)); + return true; + }); + MetaData = MetaDataWriter.Save(); + } + } + + UploadFolder(*Storage, + BuildId, + BuildPartId, + m_BuildPartName, + Path, + m_ManifestPath, + m_BlockReuseMinPercentLimit, + m_AllowMultiparts, + MetaData, + m_CreateBuild, + m_Clean, + m_PostUploadVerify); + + if (false) + { + ZEN_CONSOLE( + "{}:\n" + "Read: {}\n" + "Write: {}\n" + "Requests: {}\n" + "Avg Request Time: {}\n" + "Avg I/O Time: {}", + StorageName, + NiceBytes(StorageStats.TotalBytesRead.load()), + NiceBytes(StorageStats.TotalBytesWritten.load()), + StorageStats.TotalRequestCount.load(), + StorageStats.TotalExecutionTimeUs.load() > 0 + ? NiceTimeSpanMs(StorageStats.TotalExecutionTimeUs.load() / 1000 / StorageStats.TotalRequestCount.load()) + : 0, + StorageStats.TotalRequestCount.load() > 0 + ? NiceTimeSpanMs(StorageStats.TotalRequestTimeUs.load() / 1000 / StorageStats.TotalRequestCount.load()) + : 0); + } + return AbortFlag ? 11 : 0; + } + + if (SubOption == &m_DownloadOptions) + { + ParseStorageOptions(); + ParseAuthOptions(); + + HttpClient Http(m_BuildsUrl, ClientSettings); + + if (m_Path.empty()) + { + throw zen::OptionParseException(fmt::format("local-path is required\n{}", m_DownloadOptions.help())); + } + if (m_BuildId.empty()) + { + throw zen::OptionParseException(fmt::format("build-id is required\n{}", m_DownloadOptions.help())); + } + Oid BuildId = Oid::TryFromHexString(m_BuildId); + if (BuildId == Oid::Zero) + { + throw zen::OptionParseException(fmt::format("build-id is invalid\n{}", m_DownloadOptions.help())); + } + + if (!m_BuildPartName.empty() && !m_BuildPartId.empty()) + { + throw zen::OptionParseException(fmt::format("build-part-id conflicts with build-part-name\n{}", m_DownloadOptions.help())); + } + + std::vector<Oid> BuildPartIds; + for (const std::string& BuildPartId : m_BuildPartIds) + { + BuildPartIds.push_back(Oid::TryFromHexString(BuildPartId)); + if (BuildPartIds.back() == Oid::Zero) + { + throw zen::OptionParseException( + fmt::format("build-part-id '{}' is invalid\n{}", BuildPartId, m_DownloadOptions.help())); + } + } + + std::filesystem::path Path = StringToPath(m_Path); + + BuildStorage::Statistics StorageStats; + std::unique_ptr<BuildStorage> Storage; + std::string StorageName; + if (!m_BuildsUrl.empty()) + { + ZEN_CONSOLE("Downloading '{}' to '{}' from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}', BuildId '{}'", + BuildId, + Path, + m_BuildsUrl, + Http.GetSessionId(), + m_Namespace, + m_Bucket, + BuildId); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, Path / ZenTempStorageFolderName); + StorageName = "Cloud DDC"; + } + else if (!m_StoragePath.empty()) + { + std::filesystem::path StoragePath = StringToPath(m_StoragePath); + ZEN_CONSOLE("Downloading '{}' to '{}' from folder {}. BuildId '{}'", BuildId, Path, StoragePath, BuildId); + Storage = CreateFileBuildStorage(StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); + StorageName = fmt::format("Disk {}", StoragePath.stem()); + } + else + { + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); + } + + DownloadFolder(*Storage, + BuildId, + BuildPartIds, + m_BuildPartNames, + Path, + m_AllowMultiparts, + m_AllowPartialBlockRequests, + m_Clean, + m_PostDownloadVerify); + + if (false) + { + ZEN_CONSOLE( + "{}:\n" + "Read: {}\n" + "Write: {}\n" + "Requests: {}\n" + "Avg Request Time: {}\n" + "Avg I/O Time: {}", + StorageName, + NiceBytes(StorageStats.TotalBytesRead.load()), + NiceBytes(StorageStats.TotalBytesWritten.load()), + StorageStats.TotalRequestCount.load(), + StorageStats.TotalExecutionTimeUs.load() > 0 + ? NiceTimeSpanMs(StorageStats.TotalExecutionTimeUs.load() / 1000 / StorageStats.TotalRequestCount.load()) + : 0, + StorageStats.TotalRequestCount.load() > 0 + ? NiceTimeSpanMs(StorageStats.TotalRequestTimeUs.load() / 1000 / StorageStats.TotalRequestCount.load()) + : 0); + } + + return AbortFlag ? 11 : 0; + } + if (SubOption == &m_DiffOptions) + { + if (m_Path.empty()) + { + throw zen::OptionParseException(fmt::format("local-path is required\n{}", m_DownloadOptions.help())); + } + if (m_DiffPath.empty()) + { + throw zen::OptionParseException(fmt::format("compare-path is required\n{}", m_DownloadOptions.help())); + } + std::filesystem::path Path = StringToPath(m_Path); + DiffFolders(Path, m_DiffPath, m_OnlyChunked); + return AbortFlag ? 11 : 0; + } + + if (SubOption == &m_MultiTestDownloadOptions) + { + if (m_Path.empty()) + { + throw zen::OptionParseException(fmt::format("local-path is required\n{}", m_DownloadOptions.help())); + } + + ParseStorageOptions(); + ParseAuthOptions(); + + HttpClient Http(m_BuildsUrl, ClientSettings); + // m_StoragePath = "D:\\buildstorage"; + // m_Path = "F:\\Saved\\DownloadedBuilds\\++Fortnite+Main-CL-XXXXXXXX\\WindowsClient"; + // std::vector<std::string> BuildIdStrings{"07d3942f0e7f4ca1b13b0587", + // "07d394eed89d769f2254e75d", + // "07d3953f22fa3f8000fa6f0a", + // "07d3959df47ed1f42ddbe44c", + // "07d395fa7803d50804f14417", + // "07d3964f919d577a321a1fdd", + // "07d396a6ce875004e16b9528"}; + + std::filesystem::path Path = StringToPath(m_Path); + + BuildStorage::Statistics StorageStats; + std::unique_ptr<BuildStorage> Storage; + std::string StorageName; + if (!m_BuildsUrl.empty()) + { + ZEN_CONSOLE("Downloading {} to '{}' from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}'", + FormatArray<std::string>(m_BuildIds, " "sv), + Path, + m_BuildsUrl, + Http.GetSessionId(), + m_Namespace, + m_Bucket); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, Path / ZenTempStorageFolderName); + StorageName = "Cloud DDC"; + } + else if (!m_StoragePath.empty()) + { + std::filesystem::path StoragePath = StringToPath(m_StoragePath); + ZEN_CONSOLE("Downloading {}'to '{}' from folder {}", FormatArray<std::string>(m_BuildIds, " "sv), Path, StoragePath); + Storage = CreateFileBuildStorage(StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); + StorageName = fmt::format("Disk {}", StoragePath.stem()); + } + else + { + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); + } + + for (const std::string& BuildIdString : m_BuildIds) + { + Oid BuildId = Oid::FromHexString(BuildIdString); + if (BuildId == Oid::Zero) + { + throw zen::OptionParseException(fmt::format("invalid build id {}\n{}", BuildIdString, m_DownloadOptions.help())); + } + DownloadFolder(*Storage, + BuildId, + {}, + {}, + Path, + m_AllowMultiparts, + m_AllowPartialBlockRequests, + BuildIdString == m_BuildIds.front(), + true); + if (AbortFlag) + { + ZEN_CONSOLE("Download cancelled"); + return 11; + } + ZEN_CONSOLE("\n"); + } + return 0; + } + + if (SubOption == &m_TestOptions) + { + ParseStorageOptions(); + ParseAuthOptions(); + + HttpClient Http(m_BuildsUrl, ClientSettings); + + if (m_Path.empty()) + { + throw zen::OptionParseException(fmt::format("local-path is required\n{}", m_DownloadOptions.help())); + } + + std::filesystem::path Path = StringToPath(m_Path); + + m_BuildId = Oid::NewOid().ToString(); + m_BuildPartName = Path.filename().string(); + m_BuildPartId = Oid::NewOid().ToString(); + m_CreateBuild = true; + + BuildStorage::Statistics StorageStats; + const Oid BuildId = Oid::FromHexString(m_BuildId); + const Oid BuildPartId = Oid::FromHexString(m_BuildPartId); + std::unique_ptr<BuildStorage> Storage; + std::string StorageName; + + std::filesystem::path StoragePath = StringToPath(m_StoragePath); + + if (m_BuildsUrl.empty() && StoragePath.empty()) + { + m_StoragePath = (GetRunningExecutablePath().parent_path() / ".tmpstore").generic_string(); + CreateDirectories(StoragePath); + CleanDirectory(StoragePath, {}); + } + auto _ = MakeGuard([&]() { + if (m_BuildsUrl.empty() && StoragePath.empty()) + { + DeleteDirectories(StoragePath); + } + }); + + if (!m_BuildsUrl.empty()) + { + ZEN_CONSOLE("Using '{}' to '{}' from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}', BuildId '{}'", + m_BuildPartName.empty() ? m_BuildPartId : m_BuildPartName, + Path, + m_BuildsUrl, + Http.GetSessionId(), + m_Namespace, + m_Bucket, + BuildId); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, Path / ZenTempStorageFolderName); + StorageName = "Cloud DDC"; + } + else if (!StoragePath.empty()) + { + ZEN_CONSOLE("Using '{}' to '{}' from folder {}. BuildId '{}'", + m_BuildPartName.empty() ? m_BuildPartId : m_BuildPartName, + Path, + StoragePath, + BuildId); + Storage = CreateFileBuildStorage(StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); + StorageName = fmt::format("Disk {}", StoragePath.stem()); + } + else + { + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); + } + + auto MakeMetaData = [](const Oid& BuildId) -> CbObject { + CbObjectWriter BuildMetaDataWriter; + { + const uint32_t CL = BuildId.OidBits[2]; + BuildMetaDataWriter.AddString("name", fmt::format("++Test+Main-CL-{}", CL)); + BuildMetaDataWriter.AddString("branch", "ZenTestBuild"); + BuildMetaDataWriter.AddString("baselineBranch", "ZenTestBuild"); + BuildMetaDataWriter.AddString("platform", "Windows"); + BuildMetaDataWriter.AddString("project", "Test"); + BuildMetaDataWriter.AddInteger("changelist", CL); + BuildMetaDataWriter.AddString("buildType", "test-folder"); + } + return BuildMetaDataWriter.Save(); + }; + CbObject MetaData = MakeMetaData(Oid::TryFromHexString(m_BuildId)); + { + ExtendableStringBuilder<256> SB; + CompactBinaryToJson(MetaData, SB); + ZEN_CONSOLE("Upload Build {}, Part {} ({})\n{}", m_BuildId, BuildPartId, m_BuildPartName, SB.ToView()); + } + + UploadFolder(*Storage, + BuildId, + BuildPartId, + m_BuildPartName, + Path, + {}, + m_BlockReuseMinPercentLimit, + m_AllowMultiparts, + MetaData, + true, + false, + true); + if (AbortFlag) + { + ZEN_CONSOLE("Upload failed."); + return 11; + } + + const std::filesystem::path DownloadPath = Path.parent_path() / (m_BuildPartName + "_download"); + ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}'", BuildId, BuildPartId, m_BuildPartName, DownloadPath); + DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, m_AllowPartialBlockRequests, true, true); + if (AbortFlag) + { + ZEN_CONSOLE("Download failed."); + return 11; + } + + ZEN_CONSOLE("\nRe-download Build {}, Part {} ({}) to '{}' (identical target)", + BuildId, + BuildPartId, + m_BuildPartName, + DownloadPath); + DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, m_AllowPartialBlockRequests, false, true); + if (AbortFlag) + { + ZEN_CONSOLE("Re-download failed. (identical target)"); + return 11; + } + + auto ScrambleDir = [](const std::filesystem::path& Path) { + ZEN_CONSOLE("\nScrambling '{}'", Path); + Stopwatch Timer; + DirectoryContent DownloadContent; + GetDirectoryContent( + Path, + DirectoryContentFlags::Recursive | DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes, + DownloadContent); + auto IsAcceptedFolder = [ExcludeFolders = DefaultExcludeFolders, Path](const std::filesystem::path& AbsolutePath) -> bool { + std::string RelativePath = std::filesystem::relative(AbsolutePath, Path).generic_string(); + for (const std::string_view& ExcludeFolder : ExcludeFolders) + { + if (RelativePath.starts_with(ExcludeFolder)) + { + if (RelativePath.length() == ExcludeFolder.length()) + { + return false; + } + else if (RelativePath[ExcludeFolder.length()] == '/') + { + return false; + } + } + } + return true; + }; + + ParallellWork Work(AbortFlag); + + uint32_t Randomizer = 0; + auto FileSizeIt = DownloadContent.FileSizes.begin(); + for (const std::filesystem::path& FilePath : DownloadContent.Files) + { + if (IsAcceptedFolder(FilePath)) + { + uint32_t Case = (Randomizer++) % 7; + switch (Case) + { + case 0: + { + uint64_t SourceSize = *FileSizeIt; + if (SourceSize > 0) + { + Work.ScheduleWork( + GetMediumWorkerPool(EWorkloadType::Burst), + [SourceSize, FilePath](std::atomic<bool>&) { + if (!AbortFlag) + { + IoBuffer Scrambled(SourceSize); + { + IoBuffer Source = IoBufferBuilder::MakeFromFile(FilePath); + Scrambled.GetMutableView().CopyFrom( + Source.GetView().Mid(SourceSize / 3, SourceSize / 3)); + Scrambled.GetMutableView() + .Mid(SourceSize / 3) + .CopyFrom(Source.GetView().Mid(0, SourceSize / 3)); + Scrambled.GetMutableView() + .Mid((SourceSize / 3) * 2) + .CopyFrom(Source.GetView().Mid(SourceSize / 2, SourceSize / 3)); + } + bool IsReadOnly = SetFileReadOnly(FilePath, false); + WriteFile(FilePath, Scrambled); + if (IsReadOnly) + { + SetFileReadOnly(FilePath, true); + } + } + }, + Work.DefaultErrorFunction()); + } + } + break; + case 1: + std::filesystem::remove(FilePath); + break; + default: + break; + } + } + FileSizeIt++; + } + Work.Wait(5000, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted); + ZEN_CONSOLE("Scrambling files, {} remaining", PendingWork); + }); + ZEN_ASSERT(!AbortFlag.load()); + ZEN_CONSOLE("Scrambled files in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }; + + ScrambleDir(DownloadPath); + ZEN_CONSOLE("\nRe-download Build {}, Part {} ({}) to '{}' (scrambled target)", + BuildId, + BuildPartId, + m_BuildPartName, + DownloadPath); + DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, m_AllowPartialBlockRequests, false, true); + if (AbortFlag) + { + ZEN_CONSOLE("Re-download failed. (scrambled target)"); + return 11; + } + + ScrambleDir(DownloadPath); + + Oid BuildId2 = Oid::NewOid(); + Oid BuildPartId2 = Oid::NewOid(); + + CbObject MetaData2 = MakeMetaData(BuildId2); + { + ExtendableStringBuilder<256> SB; + CompactBinaryToJson(MetaData, SB); + ZEN_CONSOLE("\nUpload scrambled Build {}, Part {} ({})\n{}\n", BuildId2, BuildPartId2, m_BuildPartName, SB.ToView()); + } + + UploadFolder(*Storage, + BuildId2, + BuildPartId2, + m_BuildPartName, + DownloadPath, + {}, + m_BlockReuseMinPercentLimit, + m_AllowMultiparts, + MetaData2, + true, + false, + true); + if (AbortFlag) + { + ZEN_CONSOLE("Upload of scrambled failed."); + return 11; + } + + ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}' (original)", BuildId, BuildPartId, m_BuildPartName, DownloadPath); + DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, m_AllowPartialBlockRequests, false, true); + if (AbortFlag) + { + ZEN_CONSOLE("Re-download failed."); + return 11; + } + + ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}' (scrambled)", BuildId2, BuildPartId2, m_BuildPartName, DownloadPath); + DownloadFolder(*Storage, + BuildId2, + {BuildPartId2}, + {}, + DownloadPath, + m_AllowMultiparts, + m_AllowPartialBlockRequests, + false, + true); + if (AbortFlag) + { + ZEN_CONSOLE("Re-download failed."); + return 11; + } + + ZEN_CONSOLE("\nRe-download Build {}, Part {} ({}) to '{}' (scrambled)", BuildId2, BuildPartId2, m_BuildPartName, DownloadPath); + DownloadFolder(*Storage, + BuildId2, + {BuildPartId2}, + {}, + DownloadPath, + m_AllowMultiparts, + m_AllowPartialBlockRequests, + false, + true); + if (AbortFlag) + { + ZEN_CONSOLE("Re-download failed."); + return 11; + } + + return 0; + } + + if (SubOption == &m_FetchBlobOptions) + { + ParseStorageOptions(); + ParseAuthOptions(); + + HttpClient Http(m_BuildsUrl, ClientSettings); + + if (m_BlobHash.empty()) + { + throw zen::OptionParseException(fmt::format("Blob hash string is missing\n{}", m_UploadOptions.help())); + } + + IoHash BlobHash; + if (!IoHash::TryParse(m_BlobHash, BlobHash)) + { + throw zen::OptionParseException(fmt::format("Blob hash string is invalid\n{}", m_UploadOptions.help())); + } + + if (m_BuildsUrl.empty() && m_StoragePath.empty()) + { + throw zen::OptionParseException(fmt::format("At least one storage option is required\n{}", m_UploadOptions.help())); + } + + BuildStorage::Statistics StorageStats; + const Oid BuildId = Oid::FromHexString(m_BuildId); + std::unique_ptr<BuildStorage> Storage; + std::string StorageName; + + std::filesystem::path Path = StringToPath(m_Path); + + if (!m_BuildsUrl.empty()) + { + ZEN_CONSOLE("Using from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}', BuildId '{}'", + m_BuildsUrl, + Http.GetSessionId(), + m_Namespace, + m_Bucket, + BuildId); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, Path / ZenTempStorageFolderName); + StorageName = "Cloud DDC"; + } + else if (!m_StoragePath.empty()) + { + std::filesystem::path StoragePath = StringToPath(m_StoragePath); + ZEN_CONSOLE("Using folder {}. BuildId '{}'", StoragePath, BuildId); + Storage = CreateFileBuildStorage(StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); + StorageName = fmt::format("Disk {}", StoragePath.stem()); + } + else + { + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); + } + + uint64_t CompressedSize; + uint64_t DecompressedSize; + ValidateBlob(*Storage, BuildId, BlobHash, CompressedSize, DecompressedSize); + if (AbortFlag) + { + return 11; + } + ZEN_CONSOLE("Blob '{}' has a compressed size {} and a decompressed size of {} bytes", + BlobHash, + CompressedSize, + DecompressedSize); + return 0; + } + + if (SubOption == &m_ValidateBuildPartOptions) + { + ParseStorageOptions(); + ParseAuthOptions(); + + HttpClient Http(m_BuildsUrl, ClientSettings); + + if (m_BuildsUrl.empty() && m_StoragePath.empty()) + { + throw zen::OptionParseException(fmt::format("At least one storage option is required\n{}", m_UploadOptions.help())); + } + + if (m_BuildId.empty()) + { + throw zen::OptionParseException(fmt::format("build-id is required\n{}", m_DownloadOptions.help())); + } + Oid BuildId = Oid::TryFromHexString(m_BuildId); + if (BuildId == Oid::Zero) + { + throw zen::OptionParseException(fmt::format("build-id is invalid\n{}", m_DownloadOptions.help())); + } + + if (!m_BuildPartName.empty() && !m_BuildPartId.empty()) + { + throw zen::OptionParseException(fmt::format("build-part-id conflicts with build-part-name\n{}", m_DownloadOptions.help())); + } + + BuildStorage::Statistics StorageStats; + std::unique_ptr<BuildStorage> Storage; + std::string StorageName; + + std::filesystem::path Path = StringToPath(m_Path); + + if (!m_BuildsUrl.empty()) + { + ZEN_CONSOLE("Using from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}', BuildId '{}'", + m_BuildsUrl, + Http.GetSessionId(), + m_Namespace, + m_Bucket, + BuildId); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, Path / ZenTempStorageFolderName); + StorageName = "Cloud DDC"; + } + else if (!m_StoragePath.empty()) + { + std::filesystem::path StoragePath = StringToPath(m_StoragePath); + ZEN_CONSOLE("Using folder {}. BuildId '{}'", StoragePath, BuildId); + Storage = CreateFileBuildStorage(StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); + StorageName = fmt::format("Disk {}", StoragePath.stem()); + } + else + { + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); + } + Oid BuildPartId = Oid::TryFromHexString(m_BuildPartId); + + ValidateBuildPart(*Storage, BuildId, BuildPartId, m_BuildPartName); + + return AbortFlag ? 13 : 0; + } + } + catch (const std::exception& Ex) + { + ZEN_ERROR("{}", Ex.what()); + return 3; + } + ZEN_ASSERT(false); +} + +} // namespace zen diff --git a/src/zen/cmds/builds_cmd.h b/src/zen/cmds/builds_cmd.h new file mode 100644 index 000000000..60953efad --- /dev/null +++ b/src/zen/cmds/builds_cmd.h @@ -0,0 +1,113 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "../zen.h" + +#include <zenhttp/auth/authmgr.h> +#include <zenhttp/httpclientauth.h> +#include <filesystem> + +namespace zen { + +class BuildsCommand : public CacheStoreCommand +{ +public: + static constexpr char Name[] = "builds"; + static constexpr char Description[] = "Manage builds - list, upload, download, diff"; + + BuildsCommand(); + ~BuildsCommand(); + + virtual int Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) override; + virtual cxxopts::Options& Options() override { return m_Options; } + +private: + cxxopts::Options m_Options{Name, Description}; + + std::string m_SystemRootDir; + + bool m_PlainProgress = false; + bool m_Verbose = false; + + // cloud builds + std::string m_BuildsUrl; + bool m_AssumeHttp2 = false; + std::string m_Namespace; + std::string m_Bucket; + + // file storage + std::string m_StoragePath; + bool m_WriteMetadataAsJson = false; + + std::string m_BuildId; + bool m_CreateBuild = false; + std::string m_BuildMetadataPath; + std::string m_BuildMetadata; + std::string m_BuildPartName; // Defaults to name of leaf folder in m_Path + std::string m_BuildPartId; // Defaults to a generated id when creating part, looked up when downloading using m_BuildPartName + bool m_Clean = false; + uint8_t m_BlockReuseMinPercentLimit = 85; + bool m_AllowMultiparts = true; + bool m_AllowPartialBlockRequests = true; + std::string m_ManifestPath; + + // Direct access token (may expire) + std::string m_AccessToken; + std::string m_AccessTokenEnv; + std::string m_AccessTokenPath; + + // Auth manager token encryption + std::string m_EncryptionKey; // 256 bit AES encryption key + std::string m_EncryptionIV; // 128 bit AES initialization vector + + // OpenId acccess token + std::string m_OpenIdProviderName; + std::string m_OpenIdProviderUrl; + std::string m_OpenIdClientId; + std::string m_OpenIdRefreshToken; + + // OAuth acccess token + std::string m_OAuthUrl; + std::string m_OAuthClientId; + std::string m_OAuthClientSecret; + + std::string m_Verb; // list, upload, download + + cxxopts::Options m_ListOptions{"list", "List available builds"}; + + std::string m_Path; + + cxxopts::Options m_UploadOptions{"upload", "Upload a folder"}; + bool m_PostUploadVerify = false; + + cxxopts::Options m_DownloadOptions{"download", "Download a folder"}; + std::vector<std::string> m_BuildPartNames; + std::vector<std::string> m_BuildPartIds; + bool m_PostDownloadVerify = false; + + cxxopts::Options m_DiffOptions{"diff", "Compare two local folders"}; + std::string m_DiffPath; + bool m_OnlyChunked = false; + + cxxopts::Options m_TestOptions{"test", "Test upload and download with verify"}; + + cxxopts::Options m_MultiTestDownloadOptions{"multi-test-download", "Test multiple sequenced downloads with verify"}; + std::vector<std::string> m_BuildIds; + + cxxopts::Options m_FetchBlobOptions{"fetch-blob", "Fetch a blob from remote store"}; + std::string m_BlobHash; + + cxxopts::Options m_ValidateBuildPartOptions{"validate-part", "Fetch a build part and validate all referenced attachments"}; + + cxxopts::Options* m_SubCommands[8] = {&m_ListOptions, + &m_UploadOptions, + &m_DownloadOptions, + &m_DiffOptions, + &m_TestOptions, + &m_FetchBlobOptions, + &m_ValidateBuildPartOptions, + &m_MultiTestDownloadOptions}; +}; + +} // namespace zen diff --git a/src/zen/cmds/copy_cmd.cpp b/src/zen/cmds/copy_cmd.cpp index d42d3c107..cc6ddd505 100644 --- a/src/zen/cmds/copy_cmd.cpp +++ b/src/zen/cmds/copy_cmd.cpp @@ -120,7 +120,11 @@ CopyCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) { } - virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t) override + virtual void VisitFile(const std::filesystem::path& Parent, + const path_view& File, + uint64_t FileSize, + uint32_t, + uint64_t) override { ZEN_UNUSED(FileSize); std::error_code Ec; diff --git a/src/zen/cmds/serve_cmd.cpp b/src/zen/cmds/serve_cmd.cpp index 8e36e74ce..f87725e36 100644 --- a/src/zen/cmds/serve_cmd.cpp +++ b/src/zen/cmds/serve_cmd.cpp @@ -120,7 +120,7 @@ ServeCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) struct FsVisitor : public FileSystemTraversal::TreeVisitor { - virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t) override + virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t, uint64_t) override { std::filesystem::path ServerPath = std::filesystem::relative(Parent / File, RootPath); std::string ServerPathString = reinterpret_cast<const char*>(ServerPath.generic_u8string().c_str()); diff --git a/src/zen/cmds/service_cmd.cpp b/src/zen/cmds/service_cmd.cpp index b3872dae7..386046e66 100644 --- a/src/zen/cmds/service_cmd.cpp +++ b/src/zen/cmds/service_cmd.cpp @@ -161,12 +161,7 @@ ServiceCommand::ServiceCommand() cxxopts::value(m_ServiceName), "<name>"); - m_InstallOptions.add_option("", - "u", - "user", - "User to run service as, defaults to current user", - cxxopts::value(m_UserName), - "<user>"); + m_InstallOptions.add_option("", "u", "user", "User to run service as, defaults to current user", cxxopts::value(m_UserName), "<user>"); #if ZEN_PLATFORM_WINDOWS m_InstallOptions.add_option("", "d", diff --git a/src/zen/cmds/status_cmd.cpp b/src/zen/cmds/status_cmd.cpp index 16754e747..4d1534e05 100644 --- a/src/zen/cmds/status_cmd.cpp +++ b/src/zen/cmds/status_cmd.cpp @@ -32,16 +32,17 @@ StatusCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) uint16_t EffectivePort = 0; if (!m_DataDir.empty()) { - if (!std::filesystem::is_regular_file(m_DataDir / ".lock")) + std::filesystem::path DataDir = StringToPath(m_DataDir); + if (!std::filesystem::is_regular_file(DataDir / ".lock")) { - ZEN_CONSOLE("lock file does not exist in directory '{}'", m_DataDir); + ZEN_CONSOLE("lock file does not exist in directory '{}'", DataDir); return 1; } - LockFileInfo Info = ReadLockFilePayload(LoadCompactBinaryObject(IoBufferBuilder::MakeFromFile(m_DataDir / ".lock"))); + LockFileInfo Info = ReadLockFilePayload(LoadCompactBinaryObject(IoBufferBuilder::MakeFromFile(DataDir / ".lock"))); std::string Reason; if (!ValidateLockFileInfo(Info, Reason)) { - ZEN_CONSOLE("lock file in directory '{}' is not valid. Reason: '{}'", m_DataDir, Reason); + ZEN_CONSOLE("lock file in directory '{}' is not valid. Reason: '{}'", DataDir, Reason); return 1; } EffectivePort = Info.EffectiveListenPort; diff --git a/src/zen/cmds/status_cmd.h b/src/zen/cmds/status_cmd.h index 46bda9ee6..00ad0e758 100644 --- a/src/zen/cmds/status_cmd.h +++ b/src/zen/cmds/status_cmd.h @@ -20,9 +20,9 @@ public: private: int GetLockFileEffectivePort() const; - cxxopts::Options m_Options{"status", "Show zen status"}; - uint16_t m_Port = 0; - std::filesystem::path m_DataDir; + cxxopts::Options m_Options{"status", "Show zen status"}; + uint16_t m_Port = 0; + std::string m_DataDir; }; } // namespace zen diff --git a/src/zen/cmds/up_cmd.cpp b/src/zen/cmds/up_cmd.cpp index ac2f42a86..44a41146c 100644 --- a/src/zen/cmds/up_cmd.cpp +++ b/src/zen/cmds/up_cmd.cpp @@ -77,13 +77,15 @@ UpCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } } - if (m_ProgramBaseDir.empty()) + std::filesystem::path ProgramBaseDir = StringToPath(m_ProgramBaseDir); + + if (ProgramBaseDir.empty()) { std::filesystem::path ExePath = zen::GetRunningExecutablePath(); - m_ProgramBaseDir = ExePath.parent_path(); + ProgramBaseDir = ExePath.parent_path(); } ZenServerEnvironment ServerEnvironment; - ServerEnvironment.Initialize(m_ProgramBaseDir); + ServerEnvironment.Initialize(ProgramBaseDir); ZenServerInstance Server(ServerEnvironment); std::string ServerArguments = GlobalOptions.PassthroughCommandLine; if ((m_Port != 0) && (ServerArguments.find("--port"sv) == std::string::npos)) @@ -153,18 +155,20 @@ AttachCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) Instance.Sweep(); ZenServerState::ZenServerEntry* Entry = Instance.Lookup(m_Port); - if (!m_DataDir.empty()) + std::filesystem::path DataDir = StringToPath(m_DataDir); + + if (!DataDir.empty()) { - if (!std::filesystem::is_regular_file(m_DataDir / ".lock")) + if (!std::filesystem::is_regular_file(DataDir / ".lock")) { - ZEN_CONSOLE("lock file does not exist in directory '{}'", m_DataDir); + ZEN_CONSOLE("lock file does not exist in directory '{}'", DataDir); return 1; } - LockFileInfo Info = ReadLockFilePayload(LoadCompactBinaryObject(IoBufferBuilder::MakeFromFile(m_DataDir / ".lock"))); + LockFileInfo Info = ReadLockFilePayload(LoadCompactBinaryObject(IoBufferBuilder::MakeFromFile(DataDir / ".lock"))); std::string Reason; if (!ValidateLockFileInfo(Info, Reason)) { - ZEN_CONSOLE("lock file in directory '{}' is not valid. Reason: '{}'", m_DataDir, Reason); + ZEN_CONSOLE("lock file in directory '{}' is not valid. Reason: '{}'", DataDir, Reason); return 1; } Entry = Instance.LookupByEffectivePort(Info.EffectiveListenPort); @@ -214,24 +218,27 @@ DownCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) Instance.Initialize(); ZenServerState::ZenServerEntry* Entry = Instance.Lookup(m_Port); - if (m_ProgramBaseDir.empty()) + std::filesystem::path ProgramBaseDir = StringToPath(m_ProgramBaseDir); + if (ProgramBaseDir.empty()) { - std::filesystem::path ExePath = zen::GetRunningExecutablePath(); - m_ProgramBaseDir = ExePath.parent_path(); + std::filesystem::path ExePath = GetRunningExecutablePath(); + ProgramBaseDir = ExePath.parent_path(); } - if (!m_DataDir.empty()) + std::filesystem::path DataDir = StringToPath(m_DataDir); + + if (!DataDir.empty()) { - if (!std::filesystem::is_regular_file(m_DataDir / ".lock")) + if (!std::filesystem::is_regular_file(DataDir / ".lock")) { - ZEN_CONSOLE("lock file does not exist in directory '{}'", m_DataDir); + ZEN_CONSOLE("lock file does not exist in directory '{}'", DataDir); return 1; } - LockFileInfo Info = ReadLockFilePayload(LoadCompactBinaryObject(IoBufferBuilder::MakeFromFile(m_DataDir / ".lock"))); + LockFileInfo Info = ReadLockFilePayload(LoadCompactBinaryObject(IoBufferBuilder::MakeFromFile(DataDir / ".lock"))); std::string Reason; if (!ValidateLockFileInfo(Info, Reason)) { - ZEN_CONSOLE("lock file in directory '{}' is not valid. Reason: '{}'", m_DataDir, Reason); + ZEN_CONSOLE("lock file in directory '{}' is not valid. Reason: '{}'", DataDir, Reason); return 1; } Entry = Instance.LookupByEffectivePort(Info.EffectiveListenPort); @@ -244,7 +251,7 @@ DownCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) try { ZenServerEnvironment ServerEnvironment; - ServerEnvironment.Initialize(m_ProgramBaseDir); + ServerEnvironment.Initialize(ProgramBaseDir); ZenServerInstance Server(ServerEnvironment); Server.AttachToRunningServer(EntryPort); @@ -309,7 +316,7 @@ DownCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) if (m_ForceTerminate) { // Try to find the running executable by path name - std::filesystem::path ServerExePath = m_ProgramBaseDir / "zenserver" ZEN_EXE_SUFFIX_LITERAL; + std::filesystem::path ServerExePath = ProgramBaseDir / "zenserver" ZEN_EXE_SUFFIX_LITERAL; ProcessHandle RunningProcess; if (std::error_code Ec = FindProcess(ServerExePath, RunningProcess); !Ec) { diff --git a/src/zen/cmds/up_cmd.h b/src/zen/cmds/up_cmd.h index c9af16749..32d8ddab3 100644 --- a/src/zen/cmds/up_cmd.h +++ b/src/zen/cmds/up_cmd.h @@ -18,11 +18,11 @@ public: virtual cxxopts::Options& Options() override { return m_Options; } private: - cxxopts::Options m_Options{"up", "Bring up zen service"}; - uint16_t m_Port = 0; - bool m_ShowConsole = false; - bool m_ShowLog = false; - std::filesystem::path m_ProgramBaseDir; + cxxopts::Options m_Options{"up", "Bring up zen service"}; + uint16_t m_Port = 0; + bool m_ShowConsole = false; + bool m_ShowLog = false; + std::string m_ProgramBaseDir; }; class AttachCommand : public ZenCmdBase @@ -35,10 +35,10 @@ public: virtual cxxopts::Options& Options() override { return m_Options; } private: - cxxopts::Options m_Options{"attach", "Add a sponsor process to a running zen service"}; - uint16_t m_Port = 0; - int m_OwnerPid = 0; - std::filesystem::path m_DataDir; + cxxopts::Options m_Options{"attach", "Add a sponsor process to a running zen service"}; + uint16_t m_Port = 0; + int m_OwnerPid = 0; + std::string m_DataDir; }; class DownCommand : public ZenCmdBase @@ -51,11 +51,11 @@ public: virtual cxxopts::Options& Options() override { return m_Options; } private: - cxxopts::Options m_Options{"down", "Bring down zen service"}; - uint16_t m_Port = 0; - bool m_ForceTerminate = false; - std::filesystem::path m_ProgramBaseDir; - std::filesystem::path m_DataDir; + cxxopts::Options m_Options{"down", "Bring down zen service"}; + uint16_t m_Port = 0; + bool m_ForceTerminate = false; + std::string m_ProgramBaseDir; + std::string m_DataDir; }; } // namespace zen diff --git a/src/zen/cmds/workspaces_cmd.cpp b/src/zen/cmds/workspaces_cmd.cpp index 05d3c573f..5f3f8f7ca 100644 --- a/src/zen/cmds/workspaces_cmd.cpp +++ b/src/zen/cmds/workspaces_cmd.cpp @@ -25,18 +25,22 @@ namespace { if (!Path.empty()) { std::u8string PathString = Path.u8string(); - if (PathString.ends_with(std::filesystem::path::preferred_separator)) + if (PathString.ends_with(std::filesystem::path::preferred_separator) || PathString.starts_with('/')) { PathString.pop_back(); Path = std::filesystem::path(PathString); } - // Special case if user gives a path with quotes and includes a backslash at the end: - // ="path\" cxxopts strips the leading quote only but not the trailing. - // As we expect paths here and we don't want trailing slashes we strip away the quote - // manually if the string does not start with a quote UE-231677 - else if (PathString[0] != '\"' && PathString[PathString.length() - 1] == '\"') + } + } + + static void RemoveLeadingPathSeparator(std::filesystem::path& Path) + { + if (!Path.empty()) + { + std::u8string PathString = Path.u8string(); + if (PathString.starts_with(std::filesystem::path::preferred_separator) || PathString.starts_with('/')) { - PathString.pop_back(); + PathString.erase(PathString.begin()); Path = std::filesystem::path(PathString); } } @@ -83,7 +87,7 @@ WorkspaceCommand::WorkspaceCommand() { m_Options.add_options()("h,help", "Print help"); m_Options.add_option("", "u", "hosturl", "Host URL", cxxopts::value(m_HostName)->default_value(""), "<hosturl>"); - m_Options.add_options()("system-dir", "Specify system root", cxxopts::value<std::filesystem::path>(m_SystemRootDir)); + m_Options.add_options()("system-dir", "Specify system root", cxxopts::value(m_SystemRootDir)); m_Options.add_option("", "v", "verb", "Verb for workspace - create, remove, info", cxxopts::value(m_Verb), "<verb>"); m_Options.parse_positional({"verb"}); m_Options.positional_help("verb"); @@ -135,16 +139,18 @@ WorkspaceCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_HostName = ResolveTargetHostSpec(m_HostName); - if (m_SystemRootDir.empty()) + std::filesystem::path SystemRootDir = StringToPath(m_SystemRootDir); + + if (SystemRootDir.empty()) { - m_SystemRootDir = PickDefaultSystemRootDirectory(); - if (m_SystemRootDir.empty()) + SystemRootDir = PickDefaultSystemRootDirectory(); + if (SystemRootDir.empty()) { throw zen::OptionParseException("unable to resolve system root directory"); } } - std::filesystem::path StatePath = m_SystemRootDir / "workspaces"; + std::filesystem::path StatePath = SystemRootDir / "workspaces"; if (!ParseOptions(*SubOption, gsl::narrow<int>(SubCommandArguments.size()), SubCommandArguments.data())) { @@ -158,12 +164,12 @@ WorkspaceCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) throw zen::OptionParseException(fmt::format("path is required\n{}", m_CreateOptions.help())); } - RemoveTrailingPathSeparator(m_Path); + std::filesystem::path Path = StringToPath(m_Path); if (m_Id.empty()) { - m_Id = Workspaces::PathToId(m_Path).ToString(); - ZEN_CONSOLE("Using generated workspace id {} from path '{}'", m_Id, m_Path); + m_Id = Workspaces::PathToId(Path).ToString(); + ZEN_CONSOLE("Using generated workspace id {} from path '{}'", m_Id, Path); } if (Oid::TryFromHexString(m_Id) == Oid::Zero) @@ -174,7 +180,7 @@ WorkspaceCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) if (Workspaces::AddWorkspace( Log(), StatePath, - {.Id = Oid::FromHexString(m_Id), .RootPath = m_Path, .AllowShareCreationFromHttp = m_AllowShareCreationFromHttp})) + {.Id = Oid::FromHexString(m_Id), .RootPath = Path, .AllowShareCreationFromHttp = m_AllowShareCreationFromHttp})) { if (!m_HostName.empty()) { @@ -274,7 +280,7 @@ WorkspaceShareCommand::WorkspaceShareCommand() { m_Options.add_options()("h,help", "Print help"); m_Options.add_option("", "u", "hosturl", "Host URL", cxxopts::value(m_HostName)->default_value(""), "<hosturl>"); - m_Options.add_options()("system-dir", "Specify system root", cxxopts::value<std::filesystem::path>(m_SystemRootDir)); + m_Options.add_options()("system-dir", "Specify system root", cxxopts::value(m_SystemRootDir)); m_Options.add_option("", "v", "verb", "Verb for workspace - create, remove, info", cxxopts::value(m_Verb), "<verb>"); m_Options.parse_positional({"verb"}); m_Options.positional_help("verb"); @@ -386,16 +392,18 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** m_HostName = ResolveTargetHostSpec(m_HostName); - if (m_SystemRootDir.empty()) + std::filesystem::path SystemRootDir = StringToPath(m_SystemRootDir); + + if (SystemRootDir.empty()) { - m_SystemRootDir = PickDefaultSystemRootDirectory(); - if (m_SystemRootDir.empty()) + SystemRootDir = PickDefaultSystemRootDirectory(); + if (SystemRootDir.empty()) { throw zen::OptionParseException("unable to resolve system root directory"); } } - std::filesystem::path StatePath = m_SystemRootDir / "workspaces"; + std::filesystem::path StatePath = SystemRootDir / "workspaces"; if (!ParseOptions(*SubOption, gsl::narrow<int>(SubCommandArguments.size()), SubCommandArguments.data())) { @@ -404,7 +412,8 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** if (SubOption == &m_CreateOptions) { - if (m_WorkspaceRoot.empty()) + std::filesystem::path WorkspaceRoot = StringToPath(m_WorkspaceRoot); + if (WorkspaceRoot.empty()) { if (m_WorkspaceId.empty()) { @@ -423,15 +432,15 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** ZEN_CONSOLE("Workspace {} does not exist", m_WorkspaceId); return 0; } - m_WorkspaceRoot = WorkspaceConfig.RootPath; + WorkspaceRoot = WorkspaceConfig.RootPath; } else { - RemoveTrailingPathSeparator(m_WorkspaceRoot); + RemoveTrailingPathSeparator(WorkspaceRoot); if (m_WorkspaceId.empty()) { - m_WorkspaceId = Workspaces::PathToId(m_WorkspaceRoot).ToString(); - ZEN_CONSOLE("Using generated workspace id {} from path '{}'", m_WorkspaceId, m_WorkspaceRoot); + m_WorkspaceId = Workspaces::PathToId(WorkspaceRoot).ToString(); + ZEN_CONSOLE("Using generated workspace id {} from path '{}'", m_WorkspaceId, WorkspaceRoot); } else { @@ -440,22 +449,25 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** throw zen::OptionParseException(fmt::format("workspace id '{}' is invalid", m_WorkspaceId)); } } - if (Workspaces::AddWorkspace(Log(), StatePath, {.Id = Oid::FromHexString(m_WorkspaceId), .RootPath = m_WorkspaceRoot})) + if (Workspaces::AddWorkspace(Log(), StatePath, {.Id = Oid::FromHexString(m_WorkspaceId), .RootPath = WorkspaceRoot})) { - ZEN_CONSOLE("Created workspace {} using root path '{}'", m_WorkspaceId, m_WorkspaceRoot); + ZEN_CONSOLE("Created workspace {} using root path '{}'", m_WorkspaceId, WorkspaceRoot); } else { - ZEN_CONSOLE("Using existing workspace {} with root path '{}'", m_WorkspaceId, m_WorkspaceRoot); + ZEN_CONSOLE("Using existing workspace {} with root path '{}'", m_WorkspaceId, WorkspaceRoot); } } - RemoveTrailingPathSeparator(m_SharePath); + std::filesystem::path SharePath = StringToPath(m_SharePath); + + RemoveLeadingPathSeparator(SharePath); + RemoveTrailingPathSeparator(SharePath); if (m_ShareId.empty()) { - m_ShareId = Workspaces::PathToId(m_SharePath).ToString(); - ZEN_CONSOLE("Using generated share id {}, for path '{}'", m_ShareId, m_SharePath); + m_ShareId = Workspaces::PathToId(SharePath).ToString(); + ZEN_CONSOLE("Using generated share id {}, for path '{}'", m_ShareId, SharePath); } if (Oid::TryFromHexString(m_ShareId) == Oid::Zero) @@ -464,8 +476,8 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** } if (Workspaces::AddWorkspaceShare(Log(), - m_WorkspaceRoot, - {.Id = Oid::FromHexString(m_ShareId), .SharePath = m_SharePath, .Alias = m_Alias})) + WorkspaceRoot, + {.Id = Oid::FromHexString(m_ShareId), .SharePath = SharePath, .Alias = m_Alias})) { if (!m_HostName.empty()) { @@ -517,7 +529,8 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** ZEN_CONSOLE("Workspace {} does not exist", m_WorkspaceId); return 0; } - m_WorkspaceRoot = WorkspaceConfig.RootPath; + + std::filesystem::path WorkspaceRoot = WorkspaceConfig.RootPath; if (m_ShareId.empty()) { @@ -529,8 +542,7 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** throw zen::OptionParseException(fmt::format("workspace id '{}' is invalid", m_ShareId)); } - Workspaces::WorkspaceShareConfiguration Share = - Workspaces::FindWorkspaceShare(Log(), m_WorkspaceRoot, Oid::FromHexString(m_ShareId)); + Workspaces::WorkspaceShareConfiguration Share = Workspaces::FindWorkspaceShare(Log(), WorkspaceRoot, Oid::FromHexString(m_ShareId)); if (Share.Id == Oid::Zero) { ZEN_CONSOLE("Workspace share {} does not exist", m_ShareId); @@ -542,6 +554,7 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** if (SubOption == &m_RemoveOptions) { + std::filesystem::path WorkspaceRoot; if (!m_Alias.empty()) { Workspaces::WorkspaceConfiguration WorkspaceConfig; @@ -552,9 +565,9 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** ZEN_CONSOLE("Workspace share with alias {} does not exist", m_Alias); return 0; } - m_ShareId = ShareConfig.Id.ToString(); - m_WorkspaceId = WorkspaceConfig.Id.ToString(); - m_WorkspaceRoot = WorkspaceConfig.RootPath; + m_ShareId = ShareConfig.Id.ToString(); + m_WorkspaceId = WorkspaceConfig.Id.ToString(); + WorkspaceRoot = WorkspaceConfig.RootPath; } if (m_WorkspaceId.empty()) @@ -573,7 +586,7 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** ZEN_CONSOLE("Workspace {} does not exist", m_WorkspaceId); return 0; } - m_WorkspaceRoot = WorkspaceConfig.RootPath; + WorkspaceRoot = WorkspaceConfig.RootPath; if (m_ShareId.empty()) { @@ -585,7 +598,7 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** throw zen::OptionParseException(fmt::format("workspace id '{}' is invalid", m_ShareId)); } - if (Workspaces::RemoveWorkspaceShare(Log(), m_WorkspaceRoot, Oid::FromHexString(m_ShareId))) + if (Workspaces::RemoveWorkspaceShare(Log(), WorkspaceRoot, Oid::FromHexString(m_ShareId))) { if (!m_HostName.empty()) { diff --git a/src/zen/cmds/workspaces_cmd.h b/src/zen/cmds/workspaces_cmd.h index de0edd061..86452e25e 100644 --- a/src/zen/cmds/workspaces_cmd.h +++ b/src/zen/cmds/workspaces_cmd.h @@ -21,17 +21,17 @@ public: virtual cxxopts::Options& Options() override { return m_Options; } private: - cxxopts::Options m_Options{Name, Description}; - std::string m_HostName; - std::filesystem::path m_SystemRootDir; + cxxopts::Options m_Options{Name, Description}; + std::string m_HostName; + std::string m_SystemRootDir; std::string m_Verb; // create, info, remove std::string m_Id; - cxxopts::Options m_CreateOptions{"create", "Create a workspace"}; - std::filesystem::path m_Path; - bool m_AllowShareCreationFromHttp = false; + cxxopts::Options m_CreateOptions{"create", "Create a workspace"}; + std::string m_Path; + bool m_AllowShareCreationFromHttp = false; cxxopts::Options m_InfoOptions{"info", "Info about a workspace"}; @@ -53,17 +53,17 @@ public: virtual cxxopts::Options& Options() override { return m_Options; } private: - cxxopts::Options m_Options{Name, Description}; - std::string m_HostName; - std::filesystem::path m_SystemRootDir; - std::string m_WorkspaceId; - std::filesystem::path m_WorkspaceRoot; - std::string m_Verb; // create, info, remove - std::string m_ShareId; - std::string m_Alias; - - cxxopts::Options m_CreateOptions{"create", "Create a workspace share"}; - std::filesystem::path m_SharePath; + cxxopts::Options m_Options{Name, Description}; + std::string m_HostName; + std::string m_SystemRootDir; + std::string m_WorkspaceId; + std::string m_WorkspaceRoot; + std::string m_Verb; // create, info, remove + std::string m_ShareId; + std::string m_Alias; + + cxxopts::Options m_CreateOptions{"create", "Create a workspace share"}; + std::string m_SharePath; bool m_Refresh = false; diff --git a/src/zen/zen.cpp b/src/zen/zen.cpp index f64bc96a8..a5fd693f2 100644 --- a/src/zen/zen.cpp +++ b/src/zen/zen.cpp @@ -7,6 +7,7 @@ #include "cmds/admin_cmd.h" #include "cmds/bench_cmd.h" +#include "cmds/builds_cmd.h" #include "cmds/cache_cmd.h" #include "cmds/copy_cmd.h" #include "cmds/dedup_cmd.h" @@ -25,14 +26,25 @@ #include "cmds/vfs_cmd.h" #include "cmds/workspaces_cmd.h" +#include <zencore/callstack.h> #include <zencore/filesystem.h> #include <zencore/fmtutils.h> #include <zencore/logging.h> +#include <zencore/process.h> #include <zencore/scopeguard.h> #include <zencore/string.h> +#include <zencore/trace.h> +#include <zencore/windows.h> #include <zenhttp/httpcommon.h> +#include <zenutil/logging.h> #include <zenutil/zenserverprocess.h> +#include <zencore/memory/fmalloc.h> +#include <zencore/memory/llm.h> +#include <zencore/memory/memory.h> +#include <zencore/memory/memorytrace.h> +#include <zencore/memory/newdelete.h> + #if ZEN_WITH_TESTS # define ZEN_TEST_WITH_RUNNER 1 # include <zencore/testing.h> @@ -47,6 +59,10 @@ ZEN_THIRD_PARTY_INCLUDES_END #include <zencore/memory/newdelete.h> +#ifndef ZEN_PLATFORM_WINDOWS +# include <unistd.h> +#endif + ////////////////////////////////////////////////////////////////////////// namespace zen { @@ -252,7 +268,24 @@ ZenCmdBase::ResolveTargetHostSpec(const std::string& InHostSpec) return ResolveTargetHostSpec(InHostSpec, /* out */ Dummy); } -ProgressBar::ProgressBar(bool PlainProgress) : m_PlainProgress(PlainProgress), m_LastUpdateMS(m_SW.GetElapsedTimeMs() - 10000) +static bool +IsStdoutTty() +{ +#if ZEN_PLATFORM_WINDOWS + static HANDLE hStdOut = ::GetStdHandle(STD_OUTPUT_HANDLE); + DWORD dwMode = 0; + static bool IsConsole = ::GetConsoleMode(hStdOut, &dwMode); + return IsConsole; +#else + return isatty(fileno(stdout)); +#endif +} + +ProgressBar::ProgressBar(bool PlainProgress, bool ShowDetails) +: m_StdoutIsTty(IsStdoutTty()) +, m_PlainProgress(PlainProgress || !m_StdoutIsTty) +, m_ShowDetails(ShowDetails) +, m_LastUpdateMS(m_SW.GetElapsedTimeMs() - 10000) { } @@ -260,7 +293,7 @@ ProgressBar::~ProgressBar() { try { - Finish(); + ForceLinebreak(); } catch (const std::exception& Ex) { @@ -271,6 +304,7 @@ ProgressBar::~ProgressBar() void ProgressBar::UpdateState(const State& NewState, bool DoLinebreak) { + ZEN_ASSERT(NewState.TotalCount >= NewState.RemainingCount); if (DoLinebreak == false && m_State == NewState) { return; @@ -289,7 +323,8 @@ ProgressBar::UpdateState(const State& NewState, bool DoLinebreak) if (m_PlainProgress) { - ZEN_CONSOLE("{} {}% ({})", NewState.Task, PercentDone, NiceTimeSpanMs(ElapsedTimeMS)); + std::string Details = (m_ShowDetails && !NewState.Details.empty()) ? fmt::format(": {}", NewState.Details) : ""; + ZEN_CONSOLE("{} {}% ({}){}", NewState.Task, PercentDone, NiceTimeSpanMs(ElapsedTimeMS), Details); } else { @@ -309,7 +344,27 @@ ProgressBar::UpdateState(const State& NewState, bool DoLinebreak) ETA, NewState.Details.empty() ? "" : fmt::format(". {}", NewState.Details)); std::string::size_type EraseLength = m_LastOutputLength > Output.length() ? (m_LastOutputLength - Output.length()) : 0; - printf("%s%s%s", Output.c_str(), std::string(EraseLength, ' ').c_str(), DoLinebreak ? "\n" : ""); + + ExtendableStringBuilder<128> LineToPrint; + LineToPrint << Output << std::string(EraseLength, ' '); + if (DoLinebreak) + LineToPrint << "\n"; + +#if ZEN_PLATFORM_WINDOWS + static HANDLE hStdOut = GetStdHandle(STD_OUTPUT_HANDLE); + + if (m_StdoutIsTty) + { + WriteConsoleA(hStdOut, LineToPrint.c_str(), (DWORD)LineToPrint.Size(), 0, 0); + } + else + { + ::WriteFile(hStdOut, (LPCVOID)LineToPrint.c_str(), (DWORD)LineToPrint.Size(), 0, 0); + } +#else + fwrite(LineToPrint.c_str(), 1, LineToPrint.Size(), stdout); +#endif + m_LastOutputLength = DoLinebreak ? 0 : Output.length(); m_State = NewState; } @@ -328,7 +383,7 @@ ProgressBar::ForceLinebreak() void ProgressBar::Finish() { - if (m_LastOutputLength > 0 && m_State.RemainingCount > 0) + if (m_LastOutputLength > 0) { State NewState = m_State; NewState.RemainingCount = 0; @@ -361,30 +416,29 @@ ProgressBar::HasActiveTask() const int main(int argc, char** argv) { - using namespace zen; - using namespace std::literals; - - zen::logging::InitializeLogging(); - - // Set output mode to handle virtual terminal sequences - zen::logging::EnableVTMode(); - std::set_terminate([]() { ZEN_CRITICAL("Program exited abnormally via std::terminate()"); }); - - LoggerRef DefaultLogger = zen::logging::Default(); - auto& Sinks = DefaultLogger.SpdLogger->sinks(); - - Sinks.clear(); - auto ConsoleSink = std::make_shared<spdlog::sinks::ansicolor_stdout_sink_mt>(); - Sinks.push_back(ConsoleSink); - - zen::MaximizeOpenFileCount(); + std::vector<std::string> Args; +#if ZEN_PLATFORM_WINDOWS + LPWSTR RawCommandLine = GetCommandLine(); + std::string CommandLine = zen::WideToUtf8(RawCommandLine); + Args = zen::ParseCommandLine(CommandLine); +#else + Args.reserve(argc); + for (int I = 0; I < argc; I++) + { + Args.push_back(std::string(argv[I])); + } +#endif + std::vector<char*> RawArgs = zen::StripCommandlineQuotes(Args); - ////////////////////////////////////////////////////////////////////////// + argc = gsl::narrow<int>(RawArgs.size()); + argv = RawArgs.data(); - auto _ = zen::MakeGuard([] { spdlog::shutdown(); }); + using namespace zen; + using namespace std::literals; AttachCommand AttachCmd; BenchCommand BenchCmd; + BuildsCommand BuildsCmd; CacheDetailsCommand CacheDetailsCmd; CacheGetCommand CacheGetCmd; CacheGenerateCommand CacheGenerateCmd; @@ -441,6 +495,7 @@ main(int argc, char** argv) // clang-format off {"attach", &AttachCmd, "Add a sponsor process to a running zen service"}, {"bench", &BenchCmd, "Utility command for benchmarking"}, + {BuildsCommand::Name, &BuildsCmd, BuildsCommand::Description}, {"cache-details", &CacheDetailsCmd, "Details on cache"}, {"cache-info", &CacheInfoCmd, "Info on cache, namespace or bucket"}, {CacheGetCommand::Name, &CacheGetCmd, CacheGetCommand::Description}, @@ -582,15 +637,41 @@ main(int argc, char** argv) GlobalOptions.PassthroughArgs = PassthroughArgs; GlobalOptions.PassthroughArgV = PassthroughArgV; + std::string MemoryOptions; + std::string SubCommand = "<None>"; cxxopts::Options Options("zen", "Zen management tool"); Options.add_options()("d, debug", "Enable debugging", cxxopts::value<bool>(GlobalOptions.IsDebug)); Options.add_options()("v, verbose", "Enable verbose logging", cxxopts::value<bool>(GlobalOptions.IsVerbose)); + Options.add_options()("malloc", "Configure memory allocator subsystem", cxxopts::value(MemoryOptions)->default_value("mimalloc")); Options.add_options()("help", "Show command line help"); Options.add_options()("c, command", "Sub command", cxxopts::value<std::string>(SubCommand)); +#if ZEN_WITH_TRACE + std::string TraceChannels; + std::string TraceHost; + std::string TraceFile; + + Options.add_option("ue-trace", + "", + "trace", + "Specify which trace channels should be enabled", + cxxopts::value<std::string>(TraceChannels)->default_value(""), + ""); + + Options.add_option("ue-trace", + "", + "tracehost", + "Hostname to send the trace to", + cxxopts::value<std::string>(TraceHost)->default_value(""), + ""); + + Options + .add_option("ue-trace", "", "tracefile", "Path to write a trace to", cxxopts::value<std::string>(TraceFile)->default_value(""), ""); +#endif // ZEN_WITH_TRACE + Options.parse_positional({"command"}); const bool IsNullInvoke = (argc == 1); // If no arguments are passed we want to print usage information @@ -632,14 +713,49 @@ main(int argc, char** argv) exit(0); } - if (GlobalOptions.IsDebug) + zen::LoggingOptions LogOptions; + LogOptions.IsDebug = GlobalOptions.IsDebug; + LogOptions.IsVerbose = GlobalOptions.IsVerbose; + LogOptions.AllowAsync = false; + zen::InitializeLogging(LogOptions); + + std::set_terminate([]() { + void* Frames[8]; + uint32_t FrameCount = GetCallstack(2, 8, Frames); + CallstackFrames* Callstack = CreateCallstack(FrameCount, Frames); + ZEN_CRITICAL("Program exited abnormally via std::terminate()\n{}", CallstackToString(Callstack, " ")); + FreeCallstack(Callstack); + }); + + zen::MaximizeOpenFileCount(); + + ////////////////////////////////////////////////////////////////////////// + + auto _ = zen::MakeGuard([] { zen::ShutdownLogging(); }); + +#if ZEN_WITH_TRACE + if (TraceHost.size()) + { + TraceStart("zen", TraceHost.c_str(), TraceType::Network); + } + else if (TraceFile.size()) { - logging::SetLogLevel(logging::level::Debug); + TraceStart("zen", TraceFile.c_str(), TraceType::File); } - if (GlobalOptions.IsVerbose) + else { - logging::SetLogLevel(logging::level::Trace); + TraceInit("zen"); } +#endif // ZEN_WITH_TRACE + +#if ZEN_WITH_MEMTRACK + FMalloc* TraceMalloc = MemoryTrace_Create(GMalloc); + if (TraceMalloc != GMalloc) + { + GMalloc = TraceMalloc; + MemoryTrace_Initialize(); + } +#endif for (const CommandInfo& CmdInfo : Commands) { diff --git a/src/zen/zen.h b/src/zen/zen.h index 9c9586050..6765101db 100644 --- a/src/zen/zen.h +++ b/src/zen/zen.h @@ -84,7 +84,7 @@ public: uint64_t RemainingCount = 0; }; - explicit ProgressBar(bool PlainProgress); + explicit ProgressBar(bool PlainProgress, bool ShowDetails = true); ~ProgressBar(); void UpdateState(const State& NewState, bool DoLinebreak); @@ -94,7 +94,9 @@ public: bool HasActiveTask() const; private: + const bool m_StdoutIsTty = true; const bool m_PlainProgress; + const bool m_ShowDetails; Stopwatch m_SW; uint64_t m_LastUpdateMS; State m_State; diff --git a/src/zencore-test/zencore-test.cpp b/src/zencore-test/zencore-test.cpp index 37ae7f587..1cc5e1a87 100644 --- a/src/zencore-test/zencore-test.cpp +++ b/src/zencore-test/zencore-test.cpp @@ -21,9 +21,9 @@ main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) #if ZEN_WITH_TESTS zen::zencore_forcelinktests(); -#if ZEN_PLATFORM_LINUX +# if ZEN_PLATFORM_LINUX zen::IgnoreChildSignals(); -#endif +# endif zen::logging::InitializeLogging(); zen::MaximizeOpenFileCount(); diff --git a/src/zencore/basicfile.cpp b/src/zencore/basicfile.cpp index c2a21ae90..95876cff4 100644 --- a/src/zencore/basicfile.cpp +++ b/src/zencore/basicfile.cpp @@ -28,6 +28,20 @@ BasicFile::~BasicFile() { Close(); } +BasicFile::BasicFile(const std::filesystem::path& FileName, Mode Mode) +{ + Open(FileName, Mode); +} + +BasicFile::BasicFile(const std::filesystem::path& FileName, Mode Mode, std::error_code& Ec) +{ + Open(FileName, Mode, Ec); +} + +BasicFile::BasicFile(const std::filesystem::path& FileName, Mode Mode, std::function<bool(std::error_code& Ec)>&& RetryCallback) +{ + Open(FileName, Mode, std::move(RetryCallback)); +} void BasicFile::Open(const std::filesystem::path& FileName, Mode Mode) @@ -267,7 +281,21 @@ BasicFile::StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function<voi } uint64_t -BasicFile::Write(CompositeBuffer Data, uint64_t FileOffset, std::error_code& Ec) +BasicFile::Write(const CompositeBuffer& Data, uint64_t FileOffset) +{ + std::error_code Ec; + uint64_t WrittenBytes = Write(Data, FileOffset, Ec); + + if (Ec) + { + std::error_code Dummy; + throw std::system_error(Ec, fmt::format("Failed to write to file '{}'", zen::PathFromHandle(m_FileHandle, Dummy))); + } + return WrittenBytes; +} + +uint64_t +BasicFile::Write(const CompositeBuffer& Data, uint64_t FileOffset, std::error_code& Ec) { uint64_t WrittenBytes = 0; for (const SharedBuffer& Buffer : Data.GetSegments()) @@ -295,6 +323,8 @@ BasicFile::Write(MemoryView Data, uint64_t FileOffset, std::error_code& Ec) void BasicFile::Write(const void* Data, uint64_t Size, uint64_t FileOffset, std::error_code& Ec) { + ZEN_ASSERT(m_FileHandle != nullptr); + Ec.clear(); const uint64_t MaxChunkSize = 2u * 1024 * 1024 * 1024; @@ -575,7 +605,6 @@ TemporaryFile::MoveTemporaryIntoPlace(std::filesystem::path FinalFileName, std:: void TemporaryFile::SafeWriteFile(const std::filesystem::path& Path, MemoryView Data) { - TemporaryFile TempFile; std::error_code Ec; SafeWriteFile(Path, Data, Ec); if (Ec) @@ -804,6 +833,17 @@ BasicFileWriter::Write(const void* Data, uint64_t Size, uint64_t FileOffset) } void +BasicFileWriter::Write(const CompositeBuffer& Data, uint64_t FileOffset) +{ + for (const SharedBuffer& Segment : Data.GetSegments()) + { + const uint64_t SegmentSize = Segment.GetSize(); + Write(Segment.GetData(), SegmentSize, FileOffset); + FileOffset += SegmentSize; + } +} + +void BasicFileWriter::Flush() { const uint64_t BufferedBytes = m_BufferEnd - m_BufferStart; @@ -817,6 +857,78 @@ BasicFileWriter::Flush() m_Base.Write(m_Buffer, BufferedBytes, WriteOffset); } +IoBuffer +WriteToTempFile(CompositeBuffer&& Buffer, const std::filesystem::path& Path) +{ + TemporaryFile Temp; + std::error_code Ec; + Temp.CreateTemporary(Path.parent_path(), Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to create temp file for blob at '{}'", Path)); + } + + uint64_t BufferSize = Buffer.GetSize(); + { + uint64_t Offset = 0; + static const uint64_t BufferingSize = 256u * 1024u; + // BasicFileWriter BufferedOutput(BlockFile, BufferingSize / 2); + for (const SharedBuffer& Segment : Buffer.GetSegments()) + { + size_t SegmentSize = Segment.GetSize(); + + IoBufferFileReference FileRef; + if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.GetFileReference(FileRef)) + { + ScanFile(FileRef.FileHandle, + FileRef.FileChunkOffset, + FileRef.FileChunkSize, + BufferingSize, + [&Temp, &Offset](const void* Data, size_t Size) { + Temp.Write(Data, Size, Offset); + Offset += Size; + }); + } + else + { + Temp.Write(Segment.GetData(), SegmentSize, Offset); + Offset += SegmentSize; + } + } + } + + Temp.MoveTemporaryIntoPlace(Path, Ec); + if (Ec) + { + Ec.clear(); + BasicFile OpenTemp(Path, BasicFile::Mode::kDelete, Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to move temp file to '{}'", Path)); + } + if (OpenTemp.FileSize() != BufferSize) + { + throw std::runtime_error(fmt::format("Failed to move temp file to '{}' - mismatching file size already exists", Path)); + } + IoBuffer TmpBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BufferSize, true); + + IoHash ExistingHash = IoHash::HashBuffer(TmpBuffer); + const IoHash ExpectedHash = IoHash::HashBuffer(Buffer); + if (ExistingHash != ExpectedHash) + { + throw std::runtime_error(fmt::format("Failed to move temp file to '{}' - mismatching file hash already exists", Path)); + } + Buffer = CompositeBuffer{}; + TmpBuffer.SetDeleteOnClose(true); + return TmpBuffer; + } + Buffer = CompositeBuffer{}; + BasicFile OpenTemp(Path, BasicFile::Mode::kDelete); + IoBuffer TmpBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BufferSize, true); + TmpBuffer.SetDeleteOnClose(true); + return TmpBuffer; +} + ////////////////////////////////////////////////////////////////////////// /* diff --git a/src/zencore/compactbinary.cpp b/src/zencore/compactbinary.cpp index adccaba70..b43cc18f1 100644 --- a/src/zencore/compactbinary.cpp +++ b/src/zencore/compactbinary.cpp @@ -15,6 +15,8 @@ #include <zencore/testing.h> #include <zencore/uid.h> +#include <EASTL/fixed_vector.h> + #include <fmt/format.h> #include <string_view> @@ -1376,9 +1378,9 @@ TryMeasureCompactBinary(MemoryView View, CbFieldType& OutType, uint64_t& OutSize CbField LoadCompactBinary(BinaryReader& Ar, BufferAllocator Allocator) { - std::vector<uint8_t> HeaderBytes; - CbFieldType FieldType; - uint64_t FieldSize = 1; + eastl::fixed_vector<uint8_t, 32> HeaderBytes; + CbFieldType FieldType; + uint64_t FieldSize = 1; for (const int64_t StartPos = Ar.CurrentOffset(); FieldSize > 0;) { @@ -1393,7 +1395,7 @@ LoadCompactBinary(BinaryReader& Ar, BufferAllocator Allocator) HeaderBytes.resize(ReadOffset + ReadSize); Ar.Read(HeaderBytes.data() + ReadOffset, ReadSize); - if (TryMeasureCompactBinary(MakeMemoryView(HeaderBytes), FieldType, FieldSize)) + if (TryMeasureCompactBinary(MakeMemoryView(HeaderBytes.data(), HeaderBytes.size()), FieldType, FieldSize)) { if (FieldSize <= uint64_t(Ar.Size() - StartPos)) { diff --git a/src/zencore/compactbinarybuilder.cpp b/src/zencore/compactbinarybuilder.cpp index a60de023d..63c0b9c5c 100644 --- a/src/zencore/compactbinarybuilder.cpp +++ b/src/zencore/compactbinarybuilder.cpp @@ -15,23 +15,21 @@ namespace zen { -template<typename T> uint64_t -AddUninitialized(std::vector<T>& Vector, uint64_t Count) +AddUninitialized(CbWriter::CbWriterData_t& Vector, uint64_t Count) { const uint64_t Offset = Vector.size(); Vector.resize(Offset + Count); return Offset; } -template<typename T> uint64_t -Append(std::vector<T>& Vector, const T* Data, uint64_t Count) +Append(CbWriter::CbWriterData_t& Vector, const uint8_t* Data, uint64_t Count) { const uint64_t Offset = Vector.size(); Vector.resize(Offset + Count); - memcpy(Vector.data() + Offset, Data, sizeof(T) * Count); + memcpy(Vector.data() + Offset, Data, sizeof(uint8_t) * Count); return Offset; } @@ -76,7 +74,7 @@ IsUniformType(const CbFieldType Type) /** Append the payload from the compact binary value to the array and return its type. */ static inline CbFieldType -AppendCompactBinary(const CbFieldView& Value, std::vector<uint8_t>& OutData) +AppendCompactBinary(const CbFieldView& Value, CbWriter::CbWriterData_t& OutData) { struct FCopy : public CbFieldView { @@ -93,7 +91,6 @@ AppendCompactBinary(const CbFieldView& Value, std::vector<uint8_t>& OutData) CbWriter::CbWriter() { - States.reserve(4); States.emplace_back(); } diff --git a/src/zencore/compactbinarypackage.cpp b/src/zencore/compactbinarypackage.cpp index 7de161845..ffe64f2e9 100644 --- a/src/zencore/compactbinarypackage.cpp +++ b/src/zencore/compactbinarypackage.cpp @@ -3,10 +3,13 @@ #include "zencore/compactbinarypackage.h" #include <zencore/compactbinarybuilder.h> #include <zencore/compactbinaryvalidation.h> +#include <zencore/eastlutil.h> #include <zencore/endian.h> #include <zencore/stream.h> #include <zencore/testing.h> +#include <EASTL/span.h> + namespace zen { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -341,6 +344,12 @@ CbPackage::SetObject(CbObject InObject, const IoHash* InObjectHash, AttachmentRe } void +CbPackage::ReserveAttachments(size_t Count) +{ + Attachments.reserve(Count); +} + +void CbPackage::AddAttachment(const CbAttachment& Attachment, AttachmentResolver* Resolver) { if (!Attachment.IsNull()) @@ -374,17 +383,18 @@ CbPackage::AddAttachments(std::span<const CbAttachment> InAttachments) { ZEN_ASSERT(!Attachment.IsNull()); } + // Assume we have no duplicates! Attachments.insert(Attachments.end(), InAttachments.begin(), InAttachments.end()); std::sort(Attachments.begin(), Attachments.end()); - ZEN_ASSERT_SLOW(std::unique(Attachments.begin(), Attachments.end()) == Attachments.end()); + ZEN_ASSERT_SLOW(eastl::unique(Attachments.begin(), Attachments.end()) == Attachments.end()); } int32_t CbPackage::RemoveAttachment(const IoHash& Hash) { return gsl::narrow_cast<int32_t>( - std::erase_if(Attachments, [&Hash](const CbAttachment& Attachment) -> bool { return Attachment.GetHash() == Hash; })); + erase_if(Attachments, [&Hash](const CbAttachment& Attachment) -> bool { return Attachment.GetHash() == Hash; })); } bool diff --git a/src/zencore/compositebuffer.cpp b/src/zencore/compositebuffer.cpp index 49870a304..252ac9045 100644 --- a/src/zencore/compositebuffer.cpp +++ b/src/zencore/compositebuffer.cpp @@ -275,36 +275,18 @@ CompositeBuffer::IterateRange(uint64_t Offset, Visitor(View, Segment); break; } - if (Offset < SegmentSize) + else if (Offset <= SegmentSize) { - if (Offset == 0 && Size >= SegmentSize) + const MemoryView View = Segment.GetView().Mid(Offset, Size); + Offset = 0; + if (Size == 0 || !View.IsEmpty()) { - const MemoryView View = Segment.GetView(); - if (!View.IsEmpty()) - { - Visitor(View, Segment); - } - Size -= View.GetSize(); - if (Size == 0) - { - break; - } + Visitor(View, Segment); } - else + Size -= View.GetSize(); + if (Size == 0) { - // If we only want a section of the segment, do a subrange so we don't have to materialize the entire iobuffer - IoBuffer SubRange(Segment.AsIoBuffer(), Offset, Min(Size, SegmentSize - Offset)); - const MemoryView View = SubRange.GetView(); - if (!View.IsEmpty()) - { - Visitor(View, Segment); - } - Size -= View.GetSize(); - if (Size == 0) - { - break; - } - Offset = 0; + break; } } else diff --git a/src/zencore/compress.cpp b/src/zencore/compress.cpp index 29c1d9256..88c3bb5b9 100644 --- a/src/zencore/compress.cpp +++ b/src/zencore/compress.cpp @@ -2,6 +2,7 @@ #include <zencore/compress.h> +#include <zencore/basicfile.h> #include <zencore/blake3.h> #include <zencore/compositebuffer.h> #include <zencore/crc32.h> @@ -157,6 +158,9 @@ class BaseEncoder { public: [[nodiscard]] virtual CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t BlockSize = DefaultBlockSize) const = 0; + [[nodiscard]] virtual bool CompressToStream(const CompositeBuffer& RawData, + std::function<void(uint64_t Offset, const CompositeBuffer& Range)>&& Callback, + uint64_t BlockSize = DefaultBlockSize) const = 0; }; class BaseDecoder @@ -184,6 +188,12 @@ public: const MemoryView HeaderView, uint64_t RawOffset, uint64_t RawSize) const = 0; + + virtual bool DecompressToStream(const BufferHeader& Header, + const CompositeBuffer& CompressedData, + uint64_t RawOffset, + uint64_t RawSize, + std::function<bool(uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const = 0; }; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -191,11 +201,21 @@ public: class NoneEncoder final : public BaseEncoder { public: - [[nodiscard]] CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t /* BlockSize */) const final + [[nodiscard]] virtual CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t /* BlockSize */) const final { UniqueBuffer HeaderData = CompressedBuffer::CreateHeaderForNoneEncoder(RawData.GetSize(), BLAKE3::HashBuffer(RawData)); return CompositeBuffer(HeaderData.MoveToShared(), RawData.MakeOwned()); } + + [[nodiscard]] virtual bool CompressToStream(const CompositeBuffer& RawData, + std::function<void(uint64_t Offset, const CompositeBuffer& Range)>&& Callback, + uint64_t /* BlockSize */) const final + { + UniqueBuffer HeaderData = CompressedBuffer::CreateHeaderForNoneEncoder(RawData.GetSize(), BLAKE3::HashBuffer(RawData)); + Callback(0, CompositeBuffer(IoBuffer(IoBuffer::Wrap, HeaderData.GetData(), HeaderData.GetSize()))); + Callback(HeaderData.GetSize(), RawData); + return true; + } }; class NoneDecoder final : public BaseDecoder @@ -262,6 +282,25 @@ public: } [[nodiscard]] uint64_t GetHeaderSize(const BufferHeader&) const final { return sizeof(BufferHeader); } + + virtual bool DecompressToStream(const BufferHeader& Header, + const CompositeBuffer& CompressedData, + uint64_t RawOffset, + uint64_t RawSize, + std::function<bool(uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const final + { + if (Header.Method == CompressionMethod::None && Header.TotalCompressedSize == CompressedData.GetSize() && + Header.TotalCompressedSize == Header.TotalRawSize + sizeof(BufferHeader) && RawOffset < Header.TotalRawSize && + (RawOffset + RawSize) <= Header.TotalRawSize) + { + if (!Callback(0, CompressedData.Mid(sizeof(BufferHeader) + RawOffset, RawSize))) + { + return false; + } + return true; + } + return false; + } }; ////////////////////////////////////////////////////////////////////////// @@ -269,7 +308,10 @@ public: class BlockEncoder : public BaseEncoder { public: - CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t BlockSize = DefaultBlockSize) const final; + virtual CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t BlockSize) const final; + virtual bool CompressToStream(const CompositeBuffer& RawData, + std::function<void(uint64_t Offset, const CompositeBuffer& Range)>&& Callback, + uint64_t BlockSize) const final; protected: virtual CompressionMethod GetMethod() const = 0; @@ -314,37 +356,77 @@ BlockEncoder::Compress(const CompositeBuffer& RawData, const uint64_t BlockSize) CompressedBlockSizes.reserve(BlockCount); uint64_t CompressedSize = 0; { - UniqueBuffer RawBlockCopy; MutableMemoryView CompressedBlocksView = CompressedData.GetMutableView() + sizeof(BufferHeader) + MetaSize; - CompositeBuffer::Iterator It = RawData.GetIterator(0); - - for (uint64_t RawOffset = 0; RawOffset < RawSize;) + IoBufferFileReference FileRef = {nullptr, 0, 0}; + if ((RawData.GetSegments().size() == 1) && RawData.GetSegments()[0].AsIoBuffer().GetFileReference(FileRef)) { - const uint64_t RawBlockSize = zen::Min(RawSize - RawOffset, BlockSize); - const MemoryView RawBlock = RawData.ViewOrCopyRange(It, RawBlockSize, RawBlockCopy); - RawHash.Append(RawBlock); - - MutableMemoryView CompressedBlock = CompressedBlocksView; - if (!CompressBlock(CompressedBlock, RawBlock)) + ZEN_ASSERT(FileRef.FileHandle != nullptr); + UniqueBuffer RawBlockCopy = UniqueBuffer::Alloc(BlockSize); + BasicFile Source; + Source.Attach(FileRef.FileHandle); + for (uint64_t RawOffset = 0; RawOffset < RawSize;) { - return CompositeBuffer(); - } + const uint64_t RawBlockSize = zen::Min(RawSize - RawOffset, BlockSize); + Source.Read(RawBlockCopy.GetData(), RawBlockSize, FileRef.FileChunkOffset + RawOffset); + const MemoryView RawBlock = RawBlockCopy.GetView().Left(RawBlockSize); + RawHash.Append(RawBlock); + MutableMemoryView CompressedBlock = CompressedBlocksView; + if (!CompressBlock(CompressedBlock, RawBlock)) + { + Source.Detach(); + return CompositeBuffer(); + } - uint64_t CompressedBlockSize = CompressedBlock.GetSize(); - if (RawBlockSize <= CompressedBlockSize) - { - CompressedBlockSize = RawBlockSize; - CompressedBlocksView = CompressedBlocksView.CopyFrom(RawBlock); + uint64_t CompressedBlockSize = CompressedBlock.GetSize(); + if (RawBlockSize <= CompressedBlockSize) + { + CompressedBlockSize = RawBlockSize; + CompressedBlocksView = CompressedBlocksView.CopyFrom(RawBlock); + } + else + { + CompressedBlocksView += CompressedBlockSize; + } + + CompressedBlockSizes.push_back(static_cast<uint32_t>(CompressedBlockSize)); + CompressedSize += CompressedBlockSize; + RawOffset += RawBlockSize; } - else + Source.Detach(); + } + else + { + UniqueBuffer RawBlockCopy; + CompositeBuffer::Iterator It = RawData.GetIterator(0); + + for (uint64_t RawOffset = 0; RawOffset < RawSize;) { - CompressedBlocksView += CompressedBlockSize; - } + const uint64_t RawBlockSize = zen::Min(RawSize - RawOffset, BlockSize); + const MemoryView RawBlock = RawData.ViewOrCopyRange(It, RawBlockSize, RawBlockCopy); + RawHash.Append(RawBlock); + + MutableMemoryView CompressedBlock = CompressedBlocksView; + if (!CompressBlock(CompressedBlock, RawBlock)) + { + return CompositeBuffer(); + } + + uint64_t CompressedBlockSize = CompressedBlock.GetSize(); + if (RawBlockSize <= CompressedBlockSize) + { + CompressedBlockSize = RawBlockSize; + CompressedBlocksView = CompressedBlocksView.CopyFrom(RawBlock); + } + else + { + CompressedBlocksView += CompressedBlockSize; + } - CompressedBlockSizes.push_back(static_cast<uint32_t>(CompressedBlockSize)); - CompressedSize += CompressedBlockSize; - RawOffset += RawBlockSize; + CompressedBlockSizes.push_back(static_cast<uint32_t>(CompressedBlockSize)); + CompressedSize += CompressedBlockSize; + RawOffset += RawBlockSize; + } } } @@ -377,6 +459,133 @@ BlockEncoder::Compress(const CompositeBuffer& RawData, const uint64_t BlockSize) return CompositeBuffer(SharedBuffer::MakeView(CompositeView, CompressedData.MoveToShared())); } +bool +BlockEncoder::CompressToStream(const CompositeBuffer& RawData, + std::function<void(uint64_t Offset, const CompositeBuffer& Range)>&& Callback, + uint64_t BlockSize = DefaultBlockSize) const +{ + ZEN_ASSERT(IsPow2(BlockSize) && (BlockSize <= (1u << 31))); + + const uint64_t RawSize = RawData.GetSize(); + BLAKE3Stream RawHash; + + const uint64_t BlockCount = RoundUp(RawSize, BlockSize) / BlockSize; + ZEN_ASSERT(BlockCount <= ~uint32_t(0)); + + const uint64_t MetaSize = BlockCount * sizeof(uint32_t); + const uint64_t FullHeaderSize = sizeof(BufferHeader) + MetaSize; + + std::vector<uint32_t> CompressedBlockSizes; + CompressedBlockSizes.reserve(BlockCount); + uint64_t CompressedSize = 0; + { + UniqueBuffer CompressedBlockBuffer = UniqueBuffer::Alloc(GetCompressedBlocksBound(1, BlockSize, Min(RawSize, BlockSize))); + + IoBufferFileReference FileRef = {nullptr, 0, 0}; + if ((RawData.GetSegments().size() == 1) && RawData.GetSegments()[0].AsIoBuffer().GetFileReference(FileRef)) + { + ZEN_ASSERT(FileRef.FileHandle != nullptr); + UniqueBuffer RawBlockCopy = UniqueBuffer::Alloc(BlockSize); + BasicFile Source; + Source.Attach(FileRef.FileHandle); + for (uint64_t RawOffset = 0; RawOffset < RawSize;) + { + const uint64_t RawBlockSize = zen::Min(RawSize - RawOffset, BlockSize); + Source.Read(RawBlockCopy.GetData(), RawBlockSize, FileRef.FileChunkOffset + RawOffset); + const MemoryView RawBlock = RawBlockCopy.GetView().Left(RawBlockSize); + RawHash.Append(RawBlock); + MutableMemoryView CompressedBlock = CompressedBlockBuffer.GetMutableView(); + if (!CompressBlock(CompressedBlock, RawBlock)) + { + Source.Detach(); + return false; + } + + uint64_t CompressedBlockSize = CompressedBlock.GetSize(); + if (RawBlockSize <= CompressedBlockSize) + { + Callback(FullHeaderSize + CompressedSize, + CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawBlockCopy.GetView().GetData(), RawBlockSize))); + CompressedBlockSize = RawBlockSize; + } + else + { + Callback(FullHeaderSize + CompressedSize, + CompositeBuffer(IoBuffer(IoBuffer::Wrap, CompressedBlock.GetData(), CompressedBlockSize))); + } + + CompressedBlockSizes.push_back(static_cast<uint32_t>(CompressedBlockSize)); + CompressedSize += CompressedBlockSize; + RawOffset += RawBlockSize; + } + Source.Detach(); + } + else + { + UniqueBuffer RawBlockCopy; + CompositeBuffer::Iterator It = RawData.GetIterator(0); + + for (uint64_t RawOffset = 0; RawOffset < RawSize;) + { + const uint64_t RawBlockSize = zen::Min(RawSize - RawOffset, BlockSize); + const MemoryView RawBlock = RawData.ViewOrCopyRange(It, RawBlockSize, RawBlockCopy); + RawHash.Append(RawBlock); + + MutableMemoryView CompressedBlock = CompressedBlockBuffer.GetMutableView(); + if (!CompressBlock(CompressedBlock, RawBlock)) + { + return false; + } + + uint64_t CompressedBlockSize = CompressedBlock.GetSize(); + if (RawBlockSize <= CompressedBlockSize) + { + Callback(FullHeaderSize + CompressedSize, CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawBlock.GetData(), RawBlockSize))); + CompressedBlockSize = RawBlockSize; + } + else + { + Callback(FullHeaderSize + CompressedSize, + CompositeBuffer(IoBuffer(IoBuffer::Wrap, CompressedBlock.GetData(), CompressedBlockSize))); + } + + CompressedBlockSizes.push_back(static_cast<uint32_t>(CompressedBlockSize)); + CompressedSize += CompressedBlockSize; + RawOffset += RawBlockSize; + } + } + } + + // Return failure if the compressed data is larger than the raw data. + if (RawSize <= MetaSize + CompressedSize) + { + return false; + } + + // Write the header and calculate the CRC-32. + for (uint32_t& Size : CompressedBlockSizes) + { + Size = ByteSwap(Size); + } + UniqueBuffer HeaderBuffer = UniqueBuffer::Alloc(sizeof(BufferHeader) + MetaSize); + + BufferHeader Header; + Header.Method = GetMethod(); + Header.Compressor = GetCompressor(); + Header.CompressionLevel = GetCompressionLevel(); + Header.BlockSizeExponent = static_cast<uint8_t>(zen::FloorLog2_64(BlockSize)); + Header.BlockCount = static_cast<uint32_t>(BlockCount); + Header.TotalRawSize = RawSize; + Header.TotalCompressedSize = sizeof(BufferHeader) + MetaSize + CompressedSize; + Header.RawHash = RawHash.GetHash(); + + HeaderBuffer.GetMutableView().Mid(sizeof(BufferHeader), MetaSize).CopyFrom(MakeMemoryView(CompressedBlockSizes)); + Header.Write(HeaderBuffer.GetMutableView()); + + Callback(0, CompositeBuffer(IoBuffer(IoBuffer::Wrap, HeaderBuffer.GetData(), HeaderBuffer.GetSize()))); + return true; +} + class BlockDecoder : public BaseDecoder { public: @@ -406,6 +615,12 @@ public: MutableMemoryView RawView, uint64_t RawOffset) const final; + virtual bool DecompressToStream(const BufferHeader& Header, + const CompositeBuffer& CompressedData, + uint64_t RawOffset, + uint64_t RawSize, + std::function<bool(uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const final; + protected: virtual bool DecompressBlock(MutableMemoryView RawData, MemoryView CompressedData) const = 0; }; @@ -528,6 +743,159 @@ BlockDecoder::DecompressToComposite(const BufferHeader& Header, const CompositeB } bool +BlockDecoder::DecompressToStream(const BufferHeader& Header, + const CompositeBuffer& CompressedData, + uint64_t RawOffset, + uint64_t RawSize, + std::function<bool(uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const +{ + if (Header.TotalCompressedSize != CompressedData.GetSize()) + { + return false; + } + + const uint64_t BlockSize = uint64_t(1) << Header.BlockSizeExponent; + + UniqueBuffer BlockSizeBuffer; + MemoryView BlockSizeView = CompressedData.ViewOrCopyRange(sizeof(BufferHeader), Header.BlockCount * sizeof(uint32_t), BlockSizeBuffer); + std::span<uint32_t const> CompressedBlockSizes(reinterpret_cast<const uint32_t*>(BlockSizeView.GetData()), Header.BlockCount); + + UniqueBuffer CompressedBlockCopy; + + const size_t FirstBlockIndex = uint64_t(RawOffset / BlockSize); + const size_t LastBlockIndex = uint64_t((RawOffset + RawSize - 1) / BlockSize); + const uint64_t LastBlockSize = BlockSize - ((Header.BlockCount * BlockSize) - Header.TotalRawSize); + uint64_t OffsetInFirstBlock = RawOffset % BlockSize; + uint64_t CompressedOffset = sizeof(BufferHeader) + uint64_t(Header.BlockCount) * sizeof(uint32_t); + uint64_t RemainingRawSize = RawSize; + + for (size_t BlockIndex = 0; BlockIndex < FirstBlockIndex; BlockIndex++) + { + const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); + CompressedOffset += CompressedBlockSize; + } + + UniqueBuffer RawDataBuffer; + + IoBufferFileReference FileRef = {nullptr, 0, 0}; + if ((CompressedData.GetSegments().size() == 1) && CompressedData.GetSegments()[0].AsIoBuffer().GetFileReference(FileRef)) + { + ZEN_ASSERT(FileRef.FileHandle != nullptr); + BasicFile Source; + Source.Attach(FileRef.FileHandle); + + for (size_t BlockIndex = FirstBlockIndex; BlockIndex <= LastBlockIndex; BlockIndex++) + { + const uint64_t UncompressedBlockSize = BlockIndex == Header.BlockCount - 1 ? LastBlockSize : BlockSize; + const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); + const bool IsCompressed = CompressedBlockSize < UncompressedBlockSize; + + const uint64_t BytesToUncompress = OffsetInFirstBlock > 0 ? zen::Min(RawSize, UncompressedBlockSize - OffsetInFirstBlock) + : zen::Min(RemainingRawSize, BlockSize); + + if (CompressedBlockCopy.GetSize() < CompressedBlockSize) + { + CompressedBlockCopy = UniqueBuffer::Alloc(CompressedBlockSize); + } + Source.Read(CompressedBlockCopy.GetData(), CompressedBlockSize, FileRef.FileChunkOffset + CompressedOffset); + + MemoryView CompressedBlock = CompressedBlockCopy.GetView().Left(CompressedBlockSize); + + if (IsCompressed) + { + if (RawDataBuffer.IsNull()) + { + RawDataBuffer = UniqueBuffer::Alloc(zen::Min(RawSize, UncompressedBlockSize)); + } + else + { + ZEN_ASSERT(RawDataBuffer.GetSize() >= UncompressedBlockSize); + } + MutableMemoryView UncompressedBlock = RawDataBuffer.GetMutableView().Left(UncompressedBlockSize); + if (!DecompressBlock(UncompressedBlock, CompressedBlock)) + { + Source.Detach(); + return false; + } + if (!Callback(BlockIndex * BlockSize + OffsetInFirstBlock, + CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawDataBuffer.GetData(), BytesToUncompress)))) + { + Source.Detach(); + return false; + } + } + else + { + if (!Callback( + BlockIndex * BlockSize + OffsetInFirstBlock, + CompositeBuffer( + IoBuffer(IoBuffer::Wrap, CompressedBlockCopy.GetView().Mid(OffsetInFirstBlock).GetData(), BytesToUncompress)))) + { + Source.Detach(); + return false; + } + } + + OffsetInFirstBlock = 0; + RemainingRawSize -= BytesToUncompress; + CompressedOffset += CompressedBlockSize; + } + Source.Detach(); + } + else + { + for (size_t BlockIndex = FirstBlockIndex; BlockIndex <= LastBlockIndex; BlockIndex++) + { + const uint64_t UncompressedBlockSize = BlockIndex == Header.BlockCount - 1 ? LastBlockSize : BlockSize; + const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); + const bool IsCompressed = CompressedBlockSize < UncompressedBlockSize; + + const uint64_t BytesToUncompress = OffsetInFirstBlock > 0 ? zen::Min(RawSize, UncompressedBlockSize - OffsetInFirstBlock) + : zen::Min(RemainingRawSize, BlockSize); + + MemoryView CompressedBlock = CompressedData.ViewOrCopyRange(CompressedOffset, CompressedBlockSize, CompressedBlockCopy); + + if (IsCompressed) + { + if (RawDataBuffer.IsNull()) + { + RawDataBuffer = UniqueBuffer::Alloc(zen::Min(RawSize, UncompressedBlockSize)); + } + else + { + ZEN_ASSERT(RawDataBuffer.GetSize() >= UncompressedBlockSize); + } + MutableMemoryView UncompressedBlock = RawDataBuffer.GetMutableView().Left(UncompressedBlockSize); + if (!DecompressBlock(UncompressedBlock, CompressedBlock)) + { + return false; + } + if (!Callback(BlockIndex * BlockSize + OffsetInFirstBlock, + CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawDataBuffer.GetData(), BytesToUncompress)))) + { + return false; + } + } + else + { + if (!Callback( + BlockIndex * BlockSize + OffsetInFirstBlock, + CompositeBuffer( + IoBuffer(IoBuffer::Wrap, CompressedBlockCopy.GetView().Mid(OffsetInFirstBlock).GetData(), BytesToUncompress)))) + { + return false; + } + } + + OffsetInFirstBlock = 0; + RemainingRawSize -= BytesToUncompress; + CompressedOffset += CompressedBlockSize; + } + } + return true; +} + +bool BlockDecoder::TryDecompressTo(const BufferHeader& Header, const CompositeBuffer& CompressedData, MutableMemoryView RawView, @@ -560,51 +928,118 @@ BlockDecoder::TryDecompressTo(const BufferHeader& Header, CompressedOffset += CompressedBlockSize; } - for (size_t BlockIndex = FirstBlockIndex; BlockIndex <= LastBlockIndex; BlockIndex++) + IoBufferFileReference FileRef = {nullptr, 0, 0}; + if ((CompressedData.GetSegments().size() == 1) && CompressedData.GetSegments()[0].AsIoBuffer().GetFileReference(FileRef)) { - const uint64_t UncompressedBlockSize = BlockIndex == Header.BlockCount - 1 ? LastBlockSize : BlockSize; - const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); - const bool IsCompressed = CompressedBlockSize < UncompressedBlockSize; + ZEN_ASSERT(FileRef.FileHandle != nullptr); + BasicFile Source; + Source.Attach(FileRef.FileHandle); + + for (size_t BlockIndex = FirstBlockIndex; BlockIndex <= LastBlockIndex; BlockIndex++) + { + const uint64_t UncompressedBlockSize = BlockIndex == Header.BlockCount - 1 ? LastBlockSize : BlockSize; + const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); + const bool IsCompressed = CompressedBlockSize < UncompressedBlockSize; - const uint64_t BytesToUncompress = OffsetInFirstBlock > 0 ? zen::Min(RawView.GetSize(), UncompressedBlockSize - OffsetInFirstBlock) - : zen::Min(RemainingRawSize, BlockSize); + const uint64_t BytesToUncompress = OffsetInFirstBlock > 0 + ? zen::Min(RawView.GetSize(), UncompressedBlockSize - OffsetInFirstBlock) + : zen::Min(RemainingRawSize, BlockSize); - MemoryView CompressedBlock = CompressedData.ViewOrCopyRange(CompressedOffset, CompressedBlockSize, CompressedBlockCopy); + if (CompressedBlockCopy.GetSize() < CompressedBlockSize) + { + CompressedBlockCopy = UniqueBuffer::Alloc(CompressedBlockSize); + } + Source.Read(CompressedBlockCopy.GetData(), CompressedBlockSize, FileRef.FileChunkOffset + CompressedOffset); - if (IsCompressed) - { - MutableMemoryView UncompressedBlock = RawView.Left(BytesToUncompress); + MemoryView CompressedBlock = CompressedBlockCopy.GetView().Left(CompressedBlockSize); - const bool IsAligned = BytesToUncompress == UncompressedBlockSize; - if (!IsAligned) + if (IsCompressed) { - // Decompress to a temporary buffer when the first or the last block reads are not aligned with the block boundaries. - if (UncompressedBlockCopy.IsNull()) + MutableMemoryView UncompressedBlock = RawView.Left(BytesToUncompress); + + const bool IsAligned = BytesToUncompress == UncompressedBlockSize; + if (!IsAligned) { - UncompressedBlockCopy = UniqueBuffer::Alloc(BlockSize); + // Decompress to a temporary buffer when the first or the last block reads are not aligned with the block boundaries. + if (UncompressedBlockCopy.IsNull()) + { + UncompressedBlockCopy = UniqueBuffer::Alloc(BlockSize); + } + UncompressedBlock = UncompressedBlockCopy.GetMutableView().Mid(0, UncompressedBlockSize); } - UncompressedBlock = UncompressedBlockCopy.GetMutableView().Mid(0, UncompressedBlockSize); - } - if (!DecompressBlock(UncompressedBlock, CompressedBlock)) - { - return false; - } + if (!DecompressBlock(UncompressedBlock, CompressedBlock)) + { + Source.Detach(); + return false; + } - if (!IsAligned) + if (!IsAligned) + { + RawView.CopyFrom(UncompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress)); + } + } + else { - RawView.CopyFrom(UncompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress)); + RawView.CopyFrom(CompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress)); } + + OffsetInFirstBlock = 0; + RemainingRawSize -= BytesToUncompress; + CompressedOffset += CompressedBlockSize; + RawView += BytesToUncompress; } - else + Source.Detach(); + } + else + { + for (size_t BlockIndex = FirstBlockIndex; BlockIndex <= LastBlockIndex; BlockIndex++) { - RawView.CopyFrom(CompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress)); - } + const uint64_t UncompressedBlockSize = BlockIndex == Header.BlockCount - 1 ? LastBlockSize : BlockSize; + const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); + const bool IsCompressed = CompressedBlockSize < UncompressedBlockSize; - OffsetInFirstBlock = 0; - RemainingRawSize -= BytesToUncompress; - CompressedOffset += CompressedBlockSize; - RawView += BytesToUncompress; + const uint64_t BytesToUncompress = OffsetInFirstBlock > 0 + ? zen::Min(RawView.GetSize(), UncompressedBlockSize - OffsetInFirstBlock) + : zen::Min(RemainingRawSize, BlockSize); + + MemoryView CompressedBlock = CompressedData.ViewOrCopyRange(CompressedOffset, CompressedBlockSize, CompressedBlockCopy); + + if (IsCompressed) + { + MutableMemoryView UncompressedBlock = RawView.Left(BytesToUncompress); + + const bool IsAligned = BytesToUncompress == UncompressedBlockSize; + if (!IsAligned) + { + // Decompress to a temporary buffer when the first or the last block reads are not aligned with the block boundaries. + if (UncompressedBlockCopy.IsNull()) + { + UncompressedBlockCopy = UniqueBuffer::Alloc(BlockSize); + } + UncompressedBlock = UncompressedBlockCopy.GetMutableView().Mid(0, UncompressedBlockSize); + } + + if (!DecompressBlock(UncompressedBlock, CompressedBlock)) + { + return false; + } + + if (!IsAligned) + { + RawView.CopyFrom(UncompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress)); + } + } + else + { + RawView.CopyFrom(CompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress)); + } + + OffsetInFirstBlock = 0; + RemainingRawSize -= BytesToUncompress; + CompressedOffset += CompressedBlockSize; + RawView += BytesToUncompress; + } } return RemainingRawSize == 0; @@ -1342,6 +1777,30 @@ CompressedBuffer::Compress(const SharedBuffer& RawData, return Compress(CompositeBuffer(RawData), Compressor, CompressionLevel, BlockSize); } +bool +CompressedBuffer::CompressToStream(const CompositeBuffer& RawData, + std::function<void(uint64_t Offset, const CompositeBuffer& Range)>&& Callback, + OodleCompressor Compressor, + OodleCompressionLevel CompressionLevel, + uint64_t BlockSize) +{ + using namespace detail; + + if (BlockSize == 0) + { + BlockSize = DefaultBlockSize; + } + + if (CompressionLevel == OodleCompressionLevel::None) + { + return NoneEncoder().CompressToStream(RawData, std::move(Callback), BlockSize); + } + else + { + return OodleEncoder(Compressor, CompressionLevel).CompressToStream(RawData, std::move(Callback), BlockSize); + } +} + CompressedBuffer CompressedBuffer::FromCompressed(const CompositeBuffer& InCompressedData, IoHash& OutRawHash, uint64_t& OutRawSize) { @@ -1536,6 +1995,27 @@ CompressedBuffer::DecompressToComposite() const } bool +CompressedBuffer::DecompressToStream(uint64_t RawOffset, + uint64_t RawSize, + std::function<bool(uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const +{ + using namespace detail; + if (CompressedData) + { + const BufferHeader Header = BufferHeader::Read(CompressedData); + if (Header.Magic == BufferHeader::ExpectedMagic) + { + if (const BaseDecoder* const Decoder = GetDecoder(Header.Method)) + { + const uint64_t TotalRawSize = RawSize < ~uint64_t(0) ? RawSize : Header.TotalRawSize - RawOffset; + return Decoder->DecompressToStream(Header, CompressedData, RawOffset, TotalRawSize, std::move(Callback)); + } + } + } + return false; +} + +bool CompressedBuffer::TryGetCompressParameters(OodleCompressor& OutCompressor, OodleCompressionLevel& OutCompressionLevel, uint64_t& OutBlockSize) const diff --git a/src/zencore/filesystem.cpp b/src/zencore/filesystem.cpp index b8c35212f..05e2bf049 100644 --- a/src/zencore/filesystem.cpp +++ b/src/zencore/filesystem.cpp @@ -531,7 +531,10 @@ CloneFile(std::filesystem::path FromPath, std::filesystem::path ToPath) } void -CopyFile(std::filesystem::path FromPath, std::filesystem::path ToPath, const CopyFileOptions& Options, std::error_code& OutErrorCode) +CopyFile(const std::filesystem::path& FromPath, + const std::filesystem::path& ToPath, + const CopyFileOptions& Options, + std::error_code& OutErrorCode) { OutErrorCode.clear(); @@ -544,7 +547,7 @@ CopyFile(std::filesystem::path FromPath, std::filesystem::path ToPath, const Cop } bool -CopyFile(std::filesystem::path FromPath, std::filesystem::path ToPath, const CopyFileOptions& Options) +CopyFile(const std::filesystem::path& FromPath, const std::filesystem::path& ToPath, const CopyFileOptions& Options) { bool Success = false; @@ -587,7 +590,7 @@ CopyFile(std::filesystem::path FromPath, std::filesystem::path ToPath, const Cop ScopedFd $From = {FromFd}; // To file - int ToFd = open(ToPath.c_str(), O_WRONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0666); + int ToFd = open(ToPath.c_str(), O_WRONLY | O_CREAT | O_CLOEXEC, 0666); if (ToFd < 0) { ThrowLastError(fmt::format("failed to create file {}", ToPath)); @@ -595,9 +598,14 @@ CopyFile(std::filesystem::path FromPath, std::filesystem::path ToPath, const Cop fchmod(ToFd, 0666); ScopedFd $To = {ToFd}; + struct stat Stat; + fstat(FromFd, &Stat); + + size_t FileSizeBytes = Stat.st_size; + // Copy impl - static const size_t BufferSize = 64 << 10; - void* Buffer = malloc(BufferSize); + const size_t BufferSize = Min(FileSizeBytes, 64u << 10); + void* Buffer = malloc(BufferSize); while (true) { int BytesRead = read(FromFd, Buffer, BufferSize); @@ -607,7 +615,7 @@ CopyFile(std::filesystem::path FromPath, std::filesystem::path ToPath, const Cop break; } - if (write(ToFd, Buffer, BytesRead) != BufferSize) + if (write(ToFd, Buffer, BytesRead) != BytesRead) { Success = false; break; @@ -618,7 +626,7 @@ CopyFile(std::filesystem::path FromPath, std::filesystem::path ToPath, const Cop if (!Success) { - ThrowLastError("file copy failed"sv); + ThrowLastError(fmt::format("file copy from {} to {} failed", FromPath, ToPath)); } return true; @@ -683,7 +691,7 @@ CopyTree(std::filesystem::path FromPath, std::filesystem::path ToPath, const Cop { } - virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t) override + virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t, uint64_t) override { std::error_code Ec; const std::filesystem::path Relative = std::filesystem::relative(Parent, BasePath, Ec); @@ -1236,7 +1244,11 @@ FileSystemTraversal::TraverseFileSystem(const std::filesystem::path& RootDir, Tr } else { - Visitor.VisitFile(RootDir, FileName, DirInfo->EndOfFile.QuadPart, gsl::narrow<uint32_t>(DirInfo->FileAttributes)); + Visitor.VisitFile(RootDir, + FileName, + DirInfo->EndOfFile.QuadPart, + gsl::narrow<uint32_t>(DirInfo->FileAttributes), + (uint64_t)DirInfo->LastWriteTime.QuadPart); } const uint64_t NextOffset = DirInfo->NextEntryOffset; @@ -1285,7 +1297,7 @@ FileSystemTraversal::TraverseFileSystem(const std::filesystem::path& RootDir, Tr } else if (S_ISREG(Stat.st_mode)) { - Visitor.VisitFile(RootDir, FileName, Stat.st_size, gsl::narrow<uint32_t>(Stat.st_mode)); + Visitor.VisitFile(RootDir, FileName, Stat.st_size, gsl::narrow<uint32_t>(Stat.st_mode), gsl::narrow<uint64_t>(Stat.st_mtime)); } else { @@ -1465,6 +1477,36 @@ GetModificationTickFromHandle(void* NativeHandle, std::error_code& Ec) return 0; } +uint64_t +GetModificationTickFromPath(const std::filesystem::path& Filename) +{ + // PathFromHandle + void* Handle; +#if ZEN_PLATFORM_WINDOWS + Handle = CreateFileW(Filename.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, 0, nullptr); + if (Handle == INVALID_HANDLE_VALUE) + { + ThrowLastError(fmt::format("Failed to open file {} to check modification tick.", Filename)); + } + auto _ = MakeGuard([Handle]() { CloseHandle(Handle); }); + std::error_code Ec; + uint64_t ModificatonTick = GetModificationTickFromHandle(Handle, Ec); + if (Ec) + { + ThrowSystemError(Ec.value(), Ec.message()); + } + return ModificatonTick; +#else + struct stat Stat; + int err = stat(Filename.native().c_str(), &Stat); + if (err) + { + ThrowLastError(fmt::format("Failed to get mode of file {}", Filename)); + } + return gsl::narrow<uint64_t>(Stat.st_mtime); +#endif +} + std::filesystem::path GetRunningExecutablePath() { @@ -1544,7 +1586,8 @@ GetDirectoryContent(const std::filesystem::path& RootDir, DirectoryContentFlags virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, - uint32_t NativeModeOrAttributes) override + uint32_t NativeModeOrAttributes, + uint64_t NativeModificationTick) override { if (EnumHasAnyFlags(Flags, DirectoryContentFlags::IncludeFiles)) { @@ -1557,6 +1600,10 @@ GetDirectoryContent(const std::filesystem::path& RootDir, DirectoryContentFlags { Content.FileAttributes.push_back(NativeModeOrAttributes); } + if (EnumHasAnyFlags(Flags, DirectoryContentFlags::IncludeModificationTick)) + { + Content.FileModificationTicks.push_back(NativeModificationTick); + } } } @@ -1612,7 +1659,8 @@ GetDirectoryContent(const std::filesystem::path& RootDir, virtual void VisitFile(const std::filesystem::path&, const path_view& File, uint64_t FileSize, - uint32_t NativeModeOrAttributes) override + uint32_t NativeModeOrAttributes, + uint64_t NativeModificationTick) override { if (EnumHasAnyFlags(Flags, DirectoryContentFlags::IncludeFiles)) { @@ -1625,6 +1673,10 @@ GetDirectoryContent(const std::filesystem::path& RootDir, { Content.FileAttributes.push_back(NativeModeOrAttributes); } + if (EnumHasAnyFlags(Flags, DirectoryContentFlags::IncludeModificationTick)) + { + Content.FileModificationTicks.push_back(NativeModificationTick); + } } } @@ -1881,6 +1933,129 @@ PickDefaultSystemRootDirectory() #endif // ZEN_PLATFORM_WINDOWS } +#if ZEN_PLATFORM_WINDOWS + +uint32_t +GetFileAttributes(const std::filesystem::path& Filename) +{ + DWORD Attributes = ::GetFileAttributes(Filename.native().c_str()); + if (Attributes == INVALID_FILE_ATTRIBUTES) + { + ThrowLastError(fmt::format("failed to get attributes of file {}", Filename)); + } + return (uint32_t)Attributes; +} + +void +SetFileAttributes(const std::filesystem::path& Filename, uint32_t Attributes) +{ + if (::SetFileAttributes(Filename.native().c_str(), Attributes) == 0) + { + ThrowLastError(fmt::format("failed to set attributes of file {}", Filename)); + } +} + +#endif // ZEN_PLATFORM_WINDOWS + +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + +uint32_t +GetFileMode(const std::filesystem::path& Filename) +{ + struct stat Stat; + int err = stat(Filename.native().c_str(), &Stat); + if (err) + { + ThrowLastError(fmt::format("Failed to get mode of file {}", Filename)); + } + return (uint32_t)Stat.st_mode; +} + +void +SetFileMode(const std::filesystem::path& Filename, uint32_t Attributes) +{ + int err = chmod(Filename.native().c_str(), (mode_t)Attributes); + if (err) + { + ThrowLastError(fmt::format("Failed to set mode of file {}", Filename)); + } +} + +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + +#if ZEN_PLATFORM_WINDOWS +const uint32_t FileAttributesSystemReadOnlyFlag = FILE_ATTRIBUTE_READONLY; +#else +const uint32_t FileAttributesSystemReadOnlyFlag = 0x00000001; +#endif // ZEN_PLATFORM_WINDOWS + +const uint32_t FileModeWriteEnableFlags = 0222; + +bool +IsFileAttributeReadOnly(uint32_t FileAttributes) +{ +#if ZEN_PLATFORM_WINDOWS + return (FileAttributes & FileAttributesSystemReadOnlyFlag) != 0; +#else + return (FileAttributes & 0x00000001) != 0; +#endif // ZEN_PLATFORM_WINDOWS +} + +bool +IsFileModeReadOnly(uint32_t FileMode) +{ + return (FileMode & FileModeWriteEnableFlags) == 0; +} + +uint32_t +MakeFileAttributeReadOnly(uint32_t FileAttributes, bool ReadOnly) +{ + return ReadOnly ? (FileAttributes | FileAttributesSystemReadOnlyFlag) : (FileAttributes & ~FileAttributesSystemReadOnlyFlag); +} + +uint32_t +MakeFileModeReadOnly(uint32_t FileMode, bool ReadOnly) +{ + return ReadOnly ? (FileMode & ~FileModeWriteEnableFlags) : (FileMode | FileModeWriteEnableFlags); +} + +bool +SetFileReadOnly(const std::filesystem::path& Filename, bool ReadOnly) +{ +#if ZEN_PLATFORM_WINDOWS + uint32_t CurrentAttributes = GetFileAttributes(Filename); + uint32_t NewAttributes = MakeFileAttributeReadOnly(CurrentAttributes, ReadOnly); + if (CurrentAttributes != NewAttributes) + { + SetFileAttributes(Filename, NewAttributes); + return true; + } +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + uint32_t CurrentMode = GetFileMode(Filename); + uint32_t NewMode = MakeFileModeReadOnly(CurrentMode, ReadOnly); + if (CurrentMode != NewMode) + { + SetFileMode(Filename, NewMode); + return true; + } +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + return false; +} + +std::filesystem::path +StringToPath(const std::string_view& Path) +{ + if (Path.length() > 2 && Path.front() == '\"' && Path.back() == '\"') + { + return std::filesystem::path(Path.substr(1, Path.length() - 2)).make_preferred(); + } + else + { + return std::filesystem::path(Path).make_preferred(); + } +} + ////////////////////////////////////////////////////////////////////////// // // Testing related code follows... @@ -1928,7 +2103,7 @@ TEST_CASE("filesystem") // Traversal struct : public FileSystemTraversal::TreeVisitor { - virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t, uint32_t) override + virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t, uint32_t, uint64_t) override { bFoundExpected |= std::filesystem::equivalent(Parent / File, Expected); } diff --git a/src/zencore/include/zencore/basicfile.h b/src/zencore/include/zencore/basicfile.h index 03c5605df..57798b6f4 100644 --- a/src/zencore/include/zencore/basicfile.h +++ b/src/zencore/include/zencore/basicfile.h @@ -46,6 +46,10 @@ public: kPreventWrite = 0x2000'0000, // Do not open with write sharing mode (prevent other processes from writing to file while open) }; + BasicFile(const std::filesystem::path& FileName, Mode Mode); + BasicFile(const std::filesystem::path& FileName, Mode Mode, std::error_code& Ec); + BasicFile(const std::filesystem::path& FileName, Mode Mode, std::function<bool(std::error_code& Ec)>&& RetryCallback); + void Open(const std::filesystem::path& FileName, Mode Mode); void Open(const std::filesystem::path& FileName, Mode Mode, std::error_code& Ec); void Open(const std::filesystem::path& FileName, Mode Mode, std::function<bool(std::error_code& Ec)>&& RetryCallback); @@ -56,7 +60,8 @@ public: void StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function<void(const void* Data, uint64_t Size)>&& ChunkFun); void Write(MemoryView Data, uint64_t FileOffset); void Write(MemoryView Data, uint64_t FileOffset, std::error_code& Ec); - uint64_t Write(CompositeBuffer Data, uint64_t FileOffset, std::error_code& Ec); + uint64_t Write(const CompositeBuffer& Data, uint64_t FileOffset); + uint64_t Write(const CompositeBuffer& Data, uint64_t FileOffset, std::error_code& Ec); void Write(const void* Data, uint64_t Size, uint64_t FileOffset); void Write(const void* Data, uint64_t Size, uint64_t FileOffset, std::error_code& Ec); void Flush(); @@ -170,6 +175,7 @@ public: ~BasicFileWriter(); void Write(const void* Data, uint64_t Size, uint64_t FileOffset); + void Write(const CompositeBuffer& Data, uint64_t FileOffset); void Flush(); private: @@ -180,6 +186,8 @@ private: uint64_t m_BufferEnd; }; +IoBuffer WriteToTempFile(CompositeBuffer&& Buffer, const std::filesystem::path& Path); + ZENCORE_API void basicfile_forcelink(); } // namespace zen diff --git a/src/zencore/include/zencore/compactbinarybuilder.h b/src/zencore/include/zencore/compactbinarybuilder.h index 1c625cacc..f11717453 100644 --- a/src/zencore/include/zencore/compactbinarybuilder.h +++ b/src/zencore/include/zencore/compactbinarybuilder.h @@ -18,6 +18,8 @@ #include <type_traits> #include <vector> +#include <EASTL/fixed_vector.h> + #include <gsl/gsl-lite.hpp> namespace zen { @@ -367,6 +369,8 @@ public: /** Private flags that are public to work with ENUM_CLASS_FLAGS. */ enum class StateFlags : uint8_t; + typedef eastl::fixed_vector<uint8_t, 2048> CbWriterData_t; + protected: /** Reserve the specified size up front until the format is optimized. */ ZENCORE_API explicit CbWriter(int64_t InitialSize); @@ -409,8 +413,8 @@ private: // provided externally, such as on the stack. That format will store the offsets that require // object or array sizes to be inserted and field types to be removed, and will perform those // operations only when saving to a buffer. - std::vector<uint8_t> Data; - std::vector<WriterState> States; + eastl::fixed_vector<WriterState, 4> States; + CbWriterData_t Data; }; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/zencore/include/zencore/compactbinaryfmt.h b/src/zencore/include/zencore/compactbinaryfmt.h new file mode 100644 index 000000000..ae0c3eb42 --- /dev/null +++ b/src/zencore/include/zencore/compactbinaryfmt.h @@ -0,0 +1,23 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/compactbinary.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <fmt/format.h> +ZEN_THIRD_PARTY_INCLUDES_END + +#include <string_view> + +template<typename T> +requires DerivedFrom<T, zen::CbObjectView> +struct fmt::formatter<T> : fmt::formatter<std::string_view> +{ + auto format(const zen::CbObject& a, format_context& ctx) const + { + zen::ExtendableStringBuilder<1024> ObjStr; + zen::CompactBinaryToJson(a, ObjStr); + return fmt::formatter<std::string_view>::format(ObjStr.ToView(), ctx); + } +}; diff --git a/src/zencore/include/zencore/compactbinarypackage.h b/src/zencore/include/zencore/compactbinarypackage.h index 12fcc41b7..9ec12cb0f 100644 --- a/src/zencore/include/zencore/compactbinarypackage.h +++ b/src/zencore/include/zencore/compactbinarypackage.h @@ -12,6 +12,8 @@ #include <span> #include <variant> +#include <EASTL/fixed_vector.h> + #ifdef GetObject # error "windows.h pollution" # undef GetObject @@ -265,7 +267,10 @@ public: } /** Returns the attachments in this package. */ - inline std::span<const CbAttachment> GetAttachments() const { return Attachments; } + inline std::span<const CbAttachment> GetAttachments() const + { + return std::span<const CbAttachment>(begin(Attachments), end(Attachments)); + } /** * Find an attachment by its hash. @@ -286,6 +291,8 @@ public: void AddAttachments(std::span<const CbAttachment> Attachments); + void ReserveAttachments(size_t Count); + /** * Remove an attachment by hash. * @@ -324,9 +331,9 @@ private: void GatherAttachments(const CbObject& Object, AttachmentResolver Resolver); /** Attachments ordered by their hash. */ - std::vector<CbAttachment> Attachments; - CbObject Object; - IoHash ObjectHash; + eastl::fixed_vector<CbAttachment, 32> Attachments; + CbObject Object; + IoHash ObjectHash; }; namespace legacy { diff --git a/src/zencore/include/zencore/compositebuffer.h b/src/zencore/include/zencore/compositebuffer.h index b435c5e74..1e1611de9 100644 --- a/src/zencore/include/zencore/compositebuffer.h +++ b/src/zencore/include/zencore/compositebuffer.h @@ -2,6 +2,7 @@ #pragma once +#include <zencore/eastlutil.h> #include <zencore/sharedbuffer.h> #include <zencore/zencore.h> @@ -9,6 +10,8 @@ #include <span> #include <vector> +#include <EASTL/fixed_vector.h> + namespace zen { /** @@ -35,7 +38,7 @@ public: { m_Segments.reserve((GetBufferCount(std::forward<BufferTypes>(Buffers)) + ...)); (AppendBuffers(std::forward<BufferTypes>(Buffers)), ...); - std::erase_if(m_Segments, [](const SharedBuffer& It) { return It.IsNull(); }); + erase_if(m_Segments, [](const SharedBuffer& It) { return It.IsNull(); }); } } @@ -46,7 +49,10 @@ public: [[nodiscard]] ZENCORE_API uint64_t GetSize() const; /** Returns the segments that the buffer is composed from. */ - [[nodiscard]] inline std::span<const SharedBuffer> GetSegments() const { return std::span<const SharedBuffer>{m_Segments}; } + [[nodiscard]] inline std::span<const SharedBuffer> GetSegments() const + { + return std::span<const SharedBuffer>{begin(m_Segments), end(m_Segments)}; + } /** Returns true if the composite buffer is not null. */ [[nodiscard]] inline explicit operator bool() const { return !IsNull(); } @@ -120,6 +126,8 @@ public: static const CompositeBuffer Null; private: + typedef eastl::fixed_vector<SharedBuffer, 4> SharedBufferVector_t; + static inline size_t GetBufferCount(const CompositeBuffer& Buffer) { return Buffer.m_Segments.size(); } inline void AppendBuffers(const CompositeBuffer& Buffer) { @@ -134,12 +142,25 @@ private: inline void AppendBuffers(SharedBuffer&& Buffer) { m_Segments.push_back(std::move(Buffer)); } inline void AppendBuffers(IoBuffer&& Buffer) { m_Segments.push_back(SharedBuffer(std::move(Buffer))); } + static inline size_t GetBufferCount(std::span<IoBuffer>&& Container) { return Container.size(); } + inline void AppendBuffers(std::span<IoBuffer>&& Container) + { + m_Segments.reserve(m_Segments.size() + Container.size()); + for (IoBuffer& Buffer : Container) + { + m_Segments.emplace_back(SharedBuffer(std::move(Buffer))); + } + } + static inline size_t GetBufferCount(std::vector<SharedBuffer>&& Container) { return Container.size(); } static inline size_t GetBufferCount(std::vector<IoBuffer>&& Container) { return Container.size(); } inline void AppendBuffers(std::vector<SharedBuffer>&& Container) { m_Segments.reserve(m_Segments.size() + Container.size()); - m_Segments.insert(m_Segments.end(), std::make_move_iterator(Container.begin()), std::make_move_iterator(Container.end())); + for (SharedBuffer& Buffer : Container) + { + m_Segments.emplace_back(std::move(Buffer)); + } } inline void AppendBuffers(std::vector<IoBuffer>&& Container) { @@ -150,8 +171,17 @@ private: } } + inline void AppendBuffers(SharedBufferVector_t&& Container) + { + m_Segments.reserve(m_Segments.size() + Container.size()); + for (SharedBuffer& Buffer : Container) + { + m_Segments.emplace_back(std::move(Buffer)); + } + } + private: - std::vector<SharedBuffer> m_Segments; + SharedBufferVector_t m_Segments; }; void compositebuffer_forcelink(); // internal diff --git a/src/zencore/include/zencore/compress.h b/src/zencore/include/zencore/compress.h index 5e761ceef..74fd5f767 100644 --- a/src/zencore/include/zencore/compress.h +++ b/src/zencore/include/zencore/compress.h @@ -74,6 +74,11 @@ public: OodleCompressor Compressor = OodleCompressor::Mermaid, OodleCompressionLevel CompressionLevel = OodleCompressionLevel::VeryFast, uint64_t BlockSize = 0); + [[nodiscard]] ZENCORE_API static bool CompressToStream(const CompositeBuffer& RawData, + std::function<void(uint64_t Offset, const CompositeBuffer& Range)>&& Callback, + OodleCompressor Compressor = OodleCompressor::Mermaid, + OodleCompressionLevel CompressionLevel = OodleCompressionLevel::VeryFast, + uint64_t BlockSize = 0); /** * Construct from a compressed buffer previously created by Compress(). @@ -196,6 +201,16 @@ public: */ [[nodiscard]] ZENCORE_API CompositeBuffer DecompressToComposite() const; + /** + * Decompress into and call callback for ranges of decompressed data. + * The buffer in the callback will be overwritten when the callback returns. + * + * @return True if the buffer is valid and can be decompressed. + */ + [[nodiscard]] ZENCORE_API bool DecompressToStream(uint64_t RawOffset, + uint64_t RawSize, + std::function<bool(uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const; + /** A null compressed buffer. */ static const CompressedBuffer Null; diff --git a/src/zencore/include/zencore/eastlutil.h b/src/zencore/include/zencore/eastlutil.h new file mode 100644 index 000000000..642321dae --- /dev/null +++ b/src/zencore/include/zencore/eastlutil.h @@ -0,0 +1,20 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <algorithm> + +namespace zen { + +size_t +erase_if(auto& _Cont, auto Predicate) +{ + auto _First = _Cont.begin(); + const auto _Last = _Cont.end(); + const auto _Old_size = _Cont.size(); + _First = std::remove_if(_First, _Last, Predicate); + _Cont.erase(_First, _Last); + return _Old_size - _Cont.size(); +} + +} // namespace zen diff --git a/src/zencore/include/zencore/filesystem.h b/src/zencore/include/zencore/filesystem.h index ca8682cd7..9a2b15d1d 100644 --- a/src/zencore/include/zencore/filesystem.h +++ b/src/zencore/include/zencore/filesystem.h @@ -52,6 +52,10 @@ ZENCORE_API uint64_t FileSizeFromHandle(void* NativeHandle); */ ZENCORE_API uint64_t GetModificationTickFromHandle(void* NativeHandle, std::error_code& Ec); +/** Get a native time tick of last modification time + */ +ZENCORE_API uint64_t GetModificationTickFromPath(const std::filesystem::path& Filename); + ZENCORE_API std::filesystem::path GetRunningExecutablePath(); /** Set the max open file handle count to max allowed for the current process on Linux and MacOS @@ -93,11 +97,11 @@ struct CopyFileOptions bool MustClone = false; }; -ZENCORE_API bool CopyFile(std::filesystem::path FromPath, std::filesystem::path ToPath, const CopyFileOptions& Options); -ZENCORE_API void CopyFile(std::filesystem::path FromPath, - std::filesystem::path ToPath, - const CopyFileOptions& Options, - std::error_code& OutError); +ZENCORE_API bool CopyFile(const std::filesystem::path& FromPath, const std::filesystem::path& ToPath, const CopyFileOptions& Options); +ZENCORE_API void CopyFile(const std::filesystem::path& FromPath, + const std::filesystem::path& ToPath, + const CopyFileOptions& Options, + std::error_code& OutError); ZENCORE_API void CopyTree(std::filesystem::path FromPath, std::filesystem::path ToPath, const CopyFileOptions& Options); ZENCORE_API bool SupportsBlockRefCounting(std::filesystem::path Path); @@ -203,7 +207,8 @@ public: virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, - uint32_t NativeModeOrAttributes) = 0; + uint32_t NativeModeOrAttributes, + uint64_t NativeModificationTick) = 0; // This should return true if we should recurse into the directory virtual bool VisitDirectory(const std::filesystem::path& Parent, @@ -216,13 +221,14 @@ public: enum class DirectoryContentFlags : uint8_t { - None = 0, - IncludeDirs = 1u << 0, - IncludeFiles = 1u << 1, - Recursive = 1u << 2, - IncludeFileSizes = 1u << 3, - IncludeAttributes = 1u << 4, - IncludeAllEntries = IncludeDirs | IncludeFiles | Recursive + None = 0, + IncludeDirs = 1u << 0, + IncludeFiles = 1u << 1, + Recursive = 1u << 2, + IncludeFileSizes = 1u << 3, + IncludeAttributes = 1u << 4, + IncludeModificationTick = 1u << 5, + IncludeAllEntries = IncludeDirs | IncludeFiles | Recursive }; ENUM_CLASS_FLAGS(DirectoryContentFlags) @@ -232,6 +238,7 @@ struct DirectoryContent std::vector<std::filesystem::path> Files; std::vector<uint64_t> FileSizes; std::vector<uint32_t> FileAttributes; + std::vector<uint64_t> FileModificationTicks; std::vector<std::filesystem::path> Directories; std::vector<uint32_t> DirectoryAttributes; }; @@ -246,6 +253,7 @@ public: std::vector<std::filesystem::path> FileNames; std::vector<uint64_t> FileSizes; std::vector<uint32_t> FileAttributes; + std::vector<uint64_t> FileModificationTicks; std::vector<std::filesystem::path> DirectoryNames; std::vector<uint32_t> DirectoryAttributes; }; @@ -267,6 +275,25 @@ std::error_code RotateDirectories(const std::filesystem::path& DirectoryName, st std::filesystem::path PickDefaultSystemRootDirectory(); +#if ZEN_PLATFORM_WINDOWS +uint32_t GetFileAttributes(const std::filesystem::path& Filename); +void SetFileAttributes(const std::filesystem::path& Filename, uint32_t Attributes); +#endif // ZEN_PLATFORM_WINDOWS + +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC +uint32_t GetFileMode(const std::filesystem::path& Filename); +void SetFileMode(const std::filesystem::path& Filename, uint32_t Attributes); +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + +bool IsFileAttributeReadOnly(uint32_t FileAttributes); +bool IsFileModeReadOnly(uint32_t FileMode); +uint32_t MakeFileAttributeReadOnly(uint32_t FileAttributes, bool ReadOnly); +uint32_t MakeFileModeReadOnly(uint32_t FileMode, bool ReadOnly); + +bool SetFileReadOnly(const std::filesystem::path& Filename, bool ReadOnly); + +std::filesystem::path StringToPath(const std::string_view& Path); + ////////////////////////////////////////////////////////////////////////// void filesystem_forcelink(); // internal diff --git a/src/zencore/include/zencore/iohash.h b/src/zencore/include/zencore/iohash.h index 8871a5895..7443e17b7 100644 --- a/src/zencore/include/zencore/iohash.h +++ b/src/zencore/include/zencore/iohash.h @@ -47,8 +47,8 @@ struct IoHash static IoHash HashBuffer(const void* data, size_t byteCount); static IoHash HashBuffer(MemoryView Data) { return HashBuffer(Data.GetData(), Data.GetSize()); } - static IoHash HashBuffer(const CompositeBuffer& Buffer); - static IoHash HashBuffer(const IoBuffer& Buffer); + static IoHash HashBuffer(const CompositeBuffer& Buffer, std::atomic<uint64_t>* ProcessedBytes = nullptr); + static IoHash HashBuffer(const IoBuffer& Buffer, std::atomic<uint64_t>* ProcessedBytes = nullptr); static IoHash FromHexString(const char* string); static IoHash FromHexString(const std::string_view string); static bool TryParse(std::string_view Str, IoHash& Hash); diff --git a/src/zencore/include/zencore/memory/newdelete.h b/src/zencore/include/zencore/memory/newdelete.h index d22c8604f..059f1d5ea 100644 --- a/src/zencore/include/zencore/memory/newdelete.h +++ b/src/zencore/include/zencore/memory/newdelete.h @@ -153,3 +153,29 @@ operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexce return zen_new_aligned_nothrow(n, static_cast<size_t>(al)); } #endif + +// EASTL operator new + +void* +operator new[](size_t size, const char* pName, int flags, unsigned debugFlags, const char* file, int line) +{ + ZEN_UNUSED(pName, flags, debugFlags, file, line); + return zen_new(size); +} + +void* +operator new[](size_t size, + size_t alignment, + size_t alignmentOffset, + const char* pName, + int flags, + unsigned debugFlags, + const char* file, + int line) +{ + ZEN_UNUSED(alignmentOffset, pName, flags, debugFlags, file, line); + + ZEN_ASSERT_SLOW(alignmentOffset == 0); // currently not supported + + return zen_new_aligned(size, alignment); +} diff --git a/src/zencore/include/zencore/process.h b/src/zencore/include/zencore/process.h index 36c2a2481..335e3d909 100644 --- a/src/zencore/include/zencore/process.h +++ b/src/zencore/include/zencore/process.h @@ -101,6 +101,9 @@ int GetProcessId(CreateProcResult ProcId); std::filesystem::path GetProcessExecutablePath(int Pid, std::error_code& OutEc); std::error_code FindProcess(const std::filesystem::path& ExecutableImage, ProcessHandle& OutHandle); +std::vector<std::string> ParseCommandLine(std::string_view CommandLine); +std::vector<char*> StripCommandlineQuotes(std::vector<std::string>& InOutArgs); + #if ZEN_PLATFORM_LINUX void IgnoreChildSignals(); #endif diff --git a/src/zencore/include/zencore/string.h b/src/zencore/include/zencore/string.h index e2ef1c1a0..68129b691 100644 --- a/src/zencore/include/zencore/string.h +++ b/src/zencore/include/zencore/string.h @@ -522,6 +522,9 @@ public: ////////////////////////////////////////////////////////////////////////// +bool IsValidUtf8(const std::string_view& str); +std::string_view::const_iterator FindFirstInvalidUtf8Byte(const std::string_view& str); + void Utf8ToWide(const char8_t* str, WideStringBuilderBase& out); void Utf8ToWide(const std::u8string_view& wstr, WideStringBuilderBase& out); void Utf8ToWide(const std::string_view& wstr, WideStringBuilderBase& out); diff --git a/src/zencore/include/zencore/timer.h b/src/zencore/include/zencore/timer.h index e4ddc3505..767dc4314 100644 --- a/src/zencore/include/zencore/timer.h +++ b/src/zencore/include/zencore/timer.h @@ -21,6 +21,10 @@ ZENCORE_API uint64_t GetHifreqTimerFrequency(); ZENCORE_API double GetHifreqTimerToSeconds(); ZENCORE_API uint64_t GetHifreqTimerFrequencySafe(); // May be used during static init +// Query time since process was spawned (returns time in ms) + +uint64_t GetTimeSinceProcessStart(); + class Stopwatch { public: diff --git a/src/zencore/iohash.cpp b/src/zencore/iohash.cpp index 7200e6e3f..3b2af0db4 100644 --- a/src/zencore/iohash.cpp +++ b/src/zencore/iohash.cpp @@ -30,7 +30,7 @@ IoHash::HashBuffer(const void* data, size_t byteCount) } IoHash -IoHash::HashBuffer(const CompositeBuffer& Buffer) +IoHash::HashBuffer(const CompositeBuffer& Buffer, std::atomic<uint64_t>* ProcessedBytes) { IoHashStream Hasher; @@ -46,11 +46,21 @@ IoHash::HashBuffer(const CompositeBuffer& Buffer) FileRef.FileChunkOffset, FileRef.FileChunkSize, BufferingSize, - [&Hasher](const void* Data, size_t Size) { Hasher.Append(Data, Size); }); + [&Hasher, ProcessedBytes](const void* Data, size_t Size) { + Hasher.Append(Data, Size); + if (ProcessedBytes != nullptr) + { + ProcessedBytes->fetch_add(Size); + } + }); } else { Hasher.Append(Segment.GetData(), SegmentSize); + if (ProcessedBytes != nullptr) + { + ProcessedBytes->fetch_add(SegmentSize); + } } } @@ -58,7 +68,7 @@ IoHash::HashBuffer(const CompositeBuffer& Buffer) } IoHash -IoHash::HashBuffer(const IoBuffer& Buffer) +IoHash::HashBuffer(const IoBuffer& Buffer, std::atomic<uint64_t>* ProcessedBytes) { IoHashStream Hasher; @@ -71,11 +81,21 @@ IoHash::HashBuffer(const IoBuffer& Buffer) FileRef.FileChunkOffset, FileRef.FileChunkSize, BufferingSize, - [&Hasher](const void* Data, size_t Size) { Hasher.Append(Data, Size); }); + [&Hasher, ProcessedBytes](const void* Data, size_t Size) { + Hasher.Append(Data, Size); + if (ProcessedBytes != nullptr) + { + ProcessedBytes->fetch_add(Size); + } + }); } else { Hasher.Append(Buffer.GetData(), BufferSize); + if (ProcessedBytes != nullptr) + { + ProcessedBytes->fetch_add(BufferSize); + } } return Hasher.GetHash(); diff --git a/src/zencore/process.cpp b/src/zencore/process.cpp index 079e2db3f..147b00966 100644 --- a/src/zencore/process.cpp +++ b/src/zencore/process.cpp @@ -1064,6 +1064,118 @@ FindProcess(const std::filesystem::path& ExecutableImage, ProcessHandle& OutHand #endif // ZEN_PLATFORM_LINUX } +std::vector<std::string> +ParseCommandLine(std::string_view CommandLine) +{ + auto IsWhitespaceOrEnd = [](std::string_view CommandLine, std::string::size_type Pos) { + if (Pos == CommandLine.length()) + { + return true; + } + if (CommandLine[Pos] == ' ') + { + return true; + } + return false; + }; + + bool IsParsingArg = false; + bool IsInQuote = false; + + std::string::size_type Pos = 0; + std::string::size_type ArgStart = 0; + std::vector<std::string> Args; + while (Pos < CommandLine.length()) + { + if (IsInQuote) + { + if (CommandLine[Pos] == '"' && IsWhitespaceOrEnd(CommandLine, Pos + 1)) + { + Args.push_back(std::string(CommandLine.substr(ArgStart, Pos - ArgStart + 1))); + Pos++; + IsInQuote = false; + IsParsingArg = false; + } + else + { + Pos++; + } + } + else if (IsParsingArg) + { + ZEN_ASSERT(Pos > ArgStart); + if (CommandLine[Pos] == ' ') + { + Args.push_back(std::string(CommandLine.substr(ArgStart, Pos - ArgStart))); + Pos++; + IsParsingArg = false; + } + else if (CommandLine[Pos] == '"') + { + IsInQuote = true; + Pos++; + } + else + { + Pos++; + } + } + else if (CommandLine[Pos] == '"') + { + IsInQuote = true; + IsParsingArg = true; + ArgStart = Pos; + Pos++; + } + else if (CommandLine[Pos] != ' ') + { + IsParsingArg = true; + ArgStart = Pos; + Pos++; + } + else + { + Pos++; + } + } + if (IsParsingArg) + { + ZEN_ASSERT(Pos > ArgStart); + Args.push_back(std::string(CommandLine.substr(ArgStart))); + } + + return Args; +} + +std::vector<char*> +StripCommandlineQuotes(std::vector<std::string>& InOutArgs) +{ + std::vector<char*> RawArgs; + RawArgs.reserve(InOutArgs.size()); + for (std::string& Arg : InOutArgs) + { + std::string::size_type EscapedQuotePos = Arg.find("\\\"", 1); + while (EscapedQuotePos != std::string::npos && Arg.rfind('\"', EscapedQuotePos - 1) != std::string::npos) + { + Arg.erase(EscapedQuotePos, 1); + EscapedQuotePos = Arg.find("\\\"", EscapedQuotePos); + } + + if (Arg.starts_with("\"")) + { + if (Arg.find('"', 1) == Arg.length() - 1) + { + if (Arg.find(' ', 1) == std::string::npos) + { + Arg = Arg.substr(1, Arg.length() - 2); + } + } + } + RawArgs.push_back(const_cast<char*>(Arg.c_str())); + } + return RawArgs; +} + #if ZEN_WITH_TESTS void @@ -1140,6 +1252,36 @@ TEST_CASE("BuildArgV") } } +TEST_CASE("CommandLine") +{ + std::vector<std::string> v1 = ParseCommandLine("c:\\my\\exe.exe \"quoted arg\" \"one\",two,\"three\\\""); + CHECK_EQ(v1[0], "c:\\my\\exe.exe"); + CHECK_EQ(v1[1], "\"quoted arg\""); + CHECK_EQ(v1[2], "\"one\",two,\"three\\\""); + + std::vector<std::string> v2 = ParseCommandLine( + "--tracehost 127.0.0.1 builds download --url=https://jupiter.devtools.epicgames.com --namespace=ue.oplog " + "--bucket=citysample.packaged-build.fortnite-main.windows \"c:\\just\\a\\path\" " + "--access-token-path=\"C:\\Users\\dan.engelbrecht\\jupiter-token.json\" \"D:\\Dev\\Spaced Folder\\Target\\\" " + "--alt-path=\"D:\\Dev\\Spaced Folder2\\Target\\\" 07dn23ifiwesnvoasjncasab --build-part-name win64,linux,ps5"); + + std::vector<char*> v2Stripped = StripCommandlineQuotes(v2); + CHECK_EQ(v2Stripped[0], std::string("--tracehost")); + CHECK_EQ(v2Stripped[1], std::string("127.0.0.1")); + CHECK_EQ(v2Stripped[2], std::string("builds")); + CHECK_EQ(v2Stripped[3], std::string("download")); + CHECK_EQ(v2Stripped[4], std::string("--url=https://jupiter.devtools.epicgames.com")); + CHECK_EQ(v2Stripped[5], std::string("--namespace=ue.oplog")); + CHECK_EQ(v2Stripped[6], std::string("--bucket=citysample.packaged-build.fortnite-main.windows")); + CHECK_EQ(v2Stripped[7], std::string("c:\\just\\a\\path")); + CHECK_EQ(v2Stripped[8], std::string("--access-token-path=\"C:\\Users\\dan.engelbrecht\\jupiter-token.json\"")); + CHECK_EQ(v2Stripped[9], std::string("\"D:\\Dev\\Spaced Folder\\Target\"")); + CHECK_EQ(v2Stripped[10], std::string("--alt-path=\"D:\\Dev\\Spaced Folder2\\Target\"")); + CHECK_EQ(v2Stripped[11], std::string("07dn23ifiwesnvoasjncasab")); + CHECK_EQ(v2Stripped[12], std::string("--build-part-name")); + CHECK_EQ(v2Stripped[13], std::string("win64,linux,ps5")); +} + TEST_SUITE_END(/* core.process */); #endif diff --git a/src/zencore/string.cpp b/src/zencore/string.cpp index 242d41abe..a0d8c927f 100644 --- a/src/zencore/string.cpp +++ b/src/zencore/string.cpp @@ -99,6 +99,20 @@ FilepathFindExtension(const std::string_view& Path, const char* ExtensionToMatch ////////////////////////////////////////////////////////////////////////// +bool +IsValidUtf8(const std::string_view& str) +{ + return utf8::is_valid(begin(str), end(str)); +} + +std::string_view::const_iterator +FindFirstInvalidUtf8Byte(const std::string_view& str) +{ + return utf8::find_invalid(begin(str), end(str)); +} + +////////////////////////////////////////////////////////////////////////// + void Utf8ToWide(const char8_t* Str8, WideStringBuilderBase& OutString) { diff --git a/src/zencore/timer.cpp b/src/zencore/timer.cpp index 1655e912d..95536cb26 100644 --- a/src/zencore/timer.cpp +++ b/src/zencore/timer.cpp @@ -12,9 +12,20 @@ # include <unistd.h> #endif +#define GTSPS_IMPLEMENTATION +#include "GetTimeSinceProcessStart.h" + namespace zen { uint64_t +GetTimeSinceProcessStart() +{ + double TimeInSeconds = ::GetTimeSinceProcessStart(); + + return uint64_t(TimeInSeconds * 1000); +} + +uint64_t GetHifreqTimerValue() { uint64_t Timestamp; diff --git a/src/zencore/workthreadpool.cpp b/src/zencore/workthreadpool.cpp index d15fb2e83..445fe939e 100644 --- a/src/zencore/workthreadpool.cpp +++ b/src/zencore/workthreadpool.cpp @@ -274,7 +274,7 @@ WorkerThreadPool::ScheduleWork(Ref<IWork> Work) void WorkerThreadPool::ScheduleWork(std::function<void()>&& Work) { - ScheduleWork(Ref<IWork>(new detail::LambdaWork(Work))); + ScheduleWork(Ref<IWork>(new detail::LambdaWork(std::move(Work)))); } [[nodiscard]] size_t diff --git a/src/zencore/xmake.lua b/src/zencore/xmake.lua index 2efa3fdb8..13611a2e9 100644 --- a/src/zencore/xmake.lua +++ b/src/zencore/xmake.lua @@ -29,6 +29,7 @@ target('zencore') end add_includedirs("include", {public=true}) + add_includedirs("$(projectdir)/thirdparty/GetTimeSinceProcessStart") add_includedirs("$(projectdir)/thirdparty/utfcpp/source") add_includedirs("$(projectdir)/thirdparty/Oodle/include") add_includedirs("$(projectdir)/thirdparty/trace", {public=true}) @@ -55,6 +56,7 @@ target('zencore') add_packages( "vcpkg::doctest", + "vcpkg::eastl", "vcpkg::fmt", "vcpkg::gsl-lite", "vcpkg::lz4", diff --git a/src/zenhttp-test/zenhttp-test.cpp b/src/zenhttp-test/zenhttp-test.cpp index df395939b..381e0a85e 100644 --- a/src/zenhttp-test/zenhttp-test.cpp +++ b/src/zenhttp-test/zenhttp-test.cpp @@ -17,9 +17,9 @@ main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) #if ZEN_WITH_TESTS zen::zenhttp_forcelinktests(); -#if ZEN_PLATFORM_LINUX +# if ZEN_PLATFORM_LINUX zen::IgnoreChildSignals(); -#endif +# endif zen::logging::InitializeLogging(); zen::MaximizeOpenFileCount(); diff --git a/src/zenhttp/httpclient.cpp b/src/zenhttp/httpclient.cpp index 8052a8fd5..30711a432 100644 --- a/src/zenhttp/httpclient.cpp +++ b/src/zenhttp/httpclient.cpp @@ -282,7 +282,7 @@ AsCprBody(const IoBuffer& Obj) ////////////////////////////////////////////////////////////////////////// static HttpClient::Response -ResponseWithPayload(cpr::Response& HttpResponse, const HttpResponseCode WorkResponseCode, IoBuffer&& Payload) +ResponseWithPayload(std::string_view SessionId, cpr::Response& HttpResponse, const HttpResponseCode WorkResponseCode, IoBuffer&& Payload) { // This ends up doing a memcpy, would be good to get rid of it by streaming results // into buffer directly @@ -297,7 +297,7 @@ ResponseWithPayload(cpr::Response& HttpResponse, const HttpResponseCode WorkResp if (!IsHttpSuccessCode(WorkResponseCode) && WorkResponseCode != HttpResponseCode::NotFound) { - ZEN_WARN("HttpClient request failed: {}", HttpResponse); + ZEN_WARN("HttpClient request failed (session: {}): {}", SessionId, HttpResponse); } return HttpClient::Response{.StatusCode = WorkResponseCode, @@ -309,12 +309,12 @@ ResponseWithPayload(cpr::Response& HttpResponse, const HttpResponseCode WorkResp } static HttpClient::Response -CommonResponse(cpr::Response&& HttpResponse, IoBuffer&& Payload = {}) +CommonResponse(std::string_view SessionId, cpr::Response&& HttpResponse, IoBuffer&& Payload = {}) { const HttpResponseCode WorkResponseCode = HttpResponseCode(HttpResponse.status_code); if (HttpResponse.error) { - ZEN_WARN("HttpClient client error: {}", HttpResponse); + ZEN_WARN("HttpClient client error (session: {}): {}", SessionId, HttpResponse); // Client side failure code return HttpClient::Response{ @@ -339,6 +339,7 @@ CommonResponse(cpr::Response&& HttpResponse, IoBuffer&& Payload = {}) else { return ResponseWithPayload( + SessionId, HttpResponse, WorkResponseCode, Payload ? std::move(Payload) : IoBufferBuilder::MakeCloneFromMemory(HttpResponse.text.data(), HttpResponse.text.size())); @@ -413,6 +414,11 @@ ValidatePayload(cpr::Response& Response, std::unique_ptr<detail::TempPayloadFile } } + if (Response.status_code == (long)HttpResponseCode::PartialContent) + { + return true; + } + if (auto ContentType = Response.header.find("Content-Type"); ContentType != Response.header.end()) { if (ContentType->second == "application/x-ue-comp") @@ -448,22 +454,40 @@ ValidatePayload(cpr::Response& Response, std::unique_ptr<detail::TempPayloadFile } static cpr::Response -DoWithRetry(std::function<cpr::Response()>&& Func, uint8_t RetryCount) +DoWithRetry( + std::string_view SessionId, + std::function<cpr::Response()>&& Func, + uint8_t RetryCount, + std::function<bool(cpr::Response& Result)>&& Validate = [](cpr::Response&) { return true; }) { uint8_t Attempt = 0; cpr::Response Result = Func(); - while (Attempt < RetryCount && ShouldRetry(Result)) + while (Attempt < RetryCount) { + if (!ShouldRetry(Result)) + { + if (Result.error || !IsHttpSuccessCode(Result.status_code)) + { + break; + } + if (Validate(Result)) + { + break; + } + } Sleep(100 * (Attempt + 1)); Attempt++; - ZEN_INFO("{} Attempt {}/{}", CommonResponse(std::move(Result)).ErrorMessage("Retry"), Attempt, RetryCount + 1); + ZEN_INFO("{} Attempt {}/{}", CommonResponse(SessionId, std::move(Result)).ErrorMessage("Retry"), Attempt, RetryCount + 1); Result = Func(); } return Result; } static cpr::Response -DoWithRetry(std::function<cpr::Response()>&& Func, std::unique_ptr<detail::TempPayloadFile>& PayloadFile, uint8_t RetryCount) +DoWithRetry(std::string_view SessionId, + std::function<cpr::Response()>&& Func, + std::unique_ptr<detail::TempPayloadFile>& PayloadFile, + uint8_t RetryCount) { uint8_t Attempt = 0; cpr::Response Result = Func(); @@ -482,7 +506,7 @@ DoWithRetry(std::function<cpr::Response()>&& Func, std::unique_ptr<detail::TempP } Sleep(100 * (Attempt + 1)); Attempt++; - ZEN_INFO("{} Attempt {}/{}", CommonResponse(std::move(Result)).ErrorMessage("Retry"), Attempt, RetryCount + 1); + ZEN_INFO("{} Attempt {}/{}", CommonResponse(SessionId, std::move(Result)).ErrorMessage("Retry"), Attempt, RetryCount + 1); Result = Func(); } return Result; @@ -829,15 +853,18 @@ HttpClient::Put(std::string_view Url, const IoBuffer& Payload, const KeyValueMap { ZEN_TRACE_CPU("HttpClient::Put"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - Sess->SetBody(AsCprBody(Payload)); - Sess->UpdateHeader({HeaderContentType(Payload.GetContentType())}); - return Sess.Put(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + Sess->SetBody(AsCprBody(Payload)); + Sess->UpdateHeader({HeaderContentType(Payload.GetContentType())}); + return Sess.Put(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -845,31 +872,40 @@ HttpClient::Put(std::string_view Url, const KeyValueMap& Parameters) { ZEN_TRACE_CPU("HttpClient::Put"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = m_Impl->AllocSession(m_BaseUri, - Url, - m_ConnectionSettings, - {{"Content-Length", "0"}}, - Parameters, - m_SessionId, - GetAccessToken()); - return Sess.Put(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse(m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = m_Impl->AllocSession(m_BaseUri, + Url, + m_ConnectionSettings, + {{"Content-Length", "0"}}, + Parameters, + m_SessionId, + GetAccessToken()); + return Sess.Put(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response HttpClient::Get(std::string_view Url, const KeyValueMap& AdditionalHeader, const KeyValueMap& Parameters) { ZEN_TRACE_CPU("HttpClient::Get"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, Parameters, m_SessionId, GetAccessToken()); - return Sess.Get(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, Parameters, m_SessionId, GetAccessToken()); + return Sess.Get(); + }, + m_ConnectionSettings.RetryCount, + [](cpr::Response& Result) { + std::unique_ptr<detail::TempPayloadFile> NoTempFile; + return ValidatePayload(Result, NoTempFile); + })); } HttpClient::Response @@ -877,13 +913,16 @@ HttpClient::Head(std::string_view Url, const KeyValueMap& AdditionalHeader) { ZEN_TRACE_CPU("HttpClient::Head"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - return Sess.Head(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + return Sess.Head(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -891,13 +930,16 @@ HttpClient::Delete(std::string_view Url, const KeyValueMap& AdditionalHeader) { ZEN_TRACE_CPU("HttpClient::Delete"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - return Sess.Delete(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + return Sess.Delete(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -905,13 +947,16 @@ HttpClient::Post(std::string_view Url, const KeyValueMap& AdditionalHeader, cons { ZEN_TRACE_CPU("HttpClient::PostNoPayload"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, Parameters, m_SessionId, GetAccessToken()); - return Sess.Post(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, Parameters, m_SessionId, GetAccessToken()); + return Sess.Post(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -925,16 +970,19 @@ HttpClient::Post(std::string_view Url, const IoBuffer& Payload, ZenContentType C { ZEN_TRACE_CPU("HttpClient::PostWithPayload"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - - Sess->SetBody(AsCprBody(Payload)); - Sess->UpdateHeader({HeaderContentType(ContentType)}); - return Sess.Post(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + + Sess->SetBody(AsCprBody(Payload)); + Sess->UpdateHeader({HeaderContentType(ContentType)}); + return Sess.Post(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -942,16 +990,19 @@ HttpClient::Post(std::string_view Url, CbObject Payload, const KeyValueMap& Addi { ZEN_TRACE_CPU("HttpClient::PostObjectPayload"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - - Sess->SetBody(AsCprBody(Payload)); - Sess->UpdateHeader({HeaderContentType(ZenContentType::kCbObject)}); - return Sess.Post(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + + Sess->SetBody(AsCprBody(Payload)); + Sess->UpdateHeader({HeaderContentType(ZenContentType::kCbObject)}); + return Sess.Post(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -965,24 +1016,27 @@ HttpClient::Post(std::string_view Url, const CompositeBuffer& Payload, ZenConten { ZEN_TRACE_CPU("HttpClient::Post"); - return CommonResponse(DoWithRetry( - [&]() { - uint64_t SizeLeft = Payload.GetSize(); - CompositeBuffer::Iterator BufferIt = Payload.GetIterator(0); - auto ReadCallback = [&Payload, &BufferIt, &SizeLeft](char* buffer, size_t& size, intptr_t) { - size = Min<size_t>(size, SizeLeft); - MutableMemoryView Data(buffer, size); - Payload.CopyTo(Data, BufferIt); - SizeLeft -= size; - return true; - }; - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - Sess->UpdateHeader({HeaderContentType(ContentType)}); - - return Sess.Post(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback)); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + uint64_t SizeLeft = Payload.GetSize(); + CompositeBuffer::Iterator BufferIt = Payload.GetIterator(0); + auto ReadCallback = [&Payload, &BufferIt, &SizeLeft](char* buffer, size_t& size, intptr_t) { + size = Min<size_t>(size, SizeLeft); + MutableMemoryView Data(buffer, size); + Payload.CopyTo(Data, BufferIt); + SizeLeft -= size; + return true; + }; + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + Sess->UpdateHeader({HeaderContentType(ContentType)}); + + return Sess.Post(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback)); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -990,29 +1044,32 @@ HttpClient::Upload(std::string_view Url, const IoBuffer& Payload, const KeyValue { ZEN_TRACE_CPU("HttpClient::Upload"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - Sess->UpdateHeader({HeaderContentType(Payload.GetContentType())}); - - uint64_t Offset = 0; - if (Payload.IsWholeFile()) - { - auto ReadCallback = [&Payload, &Offset](char* buffer, size_t& size, intptr_t) { - size = Min<size_t>(size, Payload.GetSize() - Offset); - IoBuffer PayloadRange = IoBuffer(Payload, Offset, size); - MutableMemoryView Data(buffer, size); - Data.CopyFrom(PayloadRange.GetView()); - Offset += size; - return true; - }; - return Sess.Put(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback)); - } - Sess->SetBody(AsCprBody(Payload)); - return Sess.Put(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + Sess->UpdateHeader({HeaderContentType(Payload.GetContentType())}); + + uint64_t Offset = 0; + if (Payload.IsWholeFile()) + { + auto ReadCallback = [&Payload, &Offset](char* buffer, size_t& size, intptr_t) { + size = Min<size_t>(size, Payload.GetSize() - Offset); + IoBuffer PayloadRange = IoBuffer(Payload, Offset, size); + MutableMemoryView Data(buffer, size); + Data.CopyFrom(PayloadRange.GetView()); + Offset += size; + return true; + }; + return Sess.Put(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback)); + } + Sess->SetBody(AsCprBody(Payload)); + return Sess.Put(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -1020,24 +1077,27 @@ HttpClient::Upload(std::string_view Url, const CompositeBuffer& Payload, ZenCont { ZEN_TRACE_CPU("HttpClient::Upload"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - Sess->UpdateHeader({HeaderContentType(ContentType)}); - - uint64_t SizeLeft = Payload.GetSize(); - CompositeBuffer::Iterator BufferIt = Payload.GetIterator(0); - auto ReadCallback = [&Payload, &BufferIt, &SizeLeft](char* buffer, size_t& size, intptr_t) { - size = Min<size_t>(size, SizeLeft); - MutableMemoryView Data(buffer, size); - Payload.CopyTo(Data, BufferIt); - SizeLeft -= size; - return true; - }; - return Sess.Put(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback)); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + Sess->UpdateHeader({HeaderContentType(ContentType)}); + + uint64_t SizeLeft = Payload.GetSize(); + CompositeBuffer::Iterator BufferIt = Payload.GetIterator(0); + auto ReadCallback = [&Payload, &BufferIt, &SizeLeft](char* buffer, size_t& size, intptr_t) { + size = Min<size_t>(size, SizeLeft); + MutableMemoryView Data(buffer, size); + Payload.CopyTo(Data, BufferIt); + SizeLeft -= size; + return true; + }; + return Sess.Put(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback)); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -1048,6 +1108,7 @@ HttpClient::Download(std::string_view Url, const std::filesystem::path& TempFold std::string PayloadString; std::unique_ptr<detail::TempPayloadFile> PayloadFile; cpr::Response Response = DoWithRetry( + m_SessionId, [&]() { auto GetHeader = [&](std::string header) -> std::pair<std::string, std::string> { size_t DelimiterPos = header.find(':'); @@ -1087,6 +1148,30 @@ HttpClient::Download(std::string_view Url, const std::filesystem::path& TempFold return true; }; + uint64_t RequestedContentLength = (uint64_t)-1; + if (auto RangeIt = AdditionalHeader.Entries.find("Range"); RangeIt != AdditionalHeader.Entries.end()) + { + if (RangeIt->second.starts_with("bytes")) + { + size_t RangeStartPos = RangeIt->second.find('=', 5); + if (RangeStartPos != std::string::npos) + { + RangeStartPos++; + size_t RangeSplitPos = RangeIt->second.find('-', RangeStartPos); + if (RangeSplitPos != std::string::npos) + { + std::optional<size_t> RequestedRangeStart = + ParseInt<size_t>(RangeIt->second.substr(RangeStartPos, RangeSplitPos - RangeStartPos)); + std::optional<size_t> RequestedRangeEnd = ParseInt<size_t>(RangeIt->second.substr(RangeStartPos + 1)); + if (RequestedRangeStart.has_value() && RequestedRangeEnd.has_value()) + { + RequestedContentLength = RequestedRangeEnd.value() - 1; + } + } + } + } + } + cpr::Response Response; { std::vector<std::pair<std::string, std::string>> ReceivedHeaders; @@ -1094,10 +1179,10 @@ HttpClient::Download(std::string_view Url, const std::filesystem::path& TempFold std::pair<std::string, std::string> Header = GetHeader(header); if (Header.first == "Content-Length"sv) { - std::optional<size_t> ContentSize = ParseInt<size_t>(Header.second); - if (ContentSize.has_value()) + std::optional<size_t> ContentLength = ParseInt<size_t>(Header.second); + if (ContentLength.has_value()) { - if (ContentSize.value() > 1024 * 1024) + if (ContentLength.value() > 1024 * 1024) { PayloadFile = std::make_unique<detail::TempPayloadFile>(); std::error_code Ec = PayloadFile->Open(TempFolderPath); @@ -1111,7 +1196,7 @@ HttpClient::Download(std::string_view Url, const std::filesystem::path& TempFold } else { - PayloadString.reserve(ContentSize.value()); + PayloadString.reserve(ContentLength.value()); } } } @@ -1157,85 +1242,90 @@ HttpClient::Download(std::string_view Url, const std::filesystem::path& TempFold auto It = Response.header.find("Content-Length"); if (It != Response.header.end()) { - std::optional<int64_t> ContentLength = ParseInt<int64_t>(It->second); - if (ContentLength) - { - std::vector<std::pair<std::string, std::string>> ReceivedHeaders; + std::vector<std::pair<std::string, std::string>> ReceivedHeaders; - auto HeaderCallback = [&](std::string header, intptr_t) { - std::pair<std::string, std::string> Header = GetHeader(header); - if (!Header.first.empty()) - { - ReceivedHeaders.emplace_back(std::move(Header)); - } + auto HeaderCallback = [&](std::string header, intptr_t) { + std::pair<std::string, std::string> Header = GetHeader(header); + if (!Header.first.empty()) + { + ReceivedHeaders.emplace_back(std::move(Header)); + } - if (Header.first == "Content-Range"sv) + if (Header.first == "Content-Range"sv) + { + if (Header.second.starts_with("bytes "sv)) { - if (Header.second.starts_with("bytes "sv)) + size_t RangeStartEnd = Header.second.find('-', 6); + if (RangeStartEnd != std::string::npos) { - size_t RangeStartEnd = Header.second.find('-', 6); - if (RangeStartEnd != std::string::npos) + const auto Start = ParseInt<uint64_t>(Header.second.substr(6, RangeStartEnd - 6)); + if (Start) { - const auto Start = ParseInt<uint64_t>(Header.second.substr(6, RangeStartEnd - 6)); - if (Start) + uint64_t DownloadedSize = PayloadFile ? PayloadFile->GetSize() : PayloadString.length(); + if (Start.value() == DownloadedSize) { - uint64_t DownloadedSize = PayloadFile ? PayloadFile->GetSize() : PayloadString.length(); - if (Start.value() == DownloadedSize) - { - return 1; - } - else if (Start.value() > DownloadedSize) - { - return 0; - } - if (PayloadFile) - { - PayloadFile->ResetWritePos(Start.value()); - } - else - { - PayloadString = PayloadString.substr(0, Start.value()); - } return 1; } + else if (Start.value() > DownloadedSize) + { + return 0; + } + if (PayloadFile) + { + PayloadFile->ResetWritePos(Start.value()); + } + else + { + PayloadString = PayloadString.substr(0, Start.value()); + } + return 1; } } - return 0; } - return 1; - }; + return 0; + } + return 1; + }; - KeyValueMap HeadersWithRange(AdditionalHeader); - do - { - uint64_t DownloadedSize = PayloadFile ? PayloadFile->GetSize() : PayloadString.length(); + KeyValueMap HeadersWithRange(AdditionalHeader); + do + { + uint64_t DownloadedSize = PayloadFile ? PayloadFile->GetSize() : PayloadString.length(); - std::string Range = fmt::format("bytes={}-{}", DownloadedSize, ContentLength.value()); - if (auto RangeIt = HeadersWithRange.Entries.find("Range"); RangeIt != HeadersWithRange.Entries.end()) + uint64_t ContentLength = RequestedContentLength; + if (ContentLength == uint64_t(-1)) + { + if (auto ParsedContentLength = ParseInt<int64_t>(It->second); ParsedContentLength.has_value()) { - if (RangeIt->second == Range) - { - // If we didn't make any progress, abort - break; - } + ContentLength = ParsedContentLength.value(); } - HeadersWithRange.Entries.insert_or_assign("Range", Range); - - Impl::Session Sess = m_Impl->AllocSession(m_BaseUri, - Url, - m_ConnectionSettings, - HeadersWithRange, - {}, - m_SessionId, - GetAccessToken()); - Response = Sess.Download(cpr::WriteCallback{DownloadCallback}, cpr::HeaderCallback{HeaderCallback}); - for (const std::pair<std::string, std::string>& H : ReceivedHeaders) + } + + std::string Range = fmt::format("bytes={}-{}", DownloadedSize, DownloadedSize + ContentLength - 1); + if (auto RangeIt = HeadersWithRange.Entries.find("Range"); RangeIt != HeadersWithRange.Entries.end()) + { + if (RangeIt->second == Range) { - Response.header.insert_or_assign(H.first, H.second); + // If we didn't make any progress, abort + break; } - ReceivedHeaders.clear(); - } while (ShouldResume(Response)); - } + } + HeadersWithRange.Entries.insert_or_assign("Range", Range); + + Impl::Session Sess = m_Impl->AllocSession(m_BaseUri, + Url, + m_ConnectionSettings, + HeadersWithRange, + {}, + m_SessionId, + GetAccessToken()); + Response = Sess.Download(cpr::WriteCallback{DownloadCallback}, cpr::HeaderCallback{HeaderCallback}); + for (const std::pair<std::string, std::string>& H : ReceivedHeaders) + { + Response.header.insert_or_assign(H.first, H.second); + } + ReceivedHeaders.clear(); + } while (ShouldResume(Response)); } } } @@ -1249,7 +1339,7 @@ HttpClient::Download(std::string_view Url, const std::filesystem::path& TempFold PayloadFile, m_ConnectionSettings.RetryCount); - return CommonResponse(std::move(Response), PayloadFile ? PayloadFile->DetachToIoBuffer() : IoBuffer{}); + return CommonResponse(m_SessionId, std::move(Response), PayloadFile ? PayloadFile->DetachToIoBuffer() : IoBuffer{}); } ////////////////////////////////////////////////////////////////////////// @@ -1361,6 +1451,52 @@ HttpClient::Response::ThrowError(std::string_view ErrorPrefix) #if ZEN_WITH_TESTS +TEST_CASE("responseformat") +{ + using namespace std::literals; + + SUBCASE("identity") + { + BodyLogFormatter _{"abcd"}; + CHECK_EQ(_.GetText(), "abcd"sv); + } + + SUBCASE("very long") + { + std::string_view LongView = + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; + + BodyLogFormatter _{LongView}; + + CHECK(_.GetText().size() < LongView.size()); + CHECK(_.GetText().starts_with("[truncated"sv)); + } + + SUBCASE("invalid text") + { + std::string_view BadText = "totobaba\xff\xfe"; + + BodyLogFormatter _{BadText}; + + CHECK_EQ(_.GetText(), "totobaba"); + } +} + TEST_CASE("httpclient") { using namespace std::literals; diff --git a/src/zenhttp/httpclientauth.cpp b/src/zenhttp/httpclientauth.cpp index 04ac2ad3f..7fb3224f1 100644 --- a/src/zenhttp/httpclientauth.cpp +++ b/src/zenhttp/httpclientauth.cpp @@ -2,6 +2,7 @@ #include <zenhttp/httpclientauth.h> +#include <zencore/logging.h> #include <zenhttp/auth/authmgr.h> ZEN_THIRD_PARTY_INCLUDES_START @@ -41,6 +42,7 @@ namespace zen { namespace httpclientauth { if (Response.error || Response.status_code != 200) { + ZEN_WARN("Failed fetching OAuth access token {}. Reason: '{}'", OAuthParams.Url, Response.reason); return HttpClientAccessToken{}; } @@ -49,6 +51,7 @@ namespace zen { namespace httpclientauth { if (JsonError.empty() == false) { + ZEN_WARN("Unable to parse OAuth json response from {}. Reason: '{}'", OAuthParams.Url, JsonError); return HttpClientAccessToken{}; } diff --git a/src/zenhttp/httpserver.cpp b/src/zenhttp/httpserver.cpp index 1fbe22628..27a09f339 100644 --- a/src/zenhttp/httpserver.cpp +++ b/src/zenhttp/httpserver.cpp @@ -31,6 +31,8 @@ #include <span> #include <string_view> +#include <EASTL/fixed_vector.h> + namespace zen { using namespace std::literals; @@ -529,7 +531,7 @@ HttpServerRequest::WriteResponse(HttpResponseCode ResponseCode, HttpContentType { std::span<const SharedBuffer> Segments = Payload.GetSegments(); - std::vector<IoBuffer> Buffers; + eastl::fixed_vector<IoBuffer, 64> Buffers; Buffers.reserve(Segments.size()); for (auto& Segment : Segments) @@ -537,7 +539,7 @@ HttpServerRequest::WriteResponse(HttpResponseCode ResponseCode, HttpContentType Buffers.push_back(Segment.AsIoBuffer()); } - WriteResponse(ResponseCode, ContentType, Buffers); + WriteResponse(ResponseCode, ContentType, std::span<IoBuffer>(begin(Buffers), end(Buffers))); } std::string diff --git a/src/zenhttp/include/zenhttp/formatters.h b/src/zenhttp/include/zenhttp/formatters.h index 538136238..74da9ab05 100644 --- a/src/zenhttp/include/zenhttp/formatters.h +++ b/src/zenhttp/include/zenhttp/formatters.h @@ -7,12 +7,57 @@ #include <zencore/iobuffer.h> #include <zencore/string.h> #include <zenhttp/httpclient.h> +#include <zenhttp/httpcommon.h> ZEN_THIRD_PARTY_INCLUDES_START #include <cpr/cpr.h> #include <fmt/format.h> ZEN_THIRD_PARTY_INCLUDES_END +namespace zen { + +struct BodyLogFormatter +{ +private: + std::string_view ResponseText; + zen::ExtendableStringBuilder<128> ModifiedResponse; + +public: + explicit BodyLogFormatter(std::string_view InResponseText) : ResponseText(InResponseText) + { + using namespace std::literals; + + const int TextSizeLimit = 1024; + + // Trim invalid UTF8 + + auto InvalidIt = zen::FindFirstInvalidUtf8Byte(ResponseText); + + if (InvalidIt != end(ResponseText)) + { + ResponseText = ResponseText.substr(0, InvalidIt - begin(ResponseText)); + } + + if (ResponseText.empty()) + { + ResponseText = "<suppressed non-text response>"sv; + } + + if (ResponseText.size() > TextSizeLimit) + { + const auto TruncatedString = "[truncated response] "sv; + ModifiedResponse.Append(TruncatedString); + ModifiedResponse.Append(ResponseText.data(), TextSizeLimit - TruncatedString.size()); + + ResponseText = ModifiedResponse; + } + } + + inline std::string_view GetText() const { return ResponseText; } +}; + +} // namespace zen + template<> struct fmt::formatter<cpr::Response> { @@ -23,15 +68,17 @@ struct fmt::formatter<cpr::Response> { using namespace std::literals; - if (Response.status_code == 200 || Response.status_code == 201) + zen::NiceTimeSpanMs NiceResponseTime(uint64_t(Response.elapsed * 1000)); + + if (zen::IsHttpSuccessCode(Response.status_code)) { return fmt::format_to(Ctx.out(), - "Url: {}, Status: {}, Bytes: {}/{} (Up/Down), Elapsed: {}s", + "Url: {}, Status: {}, Bytes: {}/{} (Up/Down), Elapsed: {}", Response.url.str(), Response.status_code, Response.uploaded_bytes, Response.downloaded_bytes, - Response.elapsed); + NiceResponseTime.c_str()); } else { @@ -46,25 +93,27 @@ struct fmt::formatter<cpr::Response> std::string_view Json = Obj.ToJson(Sb).ToView(); return fmt::format_to(Ctx.out(), - "Url: {}, Status: {}, Bytes: {}/{} (Up/Down), Elapsed: {}s, Response: '{}', Reason: '{}'", + "Url: {}, Status: {}, Bytes: {}/{} (Up/Down), Elapsed: {}, Response: '{}', Reason: '{}'", Response.url.str(), Response.status_code, Response.uploaded_bytes, Response.downloaded_bytes, - Response.elapsed, + NiceResponseTime.c_str(), Json, Response.reason); } else { + zen::BodyLogFormatter Body(Response.text); + return fmt::format_to(Ctx.out(), - "Url: {}, Status: {}, Bytes: {}/{} (Up/Down), Elapsed: {}s, Reponse: '{}', Reason: '{}'", + "Url: {}, Status: {}, Bytes: {}/{} (Up/Down), Elapsed: {}, Response: '{}', Reason: '{}'", Response.url.str(), Response.status_code, Response.uploaded_bytes, Response.downloaded_bytes, - Response.elapsed, - Response.text, + NiceResponseTime.c_str(), + Body.GetText(), Response.reason); } } diff --git a/src/zenhttp/include/zenhttp/httpclient.h b/src/zenhttp/include/zenhttp/httpclient.h index 1cf77d794..a46b9fd83 100644 --- a/src/zenhttp/include/zenhttp/httpclient.h +++ b/src/zenhttp/include/zenhttp/httpclient.h @@ -60,9 +60,6 @@ struct HttpClientSettings class HttpClient { public: - struct Settings - { - }; HttpClient(std::string_view BaseUri, const HttpClientSettings& Connectionsettings = {}); ~HttpClient(); @@ -180,6 +177,7 @@ public: LoggerRef Logger() { return m_Log; } std::string_view GetBaseUri() const { return m_BaseUri; } bool Authenticate(); + std::string_view GetSessionId() const { return m_SessionId; } private: const std::optional<HttpClientAccessToken> GetAccessToken(); diff --git a/src/zenhttp/include/zenhttp/httpserver.h b/src/zenhttp/include/zenhttp/httpserver.h index 7b87cb84b..217455dba 100644 --- a/src/zenhttp/include/zenhttp/httpserver.h +++ b/src/zenhttp/include/zenhttp/httpserver.h @@ -208,7 +208,7 @@ class HttpRouterRequest public: HttpRouterRequest(HttpServerRequest& Request) : m_HttpRequest(Request) {} - ZENCORE_API std::string GetCapture(uint32_t Index) const; + std::string_view GetCapture(uint32_t Index) const; inline HttpServerRequest& ServerRequest() { return m_HttpRequest; } private: @@ -220,12 +220,14 @@ private: friend class HttpRequestRouter; }; -inline std::string +inline std::string_view HttpRouterRequest::GetCapture(uint32_t Index) const { ZEN_ASSERT(Index < m_Match.size()); - return m_Match[Index]; + const auto& Match = m_Match[Index]; + + return std::string_view(&*Match.first, Match.second - Match.first); } /** HTTP request router helper diff --git a/src/zenhttp/packageformat.cpp b/src/zenhttp/packageformat.cpp index 676fc73fd..ae80851e4 100644 --- a/src/zenhttp/packageformat.cpp +++ b/src/zenhttp/packageformat.cpp @@ -19,6 +19,8 @@ #include <span> #include <vector> +#include <EASTL/fixed_vector.h> + #if ZEN_PLATFORM_WINDOWS # include <zencore/windows.h> #endif @@ -31,6 +33,10 @@ namespace zen { const std::string_view HandlePrefix(":?#:"); +typedef eastl::fixed_vector<IoBuffer, 16> IoBufferVec_t; + +IoBufferVec_t FormatPackageMessageInternal(const CbPackage& Data, FormatFlags Flags, void* TargetProcessHandle); + std::vector<IoBuffer> FormatPackageMessage(const CbPackage& Data, void* TargetProcessHandle) { @@ -42,10 +48,18 @@ FormatPackageMessageBuffer(const CbPackage& Data, void* TargetProcessHandle) return FormatPackageMessageBuffer(Data, FormatFlags::kDefault, TargetProcessHandle); } +std::vector<IoBuffer> +FormatPackageMessage(const CbPackage& Data, FormatFlags Flags, void* TargetProcessHandle) +{ + auto Vec = FormatPackageMessageInternal(Data, Flags, TargetProcessHandle); + return std::vector<IoBuffer>(begin(Vec), end(Vec)); +} + CompositeBuffer FormatPackageMessageBuffer(const CbPackage& Data, FormatFlags Flags, void* TargetProcessHandle) { - return CompositeBuffer(FormatPackageMessage(Data, Flags, TargetProcessHandle)); + auto Vec = FormatPackageMessageInternal(Data, Flags, TargetProcessHandle); + return CompositeBuffer(std::span{begin(Vec), end(Vec)}); } static void @@ -54,7 +68,7 @@ MarshalLocal(CbAttachmentEntry*& AttachmentInfo, CbAttachmentReferenceHeader& LocalRef, const IoHash& AttachmentHash, bool IsCompressed, - std::vector<IoBuffer>& ResponseBuffers) + IoBufferVec_t& ResponseBuffers) { IoBuffer RefBuffer(sizeof(CbAttachmentReferenceHeader) + Path8.size()); @@ -146,8 +160,8 @@ IsLocalRef(tsl::robin_map<void*, std::string>& FileNameMap, return true; }; -std::vector<IoBuffer> -FormatPackageMessage(const CbPackage& Data, FormatFlags Flags, void* TargetProcessHandle) +IoBufferVec_t +FormatPackageMessageInternal(const CbPackage& Data, FormatFlags Flags, void* TargetProcessHandle) { ZEN_TRACE_CPU("FormatPackageMessage"); @@ -177,7 +191,7 @@ FormatPackageMessage(const CbPackage& Data, FormatFlags Flags, void* TargetProce #endif // ZEN_PLATFORM_WINDOWS const std::span<const CbAttachment>& Attachments = Data.GetAttachments(); - std::vector<IoBuffer> ResponseBuffers; + IoBufferVec_t ResponseBuffers; ResponseBuffers.reserve(2 + Attachments.size()); // TODO: may want to use an additional fudge factor here to avoid growing since each // attachment is likely to consist of several buffers diff --git a/src/zenhttp/servers/httpsys.cpp b/src/zenhttp/servers/httpsys.cpp index 87128c0c9..3bdcdf098 100644 --- a/src/zenhttp/servers/httpsys.cpp +++ b/src/zenhttp/servers/httpsys.cpp @@ -16,6 +16,8 @@ #include <zencore/trace.h> #include <zenhttp/packageformat.h> +#include <EASTL/fixed_vector.h> + #if ZEN_WITH_HTTPSYS # define _WINSOCKAPI_ # include <zencore/windows.h> @@ -381,14 +383,14 @@ public: void SuppressResponseBody(); // typically used for HEAD requests private: - std::vector<HTTP_DATA_CHUNK> m_HttpDataChunks; - uint64_t m_TotalDataSize = 0; // Sum of all chunk sizes - uint16_t m_ResponseCode = 0; - uint32_t m_NextDataChunkOffset = 0; // Cursor used for very large chunk lists - uint32_t m_RemainingChunkCount = 0; // Backlog for multi-call sends - bool m_IsInitialResponse = true; - HttpContentType m_ContentType = HttpContentType::kBinary; - std::vector<IoBuffer> m_DataBuffers; + eastl::fixed_vector<HTTP_DATA_CHUNK, 16> m_HttpDataChunks; + uint64_t m_TotalDataSize = 0; // Sum of all chunk sizes + uint16_t m_ResponseCode = 0; + uint32_t m_NextDataChunkOffset = 0; // Cursor used for very large chunk lists + uint32_t m_RemainingChunkCount = 0; // Backlog for multi-call sends + bool m_IsInitialResponse = true; + HttpContentType m_ContentType = HttpContentType::kBinary; + eastl::fixed_vector<IoBuffer, 16> m_DataBuffers; void InitializeForPayload(uint16_t ResponseCode, std::span<IoBuffer> Blobs); }; diff --git a/src/zennet-test/zennet-test.cpp b/src/zennet-test/zennet-test.cpp index b45a5f807..03b385085 100644 --- a/src/zennet-test/zennet-test.cpp +++ b/src/zennet-test/zennet-test.cpp @@ -18,9 +18,9 @@ main([[maybe_unused]] int argc, [[maybe_unused]] char** argv) #if ZEN_WITH_TESTS zen::zennet_forcelinktests(); -#if ZEN_PLATFORM_LINUX +# if ZEN_PLATFORM_LINUX zen::IgnoreChildSignals(); -#endif +# endif zen::logging::InitializeLogging(); zen::MaximizeOpenFileCount(); diff --git a/src/zenserver-test/zenserver-test.cpp b/src/zenserver-test/zenserver-test.cpp index 5c245d8bb..b046bbada 100644 --- a/src/zenserver-test/zenserver-test.cpp +++ b/src/zenserver-test/zenserver-test.cpp @@ -100,9 +100,9 @@ main(int argc, char** argv) using namespace std::literals; using namespace zen; -#if ZEN_PLATFORM_LINUX +# if ZEN_PLATFORM_LINUX IgnoreChildSignals(); -#endif +# endif zen::logging::InitializeLogging(); diff --git a/src/zenserver/objectstore/objectstore.cpp b/src/zenserver/objectstore/objectstore.cpp index b0212ab07..e757ef84e 100644 --- a/src/zenserver/objectstore/objectstore.cpp +++ b/src/zenserver/objectstore/objectstore.cpp @@ -269,9 +269,9 @@ HttpObjectStoreService::Inititalize() m_Router.RegisterRoute( "bucket/{path}", [this](zen::HttpRouterRequest& Request) { - const std::string Path = Request.GetCapture(1); - const auto Sep = Path.find_last_of('.'); - const bool IsObject = Sep != std::string::npos && Path.size() - Sep > 0; + const std::string_view Path = Request.GetCapture(1); + const auto Sep = Path.find_last_of('.'); + const bool IsObject = Sep != std::string::npos && Path.size() - Sep > 0; if (IsObject) { @@ -337,18 +337,18 @@ HttpObjectStoreService::CreateBucket(zen::HttpRouterRequest& Request) } void -HttpObjectStoreService::ListBucket(zen::HttpRouterRequest& Request, const std::string& Path) +HttpObjectStoreService::ListBucket(zen::HttpRouterRequest& Request, const std::string_view Path) { namespace fs = std::filesystem; - const auto Sep = Path.find_first_of('/'); - const std::string BucketName = Sep == std::string::npos ? Path : Path.substr(0, Sep); + const auto Sep = Path.find_first_of('/'); + const std::string BucketName{Sep == std::string::npos ? Path : Path.substr(0, Sep)}; if (BucketName.empty()) { return Request.ServerRequest().WriteResponse(HttpResponseCode::BadRequest); } - std::string BucketPrefix = Sep == std::string::npos || Sep == Path.size() - 1 ? std::string() : Path.substr(BucketName.size() + 1); + std::string BucketPrefix{Sep == std::string::npos || Sep == Path.size() - 1 ? std::string() : Path.substr(BucketName.size() + 1)}; if (BucketPrefix.empty()) { const auto QueryParms = Request.ServerRequest().GetQueryParams(); @@ -376,7 +376,7 @@ HttpObjectStoreService::ListBucket(zen::HttpRouterRequest& Request, const std::s Writer.BeginArray("Contents"sv); } - void VisitFile(const fs::path& Parent, const path_view& File, uint64_t FileSize, uint32_t) override + void VisitFile(const fs::path& Parent, const path_view& File, uint64_t FileSize, uint32_t, uint64_t) override { const fs::path FullPath = Parent / fs::path(File); fs::path RelativePath = fs::relative(FullPath, BucketPath); @@ -450,14 +450,13 @@ HttpObjectStoreService::DeleteBucket(zen::HttpRouterRequest& Request) } void -HttpObjectStoreService::GetObject(zen::HttpRouterRequest& Request, const std::string& Path) +HttpObjectStoreService::GetObject(zen::HttpRouterRequest& Request, const std::string_view Path) { namespace fs = std::filesystem; - const auto Sep = Path.find_first_of('/'); - const std::string BucketName = Sep == std::string::npos ? Path : Path.substr(0, Sep); - const std::string BucketPrefix = - Sep == std::string::npos || Sep == Path.size() - 1 ? std::string() : Path.substr(BucketName.size() + 1); + const auto Sep = Path.find_first_of('/'); + const std::string BucketName{Sep == std::string::npos ? Path : Path.substr(0, Sep)}; + const std::string BucketPrefix{Sep == std::string::npos || Sep == Path.size() - 1 ? std::string() : Path.substr(BucketName.size() + 1)}; const fs::path BucketDir = GetBucketDirectory(BucketName); @@ -554,8 +553,8 @@ HttpObjectStoreService::PutObject(zen::HttpRouterRequest& Request) { namespace fs = std::filesystem; - const std::string& BucketName = Request.GetCapture(1); - const fs::path BucketDir = GetBucketDirectory(BucketName); + const std::string_view BucketName = Request.GetCapture(1); + const fs::path BucketDir = GetBucketDirectory(BucketName); if (BucketDir.empty()) { diff --git a/src/zenserver/objectstore/objectstore.h b/src/zenserver/objectstore/objectstore.h index c905ceab3..dae979c4c 100644 --- a/src/zenserver/objectstore/objectstore.h +++ b/src/zenserver/objectstore/objectstore.h @@ -36,9 +36,9 @@ private: void Inititalize(); std::filesystem::path GetBucketDirectory(std::string_view BucketName); void CreateBucket(zen::HttpRouterRequest& Request); - void ListBucket(zen::HttpRouterRequest& Request, const std::string& Path); + void ListBucket(zen::HttpRouterRequest& Request, const std::string_view Path); void DeleteBucket(zen::HttpRouterRequest& Request); - void GetObject(zen::HttpRouterRequest& Request, const std::string& Path); + void GetObject(zen::HttpRouterRequest& Request, const std::string_view Path); void PutObject(zen::HttpRouterRequest& Request); ObjectStoreConfig m_Cfg; diff --git a/src/zenserver/projectstore/buildsremoteprojectstore.cpp b/src/zenserver/projectstore/buildsremoteprojectstore.cpp index 302b81729..fbb9bc344 100644 --- a/src/zenserver/projectstore/buildsremoteprojectstore.cpp +++ b/src/zenserver/projectstore/buildsremoteprojectstore.cpp @@ -3,6 +3,7 @@ #include "buildsremoteprojectstore.h" #include <zencore/compactbinarybuilder.h> +#include <zencore/compactbinaryvalidation.h> #include <zencore/compress.h> #include <zencore/fmtutils.h> @@ -114,24 +115,25 @@ public: return Result; } - virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, Block&& Block) override + virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, + const IoHash& RawHash, + ChunkBlockDescription&& Block) override { ZEN_ASSERT(m_OplogBuildPartId != Oid::Zero); JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); JupiterResult PutResult = - Session.PutBuildBlob(m_Namespace, m_Bucket, m_BuildId, m_OplogBuildPartId, RawHash, ZenContentType::kCompressedBinary, Payload); + Session.PutBuildBlob(m_Namespace, m_Bucket, m_BuildId, RawHash, ZenContentType::kCompressedBinary, Payload); AddStats(PutResult); SaveAttachmentResult Result{ConvertResult(PutResult)}; if (Result.ErrorCode) { - Result.Reason = fmt::format("Failed saving oplog attachment to {}/{}/{}/{}/{}/{}. Reason: '{}'", + Result.Reason = fmt::format("Failed saving oplog attachment to {}/{}/{}/{}/{}. Reason: '{}'", m_JupiterClient->ServiceUrl(), m_Namespace, m_Bucket, m_BuildId, - m_OplogBuildPartId, RawHash, Result.Reason); return Result; @@ -139,57 +141,21 @@ public: if (Block.BlockHash == RawHash) { - ZEN_ASSERT(Block.ChunkLengths.size() == Block.ChunkHashes.size()); - CbObjectWriter Writer; - Writer.AddHash("rawHash"sv, RawHash); - Writer.BeginArray("rawHashes"sv); - { - for (const IoHash& ChunkHash : Block.ChunkHashes) - { - Writer.AddHash(ChunkHash); - } - } - Writer.EndArray(); - Writer.BeginArray("chunkLengths"); - { - for (uint32_t ChunkSize : Block.ChunkLengths) - { - Writer.AddInteger(ChunkSize); - } - } - Writer.EndArray(); - Writer.BeginArray("chunkOffsets"); - { - ZEN_ASSERT(Block.FirstChunkOffset != (uint32_t)-1); - uint32_t Offset = Block.FirstChunkOffset; - for (uint32_t ChunkSize : Block.ChunkLengths) - { - Writer.AddInteger(Offset); - Offset += ChunkSize; - } - } - Writer.EndArray(); + CbObjectWriter BlockMetaData; + BlockMetaData.AddString("createdBy", GetRunningExecutablePath().stem().string()); - Writer.BeginObject("metadata"sv); - { - Writer.AddString("createdBy", "zenserver"); - } - Writer.EndObject(); - - IoBuffer MetaPayload = Writer.Save().GetBuffer().AsIoBuffer(); + IoBuffer MetaPayload = BuildChunkBlockDescription(Block, BlockMetaData.Save()).GetBuffer().AsIoBuffer(); MetaPayload.SetContentType(ZenContentType::kCbObject); - JupiterResult PutMetaResult = - Session.PutBlockMetadata(m_Namespace, m_Bucket, m_BuildId, m_OplogBuildPartId, RawHash, MetaPayload); + JupiterResult PutMetaResult = Session.PutBlockMetadata(m_Namespace, m_Bucket, m_BuildId, RawHash, MetaPayload); AddStats(PutMetaResult); RemoteProjectStore::Result MetaDataResult = ConvertResult(PutMetaResult); if (MetaDataResult.ErrorCode) { - ZEN_WARN("Failed saving block attachment meta data to {}/{}/{}/{}/{}/{}. Reason: '{}'", + ZEN_WARN("Failed saving block attachment meta data to {}/{}/{}/{}/{}. Reason: '{}'", m_JupiterClient->ServiceUrl(), m_Namespace, m_Bucket, m_BuildId, - m_OplogBuildPartId, RawHash, MetaDataResult.Reason); } @@ -342,51 +308,47 @@ public: { ZEN_ASSERT(m_OplogBuildPartId != Oid::Zero); JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); - JupiterResult FindResult = Session.FindBlocks(m_Namespace, m_Bucket, m_BuildId, m_OplogBuildPartId); + JupiterResult FindResult = Session.FindBlocks(m_Namespace, m_Bucket, m_BuildId); AddStats(FindResult); GetKnownBlocksResult Result{ConvertResult(FindResult)}; if (Result.ErrorCode) { Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError); - Result.Reason = fmt::format("Failed listing know blocks for {}/{}/{}/{}/{}. Reason: '{}'", + Result.Reason = fmt::format("Failed listing know blocks for {}/{}/{}/{}. Reason: '{}'", m_JupiterClient->ServiceUrl(), m_Namespace, m_Bucket, m_BuildId, - m_OplogBuildPartId, Result.Reason); return Result; } - CbObject BlocksObject = LoadCompactBinaryObject(FindResult.Response); - if (!BlocksObject) + if (ValidateCompactBinary(FindResult.Response.GetView(), CbValidateMode::Default) != CbValidateError::None) { Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError); - Result.Reason = fmt::format("The block list {}/{}/{}/{} is not formatted as a compact binary object"sv, + Result.Reason = fmt::format("The block list {}/{}/{} is not formatted as a compact binary object"sv, m_JupiterClient->ServiceUrl(), m_Namespace, m_Bucket, - m_BuildId, - m_OplogBuildPartId); + m_BuildId); return Result; } - - CbArrayView Blocks = BlocksObject["blocks"].AsArrayView(); - Result.Blocks.reserve(Blocks.Num()); - for (CbFieldView BlockView : Blocks) + std::optional<std::vector<ChunkBlockDescription>> Blocks = + ParseChunkBlockDescriptionList(LoadCompactBinaryObject(FindResult.Response)); + if (!Blocks) { - CbObjectView BlockObject = BlockView.AsObjectView(); - IoHash BlockHash = BlockObject["rawHash"sv].AsHash(); - if (BlockHash != IoHash::Zero) - { - CbArrayView ChunksArray = BlockObject["rawHashes"sv].AsArrayView(); - std::vector<IoHash> ChunkHashes; - ChunkHashes.reserve(ChunksArray.Num()); - for (CbFieldView ChunkView : ChunksArray) - { - ChunkHashes.push_back(ChunkView.AsHash()); - } - Result.Blocks.emplace_back(Block{.BlockHash = BlockHash, .ChunkHashes = ChunkHashes}); - } + Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError); + Result.Reason = fmt::format("The block list {}/{}/{} is not formatted as a list of blocks"sv, + m_JupiterClient->ServiceUrl(), + m_Namespace, + m_Bucket, + m_BuildId); + return Result; + } + Result.Blocks.reserve(Blocks.value().size()); + for (ChunkBlockDescription& BlockDescription : Blocks.value()) + { + Result.Blocks.push_back(ThinChunkBlockDescription{.BlockHash = BlockDescription.BlockHash, + .ChunkRawHashes = std::move(BlockDescription.ChunkRawHashes)}); } return Result; } @@ -395,18 +357,17 @@ public: { ZEN_ASSERT(m_OplogBuildPartId != Oid::Zero); JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); - JupiterResult GetResult = Session.GetBuildBlob(m_Namespace, m_Bucket, m_BuildId, m_OplogBuildPartId, RawHash, m_TempFilePath); + JupiterResult GetResult = Session.GetBuildBlob(m_Namespace, m_Bucket, m_BuildId, RawHash, m_TempFilePath); AddStats(GetResult); LoadAttachmentResult Result{ConvertResult(GetResult), std::move(GetResult.Response)}; if (GetResult.ErrorCode) { - Result.Reason = fmt::format("Failed fetching oplog attachment from {}/{}/{}&{}/{}/{}. Reason: '{}'", + Result.Reason = fmt::format("Failed fetching oplog attachment from {}/{}/{}/{}/{}. Reason: '{}'", m_JupiterClient->ServiceUrl(), m_Namespace, m_Bucket, m_BuildId, - m_OplogBuildPartId, RawHash, Result.Reason); } diff --git a/src/zenserver/projectstore/fileremoteprojectstore.cpp b/src/zenserver/projectstore/fileremoteprojectstore.cpp index 0fe739a12..98e292d91 100644 --- a/src/zenserver/projectstore/fileremoteprojectstore.cpp +++ b/src/zenserver/projectstore/fileremoteprojectstore.cpp @@ -106,7 +106,7 @@ public: return Result; } - virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, Block&&) override + virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, ChunkBlockDescription&&) override { Stopwatch Timer; SaveAttachmentResult Result; @@ -192,7 +192,7 @@ public: return GetKnownBlocksResult{{.ErrorCode = static_cast<int>(HttpResponseCode::NoContent), .ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000}}; } - std::vector<RemoteProjectStore::Block> KnownBlocks = GetBlocksFromOplog(LoadResult.ContainerObject, ExistingBlockHashes); + std::vector<ThinChunkBlockDescription> KnownBlocks = GetBlocksFromOplog(LoadResult.ContainerObject, ExistingBlockHashes); GetKnownBlocksResult Result{{.ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000}}; Result.Blocks = std::move(KnownBlocks); return Result; diff --git a/src/zenserver/projectstore/httpprojectstore.cpp b/src/zenserver/projectstore/httpprojectstore.cpp index 0b8e5f13b..47748dd90 100644 --- a/src/zenserver/projectstore/httpprojectstore.cpp +++ b/src/zenserver/projectstore/httpprojectstore.cpp @@ -983,15 +983,19 @@ HttpProjectService::HandleOplogOpPrepRequest(HttpRouterRequest& Req) IoBuffer Payload = HttpReq.ReadPayload(); CbObject RequestObject = LoadCompactBinaryObject(Payload); - std::vector<IoHash> ChunkList; - CbArrayView HaveList = RequestObject["have"sv].AsArrayView(); - ChunkList.reserve(HaveList.Num()); - for (auto& Entry : HaveList) + std::vector<IoHash> NeedList; + { - ChunkList.push_back(Entry.AsHash()); - } + eastl::fixed_vector<IoHash, 16> ChunkList; + CbArrayView HaveList = RequestObject["have"sv].AsArrayView(); + ChunkList.reserve(HaveList.Num()); + for (auto& Entry : HaveList) + { + ChunkList.push_back(Entry.AsHash()); + } - std::vector<IoHash> NeedList = FoundLog->CheckPendingChunkReferences(ChunkList, std::chrono::minutes(2)); + NeedList = FoundLog->CheckPendingChunkReferences(std::span(begin(ChunkList), end(ChunkList)), std::chrono::minutes(2)); + } CbObjectWriter Cbo(1 + 1 + 5 + NeedList.size() * (1 + sizeof(IoHash::Hash)) + 1); Cbo.BeginArray("need"); @@ -1151,7 +1155,7 @@ HttpProjectService::HandleOplogOpNewRequest(HttpRouterRequest& Req) return HttpReq.WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, "No oplog entry key specified"); } - std::vector<IoHash> ReferencedChunks; + eastl::fixed_vector<IoHash, 16> ReferencedChunks; Core.IterateAttachments([&ReferencedChunks](CbFieldView View) { ReferencedChunks.push_back(View.AsAttachment()); }); // Write core to oplog @@ -1169,7 +1173,7 @@ HttpProjectService::HandleOplogOpNewRequest(HttpRouterRequest& Req) // Once we stored the op, we no longer need to retain any chunks this op references if (!ReferencedChunks.empty()) { - FoundLog->RemovePendingChunkReferences(ReferencedChunks); + FoundLog->RemovePendingChunkReferences(std::span(begin(ReferencedChunks), end(ReferencedChunks))); } m_ProjectStats.OpWriteCount++; @@ -1301,9 +1305,9 @@ HttpProjectService::HandleOpLogOpRequest(HttpRouterRequest& Req) HttpServerRequest& HttpReq = Req.ServerRequest(); - const std::string& ProjectId = Req.GetCapture(1); - const std::string& OplogId = Req.GetCapture(2); - const std::string& OpIdString = Req.GetCapture(3); + const std::string_view ProjectId = Req.GetCapture(1); + const std::string_view OplogId = Req.GetCapture(2); + const std::string_view OpIdString = Req.GetCapture(3); Ref<ProjectStore::Project> Project = m_ProjectStore->OpenProject(ProjectId); if (!Project) @@ -1690,8 +1694,8 @@ HttpProjectService::HandleProjectRequest(HttpRouterRequest& Req) using namespace std::literals; - HttpServerRequest& HttpReq = Req.ServerRequest(); - const std::string ProjectId = Req.GetCapture(1); + HttpServerRequest& HttpReq = Req.ServerRequest(); + const std::string_view ProjectId = Req.GetCapture(1); switch (HttpReq.RequestVerb()) { diff --git a/src/zenserver/projectstore/jupiterremoteprojectstore.cpp b/src/zenserver/projectstore/jupiterremoteprojectstore.cpp index e906127ff..e5839ad3b 100644 --- a/src/zenserver/projectstore/jupiterremoteprojectstore.cpp +++ b/src/zenserver/projectstore/jupiterremoteprojectstore.cpp @@ -92,7 +92,7 @@ public: return Result; } - virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, Block&&) override + virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, ChunkBlockDescription&&) override { JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); JupiterResult PutResult = Session.PutCompressedBlob(m_Namespace, RawHash, Payload); @@ -193,7 +193,7 @@ public: return GetKnownBlocksResult{{.ErrorCode = static_cast<int>(HttpResponseCode::NoContent), .ElapsedSeconds = LoadResult.ElapsedSeconds + ExistsResult.ElapsedSeconds}}; } - std::vector<RemoteProjectStore::Block> KnownBlocks = GetBlocksFromOplog(LoadResult.ContainerObject, ExistingBlockHashes); + std::vector<ThinChunkBlockDescription> KnownBlocks = GetBlocksFromOplog(LoadResult.ContainerObject, ExistingBlockHashes); GetKnownBlocksResult Result{ {.ElapsedSeconds = LoadResult.ElapsedSeconds + ExistsResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000.0}}; diff --git a/src/zenserver/projectstore/projectstore.cpp b/src/zenserver/projectstore/projectstore.cpp index 46a236af9..86791e29a 100644 --- a/src/zenserver/projectstore/projectstore.cpp +++ b/src/zenserver/projectstore/projectstore.cpp @@ -423,9 +423,13 @@ ComputeOpKey(const CbObjectView& Op) { using namespace std::literals; - BinaryWriter KeyStream; + eastl::fixed_vector<uint8_t, 256> KeyData; - Op["key"sv].WriteToStream([&](const void* Data, size_t Size) { KeyStream.Write(Data, Size); }); + Op["key"sv].WriteToStream([&](const void* Data, size_t Size) { + auto Begin = reinterpret_cast<const uint8_t*>(Data); + auto End = Begin + Size; + KeyData.insert(KeyData.end(), Begin, End); + }); XXH3_128 KeyHash128; @@ -434,15 +438,15 @@ ComputeOpKey(const CbObjectView& Op) // path but longer paths are evaluated properly. In the future all key lengths // should be evaluated using the proper path, this is a temporary workaround to // maintain compatibility with existing disk state. - if (KeyStream.GetSize() < 240) + if (KeyData.size() < 240) { XXH3_128Stream_deprecated KeyHasher; - KeyHasher.Append(KeyStream.Data(), KeyStream.Size()); + KeyHasher.Append(KeyData.data(), KeyData.size()); KeyHash128 = KeyHasher.GetHash(); } else { - KeyHash128 = XXH3_128::HashMemory(KeyStream.GetView()); + KeyHash128 = XXH3_128::HashMemory(KeyData.data(), KeyData.size()); } Oid KeyHash; @@ -2735,7 +2739,7 @@ ProjectStore::Oplog::CheckPendingChunkReferences(std::span<const IoHash> ChunkHa MissingChunks.reserve(ChunkHashes.size()); for (const IoHash& FileHash : ChunkHashes) { - if (IoBuffer Payload = m_CidStore.FindChunkByCid(FileHash); !Payload) + if (!m_CidStore.ContainsChunk(FileHash)) { MissingChunks.push_back(FileHash); } @@ -3359,7 +3363,6 @@ ProjectStore::Project::OpenOplog(std::string_view OplogId, bool AllowCompact, bo ZEN_MEMSCOPE(GetProjectstoreTag()); ZEN_TRACE_CPU("Store::OpenOplog"); - std::filesystem::path OplogBasePath = BasePathForOplog(OplogId); { RwLock::SharedLockScope ProjectLock(m_ProjectLock); @@ -3367,21 +3370,35 @@ ProjectStore::Project::OpenOplog(std::string_view OplogId, bool AllowCompact, bo if (OplogIt != m_Oplogs.end()) { - if (!VerifyPathOnDisk || Oplog::ExistsAt(OplogBasePath)) + bool ReOpen = false; + + if (VerifyPathOnDisk) { - return OplogIt->second.get(); + std::filesystem::path OplogBasePath = BasePathForOplog(OplogId); + + if (!Oplog::ExistsAt(OplogBasePath)) + { + // Somebody deleted the oplog on disk behind our back + ProjectLock.ReleaseNow(); + std::filesystem::path DeletePath; + if (!RemoveOplog(OplogId, DeletePath)) + { + ZEN_WARN("Failed to clean up deleted oplog {}/{}", Identifier, OplogId, OplogBasePath); + } + + ReOpen = true; + } } - // Somebody deleted the oplog on disk behind our back - ProjectLock.ReleaseNow(); - std::filesystem::path DeletePath; - if (!RemoveOplog(OplogId, DeletePath)) + if (!ReOpen) { - ZEN_WARN("Failed to clean up deleted oplog {}/{}", Identifier, OplogId, OplogBasePath); + return OplogIt->second.get(); } } } + std::filesystem::path OplogBasePath = BasePathForOplog(OplogId); + RwLock::ExclusiveLockScope Lock(m_ProjectLock); if (auto It = m_Oplogs.find(std::string{OplogId}); It != m_Oplogs.end()) { @@ -5347,7 +5364,7 @@ ProjectStore::ReadOplog(const std::string_view ProjectId, /* BuildBlocks */ false, /* IgnoreMissingAttachments */ false, /* AllowChunking*/ false, - [](CompressedBuffer&&, RemoteProjectStore::Block&&) {}, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, [](const IoHash&, TGetAttachmentBufferFunc&&) {}, [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, /* EmbedLooseFiles*/ false); @@ -8621,14 +8638,18 @@ TEST_CASE("project.store.block") Chunks.reserve(AttachmentSizes.size()); for (const auto& It : AttachmentsWithId) { - Chunks.push_back(std::make_pair(It.second.DecodeRawHash(), - [Buffer = It.second.GetCompressed().Flatten().AsIoBuffer()](const IoHash&) -> CompositeBuffer { - return CompositeBuffer(SharedBuffer(Buffer)); - })); - } - RemoteProjectStore::Block Block; - CompressedBuffer BlockBuffer = GenerateBlock(std::move(Chunks), Block); - CHECK(IterateBlock(BlockBuffer.Decompress(), [](CompressedBuffer&&, const IoHash&) {})); + Chunks.push_back( + std::make_pair(It.second.DecodeRawHash(), [Buffer = It.second](const IoHash&) -> std::pair<uint64_t, CompressedBuffer> { + return {Buffer.DecodeRawSize(), Buffer}; + })); + } + ChunkBlockDescription Block; + CompressedBuffer BlockBuffer = GenerateChunkBlock(std::move(Chunks), Block); + uint64_t HeaderSize; + CHECK(IterateChunkBlock( + BlockBuffer.Decompress(), + [](CompressedBuffer&&, const IoHash&) {}, + HeaderSize)); } TEST_CASE("project.store.iterateoplog") diff --git a/src/zenserver/projectstore/remoteprojectstore.cpp b/src/zenserver/projectstore/remoteprojectstore.cpp index 0589fdc5f..a7263da83 100644 --- a/src/zenserver/projectstore/remoteprojectstore.cpp +++ b/src/zenserver/projectstore/remoteprojectstore.cpp @@ -12,8 +12,8 @@ #include <zencore/stream.h> #include <zencore/timer.h> #include <zencore/workthreadpool.h> -#include <zenstore/chunkedfile.h> #include <zenstore/cidstore.h> +#include <zenutil/chunkedfile.h> #include <zenutil/workerpools.h> #include <unordered_map> @@ -143,7 +143,7 @@ namespace remotestore_impl { NiceBytes(Stats.m_PeakReceivedBytes)); } - size_t AddBlock(RwLock& BlocksLock, std::vector<RemoteProjectStore::Block>& Blocks) + size_t AddBlock(RwLock& BlocksLock, std::vector<ChunkBlockDescription>& Blocks) { size_t BlockIndex; { @@ -154,63 +154,6 @@ namespace remotestore_impl { return BlockIndex; } - IoBuffer WriteToTempFile(CompressedBuffer&& CompressedBuffer, std::filesystem::path Path) - { - if (std::filesystem::is_regular_file(Path)) - { - IoBuffer ExistingTempFile = IoBuffer(IoBufferBuilder::MakeFromFile(Path)); - if (ExistingTempFile && ExistingTempFile.GetSize() == CompressedBuffer.GetCompressedSize()) - { - ExistingTempFile.SetDeleteOnClose(true); - return ExistingTempFile; - } - } - IoBuffer BlockBuffer; - BasicFile BlockFile; - uint32_t RetriesLeft = 3; - BlockFile.Open(Path, BasicFile::Mode::kTruncateDelete, [&](std::error_code& Ec) { - if (RetriesLeft == 0) - { - return false; - } - ZEN_WARN("Failed to create temporary oplog block '{}': '{}', retries left: {}.", Path, Ec.message(), RetriesLeft); - Sleep(100 - (3 - RetriesLeft) * 100); // Total 600 ms - RetriesLeft--; - return true; - }); - uint64_t Offset = 0; - { - CompositeBuffer Compressed = std::move(CompressedBuffer).GetCompressed(); - for (const SharedBuffer& Segment : Compressed.GetSegments()) - { - size_t SegmentSize = Segment.GetSize(); - static const uint64_t BufferingSize = 256u * 1024u; - - IoBufferFileReference FileRef; - if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.GetFileReference(FileRef)) - { - ScanFile(FileRef.FileHandle, - FileRef.FileChunkOffset, - FileRef.FileChunkSize, - BufferingSize, - [&BlockFile, &Offset](const void* Data, size_t Size) { - BlockFile.Write(Data, Size, Offset); - Offset += Size; - }); - } - else - { - BlockFile.Write(Segment.GetData(), SegmentSize, Offset); - Offset += SegmentSize; - } - } - } - void* FileHandle = BlockFile.Detach(); - BlockBuffer = IoBuffer(IoBuffer::File, FileHandle, 0, Offset, /*IsWholeFile*/ true); - BlockBuffer.SetDeleteOnClose(true); - return BlockBuffer; - } - RemoteProjectStore::Result WriteOplogSection(ProjectStore::Oplog& Oplog, const CbObjectView& SectionObject, JobContext* OptionalContext) { using namespace std::literals; @@ -573,21 +516,23 @@ namespace remotestore_impl { return; } - bool StoreChunksOK = IterateBlock( - BlockPayload, - [&WantedChunks, &WriteAttachmentBuffers, &WriteRawHashes, &Info](CompressedBuffer&& Chunk, - const IoHash& AttachmentRawHash) { - if (WantedChunks.contains(AttachmentRawHash)) - { - WriteAttachmentBuffers.emplace_back(Chunk.GetCompressed().Flatten().AsIoBuffer()); - IoHash RawHash; - uint64_t RawSize; - ZEN_ASSERT(CompressedBuffer::ValidateCompressedHeader(WriteAttachmentBuffers.back(), RawHash, RawSize)); - ZEN_ASSERT(RawHash == AttachmentRawHash); - WriteRawHashes.emplace_back(AttachmentRawHash); - WantedChunks.erase(AttachmentRawHash); - } - }); + uint64_t BlockHeaderSize = 0; + bool StoreChunksOK = IterateChunkBlock( + BlockPayload, + [&WantedChunks, &WriteAttachmentBuffers, &WriteRawHashes, &Info](CompressedBuffer&& Chunk, + const IoHash& AttachmentRawHash) { + if (WantedChunks.contains(AttachmentRawHash)) + { + WriteAttachmentBuffers.emplace_back(Chunk.GetCompressed().Flatten().AsIoBuffer()); + IoHash RawHash; + uint64_t RawSize; + ZEN_ASSERT(CompressedBuffer::ValidateCompressedHeader(WriteAttachmentBuffers.back(), RawHash, RawSize)); + ZEN_ASSERT(RawHash == AttachmentRawHash); + WriteRawHashes.emplace_back(AttachmentRawHash); + WantedChunks.erase(AttachmentRawHash); + } + }, + BlockHeaderSize); if (!StoreChunksOK) { @@ -738,14 +683,14 @@ namespace remotestore_impl { }); }; - void CreateBlock(WorkerThreadPool& WorkerPool, - Latch& OpSectionsLatch, - std::vector<std::pair<IoHash, FetchChunkFunc>>&& ChunksInBlock, - RwLock& SectionsLock, - std::vector<RemoteProjectStore::Block>& Blocks, - size_t BlockIndex, - const std::function<void(CompressedBuffer&&, RemoteProjectStore::Block&&)>& AsyncOnBlock, - AsyncRemoteResult& RemoteResult) + void CreateBlock(WorkerThreadPool& WorkerPool, + Latch& OpSectionsLatch, + std::vector<std::pair<IoHash, FetchChunkFunc>>&& ChunksInBlock, + RwLock& SectionsLock, + std::vector<ChunkBlockDescription>& Blocks, + size_t BlockIndex, + const std::function<void(CompressedBuffer&&, ChunkBlockDescription&&)>& AsyncOnBlock, + AsyncRemoteResult& RemoteResult) { OpSectionsLatch.AddCount(1); WorkerPool.ScheduleWork([&Blocks, @@ -764,10 +709,10 @@ namespace remotestore_impl { try { ZEN_ASSERT(ChunkCount > 0); - Stopwatch Timer; - RemoteProjectStore::Block Block; - CompressedBuffer CompressedBlock = GenerateBlock(std::move(Chunks), Block); - IoHash BlockHash = CompressedBlock.DecodeRawHash(); + Stopwatch Timer; + ChunkBlockDescription Block; + CompressedBuffer CompressedBlock = GenerateChunkBlock(std::move(Chunks), Block); + IoHash BlockHash = CompressedBlock.DecodeRawHash(); { // We can share the lock as we are not resizing the vector and only touch BlockHash at our own index RwLock::SharedLockScope __(SectionsLock); @@ -800,8 +745,8 @@ namespace remotestore_impl { struct CreatedBlock { - IoBuffer Payload; - RemoteProjectStore::Block Block; + IoBuffer Payload; + ChunkBlockDescription Block; }; void UploadAttachments(WorkerThreadPool& WorkerPool, @@ -931,8 +876,8 @@ namespace remotestore_impl { } try { - IoBuffer Payload; - RemoteProjectStore::Block Block; + IoBuffer Payload; + ChunkBlockDescription Block; if (auto BlockIt = CreatedBlocks.find(RawHash); BlockIt != CreatedBlocks.end()) { Payload = BlockIt->second.Payload; @@ -1058,7 +1003,7 @@ namespace remotestore_impl { { auto It = BulkBlockAttachmentsToUpload.find(Chunk); ZEN_ASSERT(It != BulkBlockAttachmentsToUpload.end()); - CompositeBuffer ChunkPayload = It->second(It->first); + CompressedBuffer ChunkPayload = It->second(It->first).second; if (!ChunkPayload) { RemoteResult.SetError(static_cast<int32_t>(HttpResponseCode::NotFound), @@ -1067,8 +1012,8 @@ namespace remotestore_impl { ChunkBuffers.clear(); break; } - ChunksSize += ChunkPayload.GetSize(); - ChunkBuffers.emplace_back(SharedBuffer(std::move(ChunkPayload).Flatten().AsIoBuffer())); + ChunksSize += ChunkPayload.GetCompressedSize(); + ChunkBuffers.emplace_back(SharedBuffer(std::move(ChunkPayload).GetCompressed().Flatten().AsIoBuffer())); } RemoteProjectStore::SaveAttachmentsResult Result = RemoteStore.SaveAttachments(ChunkBuffers); if (Result.ErrorCode) @@ -1139,54 +1084,13 @@ namespace remotestore_impl { } } // namespace remotestore_impl -bool -IterateBlock(const SharedBuffer& BlockPayload, std::function<void(CompressedBuffer&& Chunk, const IoHash& AttachmentHash)> Visitor) -{ - ZEN_ASSERT(BlockPayload); - if (BlockPayload.GetSize() < 1) - { - return false; - } - - MemoryView BlockView = BlockPayload.GetView(); - const uint8_t* ReadPtr = reinterpret_cast<const uint8_t*>(BlockView.GetData()); - uint32_t NumberSize; - uint64_t ChunkCount = ReadVarUInt(ReadPtr, NumberSize); - ReadPtr += NumberSize; - std::vector<uint64_t> ChunkSizes; - ChunkSizes.reserve(ChunkCount); - while (ChunkCount--) - { - ChunkSizes.push_back(ReadVarUInt(ReadPtr, NumberSize)); - ReadPtr += NumberSize; - } - ptrdiff_t TempBufferLength = std::distance(reinterpret_cast<const uint8_t*>(BlockView.GetData()), ReadPtr); - ZEN_ASSERT(TempBufferLength > 0); - for (uint64_t ChunkSize : ChunkSizes) - { - IoBuffer Chunk(IoBuffer::Wrap, ReadPtr, ChunkSize); - IoHash AttachmentRawHash; - uint64_t AttachmentRawSize; - CompressedBuffer CompressedChunk = CompressedBuffer::FromCompressed(SharedBuffer(Chunk), AttachmentRawHash, AttachmentRawSize); - - if (!CompressedChunk) - { - ZEN_ERROR("Invalid chunk in block"); - return false; - } - Visitor(std::move(CompressedChunk), AttachmentRawHash); - ReadPtr += ChunkSize; - ZEN_ASSERT(ReadPtr <= BlockView.GetDataEnd()); - } - return true; -}; std::vector<IoHash> GetBlockHashesFromOplog(CbObjectView ContainerObject) { using namespace std::literals; - std::vector<RemoteProjectStore::Block> Result; - CbArrayView BlocksArray = ContainerObject["blocks"sv].AsArrayView(); + std::vector<ChunkBlockDescription> Result; + CbArrayView BlocksArray = ContainerObject["blocks"sv].AsArrayView(); std::vector<IoHash> BlockHashes; BlockHashes.reserve(BlocksArray.Num()); @@ -1199,11 +1103,11 @@ GetBlockHashesFromOplog(CbObjectView ContainerObject) return BlockHashes; } -std::vector<RemoteProjectStore::Block> +std::vector<ThinChunkBlockDescription> GetBlocksFromOplog(CbObjectView ContainerObject, std::span<const IoHash> IncludeBlockHashes) { using namespace std::literals; - std::vector<RemoteProjectStore::Block> Result; + std::vector<ThinChunkBlockDescription> Result; CbArrayView BlocksArray = ContainerObject["blocks"sv].AsArrayView(); tsl::robin_set<IoHash, IoHash::Hasher> IncludeSet; IncludeSet.insert(IncludeBlockHashes.begin(), IncludeBlockHashes.end()); @@ -1226,53 +1130,12 @@ GetBlocksFromOplog(CbObjectView ContainerObject, std::span<const IoHash> Include { ChunkHashes.push_back(ChunkField.AsHash()); } - Result.push_back({.BlockHash = BlockHash, .ChunkHashes = std::move(ChunkHashes)}); + Result.push_back(ThinChunkBlockDescription{.BlockHash = BlockHash, .ChunkRawHashes = std::move(ChunkHashes)}); } } return Result; } -CompressedBuffer -GenerateBlock(std::vector<std::pair<IoHash, FetchChunkFunc>>&& FetchChunks, RemoteProjectStore::Block& OutBlock) -{ - const size_t ChunkCount = FetchChunks.size(); - - std::vector<SharedBuffer> ChunkSegments; - ChunkSegments.resize(1); - ChunkSegments.reserve(1 + ChunkCount); - OutBlock.ChunkHashes.reserve(ChunkCount); - OutBlock.ChunkLengths.reserve(ChunkCount); - { - IoBuffer TempBuffer(ChunkCount * 9); - MutableMemoryView View = TempBuffer.GetMutableView(); - uint8_t* BufferStartPtr = reinterpret_cast<uint8_t*>(View.GetData()); - uint8_t* BufferEndPtr = BufferStartPtr; - BufferEndPtr += WriteVarUInt(gsl::narrow<uint64_t>(ChunkCount), BufferEndPtr); - for (const auto& It : FetchChunks) - { - CompositeBuffer Chunk = It.second(It.first); - uint64_t ChunkSize = 0; - std::span<const SharedBuffer> Segments = Chunk.GetSegments(); - for (const SharedBuffer& Segment : Segments) - { - ChunkSize += Segment.GetSize(); - ChunkSegments.push_back(Segment); - } - BufferEndPtr += WriteVarUInt(ChunkSize, BufferEndPtr); - OutBlock.ChunkHashes.push_back(It.first); - OutBlock.ChunkLengths.push_back(gsl::narrow<uint32_t>(ChunkSize)); - } - ZEN_ASSERT(BufferEndPtr <= View.GetDataEnd()); - ptrdiff_t TempBufferLength = std::distance(BufferStartPtr, BufferEndPtr); - ChunkSegments[0] = SharedBuffer(IoBuffer(TempBuffer, 0, gsl::narrow<size_t>(TempBufferLength))); - } - CompressedBuffer CompressedBlock = - CompressedBuffer::Compress(CompositeBuffer(std::move(ChunkSegments)), OodleCompressor::Mermaid, OodleCompressionLevel::None); - OutBlock.BlockHash = CompressedBlock.DecodeRawHash(); - OutBlock.FirstChunkOffset = gsl::narrow<uint32_t>(CompressedBuffer::GetHeaderSizeForNoneEncoder() + ChunkSegments[0].GetSize()); - return CompressedBlock; -} - CbObject BuildContainer(CidStore& ChunkStore, ProjectStore::Project& Project, @@ -1283,9 +1146,9 @@ BuildContainer(CidStore& ChunkStore, bool BuildBlocks, bool IgnoreMissingAttachments, bool AllowChunking, - const std::vector<RemoteProjectStore::Block>& KnownBlocks, + const std::vector<ThinChunkBlockDescription>& KnownBlocks, WorkerThreadPool& WorkerPool, - const std::function<void(CompressedBuffer&&, RemoteProjectStore::Block&&)>& AsyncOnBlock, + const std::function<void(CompressedBuffer&&, ChunkBlockDescription&&)>& AsyncOnBlock, const std::function<void(const IoHash&, TGetAttachmentBufferFunc&&)>& OnLargeAttachment, const std::function<void(std::vector<std::pair<IoHash, FetchChunkFunc>>&&)>& OnBlockChunks, bool EmbedLooseFiles, @@ -1307,9 +1170,9 @@ BuildContainer(CidStore& ChunkStore, std::unordered_map<IoHash, FoundAttachment, IoHash::Hasher> UploadAttachments; - RwLock BlocksLock; - std::vector<RemoteProjectStore::Block> Blocks; - CompressedBuffer OpsBuffer; + RwLock BlocksLock; + std::vector<ChunkBlockDescription> Blocks; + CompressedBuffer OpsBuffer; std::filesystem::path AttachmentTempPath = Oplog.TempPath(); AttachmentTempPath.append(".pending"); @@ -1525,7 +1388,7 @@ BuildContainer(CidStore& ChunkStore, return {}; } - auto FindReuseBlocks = [](const std::vector<RemoteProjectStore::Block>& KnownBlocks, + auto FindReuseBlocks = [](const std::vector<ThinChunkBlockDescription>& KnownBlocks, const std::unordered_set<IoHash, IoHash::Hasher>& Attachments, JobContext* OptionalContext) -> std::vector<size_t> { std::vector<size_t> ReuseBlockIndexes; @@ -1538,14 +1401,14 @@ BuildContainer(CidStore& ChunkStore, for (size_t KnownBlockIndex = 0; KnownBlockIndex < KnownBlocks.size(); KnownBlockIndex++) { - const RemoteProjectStore::Block& KnownBlock = KnownBlocks[KnownBlockIndex]; - size_t BlockAttachmentCount = KnownBlock.ChunkHashes.size(); + const ThinChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; + size_t BlockAttachmentCount = KnownBlock.ChunkRawHashes.size(); if (BlockAttachmentCount == 0) { continue; } size_t FoundAttachmentCount = 0; - for (const IoHash& KnownHash : KnownBlock.ChunkHashes) + for (const IoHash& KnownHash : KnownBlock.ChunkRawHashes) { if (Attachments.contains(KnownHash)) { @@ -1586,8 +1449,8 @@ BuildContainer(CidStore& ChunkStore, std::vector<size_t> ReusedBlockIndexes = FindReuseBlocks(KnownBlocks, FoundHashes, OptionalContext); for (size_t KnownBlockIndex : ReusedBlockIndexes) { - const RemoteProjectStore::Block& KnownBlock = KnownBlocks[KnownBlockIndex]; - for (const IoHash& KnownHash : KnownBlock.ChunkHashes) + const ThinChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; + for (const IoHash& KnownHash : KnownBlock.ChunkRawHashes) { if (UploadAttachments.erase(KnownHash) == 1) { @@ -1605,10 +1468,7 @@ BuildContainer(CidStore& ChunkStore, }; std::vector<ChunkedFile> ChunkedFiles; - auto ChunkFile = [AttachmentTempPath](const IoHash& RawHash, - IoBuffer& RawData, - const IoBufferFileReference& FileRef, - JobContext*) -> ChunkedFile { + auto ChunkFile = [](const IoHash& RawHash, IoBuffer& RawData, const IoBufferFileReference& FileRef, JobContext*) -> ChunkedFile { ChunkedFile Chunked; Stopwatch Timer; @@ -1632,12 +1492,12 @@ BuildContainer(CidStore& ChunkStore, return Chunked; }; - RwLock ResolveLock; - std::unordered_set<IoHash, IoHash::Hasher> ChunkedHashes; - std::unordered_set<IoHash, IoHash::Hasher> LargeChunkHashes; - std::unordered_map<IoHash, size_t, IoHash::Hasher> ChunkedUploadAttachments; - std::unordered_map<IoHash, IoBuffer, IoHash::Hasher> LooseUploadAttachments; - std::unordered_set<IoHash, IoHash::Hasher> MissingHashes; + RwLock ResolveLock; + std::unordered_set<IoHash, IoHash::Hasher> ChunkedHashes; + std::unordered_set<IoHash, IoHash::Hasher> LargeChunkHashes; + std::unordered_map<IoHash, size_t, IoHash::Hasher> ChunkedUploadAttachments; + std::unordered_map<IoHash, std::pair<uint64_t, IoBuffer>, IoHash::Hasher> LooseUploadAttachments; + std::unordered_set<IoHash, IoHash::Hasher> MissingHashes; remotestore_impl::ReportMessage(OptionalContext, fmt::format("Resolving {} attachments from {} ops", UploadAttachments.size(), TotalOpCount)); @@ -1717,9 +1577,7 @@ BuildContainer(CidStore& ChunkStore, std::filesystem::path AttachmentPath = AttachmentTempPath; AttachmentPath.append(RawHash.ToHexString()); - - IoBuffer TempAttachmentBuffer = - remotestore_impl::WriteToTempFile(std::move(Compressed), AttachmentPath); + IoBuffer TempAttachmentBuffer = WriteToTempFile(std::move(Compressed).GetCompressed(), AttachmentPath); ZEN_INFO("Saved temp attachment to '{}', {} ({})", AttachmentPath, NiceBytes(RawSize), @@ -1730,7 +1588,7 @@ BuildContainer(CidStore& ChunkStore, } else { - size_t RawSize = RawData.GetSize(); + uint64_t RawSize = RawData.GetSize(); CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer(RawData), OodleCompressor::Mermaid, OodleCompressionLevel::VeryFast); @@ -1738,23 +1596,24 @@ BuildContainer(CidStore& ChunkStore, std::filesystem::path AttachmentPath = AttachmentTempPath; AttachmentPath.append(RawHash.ToHexString()); - IoBuffer TempAttachmentBuffer = remotestore_impl::WriteToTempFile(std::move(Compressed), AttachmentPath); + uint64_t CompressedSize = Compressed.GetCompressedSize(); + IoBuffer TempAttachmentBuffer = WriteToTempFile(std::move(Compressed).GetCompressed(), AttachmentPath); ZEN_INFO("Saved temp attachment to '{}', {} ({})", AttachmentPath, NiceBytes(RawSize), NiceBytes(TempAttachmentBuffer.GetSize())); - if (Compressed.GetCompressedSize() > MaxChunkEmbedSize) + if (CompressedSize > MaxChunkEmbedSize) { OnLargeAttachment(RawHash, [Data = std::move(TempAttachmentBuffer)](const IoHash&) { return Data; }); ResolveLock.WithExclusiveLock([RawHash, &LargeChunkHashes]() { LargeChunkHashes.insert(RawHash); }); } else { - UploadAttachment->Size = Compressed.GetCompressedSize(); + UploadAttachment->Size = CompressedSize; ResolveLock.WithExclusiveLock( - [RawHash, &LooseUploadAttachments, Data = std::move(TempAttachmentBuffer)]() { - LooseUploadAttachments.insert_or_assign(RawHash, std::move(Data)); + [RawHash, RawSize, &LooseUploadAttachments, Data = std::move(TempAttachmentBuffer)]() { + LooseUploadAttachments.insert_or_assign(RawHash, std::make_pair(RawSize, std::move(Data))); }); } } @@ -1927,8 +1786,8 @@ BuildContainer(CidStore& ChunkStore, std::vector<size_t> ReusedBlockFromChunking = FindReuseBlocks(KnownBlocks, ChunkedHashes, OptionalContext); for (size_t KnownBlockIndex : ReusedBlockIndexes) { - const RemoteProjectStore::Block& KnownBlock = KnownBlocks[KnownBlockIndex]; - for (const IoHash& KnownHash : KnownBlock.ChunkHashes) + const ThinChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; + for (const IoHash& KnownHash : KnownBlock.ChunkRawHashes) { if (ChunkedHashes.erase(KnownHash) == 1) { @@ -1946,7 +1805,7 @@ BuildContainer(CidStore& ChunkStore, Blocks.reserve(ReuseBlockCount); for (auto It = ReusedBlockIndexes.begin(); It != UniqueKnownBlocksEnd; It++) { - Blocks.push_back(KnownBlocks[*It]); + Blocks.push_back({KnownBlocks[*It]}); } remotestore_impl::ReportMessage(OptionalContext, fmt::format("Reused {} attachments from {} blocks", ReusedAttachmentCount, ReuseBlockCount)); @@ -2062,9 +1921,9 @@ BuildContainer(CidStore& ChunkStore, { // We can share the lock as we are not resizing the vector and only touch BlockHash at our own index RwLock::SharedLockScope _(BlocksLock); - Blocks[BlockIndex].ChunkHashes.insert(Blocks[BlockIndex].ChunkHashes.end(), - BlockAttachmentHashes.begin(), - BlockAttachmentHashes.end()); + Blocks[BlockIndex].ChunkRawHashes.insert(Blocks[BlockIndex].ChunkRawHashes.end(), + BlockAttachmentHashes.begin(), + BlockAttachmentHashes.end()); } uint64_t NowMS = Timer.GetElapsedTimeMs(); ZEN_INFO("Assembled block {} with {} chunks in {} ({})", @@ -2109,16 +1968,25 @@ BuildContainer(CidStore& ChunkStore, { if (auto It = LooseUploadAttachments.find(RawHash); It != LooseUploadAttachments.end()) { - ChunksInBlock.emplace_back(std::make_pair(RawHash, [IoBuffer = SharedBuffer(It->second)](const IoHash&) { - return CompositeBuffer(IoBuffer); - })); + ChunksInBlock.emplace_back(std::make_pair( + RawHash, + [RawSize = It->second.first, + IoBuffer = SharedBuffer(It->second.second)](const IoHash&) -> std::pair<uint64_t, CompressedBuffer> { + return std::make_pair(RawSize, CompressedBuffer::FromCompressedNoValidate(IoBuffer.AsIoBuffer())); + })); LooseUploadAttachments.erase(It); } else { - ChunksInBlock.emplace_back(std::make_pair(RawHash, [&ChunkStore](const IoHash& RawHash) { - return CompositeBuffer(SharedBuffer(ChunkStore.FindChunkByCid(RawHash))); - })); + ChunksInBlock.emplace_back( + std::make_pair(RawHash, [&ChunkStore](const IoHash& RawHash) -> std::pair<uint64_t, CompressedBuffer> { + IoBuffer Chunk = ChunkStore.FindChunkByCid(RawHash); + IoHash _; + uint64_t RawSize = 0; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(std::move(Chunk)), _, RawSize); + ZEN_ASSERT(Compressed); + return {RawSize, Compressed}; + })); } BlockSize += PayloadSize; @@ -2169,14 +2037,15 @@ BuildContainer(CidStore& ChunkStore, if (BlockAttachmentHashes.insert(ChunkHash).second) { const ChunkSource& Source = Chunked.ChunkSources[ChunkIndex]; - ChunksInBlock.emplace_back(std::make_pair( - ChunkHash, - [Source = ChunkedFile.Source, Offset = Source.Offset, Size = Source.Size](const IoHash&) { - return CompressedBuffer::Compress(SharedBuffer(IoBuffer(Source, Offset, Size)), - OodleCompressor::Mermaid, - OodleCompressionLevel::None) - .GetCompressed(); - })); + ChunksInBlock.emplace_back( + std::make_pair(ChunkHash, + [Source = ChunkedFile.Source, Offset = Source.Offset, Size = Source.Size]( + const IoHash&) -> std::pair<uint64_t, CompressedBuffer> { + return {Size, + CompressedBuffer::Compress(SharedBuffer(IoBuffer(Source, Offset, Size)), + OodleCompressor::Mermaid, + OodleCompressionLevel::None)}; + })); BlockSize += CompressedBuffer::GetHeaderSizeForNoneEncoder() + Source.Size; if (BuildBlocks) { @@ -2298,9 +2167,9 @@ BuildContainer(CidStore& ChunkStore, OplogContinerWriter.AddBinary("ops"sv, CompressedOpsSection.GetCompressed().Flatten().AsIoBuffer()); OplogContinerWriter.BeginArray("blocks"sv); { - for (const RemoteProjectStore::Block& B : Blocks) + for (const ChunkBlockDescription& B : Blocks) { - ZEN_ASSERT(!B.ChunkHashes.empty()); + ZEN_ASSERT(!B.ChunkRawHashes.empty()); if (BuildBlocks) { ZEN_ASSERT(B.BlockHash != IoHash::Zero); @@ -2310,7 +2179,7 @@ BuildContainer(CidStore& ChunkStore, OplogContinerWriter.AddBinaryAttachment("rawhash"sv, B.BlockHash); OplogContinerWriter.BeginArray("chunks"sv); { - for (const IoHash& RawHash : B.ChunkHashes) + for (const IoHash& RawHash : B.ChunkRawHashes) { OplogContinerWriter.AddHash(RawHash); } @@ -2326,7 +2195,7 @@ BuildContainer(CidStore& ChunkStore, { OplogContinerWriter.BeginArray("chunks"sv); { - for (const IoHash& RawHash : B.ChunkHashes) + for (const IoHash& RawHash : B.ChunkRawHashes) { OplogContinerWriter.AddBinaryAttachment(RawHash); } @@ -2392,7 +2261,7 @@ BuildContainer(CidStore& ChunkStore, bool BuildBlocks, bool IgnoreMissingAttachments, bool AllowChunking, - const std::function<void(CompressedBuffer&&, RemoteProjectStore::Block&&)>& AsyncOnBlock, + const std::function<void(CompressedBuffer&&, ChunkBlockDescription&&)>& AsyncOnBlock, const std::function<void(const IoHash&, TGetAttachmentBufferFunc&&)>& OnLargeAttachment, const std::function<void(std::vector<std::pair<IoHash, FetchChunkFunc>>&&)>& OnBlockChunks, bool EmbedLooseFiles) @@ -2458,13 +2327,13 @@ SaveOplog(CidStore& ChunkStore, std::unordered_map<IoHash, remotestore_impl::CreatedBlock, IoHash::Hasher> CreatedBlocks; tsl::robin_map<IoHash, TGetAttachmentBufferFunc, IoHash::Hasher> LooseLargeFiles; - auto MakeTempBlock = [AttachmentTempPath, &RemoteResult, &AttachmentsLock, &CreatedBlocks](CompressedBuffer&& CompressedBlock, - RemoteProjectStore::Block&& Block) { + auto MakeTempBlock = [AttachmentTempPath, &RemoteResult, &AttachmentsLock, &CreatedBlocks](CompressedBuffer&& CompressedBlock, + ChunkBlockDescription&& Block) { std::filesystem::path BlockPath = AttachmentTempPath; BlockPath.append(Block.BlockHash.ToHexString()); try { - IoBuffer BlockBuffer = remotestore_impl::WriteToTempFile(std::move(CompressedBlock), BlockPath); + IoBuffer BlockBuffer = WriteToTempFile(std::move(CompressedBlock).GetCompressed(), BlockPath); RwLock::ExclusiveLockScope __(AttachmentsLock); CreatedBlocks.insert({Block.BlockHash, {.Payload = std::move(BlockBuffer), .Block = std::move(Block)}}); ZEN_DEBUG("Saved temp block to '{}', {}", AttachmentTempPath, NiceBytes(BlockBuffer.GetSize())); @@ -2478,8 +2347,8 @@ SaveOplog(CidStore& ChunkStore, } }; - auto UploadBlock = [&RemoteStore, &RemoteResult, &Info, OptionalContext](CompressedBuffer&& CompressedBlock, - RemoteProjectStore::Block&& Block) { + auto UploadBlock = [&RemoteStore, &RemoteResult, &Info, OptionalContext](CompressedBuffer&& CompressedBlock, + ChunkBlockDescription&& Block) { IoHash BlockHash = Block.BlockHash; RemoteProjectStore::SaveAttachmentResult Result = RemoteStore.SaveAttachment(CompressedBlock.GetCompressed(), BlockHash, std::move(Block)); @@ -2512,7 +2381,7 @@ SaveOplog(CidStore& ChunkStore, ZEN_DEBUG("Found attachment {}", AttachmentHash); }; - std::function<void(CompressedBuffer&&, RemoteProjectStore::Block &&)> OnBlock; + std::function<void(CompressedBuffer&&, ChunkBlockDescription &&)> OnBlock; if (RemoteStoreInfo.UseTempBlockFiles) { OnBlock = MakeTempBlock; @@ -2522,7 +2391,7 @@ SaveOplog(CidStore& ChunkStore, OnBlock = UploadBlock; } - std::vector<RemoteProjectStore::Block> KnownBlocks; + std::vector<ThinChunkBlockDescription> KnownBlocks; uint64_t TransferWallTimeMS = 0; diff --git a/src/zenserver/projectstore/remoteprojectstore.h b/src/zenserver/projectstore/remoteprojectstore.h index e05cb9923..1210afc7c 100644 --- a/src/zenserver/projectstore/remoteprojectstore.h +++ b/src/zenserver/projectstore/remoteprojectstore.h @@ -5,6 +5,8 @@ #include <zencore/jobqueue.h> #include "projectstore.h" +#include <zenutil/chunkblock.h> + #include <unordered_set> namespace zen { @@ -16,14 +18,6 @@ struct ChunkedInfo; class RemoteProjectStore { public: - struct Block - { - IoHash BlockHash; - std::vector<IoHash> ChunkHashes; - std::vector<uint32_t> ChunkLengths; - uint32_t FirstChunkOffset = (uint32_t)-1; - }; - struct Result { int32_t ErrorCode{}; @@ -72,7 +66,7 @@ public: struct GetKnownBlocksResult : public Result { - std::vector<Block> Blocks; + std::vector<ThinChunkBlockDescription> Blocks; }; struct RemoteStoreInfo @@ -101,11 +95,11 @@ public: virtual RemoteStoreInfo GetInfo() const = 0; virtual Stats GetStats() const = 0; - virtual CreateContainerResult CreateContainer() = 0; - virtual SaveResult SaveContainer(const IoBuffer& Payload) = 0; - virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, Block&& Block) = 0; - virtual FinalizeResult FinalizeContainer(const IoHash& RawHash) = 0; - virtual SaveAttachmentsResult SaveAttachments(const std::vector<SharedBuffer>& Payloads) = 0; + virtual CreateContainerResult CreateContainer() = 0; + virtual SaveResult SaveContainer(const IoBuffer& Payload) = 0; + virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, ChunkBlockDescription&& Block) = 0; + virtual FinalizeResult FinalizeContainer(const IoHash& RawHash) = 0; + virtual SaveAttachmentsResult SaveAttachments(const std::vector<SharedBuffer>& Payloads) = 0; virtual LoadContainerResult LoadContainer() = 0; virtual GetKnownBlocksResult GetKnownBlocks() = 0; @@ -125,7 +119,6 @@ struct RemoteStoreOptions }; typedef std::function<IoBuffer(const IoHash& AttachmentHash)> TGetAttachmentBufferFunc; -typedef std::function<CompositeBuffer(const IoHash& RawHash)> FetchChunkFunc; RemoteProjectStore::LoadContainerResult BuildContainer( CidStore& ChunkStore, @@ -137,7 +130,7 @@ RemoteProjectStore::LoadContainerResult BuildContainer( bool BuildBlocks, bool IgnoreMissingAttachments, bool AllowChunking, - const std::function<void(CompressedBuffer&&, RemoteProjectStore::Block&&)>& AsyncOnBlock, + const std::function<void(CompressedBuffer&&, ChunkBlockDescription&&)>& AsyncOnBlock, const std::function<void(const IoHash&, TGetAttachmentBufferFunc&&)>& OnLargeAttachment, const std::function<void(std::vector<std::pair<IoHash, FetchChunkFunc>>&&)>& OnBlockChunks, bool EmbedLooseFiles); @@ -173,9 +166,7 @@ RemoteProjectStore::Result LoadOplog(CidStore& ChunkStore, bool CleanOplog, JobContext* OptionalContext); -CompressedBuffer GenerateBlock(std::vector<std::pair<IoHash, FetchChunkFunc>>&& FetchChunks, RemoteProjectStore::Block& OutBlock); -bool IterateBlock(const SharedBuffer& BlockPayload, std::function<void(CompressedBuffer&& Chunk, const IoHash& AttachmentHash)> Visitor); std::vector<IoHash> GetBlockHashesFromOplog(CbObjectView ContainerObject); -std::vector<RemoteProjectStore::Block> GetBlocksFromOplog(CbObjectView ContainerObject, std::span<const IoHash> IncludeBlockHashes); +std::vector<ThinChunkBlockDescription> GetBlocksFromOplog(CbObjectView ContainerObject, std::span<const IoHash> IncludeBlockHashes); } // namespace zen diff --git a/src/zenserver/projectstore/zenremoteprojectstore.cpp b/src/zenserver/projectstore/zenremoteprojectstore.cpp index 42519b108..2ebf58a5d 100644 --- a/src/zenserver/projectstore/zenremoteprojectstore.cpp +++ b/src/zenserver/projectstore/zenremoteprojectstore.cpp @@ -93,7 +93,7 @@ public: return Result; } - virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, Block&&) override + virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, ChunkBlockDescription&&) override { std::string SaveRequest = fmt::format("/{}/oplog/{}/{}"sv, m_Project, m_Oplog, RawHash); HttpClient::Response Response = m_Client.Post(SaveRequest, Payload, ZenContentType::kCompressedBinary); diff --git a/src/zenserver/workspaces/httpworkspaces.cpp b/src/zenserver/workspaces/httpworkspaces.cpp index 2d59c9357..8a4b977ad 100644 --- a/src/zenserver/workspaces/httpworkspaces.cpp +++ b/src/zenserver/workspaces/httpworkspaces.cpp @@ -51,9 +51,9 @@ namespace { WriteWorkspaceConfig(Writer, WorkspaceConfig); if (std::optional<std::vector<Oid>> ShareIds = Workspaces.GetWorkspaceShares(WorkspaceConfig.Id); ShareIds) { - for (const Oid& ShareId : *ShareIds) + Writer.BeginArray("shares"); { - Writer.BeginArray("shares"); + for (const Oid& ShareId : *ShareIds) { if (std::optional<Workspaces::WorkspaceShareConfiguration> WorkspaceShareConfig = Workspaces.GetWorkspaceShareConfiguration(WorkspaceConfig.Id, ShareId); @@ -66,8 +66,8 @@ namespace { Writer.EndObject(); } } - Writer.EndArray(); } + Writer.EndArray(); } } @@ -589,7 +589,7 @@ void HttpWorkspacesService::ShareAliasFilesRequest(HttpRouterRequest& Req) { HttpServerRequest& ServerRequest = Req.ServerRequest(); - std::string Alias = Req.GetCapture(1); + std::string_view Alias = Req.GetCapture(1); if (Alias.empty()) { return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, @@ -608,7 +608,7 @@ void HttpWorkspacesService::ShareAliasChunkInfoRequest(HttpRouterRequest& Req) { HttpServerRequest& ServerRequest = Req.ServerRequest(); - std::string Alias = Req.GetCapture(1); + std::string_view Alias = Req.GetCapture(1); if (Alias.empty()) { return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, @@ -635,7 +635,7 @@ void HttpWorkspacesService::ShareAliasBatchRequest(HttpRouterRequest& Req) { HttpServerRequest& ServerRequest = Req.ServerRequest(); - std::string Alias = Req.GetCapture(1); + std::string_view Alias = Req.GetCapture(1); if (Alias.empty()) { return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, @@ -654,7 +654,7 @@ void HttpWorkspacesService::ShareAliasEntriesRequest(HttpRouterRequest& Req) { HttpServerRequest& ServerRequest = Req.ServerRequest(); - std::string Alias = Req.GetCapture(1); + std::string_view Alias = Req.GetCapture(1); if (Alias.empty()) { return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, @@ -673,7 +673,7 @@ void HttpWorkspacesService::ShareAliasChunkRequest(HttpRouterRequest& Req) { HttpServerRequest& ServerRequest = Req.ServerRequest(); - std::string Alias = Req.GetCapture(1); + std::string_view Alias = Req.GetCapture(1); if (Alias.empty()) { return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, @@ -700,7 +700,7 @@ void HttpWorkspacesService::ShareAliasRequest(HttpRouterRequest& Req) { HttpServerRequest& ServerRequest = Req.ServerRequest(); - std::string Alias = Req.GetCapture(1); + std::string_view Alias = Req.GetCapture(1); if (Alias.empty()) { return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, diff --git a/src/zenstore-test/zenstore-test.cpp b/src/zenstore-test/zenstore-test.cpp index b86f6be15..32fc69783 100644 --- a/src/zenstore-test/zenstore-test.cpp +++ b/src/zenstore-test/zenstore-test.cpp @@ -18,9 +18,9 @@ main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) #if ZEN_WITH_TESTS zen::zenstore_forcelinktests(); -#if ZEN_PLATFORM_LINUX +# if ZEN_PLATFORM_LINUX zen::IgnoreChildSignals(); -#endif +# endif zen::logging::InitializeLogging(); zen::MaximizeOpenFileCount(); diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp index 25f68330a..61552fafc 100644 --- a/src/zenstore/cache/cachedisklayer.cpp +++ b/src/zenstore/cache/cachedisklayer.cpp @@ -708,11 +708,11 @@ namespace zen { ZenCacheDiskLayer::CacheBucket::CacheBucket(GcManager& Gc, std::atomic_uint64_t& OuterCacheMemoryUsage, - std::string BucketName, + std::string_view BucketName, const BucketConfiguration& Config) : m_Gc(Gc) , m_OuterCacheMemoryUsage(OuterCacheMemoryUsage) -, m_BucketName(std::move(BucketName)) +, m_BucketName(BucketName) , m_Configuration(Config) , m_BucketId(Oid::Zero) { @@ -1329,7 +1329,7 @@ ZenCacheDiskLayer::CacheBucket::EndPutBatch(PutBatchHandle* Batch) noexcept struct ZenCacheDiskLayer::CacheBucket::GetBatchHandle { - GetBatchHandle(std::vector<ZenCacheValue>& OutResults) : OutResults(OutResults) + GetBatchHandle(ZenCacheValueVec_t& OutResults) : OutResults(OutResults) { Keys.reserve(OutResults.capacity()); ResultIndexes.reserve(OutResults.capacity()); @@ -1340,11 +1340,11 @@ struct ZenCacheDiskLayer::CacheBucket::GetBatchHandle std::vector<IoHash> Keys; std::vector<size_t> ResultIndexes; - std::vector<ZenCacheValue>& OutResults; + ZenCacheValueVec_t& OutResults; }; ZenCacheDiskLayer::CacheBucket::GetBatchHandle* -ZenCacheDiskLayer::CacheBucket::BeginGetBatch(std::vector<ZenCacheValue>& OutResult) +ZenCacheDiskLayer::CacheBucket::BeginGetBatch(ZenCacheValueVec_t& OutResult) { ZEN_TRACE_CPU("Z$::Bucket::BeginGetBatch"); return new GetBatchHandle(OutResult); @@ -1364,13 +1364,13 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept if (!Batch->ResultIndexes.empty()) { - std::vector<DiskLocation> StandaloneDiskLocations; - std::vector<size_t> StandaloneKeyIndexes; - std::vector<size_t> MemCachedKeyIndexes; - std::vector<DiskLocation> InlineDiskLocations; - std::vector<BlockStoreLocation> InlineBlockLocations; - std::vector<size_t> InlineKeyIndexes; - std::vector<bool> FillRawHashAndRawSize(Batch->Keys.size(), false); + eastl::fixed_vector<DiskLocation, 16> StandaloneDiskLocations; + eastl::fixed_vector<size_t, 16> StandaloneKeyIndexes; + eastl::fixed_vector<size_t, 16> MemCachedKeyIndexes; + eastl::fixed_vector<DiskLocation, 16> InlineDiskLocations; + eastl::fixed_vector<BlockStoreLocation, 16> InlineBlockLocations; + eastl::fixed_vector<size_t, 16> InlineKeyIndexes; + eastl::fixed_vector<bool, 16> FillRawHashAndRawSize(Batch->Keys.size(), false); { RwLock::SharedLockScope IndexLock(m_IndexLock); for (size_t KeyIndex = 0; KeyIndex < Batch->Keys.size(); KeyIndex++) @@ -1526,33 +1526,35 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept if (!InlineDiskLocations.empty()) { ZEN_TRACE_CPU("Z$::Bucket::EndGetBatch::ReadInline"); - m_BlockStore.IterateChunks(InlineBlockLocations, [&](uint32_t, std::span<const size_t> ChunkIndexes) -> bool { - // Only read into memory the IoBuffers we could potentially add to memcache - const uint64_t LargeChunkSizeLimit = Max(m_Configuration.MemCacheSizeThreshold, 1u * 1024u); - m_BlockStore.IterateBlock( - InlineBlockLocations, - ChunkIndexes, - [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex, - const void* Data, - uint64_t Size) -> bool { - if (Data != nullptr) - { - FillOne(InlineDiskLocations[ChunkIndex], - InlineKeyIndexes[ChunkIndex], - IoBufferBuilder::MakeCloneFromMemory(Data, Size)); - } - return true; - }, - [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex, - BlockStoreFile& File, - uint64_t Offset, - uint64_t Size) -> bool { - FillOne(InlineDiskLocations[ChunkIndex], InlineKeyIndexes[ChunkIndex], File.GetChunk(Offset, Size)); - return true; - }, - LargeChunkSizeLimit); - return true; - }); + m_BlockStore.IterateChunks( + std::span{begin(InlineBlockLocations), end(InlineBlockLocations)}, + [&](uint32_t, std::span<const size_t> ChunkIndexes) -> bool { + // Only read into memory the IoBuffers we could potentially add to memcache + const uint64_t LargeChunkSizeLimit = Max(m_Configuration.MemCacheSizeThreshold, 1u * 1024u); + m_BlockStore.IterateBlock( + std::span{begin(InlineBlockLocations), end(InlineBlockLocations)}, + ChunkIndexes, + [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex, + const void* Data, + uint64_t Size) -> bool { + if (Data != nullptr) + { + FillOne(InlineDiskLocations[ChunkIndex], + InlineKeyIndexes[ChunkIndex], + IoBufferBuilder::MakeCloneFromMemory(Data, Size)); + } + return true; + }, + [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex, + BlockStoreFile& File, + uint64_t Offset, + uint64_t Size) -> bool { + FillOne(InlineDiskLocations[ChunkIndex], InlineKeyIndexes[ChunkIndex], File.GetChunk(Offset, Size)); + return true; + }, + LargeChunkSizeLimit); + return true; + }); } if (!StandaloneDiskLocations.empty()) @@ -3581,15 +3583,29 @@ ZenCacheDiskLayer::~ZenCacheDiskLayer() } } +template<typename T, typename U> +struct equal_to_2 : public eastl::binary_function<T, U, bool> +{ + constexpr bool operator()(const T& a, const U& b) const { return a == b; } + + template<typename T_ = T, + typename U_ = U, + typename = eastl::enable_if_t<!eastl::is_same_v<eastl::remove_const_t<T_>, eastl::remove_const_t<U_>>>> + constexpr bool operator()(const U& b, const T& a) const + { + return b == a; + } +}; + ZenCacheDiskLayer::CacheBucket* ZenCacheDiskLayer::GetOrCreateBucket(std::string_view InBucket) { ZEN_TRACE_CPU("Z$::GetOrCreateBucket"); - const auto BucketName = std::string(InBucket); { RwLock::SharedLockScope SharedLock(m_Lock); - if (auto It = m_Buckets.find(BucketName); It != m_Buckets.end()) + if (auto It = m_Buckets.find_as(InBucket, std::hash<std::string_view>(), equal_to_2<std::string, std::string_view>()); + It != m_Buckets.end()) { return It->second.get(); } @@ -3597,31 +3613,32 @@ ZenCacheDiskLayer::GetOrCreateBucket(std::string_view InBucket) // We create the bucket without holding a lock since contructor calls GcManager::AddGcReferencer which takes an exclusive lock. // This can cause a deadlock, if GC is running we would block while holding ZenCacheDiskLayer::m_Lock - std::unique_ptr<CacheBucket> Bucket( - std::make_unique<CacheBucket>(m_Gc, m_TotalMemCachedSize, BucketName, m_Configuration.BucketConfig)); + std::unique_ptr<CacheBucket> Bucket(std::make_unique<CacheBucket>(m_Gc, m_TotalMemCachedSize, InBucket, m_Configuration.BucketConfig)); RwLock::ExclusiveLockScope Lock(m_Lock); - if (auto It = m_Buckets.find(BucketName); It != m_Buckets.end()) + if (auto It = m_Buckets.find_as(InBucket, std::hash<std::string_view>(), equal_to_2<std::string, std::string_view>()); + It != m_Buckets.end()) { return It->second.get(); } std::filesystem::path BucketPath = m_RootDir; - BucketPath /= BucketName; + BucketPath /= InBucket; try { if (!Bucket->OpenOrCreate(BucketPath)) { - ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", BucketName, m_RootDir); + ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", InBucket, m_RootDir); return nullptr; } } catch (const std::exception& Err) { - ZEN_WARN("Creating bucket '{}' in '{}' FAILED, reason: '{}'", BucketName, BucketPath, Err.what()); + ZEN_WARN("Creating bucket '{}' in '{}' FAILED, reason: '{}'", InBucket, BucketPath, Err.what()); throw; } + std::string BucketName{InBucket}; CacheBucket* Result = Bucket.get(); m_Buckets.emplace(BucketName, std::move(Bucket)); if (m_CapturedBuckets) @@ -3720,7 +3737,7 @@ ZenCacheDiskLayer::EndPutBatch(PutBatchHandle* Batch) noexcept struct ZenCacheDiskLayer::GetBatchHandle { - GetBatchHandle(std::vector<ZenCacheValue>& OutResults) : OutResults(OutResults) {} + GetBatchHandle(ZenCacheValueVec_t& OutResults) : OutResults(OutResults) {} struct BucketHandle { CacheBucket* Bucket; @@ -3780,13 +3797,13 @@ struct ZenCacheDiskLayer::GetBatchHandle return NewBucketHandle; } - RwLock Lock; - std::vector<BucketHandle> BucketHandles; - std::vector<ZenCacheValue>& OutResults; + RwLock Lock; + eastl::fixed_vector<BucketHandle, 4> BucketHandles; + ZenCacheValueVec_t& OutResults; }; ZenCacheDiskLayer::GetBatchHandle* -ZenCacheDiskLayer::BeginGetBatch(std::vector<ZenCacheValue>& OutResults) +ZenCacheDiskLayer::BeginGetBatch(ZenCacheValueVec_t& OutResults) { return new GetBatchHandle(OutResults); } diff --git a/src/zenstore/cache/cacherpc.cpp b/src/zenstore/cache/cacherpc.cpp index cca51e63e..97e26a38d 100644 --- a/src/zenstore/cache/cacherpc.cpp +++ b/src/zenstore/cache/cacherpc.cpp @@ -20,6 +20,8 @@ #include <zencore/memory/llm.h> +#include <EASTL/fixed_vector.h> + ////////////////////////////////////////////////////////////////////////// namespace zen { @@ -89,7 +91,7 @@ GetRpcRequestCacheKey(const CbObjectView& KeyView, CacheKey& Key) return false; } IoHash Hash = HashField.AsHash(); - Key = CacheKey::Create(*Bucket, Hash); + Key = CacheKey::CreateValidated(std::move(*Bucket), Hash); return true; } @@ -305,7 +307,7 @@ CacheRpcHandler::HandleRpcPutCacheRecords(const CacheRequestContext& Context, co } DefaultPolicy = !PolicyText.empty() ? ParseCachePolicy(PolicyText) : CachePolicy::Default; - std::vector<bool> Results; + eastl::fixed_vector<bool, 32> Results; CbArrayView RequestsArray = Params["Requests"sv].AsArrayView(); for (CbFieldView RequestField : RequestsArray) @@ -481,16 +483,15 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb bool Exists = false; bool ReadFromUpstream = false; }; - struct RecordRequestData + struct RecordRequestData : public CacheKeyRequest { - CacheKeyRequest Upstream; - CbObjectView RecordObject; - IoBuffer RecordCacheValue; - CacheRecordPolicy DownstreamPolicy; - std::vector<ValueRequestData> Values; - bool Complete = false; - const UpstreamEndpointInfo* Source = nullptr; - uint64_t ElapsedTimeUs; + CbObjectView RecordObject; + IoBuffer RecordCacheValue; + CacheRecordPolicy DownstreamPolicy; + eastl::fixed_vector<ValueRequestData, 4> Values; + bool Complete = false; + const UpstreamEndpointInfo* Source = nullptr; + uint64_t ElapsedTimeUs; }; std::string_view PolicyText = Params["DefaultPolicy"sv].AsString(); @@ -503,8 +504,8 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb const bool HasUpstream = m_UpstreamCache.IsActive(); - std::vector<RecordRequestData> Requests; - std::vector<size_t> UpstreamIndexes; + eastl::fixed_vector<RecordRequestData, 16> Requests; + eastl::fixed_vector<size_t, 16> UpstreamIndexes; auto ParseValues = [](RecordRequestData& Request) { CbArrayView ValuesArray = Request.RecordObject["Values"sv].AsArrayView(); @@ -535,7 +536,7 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb CbObjectView RequestObject = RequestField.AsObjectView(); CbObjectView KeyObject = RequestObject["Key"sv].AsObjectView(); - CacheKey& Key = Request.Upstream.Key; + CacheKey& Key = Request.Key; if (!GetRpcRequestCacheKey(KeyObject, Key)) { return CbPackage{}; @@ -707,7 +708,7 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb for (size_t Index : UpstreamIndexes) { RecordRequestData& Request = Requests[Index]; - UpstreamRequests.push_back(&Request.Upstream); + UpstreamRequests.push_back(&Request); if (Request.Values.size()) { @@ -721,13 +722,13 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb UpstreamPolicy |= !Value.ReadFromUpstream ? CachePolicy::SkipData : CachePolicy::None; Builder.AddValuePolicy(Value.ValueId, UpstreamPolicy); } - Request.Upstream.Policy = Builder.Build(); + Request.Policy = Builder.Build(); } else { // We don't know which Values exist in the Record; ask the upstrem for all values that the client wants, // and convert the CacheRecordPolicy to an upstream policy - Request.Upstream.Policy = Request.DownstreamPolicy.ConvertToUpstream(); + Request.Policy = Request.DownstreamPolicy.ConvertToUpstream(); } } @@ -737,10 +738,9 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb return; } - RecordRequestData& Request = - *reinterpret_cast<RecordRequestData*>(reinterpret_cast<char*>(&Params.Request) - offsetof(RecordRequestData, Upstream)); + RecordRequestData& Request = *static_cast<RecordRequestData*>(&Params.Request); Request.ElapsedTimeUs += static_cast<uint64_t>(Params.ElapsedSeconds * 1000000.0); - const CacheKey& Key = Request.Upstream.Key; + const CacheKey& Key = Request.Key; Stopwatch Timer; auto TimeGuard = MakeGuard([&Timer, &Request]() { Request.ElapsedTimeUs += Timer.GetElapsedTimeUs(); }); if (!Request.RecordObject) @@ -832,10 +832,12 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb CbPackage ResponsePackage; CbObjectWriter ResponseObject{2048}; + ResponsePackage.ReserveAttachments(Requests.size()); + ResponseObject.BeginArray("Result"sv); for (RecordRequestData& Request : Requests) { - const CacheKey& Key = Request.Upstream.Key; + const CacheKey& Key = Request.Key; if (Request.Complete || (Request.RecordObject && EnumHasAllFlags(Request.DownstreamPolicy.GetRecordPolicy(), CachePolicy::PartialRecord))) { @@ -910,11 +912,12 @@ CacheRpcHandler::HandleRpcPutCacheValues(const CacheRequestContext& Context, con const bool HasUpstream = m_UpstreamCache.IsActive(); CbArrayView RequestsArray = Params["Requests"sv].AsArrayView(); - std::vector<bool> BatchResults; - std::vector<size_t> BatchResultIndexes; - std::vector<bool> Results; - std::vector<CacheKey> UpstreamCacheKeys; - uint64_t RequestCount = RequestsArray.Num(); + std::vector<bool> BatchResults; + eastl::fixed_vector<size_t, 32> BatchResultIndexes; + eastl::fixed_vector<bool, 32> Results; + eastl::fixed_vector<CacheKey, 32> UpstreamCacheKeys; + + uint64_t RequestCount = RequestsArray.Num(); { Results.reserve(RequestCount); std::unique_ptr<ZenCacheStore::PutBatch> Batch; @@ -1099,15 +1102,15 @@ CacheRpcHandler::HandleRpcGetCacheValues(const CacheRequestContext& Context, CbO uint64_t RawSize = 0; CompressedBuffer Result; }; - std::vector<RequestData> Requests; + eastl::fixed_vector<RequestData, 16> Requests; - std::vector<size_t> RemoteRequestIndexes; + eastl::fixed_vector<size_t, 16> RemoteRequestIndexes; const bool HasUpstream = m_UpstreamCache.IsActive(); - CbArrayView RequestsArray = Params["Requests"sv].AsArrayView(); - std::vector<ZenCacheValue> CacheValues; - const uint64_t RequestCount = RequestsArray.Num(); + CbArrayView RequestsArray = Params["Requests"sv].AsArrayView(); + ZenCacheValueVec_t CacheValues; + const uint64_t RequestCount = RequestsArray.Num(); CacheValues.reserve(RequestCount); { std::unique_ptr<ZenCacheStore::GetBatch> Batch; @@ -1136,7 +1139,6 @@ CacheRpcHandler::HandleRpcGetCacheValues(const CacheRequestContext& Context, CbO CacheKey& Key = Request.Key; CachePolicy Policy = Request.Policy; - ZenCacheValue CacheValue; if (EnumHasAllFlags(Policy, CachePolicy::QueryLocal)) { if (Batch) @@ -1276,6 +1278,9 @@ CacheRpcHandler::HandleRpcGetCacheValues(const CacheRequestContext& Context, CbO ZEN_TRACE_CPU("Z$::RpcGetCacheValues::Response"); CbPackage RpcResponse; CbObjectWriter ResponseObject{1024}; + + RpcResponse.ReserveAttachments(Requests.size()); + ResponseObject.BeginArray("Result"sv); for (const RequestData& Request : Requests) { @@ -1642,7 +1647,7 @@ CacheRpcHandler::GetLocalCacheValues(const CacheRequestContext& Context, using namespace cache::detail; const bool HasUpstream = m_UpstreamCache.IsActive(); - std::vector<ZenCacheValue> Chunks; + ZenCacheValueVec_t Chunks; Chunks.reserve(ValueRequests.size()); { std::unique_ptr<ZenCacheStore::GetBatch> Batch; @@ -1796,6 +1801,8 @@ CacheRpcHandler::WriteGetCacheChunksResponse([[maybe_unused]] const CacheRequest CbPackage RpcResponse; CbObjectWriter Writer{1024}; + RpcResponse.ReserveAttachments(Requests.size()); + Writer.BeginArray("Result"sv); for (ChunkRequest& Request : Requests) { diff --git a/src/zenstore/cache/structuredcachestore.cpp b/src/zenstore/cache/structuredcachestore.cpp index 133cb42d7..7d277329e 100644 --- a/src/zenstore/cache/structuredcachestore.cpp +++ b/src/zenstore/cache/structuredcachestore.cpp @@ -178,13 +178,13 @@ ZenCacheNamespace::EndPutBatch(PutBatchHandle* Batch) noexcept struct ZenCacheNamespace::GetBatchHandle { - GetBatchHandle(std::vector<ZenCacheValue>& OutResult) : Results(OutResult) {} - std::vector<ZenCacheValue>& Results; + GetBatchHandle(ZenCacheValueVec_t& OutResult) : Results(OutResult) {} + ZenCacheValueVec_t& Results; ZenCacheDiskLayer::GetBatchHandle* DiskLayerHandle = nullptr; }; ZenCacheNamespace::GetBatchHandle* -ZenCacheNamespace::BeginGetBatch(std::vector<ZenCacheValue>& OutResult) +ZenCacheNamespace::BeginGetBatch(ZenCacheValueVec_t& OutResult) { ZenCacheNamespace::GetBatchHandle* Handle = new ZenCacheNamespace::GetBatchHandle(OutResult); Handle->DiskLayerHandle = m_DiskLayer.BeginGetBatch(OutResult); @@ -580,7 +580,7 @@ ZenCacheStore::PutBatch::~PutBatch() } } -ZenCacheStore::GetBatch::GetBatch(ZenCacheStore& CacheStore, std::string_view InNamespace, std::vector<ZenCacheValue>& OutResult) +ZenCacheStore::GetBatch::GetBatch(ZenCacheStore& CacheStore, std::string_view InNamespace, ZenCacheValueVec_t& OutResult) : m_CacheStore(CacheStore) , Results(OutResult) { diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp index 14123528c..34db51aa9 100644 --- a/src/zenstore/filecas.cpp +++ b/src/zenstore/filecas.cpp @@ -185,7 +185,7 @@ FileCasStrategy::Initialize(const std::filesystem::path& RootDirectory, bool IsN // in this folder as well struct Visitor : public FileSystemTraversal::TreeVisitor { - virtual void VisitFile(const std::filesystem::path&, const path_view&, uint64_t, uint32_t) override + virtual void VisitFile(const std::filesystem::path&, const path_view&, uint64_t, uint32_t, uint64_t) override { // We don't care about files } @@ -1174,7 +1174,7 @@ FileCasStrategy::ScanFolderForCasFiles(const std::filesystem::path& RootDir) struct Visitor : public FileSystemTraversal::TreeVisitor { Visitor(const std::filesystem::path& RootDir, std::vector<FileCasIndexEntry>& Entries) : RootDirectory(RootDir), Entries(Entries) {} - virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t) override + virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t, uint64_t) override { std::filesystem::path RelPath = std::filesystem::relative(Parent, RootDirectory); diff --git a/src/zenstore/include/zenstore/cache/cachedisklayer.h b/src/zenstore/include/zenstore/cache/cachedisklayer.h index b0b4f22cb..05400c784 100644 --- a/src/zenstore/include/zenstore/cache/cachedisklayer.h +++ b/src/zenstore/include/zenstore/cache/cachedisklayer.h @@ -12,8 +12,9 @@ ZEN_THIRD_PARTY_INCLUDES_START #include <tsl/robin_map.h> ZEN_THIRD_PARTY_INCLUDES_END +#include <EASTL/string.h> +#include <EASTL/unordered_map.h> #include <filesystem> -#include <unordered_map> namespace zen { @@ -169,7 +170,7 @@ public: ~ZenCacheDiskLayer(); struct GetBatchHandle; - GetBatchHandle* BeginGetBatch(std::vector<ZenCacheValue>& OutResult); + GetBatchHandle* BeginGetBatch(ZenCacheValueVec_t& OutResult); void EndGetBatch(GetBatchHandle* Batch) noexcept; bool Get(std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue); void Get(std::string_view Bucket, const IoHash& HashKey, GetBatchHandle& BatchHandle); @@ -216,13 +217,16 @@ public: */ struct CacheBucket : public GcReferencer { - CacheBucket(GcManager& Gc, std::atomic_uint64_t& OuterCacheMemoryUsage, std::string BucketName, const BucketConfiguration& Config); + CacheBucket(GcManager& Gc, + std::atomic_uint64_t& OuterCacheMemoryUsage, + std::string_view BucketName, + const BucketConfiguration& Config); ~CacheBucket(); bool OpenOrCreate(std::filesystem::path BucketDir, bool AllowCreate = true); struct GetBatchHandle; - GetBatchHandle* BeginGetBatch(std::vector<ZenCacheValue>& OutResult); + GetBatchHandle* BeginGetBatch(ZenCacheValueVec_t& OutResult); void EndGetBatch(GetBatchHandle* Batch) noexcept; bool Get(const IoHash& HashKey, ZenCacheValue& OutValue); void Get(const IoHash& HashKey, GetBatchHandle& BatchHandle); @@ -486,18 +490,20 @@ private: bool StartAsyncMemCacheTrim(); void MemCacheTrim(); - GcManager& m_Gc; - JobQueue& m_JobQueue; - std::filesystem::path m_RootDir; - Configuration m_Configuration; - std::atomic_uint64_t m_TotalMemCachedSize{}; - std::atomic_bool m_IsMemCacheTrimming = false; - std::atomic<GcClock::Tick> m_NextAllowedTrimTick; - mutable RwLock m_Lock; - std::unordered_map<std::string, std::unique_ptr<CacheBucket>> m_Buckets; - std::vector<std::unique_ptr<CacheBucket>> m_DroppedBuckets; - uint32_t m_UpdateCaptureRefCounter = 0; - std::unique_ptr<std::vector<std::string>> m_CapturedBuckets; + typedef eastl::unordered_map<std::string, std::unique_ptr<CacheBucket>, std::hash<std::string>, std::equal_to<std::string>> BucketMap_t; + + GcManager& m_Gc; + JobQueue& m_JobQueue; + std::filesystem::path m_RootDir; + Configuration m_Configuration; + std::atomic_uint64_t m_TotalMemCachedSize{}; + std::atomic_bool m_IsMemCacheTrimming = false; + std::atomic<GcClock::Tick> m_NextAllowedTrimTick; + mutable RwLock m_Lock; + BucketMap_t m_Buckets; + std::vector<std::unique_ptr<CacheBucket>> m_DroppedBuckets; + uint32_t m_UpdateCaptureRefCounter = 0; + std::unique_ptr<std::vector<std::string>> m_CapturedBuckets; ZenCacheDiskLayer(const ZenCacheDiskLayer&) = delete; ZenCacheDiskLayer& operator=(const ZenCacheDiskLayer&) = delete; diff --git a/src/zenstore/include/zenstore/cache/cacheshared.h b/src/zenstore/include/zenstore/cache/cacheshared.h index 9b45c7b21..521c78bb1 100644 --- a/src/zenstore/include/zenstore/cache/cacheshared.h +++ b/src/zenstore/include/zenstore/cache/cacheshared.h @@ -6,6 +6,8 @@ #include <zencore/iohash.h> #include <zenstore/gc.h> +#include <EASTL/fixed_vector.h> + #include <gsl/gsl-lite.hpp> #include <unordered_map> @@ -32,6 +34,8 @@ struct ZenCacheValue IoHash RawHash = IoHash::Zero; }; +typedef eastl::fixed_vector<ZenCacheValue, 16> ZenCacheValueVec_t; + struct CacheValueDetails { struct ValueDetails diff --git a/src/zenstore/include/zenstore/cache/structuredcachestore.h b/src/zenstore/include/zenstore/cache/structuredcachestore.h index 82fec9b0e..5e056cf2d 100644 --- a/src/zenstore/include/zenstore/cache/structuredcachestore.h +++ b/src/zenstore/include/zenstore/cache/structuredcachestore.h @@ -86,7 +86,7 @@ public: void EndPutBatch(PutBatchHandle* Batch) noexcept; struct GetBatchHandle; - GetBatchHandle* BeginGetBatch(std::vector<ZenCacheValue>& OutResults); + GetBatchHandle* BeginGetBatch(ZenCacheValueVec_t& OutResults); void EndGetBatch(GetBatchHandle* Batch) noexcept; bool Get(std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue); @@ -220,14 +220,14 @@ public: class GetBatch { public: - GetBatch(ZenCacheStore& CacheStore, std::string_view Namespace, std::vector<ZenCacheValue>& OutResult); + GetBatch(ZenCacheStore& CacheStore, std::string_view Namespace, ZenCacheValueVec_t& OutResult); ~GetBatch(); private: ZenCacheStore& m_CacheStore; ZenCacheNamespace* m_Store = nullptr; ZenCacheNamespace::GetBatchHandle* m_NamespaceBatchHandle = nullptr; - std::vector<ZenCacheValue>& Results; + ZenCacheValueVec_t& Results; friend class ZenCacheStore; }; diff --git a/src/zenstore/xmake.lua b/src/zenstore/xmake.lua index f0bd64d2e..031a66829 100644 --- a/src/zenstore/xmake.lua +++ b/src/zenstore/xmake.lua @@ -8,3 +8,4 @@ target('zenstore') add_includedirs("include", {public=true}) add_deps("zencore", "zenutil") add_packages("vcpkg::robin-map") + add_packages("vcpkg::eastl", {public=true}); diff --git a/src/zentest-appstub/zentest-appstub.cpp b/src/zentest-appstub/zentest-appstub.cpp index 66e6e03fd..24cf21e97 100644 --- a/src/zentest-appstub/zentest-appstub.cpp +++ b/src/zentest-appstub/zentest-appstub.cpp @@ -1,6 +1,7 @@ // Copyright Epic Games, Inc. All Rights Reserved. #include <stdio.h> +#include <chrono> #include <cstdlib> #include <cstring> #include <thread> diff --git a/src/zenutil-test/zenutil-test.cpp b/src/zenutil-test/zenutil-test.cpp index a392ab058..cca88b984 100644 --- a/src/zenutil-test/zenutil-test.cpp +++ b/src/zenutil-test/zenutil-test.cpp @@ -18,9 +18,9 @@ main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) #if ZEN_WITH_TESTS zen::zenutil_forcelinktests(); -#if ZEN_PLATFORM_LINUX +# if ZEN_PLATFORM_LINUX zen::IgnoreChildSignals(); -#endif +# endif zen::logging::InitializeLogging(); zen::MaximizeOpenFileCount(); diff --git a/src/zenutil/chunkblock.cpp b/src/zenutil/chunkblock.cpp new file mode 100644 index 000000000..f3c14edc4 --- /dev/null +++ b/src/zenutil/chunkblock.cpp @@ -0,0 +1,257 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenutil/chunkblock.h> + +#include <zencore/compactbinarybuilder.h> +#include <zencore/fmtutils.h> +#include <zencore/logging.h> + +#include <vector> + +namespace zen { + +using namespace std::literals; + +ChunkBlockDescription +ParseChunkBlockDescription(const CbObjectView& BlockObject) +{ + ChunkBlockDescription Result; + Result.BlockHash = BlockObject["rawHash"sv].AsHash(); + if (Result.BlockHash != IoHash::Zero) + { + Result.HeaderSize = BlockObject["headerSize"sv].AsUInt64(); + CbArrayView ChunksArray = BlockObject["rawHashes"sv].AsArrayView(); + Result.ChunkRawHashes.reserve(ChunksArray.Num()); + for (CbFieldView ChunkView : ChunksArray) + { + Result.ChunkRawHashes.push_back(ChunkView.AsHash()); + } + + CbArrayView ChunkRawLengthsArray = BlockObject["chunkRawLengths"sv].AsArrayView(); + Result.ChunkRawLengths.reserve(ChunkRawLengthsArray.Num()); + for (CbFieldView ChunkView : ChunkRawLengthsArray) + { + Result.ChunkRawLengths.push_back(ChunkView.AsUInt32()); + } + + CbArrayView ChunkCompressedLengthsArray = BlockObject["chunkCompressedLengths"sv].AsArrayView(); + Result.ChunkCompressedLengths.reserve(ChunkCompressedLengthsArray.Num()); + for (CbFieldView ChunkView : ChunkCompressedLengthsArray) + { + Result.ChunkCompressedLengths.push_back(ChunkView.AsUInt32()); + } + } + return Result; +} + +std::vector<ChunkBlockDescription> +ParseChunkBlockDescriptionList(const CbObjectView& BlocksObject) +{ + if (!BlocksObject) + { + return {}; + } + std::vector<ChunkBlockDescription> Result; + CbArrayView Blocks = BlocksObject["blocks"].AsArrayView(); + Result.reserve(Blocks.Num()); + for (CbFieldView BlockView : Blocks) + { + CbObjectView BlockObject = BlockView.AsObjectView(); + Result.emplace_back(ParseChunkBlockDescription(BlockObject)); + } + return Result; +} + +CbObject +BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView MetaData) +{ + ZEN_ASSERT(Block.BlockHash != IoHash::Zero); + ZEN_ASSERT(Block.HeaderSize > 0); + ZEN_ASSERT(Block.ChunkRawLengths.size() == Block.ChunkRawHashes.size()); + ZEN_ASSERT(Block.ChunkCompressedLengths.size() == Block.ChunkRawHashes.size()); + + CbObjectWriter Writer; + Writer.AddHash("rawHash"sv, Block.BlockHash); + Writer.AddInteger("headerSize"sv, Block.HeaderSize); + Writer.BeginArray("rawHashes"sv); + { + for (const IoHash& ChunkHash : Block.ChunkRawHashes) + { + Writer.AddHash(ChunkHash); + } + } + Writer.EndArray(); + + Writer.BeginArray("chunkRawLengths"); + { + for (uint32_t ChunkSize : Block.ChunkRawLengths) + { + Writer.AddInteger(ChunkSize); + } + } + Writer.EndArray(); + + Writer.BeginArray("chunkCompressedLengths"); + { + for (uint32_t ChunkSize : Block.ChunkCompressedLengths) + { + Writer.AddInteger(ChunkSize); + } + } + Writer.EndArray(); + + Writer.AddObject("metadata", MetaData); + + return Writer.Save(); +} + +ChunkBlockDescription +GetChunkBlockDescription(const SharedBuffer& BlockPayload, const IoHash& RawHash) +{ + ChunkBlockDescription BlockDescription = {{.BlockHash = IoHash::HashBuffer(BlockPayload)}}; + if (BlockDescription.BlockHash != RawHash) + { + throw std::runtime_error(fmt::format("Block {} content hash {} does not match block hash", RawHash, BlockDescription.BlockHash)); + } + if (IterateChunkBlock( + BlockPayload, + [&BlockDescription, RawHash](CompressedBuffer&& Chunk, const IoHash& AttachmentHash) { + if (CompositeBuffer Decompressed = Chunk.DecompressToComposite(); Decompressed) + { + IoHash ChunkHash = IoHash::HashBuffer(Decompressed.Flatten()); + if (ChunkHash != AttachmentHash) + { + throw std::runtime_error( + fmt::format("Chunk {} in block {} content hash {} does not match chunk", AttachmentHash, RawHash, ChunkHash)); + } + BlockDescription.ChunkRawHashes.push_back(AttachmentHash); + BlockDescription.ChunkRawLengths.push_back(gsl::narrow<uint32_t>(Decompressed.GetSize())); + BlockDescription.ChunkCompressedLengths.push_back(gsl::narrow<uint32_t>(Chunk.GetCompressedSize())); + } + else + { + throw std::runtime_error(fmt::format("Chunk {} in block {} is not a compressed buffer", AttachmentHash, RawHash)); + } + }, + BlockDescription.HeaderSize)) + { + return BlockDescription; + } + else + { + throw std::runtime_error(fmt::format("Block {} is malformed", RawHash)); + } +} + +CompressedBuffer +GenerateChunkBlock(std::vector<std::pair<IoHash, FetchChunkFunc>>&& FetchChunks, ChunkBlockDescription& OutBlock) +{ + const size_t ChunkCount = FetchChunks.size(); + + std::vector<SharedBuffer> ChunkSegments; + ChunkSegments.resize(1); + ChunkSegments.reserve(1 + ChunkCount); + OutBlock.ChunkRawHashes.reserve(ChunkCount); + OutBlock.ChunkRawLengths.reserve(ChunkCount); + OutBlock.ChunkCompressedLengths.reserve(ChunkCount); + { + IoBuffer TempBuffer(ChunkCount * 9); + MutableMemoryView View = TempBuffer.GetMutableView(); + uint8_t* BufferStartPtr = reinterpret_cast<uint8_t*>(View.GetData()); + uint8_t* BufferEndPtr = BufferStartPtr; + BufferEndPtr += WriteVarUInt(gsl::narrow<uint64_t>(ChunkCount), BufferEndPtr); + for (const auto& It : FetchChunks) + { + std::pair<uint64_t, CompressedBuffer> Chunk = It.second(It.first); + uint64_t ChunkSize = 0; + std::span<const SharedBuffer> Segments = Chunk.second.GetCompressed().GetSegments(); + for (const SharedBuffer& Segment : Segments) + { + ZEN_ASSERT(Segment.IsOwned()); + ChunkSize += Segment.GetSize(); + ChunkSegments.push_back(Segment); + } + BufferEndPtr += WriteVarUInt(ChunkSize, BufferEndPtr); + OutBlock.ChunkRawHashes.push_back(It.first); + OutBlock.ChunkRawLengths.push_back(gsl::narrow<uint32_t>(Chunk.first)); + OutBlock.ChunkCompressedLengths.push_back(gsl::narrow<uint32_t>(ChunkSize)); + } + ZEN_ASSERT(BufferEndPtr <= View.GetDataEnd()); + ptrdiff_t TempBufferLength = std::distance(BufferStartPtr, BufferEndPtr); + ChunkSegments[0] = SharedBuffer(IoBuffer(TempBuffer, 0, gsl::narrow<size_t>(TempBufferLength))); + OutBlock.HeaderSize = TempBufferLength; + } + CompressedBuffer CompressedBlock = + CompressedBuffer::Compress(CompositeBuffer(std::move(ChunkSegments)), OodleCompressor::Mermaid, OodleCompressionLevel::None); + OutBlock.BlockHash = CompressedBlock.DecodeRawHash(); + return CompressedBlock; +} + +std::vector<uint32_t> +ReadChunkBlockHeader(const MemoryView BlockView, uint64_t& OutHeaderSize) +{ + const uint8_t* ReadPtr = reinterpret_cast<const uint8_t*>(BlockView.GetData()); + uint32_t NumberSize; + uint64_t ChunkCount = ReadVarUInt(ReadPtr, NumberSize); + ReadPtr += NumberSize; + std::vector<uint32_t> ChunkSizes; + ChunkSizes.reserve(ChunkCount); + while (ChunkCount--) + { + if (ReadPtr >= BlockView.GetDataEnd()) + { + throw std::runtime_error("Invalid block header, block data ended unexpectedly"); + } + uint64_t ChunkSize = ReadVarUInt(ReadPtr, NumberSize); + if (ChunkSize > std::numeric_limits<uint32_t>::max()) + { + throw std::runtime_error("Invalid block header, header data is corrupt"); + } + if (ChunkSize < 1) + { + throw std::runtime_error("Invalid block header, header data is corrupt"); + } + ChunkSizes.push_back(gsl::narrow<uint32_t>(ChunkSize)); + ReadPtr += NumberSize; + } + uint64_t Offset = std::distance((const uint8_t*)BlockView.GetData(), ReadPtr); + OutHeaderSize = Offset; + return ChunkSizes; +} + +bool +IterateChunkBlock(const SharedBuffer& BlockPayload, + std::function<void(CompressedBuffer&& Chunk, const IoHash& AttachmentHash)> Visitor, + uint64_t& OutHeaderSize) +{ + ZEN_ASSERT(BlockPayload); + if (BlockPayload.GetSize() < 1) + { + return false; + } + + MemoryView BlockView = BlockPayload.GetView(); + + std::vector<uint32_t> ChunkSizes = ReadChunkBlockHeader(BlockView, OutHeaderSize); + uint64_t Offset = OutHeaderSize; + OutHeaderSize = Offset; + for (uint64_t ChunkSize : ChunkSizes) + { + IoBuffer Chunk(BlockPayload.AsIoBuffer(), Offset, ChunkSize); + IoHash AttachmentRawHash; + uint64_t AttachmentRawSize; + CompressedBuffer CompressedChunk = CompressedBuffer::FromCompressed(SharedBuffer(Chunk), AttachmentRawHash, AttachmentRawSize); + ZEN_ASSERT_SLOW(IoHash::HashBuffer(CompressedChunk.DecompressToComposite()) == AttachmentRawHash); + if (!CompressedChunk) + { + ZEN_ERROR("Invalid chunk in block"); + return false; + } + Visitor(std::move(CompressedChunk), AttachmentRawHash); + Offset += ChunkSize; + ZEN_ASSERT(Offset <= BlockView.GetSize()); + } + return true; +}; + +} // namespace zen diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp new file mode 100644 index 000000000..bb1ee5183 --- /dev/null +++ b/src/zenutil/chunkedcontent.cpp @@ -0,0 +1,896 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenutil/chunkedcontent.h> + +#include <zencore/filesystem.h> +#include <zencore/fmtutils.h> +#include <zencore/logging.h> +#include <zencore/scopeguard.h> +#include <zencore/timer.h> +#include <zencore/trace.h> + +#include <zenutil/chunkedfile.h> +#include <zenutil/chunkingcontroller.h> +#include <zenutil/parallellwork.h> +#include <zenutil/workerpools.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_set.h> +#include <gsl/gsl-lite.hpp> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +using namespace std::literals; + +namespace { + void AddChunkSequence(ChunkingStatistics& Stats, + ChunkedContentData& InOutChunkedContent, + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex, + const IoHash& RawHash, + std::span<const uint32_t> ChunkSequence, + std::span<const IoHash> ChunkHashes, + std::span<const uint64_t> ChunkRawSizes) + { + ZEN_ASSERT(ChunkHashes.size() == ChunkRawSizes.size()); + InOutChunkedContent.ChunkCounts.push_back(gsl::narrow<uint32_t>(ChunkSequence.size())); + InOutChunkedContent.ChunkOrders.reserve(InOutChunkedContent.ChunkOrders.size() + ChunkSequence.size()); + + for (uint32_t ChunkedSequenceIndex : ChunkSequence) + { + const IoHash& ChunkHash = ChunkHashes[ChunkedSequenceIndex]; + if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) + { + uint32_t ChunkIndex = gsl::narrow<uint32_t>(It->second); + InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); + } + else + { + uint32_t ChunkIndex = gsl::narrow<uint32_t>(InOutChunkedContent.ChunkHashes.size()); + ChunkHashToChunkIndex.insert_or_assign(ChunkHash, ChunkIndex); + InOutChunkedContent.ChunkHashes.push_back(ChunkHash); + InOutChunkedContent.ChunkRawSizes.push_back(ChunkRawSizes[ChunkedSequenceIndex]); + InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); + Stats.UniqueChunksFound++; + Stats.UniqueBytesFound += ChunkRawSizes[ChunkedSequenceIndex]; + } + } + InOutChunkedContent.SequenceRawHashes.push_back(RawHash); + Stats.UniqueSequencesFound++; + } + + void AddChunkSequence(ChunkingStatistics& Stats, + ChunkedContentData& InOutChunkedContent, + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex, + const IoHash& RawHash, + const uint64_t RawSize) + { + InOutChunkedContent.ChunkCounts.push_back(1); + + if (auto It = ChunkHashToChunkIndex.find(RawHash); It != ChunkHashToChunkIndex.end()) + { + uint32_t ChunkIndex = gsl::narrow<uint32_t>(It->second); + InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); + } + else + { + uint32_t ChunkIndex = gsl::narrow<uint32_t>(InOutChunkedContent.ChunkHashes.size()); + ChunkHashToChunkIndex.insert_or_assign(RawHash, ChunkIndex); + InOutChunkedContent.ChunkHashes.push_back(RawHash); + InOutChunkedContent.ChunkRawSizes.push_back(RawSize); + InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); + Stats.UniqueChunksFound++; + Stats.UniqueBytesFound += RawSize; + } + InOutChunkedContent.SequenceRawHashes.push_back(RawHash); + Stats.UniqueSequencesFound++; + } + + IoHash HashOneFile(ChunkingStatistics& Stats, + const ChunkingController& InChunkingController, + ChunkedFolderContent& OutChunkedContent, + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex, + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& RawHashToSequenceRawHashIndex, + RwLock& Lock, + const std::filesystem::path& FolderPath, + uint32_t PathIndex, + std::atomic<bool>& AbortFlag) + { + ZEN_TRACE_CPU("ChunkFolderContent"); + + const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex]; + const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex]; + + if (RawSize == 0) + { + return IoHash::Zero; + } + else + { + ChunkedInfoWithSource Chunked; + const bool DidChunking = + InChunkingController.ProcessFile((FolderPath / Path).make_preferred(), RawSize, Chunked, Stats.BytesHashed, AbortFlag); + if (DidChunking) + { + Lock.WithExclusiveLock([&]() { + if (!RawHashToSequenceRawHashIndex.contains(Chunked.Info.RawHash)) + { + RawHashToSequenceRawHashIndex.insert( + {Chunked.Info.RawHash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); + std::vector<uint64_t> ChunkSizes; + ChunkSizes.reserve(Chunked.ChunkSources.size()); + for (const ChunkSource& Source : Chunked.ChunkSources) + { + ChunkSizes.push_back(Source.Size); + } + AddChunkSequence(Stats, + OutChunkedContent.ChunkedContent, + ChunkHashToChunkIndex, + Chunked.Info.RawHash, + Chunked.Info.ChunkSequence, + Chunked.Info.ChunkHashes, + ChunkSizes); + Stats.UniqueSequencesFound++; + } + }); + Stats.FilesChunked++; + return Chunked.Info.RawHash; + } + else + { + ZEN_TRACE_CPU("HashOnly"); + + IoBuffer Buffer = IoBufferBuilder::MakeFromFile((FolderPath / Path).make_preferred()); + const IoHash Hash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed); + + Lock.WithExclusiveLock([&]() { + if (!RawHashToSequenceRawHashIndex.contains(Hash)) + { + RawHashToSequenceRawHashIndex.insert( + {Hash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); + AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Hash, RawSize); + Stats.UniqueSequencesFound++; + } + }); + return Hash; + } + } + } + + std::string PathCompareString(const std::filesystem::path& Path) { return ToLower(Path.generic_string()); } + +} // namespace + +std::string_view FolderContentSourcePlatformNames[(size_t)SourcePlatform::_Count] = {"Windows"sv, "Linux"sv, "MacOS"sv}; + +std::string_view +ToString(SourcePlatform Platform) +{ + return FolderContentSourcePlatformNames[(size_t)Platform]; +} + +SourcePlatform +FromString(std::string_view Platform, SourcePlatform Default) +{ + for (size_t Index = 0; Index < (size_t)SourcePlatform::_Count; Index++) + { + if (Platform == FolderContentSourcePlatformNames[Index]) + { + return (SourcePlatform)Index; + } + } + return Default; +} + +SourcePlatform +GetSourceCurrentPlatform() +{ +#if ZEN_PLATFORM_WINDOWS + return SourcePlatform::Windows; +#endif +#if ZEN_PLATFORM_MAC + return SourcePlatform::MacOS; +#endif +#if ZEN_PLATFORM_LINUX + return SourcePlatform::Linux; +#endif +} + +bool +FolderContent::AreFileAttributesEqual(const uint32_t Lhs, const uint32_t Rhs) +{ +#if ZEN_PLATFORM_WINDOWS + return (Lhs & 0xff) == (Rhs & 0xff); +#endif +#if ZEN_PLATFORM_MAC + return Lhs == Rhs; +#endif +#if ZEN_PLATFORM_LINUX + return Lhs == Rhs; +#endif +} + +bool +FolderContent::operator==(const FolderContent& Rhs) const +{ + if ((Platform == Rhs.Platform) && (RawSizes == Rhs.RawSizes) && (Attributes == Rhs.Attributes) && + (ModificationTicks == Rhs.ModificationTicks) && (Paths.size() == Rhs.Paths.size())) + { + size_t PathCount = 0; + for (size_t PathIndex = 0; PathIndex < PathCount; PathIndex++) + { + if (Paths[PathIndex].generic_string() != Rhs.Paths[PathIndex].generic_string()) + { + return false; + } + } + return true; + } + return false; +} + +bool +FolderContent::AreKnownFilesEqual(const FolderContent& Rhs) const +{ + ZEN_TRACE_CPU("FolderContent::AreKnownFilesEqual"); + tsl::robin_map<std::string, size_t> RhsPathToIndex; + const size_t RhsPathCount = Rhs.Paths.size(); + RhsPathToIndex.reserve(RhsPathCount); + for (size_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++) + { + RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex}); + } + const size_t PathCount = Paths.size(); + for (size_t PathIndex = 0; PathIndex < PathCount; PathIndex++) + { + if (auto It = RhsPathToIndex.find(Paths[PathIndex].generic_string()); It != RhsPathToIndex.end()) + { + const size_t RhsPathIndex = It->second; + if ((RawSizes[PathIndex] != Rhs.RawSizes[RhsPathIndex]) || + (!AreFileAttributesEqual(Attributes[PathIndex], Rhs.Attributes[RhsPathIndex])) || + (ModificationTicks[PathIndex] != Rhs.ModificationTicks[RhsPathIndex])) + { + return false; + } + } + else + { + return false; + } + } + return true; +} + +void +FolderContent::UpdateState(const FolderContent& Rhs, std::vector<uint32_t>& OutPathIndexesOufOfDate) +{ + ZEN_TRACE_CPU("FolderContent::UpdateState"); + tsl::robin_map<std::string, uint32_t> RhsPathToIndex; + const uint32_t RhsPathCount = gsl::narrow<uint32_t>(Rhs.Paths.size()); + RhsPathToIndex.reserve(RhsPathCount); + for (uint32_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++) + { + RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex}); + } + uint32_t PathCount = gsl::narrow<uint32_t>(Paths.size()); + for (uint32_t PathIndex = 0; PathIndex < PathCount;) + { + if (auto It = RhsPathToIndex.find(Paths[PathIndex].generic_string()); It != RhsPathToIndex.end()) + { + const uint32_t RhsPathIndex = It->second; + + if ((RawSizes[PathIndex] != Rhs.RawSizes[RhsPathIndex]) || + (ModificationTicks[PathIndex] != Rhs.ModificationTicks[RhsPathIndex])) + { + RawSizes[PathIndex] = Rhs.RawSizes[RhsPathIndex]; + ModificationTicks[PathIndex] = Rhs.ModificationTicks[RhsPathIndex]; + OutPathIndexesOufOfDate.push_back(PathIndex); + } + Attributes[PathIndex] = Rhs.Attributes[RhsPathIndex]; + PathIndex++; + } + else + { + Paths.erase(Paths.begin() + PathIndex); + RawSizes.erase(RawSizes.begin() + PathIndex); + Attributes.erase(Attributes.begin() + PathIndex); + ModificationTicks.erase(ModificationTicks.begin() + PathIndex); + PathCount--; + } + } +} + +FolderContent +GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vector<std::filesystem::path>& OutDeletedPathIndexes) +{ + ZEN_TRACE_CPU("FolderContent::GetUpdatedContent"); + FolderContent Result = {.Platform = Old.Platform}; + tsl::robin_map<std::string, uint32_t> NewPathToIndex; + const uint32_t NewPathCount = gsl::narrow<uint32_t>(New.Paths.size()); + NewPathToIndex.reserve(NewPathCount); + for (uint32_t NewPathIndex = 0; NewPathIndex < NewPathCount; NewPathIndex++) + { + NewPathToIndex.insert({New.Paths[NewPathIndex].generic_string(), NewPathIndex}); + } + uint32_t OldPathCount = gsl::narrow<uint32_t>(Old.Paths.size()); + for (uint32_t OldPathIndex = 0; OldPathIndex < OldPathCount; OldPathIndex++) + { + if (auto It = NewPathToIndex.find(Old.Paths[OldPathIndex].generic_string()); It != NewPathToIndex.end()) + { + const uint32_t NewPathIndex = It->second; + + if ((Old.RawSizes[OldPathIndex] != New.RawSizes[NewPathIndex]) || + (Old.ModificationTicks[OldPathIndex] != New.ModificationTicks[NewPathIndex])) + { + Result.Paths.push_back(New.Paths[NewPathIndex]); + Result.RawSizes.push_back(New.RawSizes[NewPathIndex]); + Result.Attributes.push_back(New.Attributes[NewPathIndex]); + Result.ModificationTicks.push_back(New.ModificationTicks[NewPathIndex]); + } + } + else + { + OutDeletedPathIndexes.push_back(Old.Paths[OldPathIndex]); + } + } + return Result; +} + +void +SaveFolderContentToCompactBinary(const FolderContent& Content, CbWriter& Output) +{ + ZEN_TRACE_CPU("SaveFolderContentToCompactBinary"); + Output.AddString("platform"sv, ToString(Content.Platform)); + compactbinary_helpers::WriteArray(Content.Paths, "paths"sv, Output); + compactbinary_helpers::WriteArray(Content.RawSizes, "rawSizes"sv, Output); + compactbinary_helpers::WriteArray(Content.Attributes, "attributes"sv, Output); + compactbinary_helpers::WriteArray(Content.ModificationTicks, "modificationTimes"sv, Output); +} + +FolderContent +LoadFolderContentToCompactBinary(CbObjectView Input) +{ + ZEN_TRACE_CPU("LoadFolderContentToCompactBinary"); + FolderContent Content; + Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform()); + compactbinary_helpers::ReadArray("paths"sv, Input, Content.Paths); + compactbinary_helpers::ReadArray("rawSizes"sv, Input, Content.RawSizes); + compactbinary_helpers::ReadArray("attributes"sv, Input, Content.Attributes); + compactbinary_helpers::ReadArray("modificationTimes"sv, Input, Content.ModificationTicks); + return Content; +} + +FolderContent +GetFolderContent(GetFolderContentStatistics& Stats, + const std::filesystem::path& RootPath, + std::function<bool(const std::string_view& RelativePath)>&& AcceptDirectory, + std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& AcceptFile, + WorkerThreadPool& WorkerPool, + int32_t UpdateInteralMS, + std::function<void(bool IsAborted, std::ptrdiff_t PendingWork)>&& UpdateCallback, + std::atomic<bool>& AbortFlag) +{ + ZEN_TRACE_CPU("GetFolderContent"); + + Stopwatch Timer; + auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); + + FolderContent Content; + struct AsyncVisitor : public GetDirectoryContentVisitor + { + AsyncVisitor(GetFolderContentStatistics& Stats, + std::atomic<bool>& AbortFlag, + FolderContent& Content, + std::function<bool(const std::string_view& RelativePath)>&& AcceptDirectory, + std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& AcceptFile) + : m_Stats(Stats) + , m_AbortFlag(AbortFlag) + , m_FoundContent(Content) + , m_AcceptDirectory(std::move(AcceptDirectory)) + , m_AcceptFile(std::move(AcceptFile)) + { + } + virtual void AsyncVisitDirectory(const std::filesystem::path& RelativeRoot, DirectoryContent&& Content) override + { + if (!m_AbortFlag) + { + m_Stats.FoundFileCount += Content.FileNames.size(); + for (uint64_t FileSize : Content.FileSizes) + { + m_Stats.FoundFileByteCount += FileSize; + } + std::string RelativeDirectoryPath = RelativeRoot.generic_string(); + if (m_AcceptDirectory(RelativeDirectoryPath)) + { + std::vector<std::filesystem::path> Paths; + std::vector<uint64_t> RawSizes; + std::vector<uint32_t> Attributes; + std::vector<uint64_t> ModificatonTicks; + Paths.reserve(Content.FileNames.size()); + RawSizes.reserve(Content.FileNames.size()); + Attributes.reserve(Content.FileNames.size()); + ModificatonTicks.reserve(Content.FileModificationTicks.size()); + + for (size_t FileIndex = 0; FileIndex < Content.FileNames.size(); FileIndex++) + { + const std::filesystem::path& FileName = Content.FileNames[FileIndex]; + std::string RelativePath = (RelativeRoot / FileName).generic_string(); + std::replace(RelativePath.begin(), RelativePath.end(), '\\', '/'); + if (m_AcceptFile(RelativePath, Content.FileSizes[FileIndex], Content.FileAttributes[FileIndex])) + { + Paths.emplace_back(std::move(RelativePath)); + RawSizes.emplace_back(Content.FileSizes[FileIndex]); + Attributes.emplace_back(Content.FileAttributes[FileIndex]); + ModificatonTicks.emplace_back(Content.FileModificationTicks[FileIndex]); + + m_Stats.AcceptedFileCount++; + m_Stats.AcceptedFileByteCount += Content.FileSizes[FileIndex]; + } + } + m_Lock.WithExclusiveLock([&]() { + m_FoundContent.Paths.insert(m_FoundContent.Paths.end(), Paths.begin(), Paths.end()); + m_FoundContent.RawSizes.insert(m_FoundContent.RawSizes.end(), RawSizes.begin(), RawSizes.end()); + m_FoundContent.Attributes.insert(m_FoundContent.Attributes.end(), Attributes.begin(), Attributes.end()); + m_FoundContent.ModificationTicks.insert(m_FoundContent.ModificationTicks.end(), + ModificatonTicks.begin(), + ModificatonTicks.end()); + }); + } + } + } + + GetFolderContentStatistics& m_Stats; + std::atomic<bool>& m_AbortFlag; + RwLock m_Lock; + FolderContent& m_FoundContent; + std::function<bool(const std::string_view& RelativePath)> m_AcceptDirectory; + std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)> m_AcceptFile; + } Visitor(Stats, AbortFlag, Content, std::move(AcceptDirectory), std::move(AcceptFile)); + + Latch PendingWork(1); + GetDirectoryContent(RootPath, + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive | DirectoryContentFlags::IncludeFileSizes | + DirectoryContentFlags::IncludeAttributes | DirectoryContentFlags::IncludeModificationTick, + Visitor, + WorkerPool, + PendingWork); + PendingWork.CountDown(); + while (!PendingWork.Wait(UpdateInteralMS)) + { + UpdateCallback(AbortFlag.load(), PendingWork.Remaining()); + } + std::vector<size_t> Order; + size_t PathCount = Content.Paths.size(); + Order.resize(Content.Paths.size()); + std::vector<std::string> Parents; + Parents.reserve(PathCount); + std::vector<std::string> Filenames; + Filenames.reserve(PathCount); + for (size_t OrderIndex = 0; OrderIndex < PathCount; OrderIndex++) + { + Order[OrderIndex] = OrderIndex; + Parents.emplace_back(Content.Paths[OrderIndex].parent_path().generic_string()); + Filenames.emplace_back(Content.Paths[OrderIndex].filename().generic_string()); + } + std::sort(Order.begin(), Order.end(), [&Parents, &Filenames](size_t Lhs, size_t Rhs) { + const std::string& LhsParent = Parents[Lhs]; + const std::string& RhsParent = Parents[Rhs]; + if (LhsParent < RhsParent) + { + return true; + } + else if (LhsParent > RhsParent) + { + return false; + } + return Filenames[Lhs] < Filenames[Rhs]; + }); + FolderContent OrderedContent; + OrderedContent.Paths.reserve(PathCount); + OrderedContent.RawSizes.reserve(PathCount); + OrderedContent.Attributes.reserve(PathCount); + OrderedContent.ModificationTicks.reserve(PathCount); + for (size_t OrderIndex : Order) + { + OrderedContent.Paths.emplace_back(std::move(Content.Paths[OrderIndex])); + OrderedContent.RawSizes.emplace_back(Content.RawSizes[OrderIndex]); + OrderedContent.Attributes.emplace_back(Content.Attributes[OrderIndex]); + OrderedContent.ModificationTicks.emplace_back(Content.ModificationTicks[OrderIndex]); + } + return OrderedContent; +} + +void +SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbWriter& Output) +{ + ZEN_TRACE_CPU("SaveChunkedFolderContentToCompactBinary"); + Output.AddString("platform"sv, ToString(Content.Platform)); + compactbinary_helpers::WriteArray(Content.Paths, "paths"sv, Output); + compactbinary_helpers::WriteArray(Content.RawSizes, "rawSizes"sv, Output); + compactbinary_helpers::WriteArray(Content.Attributes, "attributes"sv, Output); + compactbinary_helpers::WriteArray(Content.RawHashes, "rawHashes"sv, Output); + + Output.BeginObject("chunkedContent"); + compactbinary_helpers::WriteArray(Content.ChunkedContent.SequenceRawHashes, "sequenceRawHashes"sv, Output); + compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkCounts, "chunkCounts"sv, Output); + compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkOrders, "chunkOrders"sv, Output); + compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkHashes, "chunkHashes"sv, Output); + compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkRawSizes, "chunkRawSizes"sv, Output); + Output.EndObject(); // chunkedContent +} + +ChunkedFolderContent +LoadChunkedFolderContentToCompactBinary(CbObjectView Input) +{ + ZEN_TRACE_CPU("LoadChunkedFolderContentToCompactBinary"); + ChunkedFolderContent Content; + Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform()); + compactbinary_helpers::ReadArray("paths"sv, Input, Content.Paths); + compactbinary_helpers::ReadArray("rawSizes"sv, Input, Content.RawSizes); + compactbinary_helpers::ReadArray("attributes"sv, Input, Content.Attributes); + compactbinary_helpers::ReadArray("rawHashes"sv, Input, Content.RawHashes); + + CbObjectView ChunkedContentView = Input["chunkedContent"sv].AsObjectView(); + compactbinary_helpers::ReadArray("sequenceRawHashes"sv, ChunkedContentView, Content.ChunkedContent.SequenceRawHashes); + compactbinary_helpers::ReadArray("chunkCounts"sv, ChunkedContentView, Content.ChunkedContent.ChunkCounts); + compactbinary_helpers::ReadArray("chunkOrders"sv, ChunkedContentView, Content.ChunkedContent.ChunkOrders); + compactbinary_helpers::ReadArray("chunkHashes"sv, ChunkedContentView, Content.ChunkedContent.ChunkHashes); + compactbinary_helpers::ReadArray("chunkRawSizes"sv, ChunkedContentView, Content.ChunkedContent.ChunkRawSizes); + return Content; +} + +ChunkedFolderContent +MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const ChunkedFolderContent> Overlays) +{ + ZEN_TRACE_CPU("MergeChunkedFolderContents"); + + ZEN_ASSERT(!Overlays.empty()); + + ChunkedFolderContent Result; + const size_t BasePathCount = Base.Paths.size(); + Result.Paths.reserve(BasePathCount); + Result.RawSizes.reserve(BasePathCount); + Result.Attributes.reserve(BasePathCount); + Result.RawHashes.reserve(BasePathCount); + + const size_t BaseChunkCount = Base.ChunkedContent.ChunkHashes.size(); + Result.ChunkedContent.SequenceRawHashes.reserve(Base.ChunkedContent.SequenceRawHashes.size()); + Result.ChunkedContent.ChunkCounts.reserve(BaseChunkCount); + Result.ChunkedContent.ChunkHashes.reserve(BaseChunkCount); + Result.ChunkedContent.ChunkRawSizes.reserve(BaseChunkCount); + Result.ChunkedContent.ChunkOrders.reserve(Base.ChunkedContent.ChunkOrders.size()); + + tsl::robin_map<std::string, std::filesystem::path> GenericPathToActualPath; + for (const std::filesystem::path& Path : Base.Paths) + { + GenericPathToActualPath.insert({PathCompareString(Path), Path}); + } + for (const ChunkedFolderContent& Overlay : Overlays) + { + for (const std::filesystem::path& Path : Overlay.Paths) + { + GenericPathToActualPath.insert({PathCompareString(Path), Path}); + } + } + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex; + + auto BuildOverlayPaths = [](std::span<const ChunkedFolderContent> Overlays) -> tsl::robin_set<std::string> { + tsl::robin_set<std::string> Result; + for (const ChunkedFolderContent& OverlayContent : Overlays) + { + for (const std::filesystem::path& Path : OverlayContent.Paths) + { + Result.insert(PathCompareString(Path)); + } + } + return Result; + }; + + auto AddContent = [&BuildOverlayPaths](ChunkedFolderContent& Result, + const ChunkedFolderContent& OverlayContent, + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex, + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& RawHashToSequenceRawHashIndex, + const tsl::robin_map<std::string, std::filesystem::path>& GenericPathToActualPath, + std::span<const ChunkedFolderContent> Overlays) { + const ChunkedContentLookup OverlayLookup = BuildChunkedContentLookup(OverlayContent); + tsl::robin_set<std::string> BaseOverlayPaths = BuildOverlayPaths(Overlays); + for (uint32_t PathIndex = 0; PathIndex < OverlayContent.Paths.size(); PathIndex++) + { + std::string GenericPath = PathCompareString(OverlayContent.Paths[PathIndex]); + if (!BaseOverlayPaths.contains(GenericPath)) + { + // This asset will not be overridden by a later layer - add it + + const std::filesystem::path OriginalPath = GenericPathToActualPath.at(GenericPath); + Result.Paths.push_back(OriginalPath); + const IoHash& RawHash = OverlayContent.RawHashes[PathIndex]; + Result.RawSizes.push_back(OverlayContent.RawSizes[PathIndex]); + Result.Attributes.push_back(OverlayContent.Attributes[PathIndex]); + Result.RawHashes.push_back(RawHash); + + if (OverlayContent.RawSizes[PathIndex] > 0) + { + if (!RawHashToSequenceRawHashIndex.contains(RawHash)) + { + RawHashToSequenceRawHashIndex.insert( + {RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())}); + const uint32_t SequenceRawHashIndex = OverlayLookup.RawHashToSequenceIndex.at(RawHash); + const uint32_t OrderIndexOffset = OverlayLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; + const uint32_t ChunkCount = OverlayContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + ChunkingStatistics Stats; + std::span<const uint32_t> OriginalChunkOrder = + std::span<const uint32_t>(OverlayContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); + AddChunkSequence(Stats, + Result.ChunkedContent, + ChunkHashToChunkIndex, + RawHash, + OriginalChunkOrder, + OverlayContent.ChunkedContent.ChunkHashes, + OverlayContent.ChunkedContent.ChunkRawSizes); + Stats.UniqueSequencesFound++; + } + } + } + } + }; + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> MergedChunkHashToChunkIndex; + AddContent(Result, Base, MergedChunkHashToChunkIndex, RawHashToSequenceRawHashIndex, GenericPathToActualPath, Overlays); + for (uint32_t OverlayIndex = 0; OverlayIndex < Overlays.size(); OverlayIndex++) + { + AddContent(Result, + Overlays[OverlayIndex], + MergedChunkHashToChunkIndex, + RawHashToSequenceRawHashIndex, + GenericPathToActualPath, + Overlays.subspan(OverlayIndex + 1)); + } + return Result; +} + +ChunkedFolderContent +DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span<const std::filesystem::path> DeletedPaths) +{ + ZEN_TRACE_CPU("DeletePathsFromChunkedContent"); + + ZEN_ASSERT(DeletedPaths.size() <= BaseContent.Paths.size()); + ChunkedFolderContent Result = {.Platform = BaseContent.Platform}; + if (DeletedPaths.size() < BaseContent.Paths.size()) + { + tsl::robin_set<std::string> DeletedPathSet; + DeletedPathSet.reserve(DeletedPaths.size()); + for (const std::filesystem::path& DeletedPath : DeletedPaths) + { + DeletedPathSet.insert(PathCompareString(DeletedPath)); + } + const ChunkedContentLookup BaseLookup = BuildChunkedContentLookup(BaseContent); + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex; + for (uint32_t PathIndex = 0; PathIndex < BaseContent.Paths.size(); PathIndex++) + { + const std::filesystem::path& Path = BaseContent.Paths[PathIndex]; + if (!DeletedPathSet.contains(PathCompareString(Path))) + { + const IoHash& RawHash = BaseContent.RawHashes[PathIndex]; + const uint64_t RawSize = BaseContent.RawSizes[PathIndex]; + Result.Paths.push_back(Path); + Result.RawSizes.push_back(RawSize); + Result.Attributes.push_back(BaseContent.Attributes[PathIndex]); + Result.RawHashes.push_back(RawHash); + if (RawSize > 0) + { + if (!RawHashToSequenceRawHashIndex.contains(RawHash)) + { + RawHashToSequenceRawHashIndex.insert( + {RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())}); + const uint32_t SequenceRawHashIndex = BaseLookup.RawHashToSequenceIndex.at(RawHash); + const uint32_t OrderIndexOffset = BaseLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; + const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + ChunkingStatistics Stats; + std::span<const uint32_t> OriginalChunkOrder = + std::span<const uint32_t>(BaseContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); + AddChunkSequence(Stats, + Result.ChunkedContent, + ChunkHashToChunkIndex, + RawHash, + OriginalChunkOrder, + BaseContent.ChunkedContent.ChunkHashes, + BaseContent.ChunkedContent.ChunkRawSizes); + Stats.UniqueSequencesFound++; + } + } + } + } + } + return Result; +} + +ChunkedFolderContent +ChunkFolderContent(ChunkingStatistics& Stats, + WorkerThreadPool& WorkerPool, + const std::filesystem::path& RootPath, + const FolderContent& Content, + const ChunkingController& InChunkingController, + int32_t UpdateInteralMS, + std::function<void(bool IsAborted, std::ptrdiff_t PendingWork)>&& UpdateCallback, + std::atomic<bool>& AbortFlag) +{ + ZEN_TRACE_CPU("ChunkFolderContent"); + + Stopwatch Timer; + auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); + + ChunkedFolderContent Result = {.Platform = Content.Platform, + .Paths = Content.Paths, + .RawSizes = Content.RawSizes, + .Attributes = Content.Attributes}; + const size_t ItemCount = Result.Paths.size(); + Result.RawHashes.resize(ItemCount, IoHash::Zero); + Result.ChunkedContent.SequenceRawHashes.reserve(ItemCount); // Up to 1 per file, maybe less + Result.ChunkedContent.ChunkCounts.reserve(ItemCount); // Up to one per file + Result.ChunkedContent.ChunkOrders.reserve(ItemCount); // At least 1 per file, maybe more + Result.ChunkedContent.ChunkHashes.reserve(ItemCount); // At least 1 per file, maybe more + Result.ChunkedContent.ChunkRawSizes.reserve(ItemCount); // At least 1 per file, maybe more + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToChunkSequenceIndex; + RawHashToChunkSequenceIndex.reserve(ItemCount); + ChunkHashToChunkIndex.reserve(ItemCount); + { + std::vector<uint32_t> Order; + Order.resize(ItemCount); + for (uint32_t I = 0; I < ItemCount; I++) + { + Order[I] = I; + } + + // Handle the biggest files first so we don't end up with one straggling large file at the end + // std::sort(Order.begin(), Order.end(), [&](uint32_t Lhs, uint32_t Rhs) { return Result.RawSizes[Lhs] > Result.RawSizes[Rhs]; + //}); + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex; + RawHashToSequenceRawHashIndex.reserve(ItemCount); + + RwLock Lock; + + ParallellWork Work(AbortFlag); + + for (uint32_t PathIndex : Order) + { + if (Work.IsAborted()) + { + break; + } + Work.ScheduleWork( + WorkerPool, // GetSyncWorkerPool() + [&, PathIndex](std::atomic<bool>& AbortFlag) { + if (!AbortFlag) + { + IoHash RawHash = HashOneFile(Stats, + InChunkingController, + Result, + ChunkHashToChunkIndex, + RawHashToSequenceRawHashIndex, + Lock, + RootPath, + PathIndex, + AbortFlag); + Lock.WithExclusiveLock([&]() { Result.RawHashes[PathIndex] = RawHash; }); + Stats.FilesProcessed++; + } + }, + Work.DefaultErrorFunction()); + } + + Work.Wait(UpdateInteralMS, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted); + ZEN_UNUSED(PendingWork); + UpdateCallback(Work.IsAborted(), Work.PendingWork().Remaining()); + }); + } + return Result; +} + +ChunkedContentLookup +BuildChunkedContentLookup(const ChunkedFolderContent& Content) +{ + ZEN_TRACE_CPU("BuildChunkedContentLookup"); + + struct ChunkLocationReference + { + uint32_t ChunkIndex = (uint32_t)-1; + ChunkedContentLookup::ChunkSequenceLocation Location; + }; + + ChunkedContentLookup Result; + { + const uint32_t SequenceRawHashesCount = gsl::narrow<uint32_t>(Content.ChunkedContent.SequenceRawHashes.size()); + Result.RawHashToSequenceIndex.reserve(SequenceRawHashesCount); + Result.SequenceIndexChunkOrderOffset.reserve(SequenceRawHashesCount); + uint32_t OrderOffset = 0; + for (uint32_t SequenceRawHashIndex = 0; SequenceRawHashIndex < Content.ChunkedContent.SequenceRawHashes.size(); + SequenceRawHashIndex++) + { + Result.RawHashToSequenceIndex.insert({Content.ChunkedContent.SequenceRawHashes[SequenceRawHashIndex], SequenceRawHashIndex}); + Result.SequenceIndexChunkOrderOffset.push_back(OrderOffset); + OrderOffset += Content.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + } + } + + std::vector<ChunkLocationReference> Locations; + Locations.reserve(Content.ChunkedContent.ChunkOrders.size()); + for (uint32_t SequenceIndex = 0; SequenceIndex < Content.ChunkedContent.SequenceRawHashes.size(); SequenceIndex++) + { + const uint32_t OrderOffset = Result.SequenceIndexChunkOrderOffset[SequenceIndex]; + const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SequenceIndex]; + uint64_t LocationOffset = 0; + for (size_t OrderIndex = OrderOffset; OrderIndex < OrderOffset + ChunkCount; OrderIndex++) + { + uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex]; + + Locations.push_back( + ChunkLocationReference{ChunkIndex, ChunkedContentLookup::ChunkSequenceLocation{SequenceIndex, LocationOffset}}); + + LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + } + + std::sort(Locations.begin(), Locations.end(), [](const ChunkLocationReference& Lhs, const ChunkLocationReference& Rhs) { + if (Lhs.ChunkIndex < Rhs.ChunkIndex) + { + return true; + } + if (Lhs.ChunkIndex > Rhs.ChunkIndex) + { + return false; + } + if (Lhs.Location.SequenceIndex < Rhs.Location.SequenceIndex) + { + return true; + } + if (Lhs.Location.SequenceIndex > Rhs.Location.SequenceIndex) + { + return false; + } + return Lhs.Location.Offset < Rhs.Location.Offset; + }); + + Result.ChunkSequenceLocations.reserve(Locations.size()); + const uint32_t ChunkCount = gsl::narrow<uint32_t>(Content.ChunkedContent.ChunkHashes.size()); + Result.ChunkHashToChunkIndex.reserve(ChunkCount); + size_t RangeOffset = 0; + for (uint32_t ChunkIndex = 0; ChunkIndex < ChunkCount; ChunkIndex++) + { + Result.ChunkHashToChunkIndex.insert({Content.ChunkedContent.ChunkHashes[ChunkIndex], ChunkIndex}); + uint32_t Count = 0; + while ((RangeOffset + Count < Locations.size()) && (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex)) + { + Result.ChunkSequenceLocations.push_back(Locations[RangeOffset + Count].Location); + Count++; + } + Result.ChunkSequenceLocationOffset.push_back(RangeOffset); + Result.ChunkSequenceLocationCounts.push_back(Count); + RangeOffset += Count; + } + + Result.SequenceIndexFirstPathIndex.resize(Content.ChunkedContent.SequenceRawHashes.size(), (uint32_t)-1); + for (uint32_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++) + { + if (Content.RawSizes[PathIndex] > 0) + { + const IoHash& RawHash = Content.RawHashes[PathIndex]; + auto SequenceIndexIt = Result.RawHashToSequenceIndex.find(RawHash); + ZEN_ASSERT(SequenceIndexIt != Result.RawHashToSequenceIndex.end()); + const uint32_t SequenceIndex = SequenceIndexIt->second; + if (Result.SequenceIndexFirstPathIndex[SequenceIndex] == (uint32_t)-1) + { + Result.SequenceIndexFirstPathIndex[SequenceIndex] = PathIndex; + } + } + } + + return Result; +} + +} // namespace zen diff --git a/src/zenstore/chunkedfile.cpp b/src/zenutil/chunkedfile.cpp index f200bc1ec..a2c041ffd 100644 --- a/src/zenstore/chunkedfile.cpp +++ b/src/zenutil/chunkedfile.cpp @@ -1,7 +1,9 @@ // Copyright Epic Games, Inc. All Rights Reserved. +#include <zenutil/chunkedfile.h> + #include <zencore/basicfile.h> -#include <zenstore/chunkedfile.h> +#include <zencore/trace.h> #include "chunking.h" @@ -32,6 +34,7 @@ namespace { IoBuffer SerializeChunkedInfo(const ChunkedInfo& Info) { + ZEN_TRACE_CPU("SerializeChunkedInfo"); size_t HeaderSize = RoundUp(sizeof(ChunkedHeader), 16) + RoundUp(sizeof(uint32_t) * Info.ChunkSequence.size(), 16) + RoundUp(sizeof(IoHash) * Info.ChunkHashes.size(), 16); IoBuffer HeaderData(HeaderSize); @@ -64,6 +67,7 @@ SerializeChunkedInfo(const ChunkedInfo& Info) ChunkedInfo DeserializeChunkedInfo(IoBuffer& Buffer) { + ZEN_TRACE_CPU("DeserializeChunkedInfo"); MemoryView View = Buffer.GetView(); ChunkedHeader Header; { @@ -98,6 +102,7 @@ DeserializeChunkedInfo(IoBuffer& Buffer) void Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, std::function<IoBuffer(const IoHash& ChunkHash)> GetChunk) { + ZEN_TRACE_CPU("Reconstruct"); BasicFile Reconstructed; Reconstructed.Open(TargetPath, BasicFile::Mode::kTruncate); BasicFileWriter ReconstructedWriter(Reconstructed, 64 * 1024); @@ -111,8 +116,15 @@ Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, st } ChunkedInfoWithSource -ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Params) +ChunkData(BasicFile& RawData, + uint64_t Offset, + uint64_t Size, + ChunkedParams Params, + std::atomic<uint64_t>* BytesProcessed, + std::atomic<bool>* AbortFlag) { + ZEN_TRACE_CPU("ChunkData"); + ChunkedInfoWithSource Result; tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> FoundChunks; @@ -120,7 +132,7 @@ ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Para Chunker.SetUseThreshold(Params.UseThreshold); Chunker.SetChunkSize(Params.MinSize, Params.MaxSize, Params.AvgSize); size_t End = Offset + Size; - const size_t ScanBufferSize = 1u * 1024 * 1024; // (Params.MaxSize * 9) / 3;//1 * 1024 * 1024; + const size_t ScanBufferSize = Max(1u * 1024 * 1024, Params.MaxSize); BasicFileBuffer RawBuffer(RawData, ScanBufferSize); MemoryView SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset); ZEN_ASSERT(!SliceView.IsEmpty()); @@ -128,6 +140,10 @@ ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Para IoHashStream RawHashStream; while (Offset < End) { + if (AbortFlag != nullptr && AbortFlag->load()) + { + return {}; + } size_t ScanLength = Chunker.ScanChunk(SliceView.GetData(), SliceSize); if (ScanLength == ZenChunkHelper::kNoBoundaryFound) { @@ -163,6 +179,10 @@ ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Para SliceSize = SliceView.GetSize(); Offset += ChunkLength; + if (BytesProcessed != nullptr) + { + BytesProcessed->fetch_add(ChunkLength); + } } Result.Info.RawSize = Size; Result.Info.RawHash = RawHashStream.GetHash(); diff --git a/src/zenstore/chunking.cpp b/src/zenutil/chunking.cpp index 71f0a06e4..71f0a06e4 100644 --- a/src/zenstore/chunking.cpp +++ b/src/zenutil/chunking.cpp diff --git a/src/zenstore/chunking.h b/src/zenutil/chunking.h index 09c56454f..09c56454f 100644 --- a/src/zenstore/chunking.h +++ b/src/zenutil/chunking.h diff --git a/src/zenutil/chunkingcontroller.cpp b/src/zenutil/chunkingcontroller.cpp new file mode 100644 index 000000000..2a7057a46 --- /dev/null +++ b/src/zenutil/chunkingcontroller.cpp @@ -0,0 +1,275 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenutil/chunkingcontroller.h> + +#include <zencore/basicfile.h> +#include <zencore/compactbinarybuilder.h> +#include <zencore/trace.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { +using namespace std::literals; + +namespace { + std::vector<std::string> ReadStringArray(CbArrayView StringArray) + { + std::vector<std::string> Result; + Result.reserve(StringArray.Num()); + for (CbFieldView FieldView : StringArray) + { + Result.emplace_back(FieldView.AsString()); + } + return Result; + } + + ChunkedParams ReadChunkParams(CbObjectView Params) + { + bool UseThreshold = Params["UseThreshold"sv].AsBool(true); + size_t MinSize = Params["MinSize"sv].AsUInt64(DefaultChunkedParams.MinSize); + size_t MaxSize = Params["MaxSize"sv].AsUInt64(DefaultChunkedParams.MaxSize); + size_t AvgSize = Params["AvgSize"sv].AsUInt64(DefaultChunkedParams.AvgSize); + + return ChunkedParams{.UseThreshold = UseThreshold, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize}; + } + +} // namespace + +class BasicChunkingController : public ChunkingController +{ +public: + BasicChunkingController(std::span<const std::string_view> ExcludeExtensions, + uint64_t ChunkFileSizeLimit, + const ChunkedParams& ChunkingParams) + : m_ChunkExcludeExtensions(ExcludeExtensions.begin(), ExcludeExtensions.end()) + , m_ChunkFileSizeLimit(ChunkFileSizeLimit) + , m_ChunkingParams(ChunkingParams) + { + } + + BasicChunkingController(CbObjectView Parameters) + : m_ChunkExcludeExtensions(ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView())) + , m_ChunkFileSizeLimit(Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit)) + , m_ChunkingParams(ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())) + { + } + + virtual bool ProcessFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + ChunkedInfoWithSource& OutChunked, + std::atomic<uint64_t>& BytesProcessed, + std::atomic<bool>& AbortFlag) const override + { + ZEN_TRACE_CPU("BasicChunkingController::ProcessFile"); + const bool ExcludeFromChunking = + std::find(m_ChunkExcludeExtensions.begin(), m_ChunkExcludeExtensions.end(), InputPath.extension()) != + m_ChunkExcludeExtensions.end(); + + if (ExcludeFromChunking || (RawSize < m_ChunkFileSizeLimit)) + { + return false; + } + + BasicFile Buffer(InputPath, BasicFile::Mode::kRead); + OutChunked = ChunkData(Buffer, 0, RawSize, m_ChunkingParams, &BytesProcessed, &AbortFlag); + return true; + } + + virtual std::string_view GetName() const override { return Name; } + + virtual CbObject GetParameters() const override + { + CbObjectWriter Writer; + Writer.BeginArray("ChunkExcludeExtensions"sv); + { + for (const std::string& Extension : m_ChunkExcludeExtensions) + { + Writer.AddString(Extension); + } + } + Writer.EndArray(); // ChunkExcludeExtensions + Writer.AddInteger("ChunkFileSizeLimit"sv, m_ChunkFileSizeLimit); + Writer.BeginObject("ChunkingParams"sv); + { + Writer.AddBool("UseThreshold"sv, m_ChunkingParams.UseThreshold); + + Writer.AddInteger("MinSize"sv, (uint64_t)m_ChunkingParams.MinSize); + Writer.AddInteger("MaxSize"sv, (uint64_t)m_ChunkingParams.MaxSize); + Writer.AddInteger("AvgSize"sv, (uint64_t)m_ChunkingParams.AvgSize); + } + Writer.EndObject(); // ChunkingParams + return Writer.Save(); + } + static constexpr std::string_view Name = "BasicChunkingController"sv; + +protected: + const std::vector<std::string> m_ChunkExcludeExtensions; + const uint64_t m_ChunkFileSizeLimit; + const ChunkedParams m_ChunkingParams; +}; + +class ChunkingControllerWithFixedChunking : public ChunkingController +{ +public: + ChunkingControllerWithFixedChunking(std::span<const std::string_view> FixedChunkingExtensions, + uint64_t ChunkFileSizeLimit, + const ChunkedParams& ChunkingParams, + uint32_t FixedChunkingChunkSize) + : m_FixedChunkingExtensions(FixedChunkingExtensions.begin(), FixedChunkingExtensions.end()) + , m_ChunkFileSizeLimit(ChunkFileSizeLimit) + , m_ChunkingParams(ChunkingParams) + , m_FixedChunkingChunkSize(FixedChunkingChunkSize) + { + } + + ChunkingControllerWithFixedChunking(CbObjectView Parameters) + : m_FixedChunkingExtensions(ReadStringArray(Parameters["FixedChunkingExtensions"sv].AsArrayView())) + , m_ChunkFileSizeLimit(Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit)) + , m_ChunkingParams(ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())) + , m_FixedChunkingChunkSize(Parameters["FixedChunkingChunkSize"sv].AsUInt32(16u * 1024u * 1024u)) + { + } + + virtual bool ProcessFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + ChunkedInfoWithSource& OutChunked, + std::atomic<uint64_t>& BytesProcessed, + std::atomic<bool>& AbortFlag) const override + { + ZEN_TRACE_CPU("ChunkingControllerWithFixedChunking::ProcessFile"); + if (RawSize < m_ChunkFileSizeLimit) + { + return false; + } + const bool FixedChunking = std::find(m_FixedChunkingExtensions.begin(), m_FixedChunkingExtensions.end(), InputPath.extension()) != + m_FixedChunkingExtensions.end(); + + if (FixedChunking) + { + ZEN_TRACE_CPU("FixedChunking"); + IoHashStream FullHash; + IoBuffer Source = IoBufferBuilder::MakeFromFile(InputPath); + uint64_t Offset = 0; + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; + ChunkHashToChunkIndex.reserve(1 + (RawSize / m_FixedChunkingChunkSize)); + while (Offset < RawSize) + { + if (AbortFlag) + { + return false; + } + uint64_t ChunkSize = std::min<uint64_t>(RawSize - Offset, m_FixedChunkingChunkSize); + IoBuffer Chunk(Source, Offset, ChunkSize); + MemoryView ChunkData = Chunk.GetView(); + FullHash.Append(ChunkData); + + IoHash ChunkHash = IoHash::HashBuffer(ChunkData); + if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) + { + OutChunked.Info.ChunkSequence.push_back(It->second); + } + else + { + uint32_t ChunkIndex = gsl::narrow<uint32_t>(OutChunked.Info.ChunkHashes.size()); + OutChunked.Info.ChunkHashes.push_back(ChunkHash); + OutChunked.Info.ChunkSequence.push_back(ChunkIndex); + OutChunked.ChunkSources.push_back({.Offset = Offset, .Size = gsl::narrow<uint32_t>(ChunkSize)}); + } + Offset += ChunkSize; + BytesProcessed.fetch_add(ChunkSize); + } + OutChunked.Info.RawSize = RawSize; + OutChunked.Info.RawHash = FullHash.GetHash(); + return true; + } + else + { + BasicFile Buffer(InputPath, BasicFile::Mode::kRead); + OutChunked = ChunkData(Buffer, 0, RawSize, m_ChunkingParams, &BytesProcessed); + return true; + } + } + + virtual std::string_view GetName() const override { return Name; } + + virtual CbObject GetParameters() const override + { + CbObjectWriter Writer; + Writer.BeginArray("FixedChunkingExtensions"); + { + for (const std::string& Extension : m_FixedChunkingExtensions) + { + Writer.AddString(Extension); + } + } + Writer.EndArray(); // ChunkExcludeExtensions + Writer.AddInteger("ChunkFileSizeLimit"sv, m_ChunkFileSizeLimit); + Writer.BeginObject("ChunkingParams"sv); + { + Writer.AddBool("UseThreshold"sv, m_ChunkingParams.UseThreshold); + + Writer.AddInteger("MinSize"sv, (uint64_t)m_ChunkingParams.MinSize); + Writer.AddInteger("MaxSize"sv, (uint64_t)m_ChunkingParams.MaxSize); + Writer.AddInteger("AvgSize"sv, (uint64_t)m_ChunkingParams.AvgSize); + } + Writer.EndObject(); // ChunkingParams + Writer.AddInteger("FixedChunkingChunkSize"sv, m_FixedChunkingChunkSize); + return Writer.Save(); + } + + static constexpr std::string_view Name = "ChunkingControllerWithFixedChunking"sv; + +protected: + const std::vector<std::string> m_FixedChunkingExtensions; + const uint64_t m_ChunkFileSizeLimit; + const ChunkedParams m_ChunkingParams; + const uint32_t m_FixedChunkingChunkSize; +}; + +std::unique_ptr<ChunkingController> +CreateBasicChunkingController(std::span<const std::string_view> ExcludeExtensions, + uint64_t ChunkFileSizeLimit, + const ChunkedParams& ChunkingParams) +{ + return std::make_unique<BasicChunkingController>(ExcludeExtensions, ChunkFileSizeLimit, ChunkingParams); +} +std::unique_ptr<ChunkingController> +CreateBasicChunkingController(CbObjectView Parameters) +{ + return std::make_unique<BasicChunkingController>(Parameters); +} + +std::unique_ptr<ChunkingController> +CreateChunkingControllerWithFixedChunking(std::span<const std::string_view> FixedChunkingExtensions, + uint64_t ChunkFileSizeLimit, + const ChunkedParams& ChunkingParams, + uint32_t FixedChunkingChunkSize) +{ + return std::make_unique<ChunkingControllerWithFixedChunking>(FixedChunkingExtensions, + ChunkFileSizeLimit, + ChunkingParams, + FixedChunkingChunkSize); +} +std::unique_ptr<ChunkingController> +CreateChunkingControllerWithFixedChunking(CbObjectView Parameters) +{ + return std::make_unique<ChunkingControllerWithFixedChunking>(Parameters); +} + +std::unique_ptr<ChunkingController> +CreateChunkingController(std::string_view Name, CbObjectView Parameters) +{ + if (Name == BasicChunkingController::Name) + { + return CreateBasicChunkingController(Parameters); + } + else if (Name == ChunkingControllerWithFixedChunking::Name) + { + return CreateChunkingControllerWithFixedChunking(Parameters); + } + return {}; +} + +} // namespace zen diff --git a/src/zenutil/filebuildstorage.cpp b/src/zenutil/filebuildstorage.cpp new file mode 100644 index 000000000..47a4e1cc4 --- /dev/null +++ b/src/zenutil/filebuildstorage.cpp @@ -0,0 +1,644 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenutil/filebuildstorage.h> + +#include <zencore/basicfile.h> +#include <zencore/compactbinarybuilder.h> +#include <zencore/compactbinaryvalidation.h> +#include <zencore/fmtutils.h> +#include <zencore/scopeguard.h> +#include <zencore/timer.h> +#include <zencore/trace.h> + +namespace zen { + +using namespace std::literals; + +class FileBuildStorage : public BuildStorage +{ +public: + explicit FileBuildStorage(const std::filesystem::path& StoragePath, + BuildStorage::Statistics& Stats, + bool EnableJsonOutput, + double LatencySec, + double DelayPerKBSec) + : m_StoragePath(StoragePath) + , m_Stats(Stats) + , m_EnableJsonOutput(EnableJsonOutput) + , m_LatencySec(LatencySec) + , m_DelayPerKBSec(DelayPerKBSec) + { + CreateDirectories(GetBuildsFolder()); + CreateDirectories(GetBlobsFolder()); + CreateDirectories(GetBlobsMetadataFolder()); + } + + virtual ~FileBuildStorage() {} + + virtual CbObject ListBuilds(CbObject Query) override + { + ZEN_TRACE_CPU("FileBuildStorage::ListBuilds"); + ZEN_UNUSED(Query); + + SimulateLatency(Query.GetSize(), 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BuildFolder = GetBuildsFolder(); + DirectoryContent Content; + GetDirectoryContent(BuildFolder, DirectoryContentFlags::IncludeDirs, Content); + CbObjectWriter Writer; + Writer.BeginArray("results"); + { + for (const std::filesystem::path& BuildPath : Content.Directories) + { + Oid BuildId = Oid::TryFromHexString(BuildPath.stem().string()); + if (BuildId != Oid::Zero) + { + Writer.BeginObject(); + { + Writer.AddObjectId("buildId", BuildId); + Writer.AddObject("metadata", ReadBuild(BuildId)["metadata"sv].AsObjectView()); + } + Writer.EndObject(); + } + } + } + Writer.EndArray(); // builds + Writer.Save(); + SimulateLatency(Writer.GetSaveSize(), 0); + return Writer.Save(); + } + + virtual CbObject PutBuild(const Oid& BuildId, const CbObject& MetaData) override + { + ZEN_TRACE_CPU("FileBuildStorage::PutBuild"); + SimulateLatency(MetaData.GetSize(), 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + CbObjectWriter BuildObject; + BuildObject.AddObject("metadata", MetaData); + BuildObject.AddInteger("chunkSize"sv, 32u * 1024u * 1024u); + WriteBuild(BuildId, BuildObject.Save()); + + CbObjectWriter BuildResponse; + BuildResponse.AddInteger("chunkSize"sv, 32u * 1024u * 1024u); + BuildResponse.Save(); + + SimulateLatency(0, BuildResponse.GetSaveSize()); + return BuildResponse.Save(); + } + + virtual CbObject GetBuild(const Oid& BuildId) override + { + ZEN_TRACE_CPU("FileBuildStorage::GetBuild"); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + CbObject Build = ReadBuild(BuildId); + SimulateLatency(0, Build.GetSize()); + return Build; + } + + virtual void FinalizeBuild(const Oid& BuildId) override + { + ZEN_TRACE_CPU("FileBuildStorage::FinalizeBuild"); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + ZEN_UNUSED(BuildId); + SimulateLatency(0, 0); + } + + virtual std::pair<IoHash, std::vector<IoHash>> PutBuildPart(const Oid& BuildId, + const Oid& BuildPartId, + std::string_view PartName, + const CbObject& MetaData) override + { + ZEN_TRACE_CPU("FileBuildStorage::PutBuildPart"); + SimulateLatency(MetaData.GetSize(), 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BuildPartDataPath = GetBuildPartPath(BuildId, BuildPartId); + CreateDirectories(BuildPartDataPath.parent_path()); + + TemporaryFile::SafeWriteFile(BuildPartDataPath, MetaData.GetView()); + m_WrittenBytes += MetaData.GetSize(); + WriteAsJson(BuildPartDataPath, MetaData); + + IoHash RawHash = IoHash::HashBuffer(MetaData.GetView()); + + CbObjectWriter Writer; + { + CbObject BuildObject = ReadBuild(BuildId); + CbObjectView PartsObject = BuildObject["parts"sv].AsObjectView(); + CbObjectView MetaDataView = BuildObject["metadata"sv].AsObjectView(); + + Writer.AddObject("metadata"sv, MetaDataView); + Writer.BeginObject("parts"sv); + { + for (CbFieldView PartView : PartsObject) + { + if (PartView.GetName() != PartName) + { + Writer.AddObjectId(PartView.GetName(), PartView.AsObjectId()); + } + } + Writer.AddObjectId(PartName, BuildPartId); + } + Writer.EndObject(); // parts + } + WriteBuild(BuildId, Writer.Save()); + + std::vector<IoHash> NeededAttachments = GetNeededAttachments(MetaData); + + SimulateLatency(0, sizeof(IoHash) * NeededAttachments.size()); + + return std::make_pair(RawHash, std::move(NeededAttachments)); + } + + virtual CbObject GetBuildPart(const Oid& BuildId, const Oid& BuildPartId) override + { + ZEN_TRACE_CPU("FileBuildStorage::GetBuildPart"); + SimulateLatency(0, 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BuildPartDataPath = GetBuildPartPath(BuildId, BuildPartId); + + IoBuffer Payload = ReadFile(BuildPartDataPath).Flatten(); + m_Stats.TotalBytesRead += Payload.GetSize(); + + ZEN_ASSERT(ValidateCompactBinary(Payload.GetView(), CbValidateMode::Default) == CbValidateError::None); + + CbObject BuildPartObject = CbObject(SharedBuffer(Payload)); + + SimulateLatency(0, BuildPartObject.GetSize()); + + return BuildPartObject; + } + + virtual std::vector<IoHash> FinalizeBuildPart(const Oid& BuildId, const Oid& BuildPartId, const IoHash& PartHash) override + { + ZEN_TRACE_CPU("FileBuildStorage::FinalizeBuildPart"); + SimulateLatency(0, 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BuildPartDataPath = GetBuildPartPath(BuildId, BuildPartId); + IoBuffer Payload = ReadFile(BuildPartDataPath).Flatten(); + m_Stats.TotalBytesRead += Payload.GetSize(); + IoHash RawHash = IoHash::HashBuffer(Payload.GetView()); + if (RawHash != PartHash) + { + throw std::runtime_error( + fmt::format("Failed finalizing build part {}: Expected hash {}, got {}", BuildPartId, PartHash, RawHash)); + } + + CbObject BuildPartObject = CbObject(SharedBuffer(Payload)); + std::vector<IoHash> NeededAttachments(GetNeededAttachments(BuildPartObject)); + + SimulateLatency(0, NeededAttachments.size() * sizeof(IoHash)); + + return NeededAttachments; + } + + virtual void PutBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + const CompositeBuffer& Payload) override + { + ZEN_TRACE_CPU("FileBuildStorage::PutBuildBlob"); + ZEN_UNUSED(BuildId); + ZEN_ASSERT(ContentType == ZenContentType::kCompressedBinary); + SimulateLatency(Payload.GetSize(), 0); + + ZEN_ASSERT_SLOW(ValidateCompressedBuffer(RawHash, Payload)); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash); + if (!std::filesystem::is_regular_file(BlockPath)) + { + CreateDirectories(BlockPath.parent_path()); + TemporaryFile::SafeWriteFile(BlockPath, Payload.Flatten().GetView()); + } + m_Stats.TotalBytesWritten += Payload.GetSize(); + SimulateLatency(0, 0); + } + + virtual std::vector<std::function<void()>> PutLargeBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + uint64_t PayloadSize, + std::function<IoBuffer(uint64_t Offset, uint64_t Size)>&& Transmitter, + std::function<void(uint64_t, bool)>&& OnSentBytes) override + { + ZEN_TRACE_CPU("FileBuildStorage::PutLargeBuildBlob"); + ZEN_UNUSED(BuildId); + ZEN_UNUSED(ContentType); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash); + if (!std::filesystem::is_regular_file(BlockPath)) + { + CreateDirectories(BlockPath.parent_path()); + + struct WorkloadData + { + std::function<IoBuffer(uint64_t Offset, uint64_t Size)> Transmitter; + std::function<void(uint64_t, bool)> OnSentBytes; + TemporaryFile TempFile; + std::atomic<size_t> PartsLeft; + }; + + std::shared_ptr<WorkloadData> Workload(std::make_shared<WorkloadData>()); + Workload->Transmitter = std::move(Transmitter); + Workload->OnSentBytes = std::move(OnSentBytes); + std::error_code Ec; + Workload->TempFile.CreateTemporary(BlockPath.parent_path(), Ec); + + if (Ec) + { + throw std::runtime_error( + fmt::format("Failed opening temporary file '{}': {} ({})", Workload->TempFile.GetPath(), Ec.message(), Ec.value())); + } + + std::vector<std::function<void()>> WorkItems; + uint64_t Offset = 0; + while (Offset < PayloadSize) + { + uint64_t Size = Min(32u * 1024u * 1024u, PayloadSize - Offset); + + WorkItems.push_back([this, RawHash, BlockPath, Workload, Offset, Size]() { + ZEN_TRACE_CPU("FileBuildStorage::PutLargeBuildBlob_Work"); + IoBuffer PartPayload = Workload->Transmitter(Offset, Size); + SimulateLatency(PartPayload.GetSize(), 0); + + std::error_code Ec; + Workload->TempFile.Write(PartPayload, Offset, Ec); + if (Ec) + { + throw std::runtime_error(fmt::format("Failed writing to temporary file '{}': {} ({})", + Workload->TempFile.GetPath(), + Ec.message(), + Ec.value())); + } + uint64_t BytesWritten = PartPayload.GetSize(); + m_Stats.TotalBytesWritten += BytesWritten; + const bool IsLastPart = Workload->PartsLeft.fetch_sub(1) == 1; + if (IsLastPart) + { + Workload->TempFile.Flush(); + ZEN_ASSERT_SLOW(ValidateCompressedBuffer(RawHash, CompositeBuffer(Workload->TempFile.ReadAll()))); + Workload->TempFile.MoveTemporaryIntoPlace(BlockPath, Ec); + if (Ec) + { + throw std::runtime_error(fmt::format("Failed moving temporary file '{}' to '{}': {} ({})", + Workload->TempFile.GetPath(), + BlockPath, + Ec.message(), + Ec.value())); + } + } + Workload->OnSentBytes(BytesWritten, IsLastPart); + SimulateLatency(0, 0); + }); + + Offset += Size; + } + Workload->PartsLeft.store(WorkItems.size()); + + SimulateLatency(0, 0); + return WorkItems; + } + SimulateLatency(0, 0); + return {}; + } + + virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash, uint64_t RangeOffset, uint64_t RangeBytes) override + { + ZEN_TRACE_CPU("FileBuildStorage::GetBuildBlob"); + ZEN_UNUSED(BuildId); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash); + if (std::filesystem::is_regular_file(BlockPath)) + { + BasicFile File(BlockPath, BasicFile::Mode::kRead); + IoBuffer Payload; + if (RangeOffset != 0 || RangeBytes != (uint64_t)-1) + { + Payload = IoBuffer(RangeBytes); + File.Read(Payload.GetMutableView().GetData(), RangeBytes, RangeOffset); + } + else + { + Payload = File.ReadAll(); + ZEN_ASSERT_SLOW(ValidateCompressedBuffer(RawHash, CompositeBuffer(SharedBuffer(Payload)))); + } + Payload.SetContentType(ZenContentType::kCompressedBinary); + m_Stats.TotalBytesRead += Payload.GetSize(); + SimulateLatency(0, Payload.GetSize()); + return Payload; + } + SimulateLatency(0, 0); + return IoBuffer{}; + } + + virtual std::vector<std::function<void()>> GetLargeBuildBlob( + const Oid& BuildId, + const IoHash& RawHash, + uint64_t ChunkSize, + std::function<void(uint64_t Offset, const IoBuffer& Chunk, uint64_t BytesRemaining)>&& Receiver) override + { + ZEN_TRACE_CPU("FileBuildStorage::GetLargeBuildBlob"); + ZEN_UNUSED(BuildId); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash); + if (std::filesystem::is_regular_file(BlockPath)) + { + struct WorkloadData + { + std::atomic<uint64_t> BytesRemaining; + BasicFile BlobFile; + std::function<void(uint64_t Offset, const IoBuffer& Chunk, uint64_t BytesRemaining)> Receiver; + }; + + std::shared_ptr<WorkloadData> Workload(std::make_shared<WorkloadData>()); + Workload->BlobFile.Open(BlockPath, BasicFile::Mode::kRead); + const uint64_t BlobSize = Workload->BlobFile.FileSize(); + + Workload->Receiver = std::move(Receiver); + Workload->BytesRemaining = BlobSize; + + std::vector<std::function<void()>> WorkItems; + uint64_t Offset = 0; + while (Offset < BlobSize) + { + uint64_t Size = Min(ChunkSize, BlobSize - Offset); + WorkItems.push_back([this, BlockPath, Workload, Offset, Size]() { + ZEN_TRACE_CPU("FileBuildStorage::GetLargeBuildBlob_Work"); + SimulateLatency(0, 0); + IoBuffer PartPayload(Size); + Workload->BlobFile.Read(PartPayload.GetMutableView().GetData(), Size, Offset); + m_Stats.TotalBytesRead += PartPayload.GetSize(); + uint64_t ByteRemaning = Workload->BytesRemaining.fetch_sub(Size); + Workload->Receiver(Offset, PartPayload, ByteRemaning); + SimulateLatency(Size, PartPayload.GetSize()); + }); + + Offset += Size; + } + SimulateLatency(0, 0); + return WorkItems; + } + return {}; + } + + virtual void PutBlockMetadata(const Oid& BuildId, const IoHash& BlockRawHash, const CbObject& MetaData) override + { + ZEN_TRACE_CPU("FileBuildStorage::PutBlockMetadata"); + ZEN_UNUSED(BuildId); + + SimulateLatency(MetaData.GetSize(), 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BlockMetaDataPath = GetBlobMetadataPath(BlockRawHash); + CreateDirectories(BlockMetaDataPath.parent_path()); + TemporaryFile::SafeWriteFile(BlockMetaDataPath, MetaData.GetView()); + m_Stats.TotalBytesWritten += MetaData.GetSize(); + WriteAsJson(BlockMetaDataPath, MetaData); + SimulateLatency(0, 0); + } + + virtual std::vector<ChunkBlockDescription> FindBlocks(const Oid& BuildId) override + { + ZEN_TRACE_CPU("FileBuildStorage::FindBlocks"); + ZEN_UNUSED(BuildId); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + DirectoryContent Content; + GetDirectoryContent(GetBlobsMetadataFolder(), DirectoryContentFlags::IncludeFiles, Content); + std::vector<ChunkBlockDescription> Result; + for (const std::filesystem::path& MetaDataFile : Content.Files) + { + IoHash ChunkHash; + if (IoHash::TryParse(MetaDataFile.stem().string(), ChunkHash)) + { + std::filesystem::path BlockPath = GetBlobPayloadPath(ChunkHash); + if (std::filesystem::is_regular_file(BlockPath)) + { + IoBuffer BlockMetaDataPayload = ReadFile(MetaDataFile).Flatten(); + + m_Stats.TotalBytesRead += BlockMetaDataPayload.GetSize(); + + CbObject BlockObject = CbObject(SharedBuffer(BlockMetaDataPayload)); + Result.emplace_back(ParseChunkBlockDescription(BlockObject)); + } + } + } + SimulateLatency(0, sizeof(IoHash) * Result.size()); + return Result; + } + + virtual std::vector<ChunkBlockDescription> GetBlockMetadata(const Oid& BuildId, std::span<const IoHash> BlockHashes) override + { + ZEN_TRACE_CPU("FileBuildStorage::GetBlockMetadata"); + ZEN_UNUSED(BuildId); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + std::vector<ChunkBlockDescription> Result; + for (const IoHash& BlockHash : BlockHashes) + { + std::filesystem::path MetaDataFile = GetBlobMetadataPath(BlockHash); + if (std::filesystem::is_regular_file(MetaDataFile)) + { + IoBuffer BlockMetaDataPayload = ReadFile(MetaDataFile).Flatten(); + + m_Stats.TotalBytesRead += BlockMetaDataPayload.GetSize(); + + CbObject BlockObject = CbObject(SharedBuffer(BlockMetaDataPayload)); + Result.emplace_back(ParseChunkBlockDescription(BlockObject)); + } + } + SimulateLatency(sizeof(BlockHashes) * BlockHashes.size(), sizeof(ChunkBlockDescription) * Result.size()); + return Result; + } + +protected: + std::filesystem::path GetBuildsFolder() const { return m_StoragePath / "builds"; } + std::filesystem::path GetBlobsFolder() const { return m_StoragePath / "blobs"; } + std::filesystem::path GetBlobsMetadataFolder() const { return m_StoragePath / "blocks"; } + std::filesystem::path GetBuildFolder(const Oid& BuildId) const { return GetBuildsFolder() / BuildId.ToString(); } + + std::filesystem::path GetBuildPath(const Oid& BuildId) const { return GetBuildFolder(BuildId) / "metadata.cb"; } + + std::filesystem::path GetBuildPartFolder(const Oid& BuildId, const Oid& BuildPartId) const + { + return GetBuildFolder(BuildId) / "parts" / BuildPartId.ToString(); + } + + std::filesystem::path GetBuildPartPath(const Oid& BuildId, const Oid& BuildPartId) const + { + return GetBuildPartFolder(BuildId, BuildPartId) / "metadata.cb"; + } + + std::filesystem::path GetBlobPayloadPath(const IoHash& RawHash) const { return GetBlobsFolder() / fmt::format("{}.cbz", RawHash); } + + std::filesystem::path GetBlobMetadataPath(const IoHash& RawHash) const + { + return GetBlobsMetadataFolder() / fmt::format("{}.cb", RawHash); + } + + void SimulateLatency(uint64_t ReceiveSize, uint64_t SendSize) + { + double SleepSec = m_LatencySec; + if (m_DelayPerKBSec > 0.0) + { + SleepSec += m_DelayPerKBSec * (double(SendSize + ReceiveSize) / 1024u); + } + if (SleepSec > 0) + { + Sleep(int(SleepSec * 1000)); + } + } + + void WriteAsJson(const std::filesystem::path& OriginalPath, CbObjectView Data) const + { + if (m_EnableJsonOutput) + { + ExtendableStringBuilder<128> SB; + CompactBinaryToJson(Data, SB); + std::filesystem::path JsonPath = OriginalPath; + JsonPath.replace_extension(".json"); + std::string_view JsonMetaData = SB.ToView(); + TemporaryFile::SafeWriteFile(JsonPath, MemoryView(JsonMetaData.data(), JsonMetaData.length())); + } + } + + void WriteBuild(const Oid& BuildId, CbObjectView Data) + { + const std::filesystem::path BuildDataPath = GetBuildPath(BuildId); + CreateDirectories(BuildDataPath.parent_path()); + TemporaryFile::SafeWriteFile(BuildDataPath, Data.GetView()); + m_Stats.TotalBytesWritten += Data.GetSize(); + WriteAsJson(BuildDataPath, Data); + } + + CbObject ReadBuild(const Oid& BuildId) + { + const std::filesystem::path BuildDataPath = GetBuildPath(BuildId); + FileContents Content = ReadFile(BuildDataPath); + if (Content.ErrorCode) + { + throw std::runtime_error(fmt::format("Failed reading build '{}' from '{}': {} ({})", + BuildId, + BuildDataPath, + Content.ErrorCode.message(), + Content.ErrorCode.value())); + } + IoBuffer Payload = Content.Flatten(); + m_Stats.TotalBytesRead += Payload.GetSize(); + ZEN_ASSERT(ValidateCompactBinary(Payload.GetView(), CbValidateMode::Default) == CbValidateError::None); + CbObject BuildObject = CbObject(SharedBuffer(Payload)); + return BuildObject; + } + + std::vector<IoHash> GetNeededAttachments(CbObjectView BuildPartObject) + { + std::vector<IoHash> NeededAttachments; + BuildPartObject.IterateAttachments([&](CbFieldView FieldView) { + const IoHash AttachmentHash = FieldView.AsBinaryAttachment(); + const std::filesystem::path BlockPath = GetBlobPayloadPath(AttachmentHash); + if (!std::filesystem::is_regular_file(BlockPath)) + { + NeededAttachments.push_back(AttachmentHash); + } + }); + return NeededAttachments; + } + + bool ValidateCompressedBuffer(const IoHash& RawHash, const CompositeBuffer& Payload) + { + IoHash VerifyHash; + uint64_t VerifySize; + CompressedBuffer ValidateBuffer = CompressedBuffer::FromCompressed(Payload, VerifyHash, VerifySize); + if (!ValidateBuffer) + { + return false; + } + if (VerifyHash != RawHash) + { + return false; + } + CompositeBuffer Decompressed = ValidateBuffer.DecompressToComposite(); + if (!Decompressed) + { + return false; + } + IoHash Hash = IoHash::HashBuffer(Decompressed); + if (Hash != RawHash) + { + return false; + } + return true; + } + +private: + const std::filesystem::path m_StoragePath; + BuildStorage::Statistics& m_Stats; + const bool m_EnableJsonOutput = false; + std::atomic<uint64_t> m_WrittenBytes; + + const double m_LatencySec = 0.0; + const double m_DelayPerKBSec = 0.0; +}; + +std::unique_ptr<BuildStorage> +CreateFileBuildStorage(const std::filesystem::path& StoragePath, + BuildStorage::Statistics& Stats, + bool EnableJsonOutput, + double LatencySec, + double DelayPerKBSec) +{ + return std::make_unique<FileBuildStorage>(StoragePath, Stats, EnableJsonOutput, LatencySec, DelayPerKBSec); +} + +} // namespace zen diff --git a/src/zenutil/include/zenutil/buildstorage.h b/src/zenutil/include/zenutil/buildstorage.h new file mode 100644 index 000000000..9d2bab170 --- /dev/null +++ b/src/zenutil/include/zenutil/buildstorage.h @@ -0,0 +1,58 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/compactbinary.h> +#include <zenutil/chunkblock.h> + +namespace zen { + +class BuildStorage +{ +public: + struct Statistics + { + std::atomic<uint64_t> TotalBytesRead = 0; + std::atomic<uint64_t> TotalBytesWritten = 0; + std::atomic<uint64_t> TotalRequestCount = 0; + std::atomic<uint64_t> TotalRequestTimeUs = 0; + std::atomic<uint64_t> TotalExecutionTimeUs = 0; + }; + + virtual ~BuildStorage() {} + + virtual CbObject ListBuilds(CbObject Query) = 0; + virtual CbObject PutBuild(const Oid& BuildId, const CbObject& MetaData) = 0; + virtual CbObject GetBuild(const Oid& BuildId) = 0; + virtual void FinalizeBuild(const Oid& BuildId) = 0; + + virtual std::pair<IoHash, std::vector<IoHash>> PutBuildPart(const Oid& BuildId, + const Oid& BuildPartId, + std::string_view PartName, + const CbObject& MetaData) = 0; + virtual CbObject GetBuildPart(const Oid& BuildId, const Oid& BuildPartId) = 0; + virtual std::vector<IoHash> FinalizeBuildPart(const Oid& BuildId, const Oid& BuildPartId, const IoHash& PartHash) = 0; + virtual void PutBuildBlob(const Oid& BuildId, const IoHash& RawHash, ZenContentType ContentType, const CompositeBuffer& Payload) = 0; + virtual std::vector<std::function<void()>> PutLargeBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + uint64_t PayloadSize, + std::function<IoBuffer(uint64_t Offset, uint64_t Size)>&& Transmitter, + std::function<void(uint64_t, bool)>&& OnSentBytes) = 0; + + virtual IoBuffer GetBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + uint64_t RangeOffset = 0, + uint64_t RangeBytes = (uint64_t)-1) = 0; + virtual std::vector<std::function<void()>> GetLargeBuildBlob( + const Oid& BuildId, + const IoHash& RawHash, + uint64_t ChunkSize, + std::function<void(uint64_t Offset, const IoBuffer& Chunk, uint64_t BytesRemaining)>&& Receiver) = 0; + + virtual void PutBlockMetadata(const Oid& BuildId, const IoHash& BlockRawHash, const CbObject& MetaData) = 0; + virtual std::vector<ChunkBlockDescription> FindBlocks(const Oid& BuildId) = 0; + virtual std::vector<ChunkBlockDescription> GetBlockMetadata(const Oid& BuildId, std::span<const IoHash> BlockHashes) = 0; +}; + +} // namespace zen diff --git a/src/zenutil/include/zenutil/cache/cachekey.h b/src/zenutil/include/zenutil/cache/cachekey.h index 741375946..0ab05f4f1 100644 --- a/src/zenutil/include/zenutil/cache/cachekey.h +++ b/src/zenutil/include/zenutil/cache/cachekey.h @@ -17,6 +17,12 @@ struct CacheKey static CacheKey Create(std::string_view Bucket, const IoHash& Hash) { return {.Bucket = ToLower(Bucket), .Hash = Hash}; } + // This should be used whenever the bucket name has already been validated to avoid redundant ToLower calls + static CacheKey CreateValidated(std::string&& BucketValidated, const IoHash& Hash) + { + return {.Bucket = std::move(BucketValidated), .Hash = Hash}; + } + auto operator<=>(const CacheKey& that) const { if (auto b = caseSensitiveCompareStrings(Bucket, that.Bucket); b != std::strong_ordering::equal) diff --git a/src/zenutil/include/zenutil/chunkblock.h b/src/zenutil/include/zenutil/chunkblock.h new file mode 100644 index 000000000..277580c74 --- /dev/null +++ b/src/zenutil/include/zenutil/chunkblock.h @@ -0,0 +1,40 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/iohash.h> + +#include <zencore/compactbinary.h> +#include <zencore/compress.h> + +#include <optional> +#include <vector> + +namespace zen { + +struct ThinChunkBlockDescription +{ + IoHash BlockHash; + std::vector<IoHash> ChunkRawHashes; +}; + +struct ChunkBlockDescription : public ThinChunkBlockDescription +{ + uint64_t HeaderSize; + std::vector<uint32_t> ChunkRawLengths; + std::vector<uint32_t> ChunkCompressedLengths; +}; + +std::vector<ChunkBlockDescription> ParseChunkBlockDescriptionList(const CbObjectView& BlocksObject); +ChunkBlockDescription ParseChunkBlockDescription(const CbObjectView& BlockObject); +CbObject BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView MetaData); +ChunkBlockDescription GetChunkBlockDescription(const SharedBuffer& BlockPayload, const IoHash& RawHash); +typedef std::function<std::pair<uint64_t, CompressedBuffer>(const IoHash& RawHash)> FetchChunkFunc; + +CompressedBuffer GenerateChunkBlock(std::vector<std::pair<IoHash, FetchChunkFunc>>&& FetchChunks, ChunkBlockDescription& OutBlock); +bool IterateChunkBlock(const SharedBuffer& BlockPayload, + std::function<void(CompressedBuffer&& Chunk, const IoHash& AttachmentHash)> Visitor, + uint64_t& OutHeaderSize); +std::vector<uint32_t> ReadChunkBlockHeader(const MemoryView BlockView, uint64_t& OutHeaderSize); + +} // namespace zen diff --git a/src/zenutil/include/zenutil/chunkedcontent.h b/src/zenutil/include/zenutil/chunkedcontent.h new file mode 100644 index 000000000..57b55cb8e --- /dev/null +++ b/src/zenutil/include/zenutil/chunkedcontent.h @@ -0,0 +1,283 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/compactbinary.h> +#include <zencore/compactbinarybuilder.h> +#include <zencore/iohash.h> + +#include <filesystem> +#include <vector> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +class CbWriter; +class ChunkingController; +class WorkerThreadPool; + +enum class SourcePlatform +{ + Windows = 0, + Linux = 1, + MacOS = 2, + _Count +}; + +std::string_view ToString(SourcePlatform Platform); +SourcePlatform FromString(std::string_view Platform, SourcePlatform Default); +SourcePlatform GetSourceCurrentPlatform(); + +struct FolderContent +{ + SourcePlatform Platform = GetSourceCurrentPlatform(); + std::vector<std::filesystem::path> Paths; + std::vector<uint64_t> RawSizes; + std::vector<uint32_t> Attributes; + std::vector<uint64_t> ModificationTicks; + + bool operator==(const FolderContent& Rhs) const; + + bool AreKnownFilesEqual(const FolderContent& Rhs) const; + void UpdateState(const FolderContent& Rhs, std::vector<uint32_t>& PathIndexesOufOfDate); + static bool AreFileAttributesEqual(const uint32_t Lhs, const uint32_t Rhs); +}; + +FolderContent GetUpdatedContent(const FolderContent& Old, + const FolderContent& New, + std::vector<std::filesystem::path>& OutDeletedPathIndexes); + +void SaveFolderContentToCompactBinary(const FolderContent& Content, CbWriter& Output); +FolderContent LoadFolderContentToCompactBinary(CbObjectView Input); + +struct GetFolderContentStatistics +{ + std::atomic<uint64_t> FoundFileCount = 0; + std::atomic<uint64_t> FoundFileByteCount = 0; + std::atomic<uint64_t> AcceptedFileCount = 0; + std::atomic<uint64_t> AcceptedFileByteCount = 0; + uint64_t ElapsedWallTimeUS = 0; +}; + +FolderContent GetFolderContent(GetFolderContentStatistics& Stats, + const std::filesystem::path& RootPath, + std::function<bool(const std::string_view& RelativePath)>&& AcceptDirectory, + std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& AcceptFile, + WorkerThreadPool& WorkerPool, + int32_t UpdateInteralMS, + std::function<void(bool IsAborted, std::ptrdiff_t PendingWork)>&& UpdateCallback, + std::atomic<bool>& AbortFlag); + +struct ChunkedContentData +{ + // To describe one asset with a particular RawHash, find the index of the hash in SequenceRawHashes + // ChunkCounts for that index will be the number of indexes in ChunkOrders that describe + // the sequence of chunks required to reconstruct the asset. + // Offset into ChunkOrders is based on how many entries in ChunkOrders the previous [n - 1] SequenceRawHashes uses + std::vector<IoHash> SequenceRawHashes; // Raw hash for Chunk sequence + std::vector<uint32_t> ChunkCounts; // Chunk count of ChunkOrder for SequenceRawHashes[n] + std::vector<uint32_t> ChunkOrders; // Chunk sequence indexed into ChunkHashes, ChunkCounts[n] indexes per SequenceRawHashes[n] + std::vector<IoHash> ChunkHashes; // Unique chunk hashes + std::vector<uint64_t> ChunkRawSizes; // Unique chunk raw size for ChunkHash[n] +}; + +struct ChunkedFolderContent +{ + SourcePlatform Platform = GetSourceCurrentPlatform(); + std::vector<std::filesystem::path> Paths; + std::vector<uint64_t> RawSizes; + std::vector<uint32_t> Attributes; + std::vector<IoHash> RawHashes; + ChunkedContentData ChunkedContent; +}; + +void SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbWriter& Output); +ChunkedFolderContent LoadChunkedFolderContentToCompactBinary(CbObjectView Input); + +ChunkedFolderContent MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const ChunkedFolderContent> Overlays); +ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& Base, std::span<const std::filesystem::path> DeletedPaths); + +struct ChunkingStatistics +{ + std::atomic<uint64_t> FilesProcessed = 0; + std::atomic<uint64_t> FilesChunked = 0; + std::atomic<uint64_t> BytesHashed = 0; + std::atomic<uint64_t> UniqueChunksFound = 0; + std::atomic<uint64_t> UniqueSequencesFound = 0; + std::atomic<uint64_t> UniqueBytesFound = 0; + uint64_t ElapsedWallTimeUS = 0; +}; + +ChunkedFolderContent ChunkFolderContent(ChunkingStatistics& Stats, + WorkerThreadPool& WorkerPool, + const std::filesystem::path& RootPath, + const FolderContent& Content, + const ChunkingController& InChunkingController, + int32_t UpdateInteralMS, + std::function<void(bool IsAborted, std::ptrdiff_t PendingWork)>&& UpdateCallback, + std::atomic<bool>& AbortFlag); + +struct ChunkedContentLookup +{ + struct ChunkSequenceLocation + { + uint32_t SequenceIndex = (uint32_t)-1; + uint64_t Offset = (uint64_t)-1; + }; + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceIndex; + std::vector<uint32_t> SequenceIndexChunkOrderOffset; + std::vector<ChunkSequenceLocation> ChunkSequenceLocations; + std::vector<size_t> + ChunkSequenceLocationOffset; // ChunkSequenceLocations[ChunkLocationOffset[ChunkIndex]] -> start of sources for ChunkIndex + std::vector<uint32_t> ChunkSequenceLocationCounts; // ChunkSequenceLocationCounts[ChunkIndex] count of chunk locations for ChunkIndex + std::vector<uint32_t> SequenceIndexFirstPathIndex; // SequenceIndexFirstPathIndex[SequenceIndex] -> first path index with that RawHash +}; + +ChunkedContentLookup BuildChunkedContentLookup(const ChunkedFolderContent& Content); + +inline std::pair<size_t, uint32_t> +GetChunkSequenceLocationRange(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) +{ + return std::make_pair(Lookup.ChunkSequenceLocationOffset[ChunkIndex], Lookup.ChunkSequenceLocationCounts[ChunkIndex]); +} + +inline std::span<const ChunkedContentLookup::ChunkSequenceLocation> +GetChunkSequenceLocations(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) +{ + std::pair<size_t, uint32_t> Range = GetChunkSequenceLocationRange(Lookup, ChunkIndex); + return std::span<const ChunkedContentLookup::ChunkSequenceLocation>(Lookup.ChunkSequenceLocations).subspan(Range.first, Range.second); +} + +inline uint32_t +GetSequenceIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash) +{ + return Lookup.RawHashToSequenceIndex.at(RawHash); +} + +inline uint32_t +GetChunkIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash) +{ + return Lookup.RawHashToSequenceIndex.at(RawHash); +} + +inline uint32_t +GetFirstPathIndexForSeqeuenceIndex(const ChunkedContentLookup& Lookup, const uint32_t SequenceIndex) +{ + return Lookup.SequenceIndexFirstPathIndex[SequenceIndex]; +} + +inline uint32_t +GetFirstPathIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash) +{ + const uint32_t SequenceIndex = GetSequenceIndexForRawHash(Lookup, RawHash); + return GetFirstPathIndexForSeqeuenceIndex(Lookup, SequenceIndex); +} + +namespace compactbinary_helpers { + template<typename Type> + void WriteArray(std::span<const Type> Values, std::string_view ArrayName, CbWriter& Output) + { + Output.BeginArray(ArrayName); + for (const Type Value : Values) + { + Output << Value; + } + Output.EndArray(); + } + + template<typename Type> + void WriteArray(const std::vector<Type>& Values, std::string_view ArrayName, CbWriter& Output) + { + WriteArray(std::span<const Type>(Values), ArrayName, Output); + } + + template<> + inline void WriteArray(std::span<const std::filesystem::path> Values, std::string_view ArrayName, CbWriter& Output) + { + Output.BeginArray(ArrayName); + for (const std::filesystem::path& Path : Values) + { + Output.AddString((const char*)Path.generic_u8string().c_str()); + } + Output.EndArray(); + } + + template<> + inline void WriteArray(const std::vector<std::filesystem::path>& Values, std::string_view ArrayName, CbWriter& Output) + { + WriteArray(std::span<const std::filesystem::path>(Values), ArrayName, Output); + } + + inline void WriteBinaryAttachmentArray(std::span<const IoHash> Values, std::string_view ArrayName, CbWriter& Output) + { + Output.BeginArray(ArrayName); + for (const IoHash& Hash : Values) + { + Output.AddBinaryAttachment(Hash); + } + Output.EndArray(); + } + + inline void WriteBinaryAttachmentArray(const std::vector<IoHash>& Values, std::string_view ArrayName, CbWriter& Output) + { + WriteArray(std::span<const IoHash>(Values), ArrayName, Output); + } + + inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector<uint32_t>& Result) + { + CbArrayView Array = Input[ArrayName].AsArrayView(); + Result.reserve(Array.Num()); + for (CbFieldView ItemView : Array) + { + Result.push_back(ItemView.AsUInt32()); + } + } + + inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector<uint64_t>& Result) + { + CbArrayView Array = Input[ArrayName].AsArrayView(); + Result.reserve(Array.Num()); + for (CbFieldView ItemView : Array) + { + Result.push_back(ItemView.AsUInt64()); + } + } + + inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector<std::filesystem::path>& Result) + { + CbArrayView Array = Input[ArrayName].AsArrayView(); + Result.reserve(Array.Num()); + for (CbFieldView ItemView : Array) + { + std::u8string_view U8Path = ItemView.AsU8String(); + Result.push_back(std::filesystem::path(U8Path)); + } + } + + inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector<IoHash>& Result) + { + CbArrayView Array = Input[ArrayName].AsArrayView(); + Result.reserve(Array.Num()); + for (CbFieldView ItemView : Array) + { + Result.push_back(ItemView.AsHash()); + } + } + + inline void ReadBinaryAttachmentArray(std::string_view ArrayName, CbObjectView Input, std::vector<IoHash>& Result) + { + CbArrayView Array = Input[ArrayName].AsArrayView(); + Result.reserve(Array.Num()); + for (CbFieldView ItemView : Array) + { + Result.push_back(ItemView.AsBinaryAttachment()); + } + } + +} // namespace compactbinary_helpers + +} // namespace zen diff --git a/src/zenstore/include/zenstore/chunkedfile.h b/src/zenutil/include/zenutil/chunkedfile.h index c6330bdbd..4cec80fdb 100644 --- a/src/zenstore/include/zenstore/chunkedfile.h +++ b/src/zenutil/include/zenutil/chunkedfile.h @@ -43,7 +43,12 @@ struct ChunkedParams static const ChunkedParams UShaderByteCodeParams = {.UseThreshold = true, .MinSize = 17280, .MaxSize = 139264, .AvgSize = 36340}; -ChunkedInfoWithSource ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Params = {}); +ChunkedInfoWithSource ChunkData(BasicFile& RawData, + uint64_t Offset, + uint64_t Size, + ChunkedParams Params = {}, + std::atomic<uint64_t>* BytesProcessed = nullptr, + std::atomic<bool>* AbortFlag = nullptr); void Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, std::function<IoBuffer(const IoHash& ChunkHash)> GetChunk); diff --git a/src/zenutil/include/zenutil/chunkingcontroller.h b/src/zenutil/include/zenutil/chunkingcontroller.h new file mode 100644 index 000000000..246f4498a --- /dev/null +++ b/src/zenutil/include/zenutil/chunkingcontroller.h @@ -0,0 +1,56 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/compactbinary.h> + +#include <zenutil/chunkedfile.h> + +#include <atomic> +#include <filesystem> + +namespace zen { + +const std::vector<std::string_view> DefaultChunkingExcludeExtensions = {".exe", ".dll", ".pdb", ".self", ".mp4"}; + +const ChunkedParams DefaultChunkedParams = {.MinSize = ((8u * 1u) * 1024u) - 128u, + .MaxSize = 128u * 1024u, + .AvgSize = ((8u * 4u) * 1024u) + 128u}; + +const size_t DefaultChunkingFileSizeLimit = DefaultChunkedParams.MaxSize; + +const uint32_t DefaultFixedChunkingChunkSize = 16u * 1024u * 1024u; + +struct ChunkedInfoWithSource; + +class ChunkingController +{ +public: + virtual ~ChunkingController() {} + + // Return true if the input file was processed. If true is returned OutChunked will contain the chunked info + virtual bool ProcessFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + ChunkedInfoWithSource& OutChunked, + std::atomic<uint64_t>& BytesProcessed, + std::atomic<bool>& AbortFlag) const = 0; + virtual std::string_view GetName() const = 0; + virtual CbObject GetParameters() const = 0; +}; + +std::unique_ptr<ChunkingController> CreateBasicChunkingController( + std::span<const std::string_view> ExcludeExtensions = DefaultChunkingExcludeExtensions, + uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit, + const ChunkedParams& ChunkingParams = DefaultChunkedParams); +std::unique_ptr<ChunkingController> CreateBasicChunkingController(CbObjectView Parameters); + +std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking( + std::span<const std::string_view> ExcludeExtensions = DefaultChunkingExcludeExtensions, + uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit, + const ChunkedParams& ChunkingParams = DefaultChunkedParams, + uint32_t FixedChunkingChunkSize = DefaultFixedChunkingChunkSize); +std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking(CbObjectView Parameters); + +std::unique_ptr<ChunkingController> CreateChunkingController(std::string_view Name, CbObjectView Parameters); + +} // namespace zen diff --git a/src/zenutil/include/zenutil/filebuildstorage.h b/src/zenutil/include/zenutil/filebuildstorage.h new file mode 100644 index 000000000..c95fb32e6 --- /dev/null +++ b/src/zenutil/include/zenutil/filebuildstorage.h @@ -0,0 +1,16 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/logging.h> +#include <zenutil/buildstorage.h> + +namespace zen { +class HttpClient; + +std::unique_ptr<BuildStorage> CreateFileBuildStorage(const std::filesystem::path& StoragePath, + BuildStorage::Statistics& Stats, + bool EnableJsonOutput, + double LatencySec = 0.0, + double DelayPerKBSec = 0.0); +} // namespace zen diff --git a/src/zenutil/include/zenutil/jupiter/jupiterbuildstorage.h b/src/zenutil/include/zenutil/jupiter/jupiterbuildstorage.h new file mode 100644 index 000000000..89fc70140 --- /dev/null +++ b/src/zenutil/include/zenutil/jupiter/jupiterbuildstorage.h @@ -0,0 +1,17 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/logging.h> +#include <zenutil/buildstorage.h> + +namespace zen { +class HttpClient; + +std::unique_ptr<BuildStorage> CreateJupiterBuildStorage(LoggerRef InLog, + HttpClient& InHttpClient, + BuildStorage::Statistics& Stats, + std::string_view Namespace, + std::string_view Bucket, + const std::filesystem::path& TempFolderPath); +} // namespace zen diff --git a/src/zenutil/include/zenutil/jupiter/jupiterclient.h b/src/zenutil/include/zenutil/jupiter/jupiterclient.h index defe50edc..8a51bd60a 100644 --- a/src/zenutil/include/zenutil/jupiter/jupiterclient.h +++ b/src/zenutil/include/zenutil/jupiter/jupiterclient.h @@ -44,12 +44,11 @@ public: HttpClient& Client() { return m_HttpClient; } private: - LoggerRef m_Log; - const std::string m_DefaultDdcNamespace; - const std::string m_DefaultBlobStoreNamespace; - const std::string m_ComputeCluster; - std::function<HttpClientAccessToken()> m_TokenProvider; - HttpClient m_HttpClient; + LoggerRef m_Log; + const std::string m_DefaultDdcNamespace; + const std::string m_DefaultBlobStoreNamespace; + const std::string m_ComputeCluster; + HttpClient m_HttpClient; friend class JupiterSession; }; diff --git a/src/zenutil/include/zenutil/jupiter/jupitersession.h b/src/zenutil/include/zenutil/jupiter/jupitersession.h index 6a80332f4..2c5fc73b8 100644 --- a/src/zenutil/include/zenutil/jupiter/jupitersession.h +++ b/src/zenutil/include/zenutil/jupiter/jupitersession.h @@ -102,33 +102,49 @@ public: std::vector<IoHash> Filter(std::string_view Namespace, std::string_view BucketId, const std::vector<IoHash>& ChunkHashes); - JupiterResult PutBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const IoBuffer& Payload); - JupiterResult GetBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); - JupiterResult FinalizeBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); - PutBuildPartResult PutBuildPart(std::string_view Namespace, - std::string_view BucketId, - const Oid& BuildId, - const Oid& PartId, - std::string_view PartName, - const IoBuffer& Payload); - JupiterResult GetBuildPart(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const Oid& PartId); - JupiterResult PutBuildBlob(std::string_view Namespace, - std::string_view BucketId, - const Oid& BuildId, - const Oid& PartId, - const IoHash& Hash, - ZenContentType ContentType, - const CompositeBuffer& Payload); - JupiterResult GetBuildBlob(std::string_view Namespace, - std::string_view BucketId, - const Oid& BuildId, - const Oid& PartId, - const IoHash& Hash, - std::filesystem::path TempFolderPath); + JupiterResult ListBuilds(std::string_view Namespace, std::string_view BucketId, const IoBuffer& Payload); + JupiterResult PutBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const IoBuffer& Payload); + JupiterResult GetBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); + JupiterResult FinalizeBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); + PutBuildPartResult PutBuildPart(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& PartId, + std::string_view PartName, + const IoBuffer& Payload); + JupiterResult GetBuildPart(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const Oid& PartId); + JupiterResult PutBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const IoHash& Hash, + ZenContentType ContentType, + const CompositeBuffer& Payload); + JupiterResult GetBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const IoHash& Hash, + std::filesystem::path TempFolderPath, + uint64_t Offset = 0, + uint64_t Size = (uint64_t)-1); + + JupiterResult PutMultipartBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const IoHash& Hash, + ZenContentType ContentType, + uint64_t PayloadSize, + std::function<IoBuffer(uint64_t Offset, uint64_t Size)>&& Transmitter, + std::vector<std::function<JupiterResult(bool& OutIsComplete)>>& OutWorkItems); + JupiterResult GetMultipartBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const IoHash& Hash, + uint64_t ChunkSize, + std::function<void(uint64_t Offset, const IoBuffer& Chunk, uint64_t BytesRemaining)>&& Receiver, + std::vector<std::function<JupiterResult()>>& OutWorkItems); JupiterResult PutBlockMetadata(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, const IoBuffer& Payload); FinalizeBuildPartResult FinalizeBuildPart(std::string_view Namespace, @@ -136,7 +152,8 @@ public: const Oid& BuildId, const Oid& PartId, const IoHash& RawHash); - JupiterResult FindBlocks(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const Oid& PartId); + JupiterResult FindBlocks(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); + JupiterResult GetBlockMetadata(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, IoBuffer Payload); private: inline LoggerRef Log() { return m_Log; } diff --git a/src/zenutil/include/zenutil/logging.h b/src/zenutil/include/zenutil/logging.h index ebf6372fc..d64eef207 100644 --- a/src/zenutil/include/zenutil/logging.h +++ b/src/zenutil/include/zenutil/logging.h @@ -32,6 +32,7 @@ struct LoggingOptions bool IsDebug = false; bool IsVerbose = false; bool IsTest = false; + bool AllowAsync = true; bool NoConsoleOutput = false; std::filesystem::path AbsLogFile; // Absolute path to main log file std::string LogId; diff --git a/src/zenutil/include/zenutil/logging/fullformatter.h b/src/zenutil/include/zenutil/logging/fullformatter.h index 07ad408fa..0326870e5 100644 --- a/src/zenutil/include/zenutil/logging/fullformatter.h +++ b/src/zenutil/include/zenutil/logging/fullformatter.h @@ -45,6 +45,8 @@ public: std::chrono::seconds TimestampSeconds; + std::chrono::milliseconds millis; + if (m_UseFullDate) { TimestampSeconds = std::chrono::duration_cast<std::chrono::seconds>(msg.time.time_since_epoch()); @@ -69,6 +71,8 @@ public: spdlog::details::fmt_helper::pad2(m_CachedLocalTm.tm_sec, m_CachedDatetime); m_CachedDatetime.push_back('.'); } + + millis = spdlog::details::fmt_helper::time_fraction<std::chrono::milliseconds>(msg.time); } else { @@ -97,6 +101,8 @@ public: spdlog::details::fmt_helper::pad2(LogSecs, m_CachedDatetime); m_CachedDatetime.push_back('.'); } + + millis = std::chrono::duration_cast<std::chrono::milliseconds>(ElapsedTime - TimestampSeconds); } { @@ -104,7 +110,6 @@ public: OutBuffer.append(m_CachedDatetime.begin(), m_CachedDatetime.end()); } - auto millis = spdlog::details::fmt_helper::time_fraction<std::chrono::milliseconds>(msg.time); spdlog::details::fmt_helper::pad3(static_cast<uint32_t>(millis.count()), OutBuffer); OutBuffer.push_back(']'); OutBuffer.push_back(' '); diff --git a/src/zenutil/include/zenutil/parallellwork.h b/src/zenutil/include/zenutil/parallellwork.h new file mode 100644 index 000000000..79798fc8d --- /dev/null +++ b/src/zenutil/include/zenutil/parallellwork.h @@ -0,0 +1,117 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/except.h> +#include <zencore/fmtutils.h> +#include <zencore/thread.h> +#include <zencore/workthreadpool.h> + +#include <atomic> + +namespace zen { + +class ParallellWork +{ +public: + ParallellWork(std::atomic<bool>& AbortFlag) : m_AbortFlag(AbortFlag), m_PendingWork(1) {} + + ~ParallellWork() + { + // Make sure to call Wait before destroying + ZEN_ASSERT(m_PendingWork.Remaining() == 0); + } + + std::function<void(const std::exception& Ex, std::atomic<bool>& AbortFlag)> DefaultErrorFunction() + { + return [&](const std::exception& Ex, std::atomic<bool>& AbortFlag) { + m_ErrorLock.WithExclusiveLock([&]() { m_Errors.push_back(Ex.what()); }); + AbortFlag = true; + }; + } + + void ScheduleWork(WorkerThreadPool& WorkerPool, + std::function<void(std::atomic<bool>& AbortFlag)>&& Work, + std::function<void(const std::exception& Ex, std::atomic<bool>& AbortFlag)>&& OnError) + { + m_PendingWork.AddCount(1); + try + { + WorkerPool.ScheduleWork([this, Work = std::move(Work), OnError = std::move(OnError)] { + try + { + Work(m_AbortFlag); + } + catch (const AssertException& AssertEx) + { + OnError( + std::runtime_error(fmt::format("Caught assert exception while handling request: {}", AssertEx.FullDescription())), + m_AbortFlag); + } + catch (const std::system_error& SystemError) + { + if (IsOOM(SystemError.code())) + { + OnError(std::runtime_error(fmt::format("Out of memory. Reason: {}", SystemError.what())), m_AbortFlag); + } + else if (IsOOD(SystemError.code())) + { + OnError(std::runtime_error(fmt::format("Out of disk. Reason: {}", SystemError.what())), m_AbortFlag); + } + else + { + OnError(std::runtime_error(fmt::format("System error. Reason: {}", SystemError.what())), m_AbortFlag); + } + } + catch (const std::exception& Ex) + { + OnError(Ex, m_AbortFlag); + } + m_PendingWork.CountDown(); + }); + } + catch (const std::exception&) + { + m_PendingWork.CountDown(); + throw; + } + } + + void Abort() { m_AbortFlag = true; } + + bool IsAborted() const { return m_AbortFlag.load(); } + + void Wait(int32_t UpdateInteralMS, std::function<void(bool IsAborted, std::ptrdiff_t PendingWork)>&& UpdateCallback) + { + ZEN_ASSERT(m_PendingWork.Remaining() > 0); + m_PendingWork.CountDown(); + while (!m_PendingWork.Wait(UpdateInteralMS)) + { + UpdateCallback(m_AbortFlag.load(), m_PendingWork.Remaining()); + } + if (m_Errors.size() == 1) + { + throw std::runtime_error(m_Errors.front()); + } + else if (m_Errors.size() > 1) + { + ExtendableStringBuilder<128> SB; + SB.Append("Multiple errors:"); + for (const std::string& Error : m_Errors) + { + SB.Append(fmt::format("\n {}", Error)); + } + throw std::runtime_error(SB.ToString()); + } + } + Latch& PendingWork() { return m_PendingWork; } + +private: + std::atomic<bool>& m_AbortFlag; + Latch m_PendingWork; + + RwLock m_ErrorLock; + std::vector<std::string> m_Errors; +}; + +} // namespace zen diff --git a/src/zenutil/jupiter/jupiterbuildstorage.cpp b/src/zenutil/jupiter/jupiterbuildstorage.cpp new file mode 100644 index 000000000..bf89ce785 --- /dev/null +++ b/src/zenutil/jupiter/jupiterbuildstorage.cpp @@ -0,0 +1,404 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenutil/jupiter/jupiterbuildstorage.h> + +#include <zencore/compactbinarybuilder.h> +#include <zencore/fmtutils.h> +#include <zencore/scopeguard.h> +#include <zencore/timer.h> +#include <zencore/trace.h> +#include <zenutil/jupiter/jupitersession.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +using namespace std::literals; + +class JupiterBuildStorage : public BuildStorage +{ +public: + JupiterBuildStorage(LoggerRef InLog, + HttpClient& InHttpClient, + Statistics& Stats, + std::string_view Namespace, + std::string_view Bucket, + const std::filesystem::path& TempFolderPath) + : m_Session(InLog, InHttpClient) + , m_Stats(Stats) + , m_Namespace(Namespace) + , m_Bucket(Bucket) + , m_TempFolderPath(TempFolderPath) + { + } + virtual ~JupiterBuildStorage() {} + + virtual CbObject ListBuilds(CbObject Query) override + { + ZEN_TRACE_CPU("Jupiter::ListBuilds"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + IoBuffer Payload = Query.GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + JupiterResult ListResult = m_Session.ListBuilds(m_Namespace, m_Bucket, Payload); + AddStatistic(ListResult); + if (!ListResult.Success) + { + throw std::runtime_error(fmt::format("Failed listing builds: {} ({})", ListResult.Reason, ListResult.ErrorCode)); + } + return PayloadToJson("Failed listing builds"sv, ListResult.Response); + } + + virtual CbObject PutBuild(const Oid& BuildId, const CbObject& MetaData) override + { + ZEN_TRACE_CPU("Jupiter::PutBuild"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + IoBuffer Payload = MetaData.GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + JupiterResult PutResult = m_Session.PutBuild(m_Namespace, m_Bucket, BuildId, Payload); + AddStatistic(PutResult); + if (!PutResult.Success) + { + throw std::runtime_error(fmt::format("Failed creating build: {} ({})", PutResult.Reason, PutResult.ErrorCode)); + } + return PayloadToJson(fmt::format("Failed creating build: {}", BuildId), PutResult.Response); + } + + virtual CbObject GetBuild(const Oid& BuildId) override + { + ZEN_TRACE_CPU("Jupiter::GetBuild"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult GetBuildResult = m_Session.GetBuild(m_Namespace, m_Bucket, BuildId); + AddStatistic(GetBuildResult); + if (!GetBuildResult.Success) + { + throw std::runtime_error(fmt::format("Failed fetching build: {} ({})", GetBuildResult.Reason, GetBuildResult.ErrorCode)); + } + return PayloadToJson(fmt::format("Failed fetching build {}:", BuildId), GetBuildResult.Response); + } + + virtual void FinalizeBuild(const Oid& BuildId) override + { + ZEN_TRACE_CPU("Jupiter::FinalizeBuild"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult FinalizeBuildResult = m_Session.FinalizeBuild(m_Namespace, m_Bucket, BuildId); + AddStatistic(FinalizeBuildResult); + if (!FinalizeBuildResult.Success) + { + throw std::runtime_error( + fmt::format("Failed finalizing build part: {} ({})", FinalizeBuildResult.Reason, FinalizeBuildResult.ErrorCode)); + } + } + + virtual std::pair<IoHash, std::vector<IoHash>> PutBuildPart(const Oid& BuildId, + const Oid& BuildPartId, + std::string_view PartName, + const CbObject& MetaData) override + { + ZEN_TRACE_CPU("Jupiter::PutBuildPart"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + IoBuffer Payload = MetaData.GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + PutBuildPartResult PutPartResult = m_Session.PutBuildPart(m_Namespace, m_Bucket, BuildId, BuildPartId, PartName, Payload); + AddStatistic(PutPartResult); + if (!PutPartResult.Success) + { + throw std::runtime_error(fmt::format("Failed creating build part: {} ({})", PutPartResult.Reason, PutPartResult.ErrorCode)); + } + return std::make_pair(PutPartResult.RawHash, std::move(PutPartResult.Needs)); + } + + virtual CbObject GetBuildPart(const Oid& BuildId, const Oid& BuildPartId) override + { + ZEN_TRACE_CPU("Jupiter::GetBuildPart"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult GetBuildPartResult = m_Session.GetBuildPart(m_Namespace, m_Bucket, BuildId, BuildPartId); + AddStatistic(GetBuildPartResult); + if (!GetBuildPartResult.Success) + { + throw std::runtime_error(fmt::format("Failed fetching build part {}: {} ({})", + BuildPartId, + GetBuildPartResult.Reason, + GetBuildPartResult.ErrorCode)); + } + return PayloadToJson(fmt::format("Failed fetching build part {}:", BuildPartId), GetBuildPartResult.Response); + } + + virtual std::vector<IoHash> FinalizeBuildPart(const Oid& BuildId, const Oid& BuildPartId, const IoHash& PartHash) override + { + ZEN_TRACE_CPU("Jupiter::FinalizeBuildPart"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + FinalizeBuildPartResult FinalizePartResult = m_Session.FinalizeBuildPart(m_Namespace, m_Bucket, BuildId, BuildPartId, PartHash); + AddStatistic(FinalizePartResult); + if (!FinalizePartResult.Success) + { + throw std::runtime_error( + fmt::format("Failed finalizing build part: {} ({})", FinalizePartResult.Reason, FinalizePartResult.ErrorCode)); + } + return std::move(FinalizePartResult.Needs); + } + + virtual void PutBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + const CompositeBuffer& Payload) override + { + ZEN_TRACE_CPU("Jupiter::PutBuildBlob"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult PutBlobResult = m_Session.PutBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, ContentType, Payload); + AddStatistic(PutBlobResult); + if (!PutBlobResult.Success) + { + throw std::runtime_error(fmt::format("Failed putting build part: {} ({})", PutBlobResult.Reason, PutBlobResult.ErrorCode)); + } + } + + virtual std::vector<std::function<void()>> PutLargeBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + uint64_t PayloadSize, + std::function<IoBuffer(uint64_t Offset, uint64_t Size)>&& Transmitter, + std::function<void(uint64_t, bool)>&& OnSentBytes) override + { + ZEN_TRACE_CPU("Jupiter::PutLargeBuildBlob"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + std::vector<std::function<JupiterResult(bool&)>> WorkItems; + JupiterResult PutMultipartBlobResult = m_Session.PutMultipartBuildBlob(m_Namespace, + m_Bucket, + BuildId, + RawHash, + ContentType, + PayloadSize, + std::move(Transmitter), + WorkItems); + AddStatistic(PutMultipartBlobResult); + if (!PutMultipartBlobResult.Success) + { + throw std::runtime_error( + fmt::format("Failed putting build part: {} ({})", PutMultipartBlobResult.Reason, PutMultipartBlobResult.ErrorCode)); + } + OnSentBytes(PutMultipartBlobResult.SentBytes, WorkItems.empty()); + + std::vector<std::function<void()>> WorkList; + for (auto& WorkItem : WorkItems) + { + WorkList.emplace_back([this, WorkItem = std::move(WorkItem), OnSentBytes]() { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + bool IsComplete = false; + JupiterResult PartResult = WorkItem(IsComplete); + AddStatistic(PartResult); + if (!PartResult.Success) + { + throw std::runtime_error(fmt::format("Failed putting build part: {} ({})", PartResult.Reason, PartResult.ErrorCode)); + } + OnSentBytes(PartResult.SentBytes, IsComplete); + }); + } + return WorkList; + } + + virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash, uint64_t RangeOffset, uint64_t RangeBytes) override + { + ZEN_TRACE_CPU("Jupiter::GetBuildBlob"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + CreateDirectories(m_TempFolderPath); + JupiterResult GetBuildBlobResult = + m_Session.GetBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, m_TempFolderPath, RangeOffset, RangeBytes); + AddStatistic(GetBuildBlobResult); + if (!GetBuildBlobResult.Success) + { + throw std::runtime_error( + fmt::format("Failed fetching build blob {}: {} ({})", RawHash, GetBuildBlobResult.Reason, GetBuildBlobResult.ErrorCode)); + } + return std::move(GetBuildBlobResult.Response); + } + + virtual std::vector<std::function<void()>> GetLargeBuildBlob( + const Oid& BuildId, + const IoHash& RawHash, + uint64_t ChunkSize, + std::function<void(uint64_t Offset, const IoBuffer& Chunk, uint64_t BytesRemaining)>&& Receiver) override + { + ZEN_TRACE_CPU("Jupiter::GetLargeBuildBlob"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + std::vector<std::function<JupiterResult()>> WorkItems; + JupiterResult GetMultipartBlobResult = + m_Session.GetMultipartBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, ChunkSize, std::move(Receiver), WorkItems); + + AddStatistic(GetMultipartBlobResult); + if (!GetMultipartBlobResult.Success) + { + throw std::runtime_error( + fmt::format("Failed getting build part: {} ({})", GetMultipartBlobResult.Reason, GetMultipartBlobResult.ErrorCode)); + } + std::vector<std::function<void()>> WorkList; + for (auto& WorkItem : WorkItems) + { + WorkList.emplace_back([this, WorkItem = std::move(WorkItem)]() { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult PartResult = WorkItem(); + AddStatistic(PartResult); + if (!PartResult.Success) + { + throw std::runtime_error(fmt::format("Failed getting build part: {} ({})", PartResult.Reason, PartResult.ErrorCode)); + } + }); + } + return WorkList; + } + + virtual void PutBlockMetadata(const Oid& BuildId, const IoHash& BlockRawHash, const CbObject& MetaData) override + { + ZEN_TRACE_CPU("Jupiter::PutBlockMetadata"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + IoBuffer Payload = MetaData.GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + JupiterResult PutMetaResult = m_Session.PutBlockMetadata(m_Namespace, m_Bucket, BuildId, BlockRawHash, Payload); + AddStatistic(PutMetaResult); + if (!PutMetaResult.Success) + { + throw std::runtime_error( + fmt::format("Failed putting build block metadata: {} ({})", PutMetaResult.Reason, PutMetaResult.ErrorCode)); + } + } + + virtual std::vector<ChunkBlockDescription> FindBlocks(const Oid& BuildId) override + { + ZEN_TRACE_CPU("Jupiter::FindBlocks"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult FindResult = m_Session.FindBlocks(m_Namespace, m_Bucket, BuildId); + AddStatistic(FindResult); + if (!FindResult.Success) + { + throw std::runtime_error(fmt::format("Failed fetching known blocks: {} ({})", FindResult.Reason, FindResult.ErrorCode)); + } + return ParseChunkBlockDescriptionList(PayloadToJson("Failed fetching known blocks"sv, FindResult.Response)); + } + + virtual std::vector<ChunkBlockDescription> GetBlockMetadata(const Oid& BuildId, std::span<const IoHash> BlockHashes) override + { + ZEN_TRACE_CPU("Jupiter::GetBlockMetadata"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + CbObjectWriter Request; + + Request.BeginArray("blocks"sv); + for (const IoHash& BlockHash : BlockHashes) + { + Request.AddHash(BlockHash); + } + Request.EndArray(); + + IoBuffer Payload = Request.Save().GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + JupiterResult GetBlockMetadataResult = m_Session.GetBlockMetadata(m_Namespace, m_Bucket, BuildId, Payload); + AddStatistic(GetBlockMetadataResult); + if (!GetBlockMetadataResult.Success) + { + throw std::runtime_error( + fmt::format("Failed fetching block metadatas: {} ({})", GetBlockMetadataResult.Reason, GetBlockMetadataResult.ErrorCode)); + } + std::vector<ChunkBlockDescription> UnorderedList = + ParseChunkBlockDescriptionList(PayloadToJson("Failed fetching block metadatas", GetBlockMetadataResult.Response)); + tsl::robin_map<IoHash, size_t, IoHash::Hasher> BlockDescriptionLookup; + for (size_t DescriptionIndex = 0; DescriptionIndex < UnorderedList.size(); DescriptionIndex++) + { + const ChunkBlockDescription& Description = UnorderedList[DescriptionIndex]; + BlockDescriptionLookup.insert_or_assign(Description.BlockHash, DescriptionIndex); + } + std::vector<ChunkBlockDescription> SortedBlockDescriptions; + SortedBlockDescriptions.reserve(BlockDescriptionLookup.size()); + for (const IoHash& BlockHash : BlockHashes) + { + if (auto It = BlockDescriptionLookup.find(BlockHash); It != BlockDescriptionLookup.end()) + { + SortedBlockDescriptions.push_back(std::move(UnorderedList[It->second])); + } + } + return SortedBlockDescriptions; + } + +private: + static CbObject PayloadToJson(std::string_view Context, const IoBuffer& Payload) + { + if (Payload.GetContentType() == ZenContentType::kJSON) + { + std::string_view Json(reinterpret_cast<const char*>(Payload.GetData()), Payload.GetSize()); + return LoadCompactBinaryFromJson(Json).AsObject(); + } + else if (Payload.GetContentType() == ZenContentType::kCbObject) + { + return LoadCompactBinaryObject(Payload); + } + else if (Payload.GetContentType() == ZenContentType::kCompressedBinary) + { + IoHash RawHash; + uint64_t RawSize; + return LoadCompactBinaryObject(CompressedBuffer::FromCompressed(SharedBuffer(Payload), RawHash, RawSize)); + } + else + { + throw std::runtime_error( + fmt::format("{}: {} ({})", "Unsupported response format", Context, ToString(Payload.GetContentType()))); + } + } + + void AddStatistic(const JupiterResult& Result) + { + m_Stats.TotalBytesWritten += Result.SentBytes; + m_Stats.TotalBytesRead += Result.ReceivedBytes; + m_Stats.TotalRequestTimeUs += uint64_t(Result.ElapsedSeconds * 1000000.0); + m_Stats.TotalRequestCount++; + } + + JupiterSession m_Session; + Statistics& m_Stats; + const std::string m_Namespace; + const std::string m_Bucket; + const std::filesystem::path m_TempFolderPath; +}; + +std::unique_ptr<BuildStorage> +CreateJupiterBuildStorage(LoggerRef InLog, + HttpClient& InHttpClient, + BuildStorage::Statistics& Stats, + std::string_view Namespace, + std::string_view Bucket, + const std::filesystem::path& TempFolderPath) +{ + ZEN_TRACE_CPU("CreateJupiterBuildStorage"); + + return std::make_unique<JupiterBuildStorage>(InLog, InHttpClient, Stats, Namespace, Bucket, TempFolderPath); +} + +} // namespace zen diff --git a/src/zenutil/jupiter/jupiterclient.cpp b/src/zenutil/jupiter/jupiterclient.cpp index 5e5da3750..dbac218a4 100644 --- a/src/zenutil/jupiter/jupiterclient.cpp +++ b/src/zenutil/jupiter/jupiterclient.cpp @@ -11,7 +11,6 @@ JupiterClient::JupiterClient(const JupiterClientOptions& Options, std::function< , m_DefaultDdcNamespace(Options.DdcNamespace) , m_DefaultBlobStoreNamespace(Options.BlobStoreNamespace) , m_ComputeCluster(Options.ComputeCluster) -, m_TokenProvider(std::move(TokenProvider)) , m_HttpClient(Options.ServiceUrl, HttpClientSettings{.ConnectTimeout = Options.ConnectTimeout, .Timeout = Options.Timeout, diff --git a/src/zenutil/jupiter/jupitersession.cpp b/src/zenutil/jupiter/jupitersession.cpp index f706a7efc..68f214c06 100644 --- a/src/zenutil/jupiter/jupitersession.cpp +++ b/src/zenutil/jupiter/jupitersession.cpp @@ -3,6 +3,8 @@ #include <zenutil/jupiter/jupitersession.h> #include <zencore/compactbinary.h> +#include <zencore/compactbinarybuilder.h> +#include <zencore/compositebuffer.h> #include <zencore/fmtutils.h> #include <zencore/trace.h> @@ -355,6 +357,16 @@ JupiterSession::CacheTypeExists(std::string_view Namespace, std::string_view Typ } JupiterResult +JupiterSession::ListBuilds(std::string_view Namespace, std::string_view BucketId, const IoBuffer& Payload) +{ + ZEN_ASSERT(Payload.GetContentType() == ZenContentType::kCbObject); + HttpClient::Response Response = m_HttpClient.Post(fmt::format("/api/v2/builds/{}/{}/search", Namespace, BucketId), + Payload, + {HttpClient::Accept(ZenContentType::kCbObject)}); + return detail::ConvertResponse(Response, "JupiterSession::ListBuilds"sv); +} + +JupiterResult JupiterSession::PutBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const IoBuffer& Payload) { ZEN_ASSERT(Payload.GetContentType() == ZenContentType::kCbObject); @@ -424,29 +436,282 @@ JupiterResult JupiterSession::PutBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, ZenContentType ContentType, const CompositeBuffer& Payload) { - HttpClient::Response Response = m_HttpClient.Upload( - fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}", Namespace, BucketId, BuildId, PartId, Hash.ToHexString()), - Payload, - ContentType); + HttpClient::Response Response = + m_HttpClient.Upload(fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}", Namespace, BucketId, BuildId, Hash.ToHexString()), + Payload, + ContentType); return detail::ConvertResponse(Response, "JupiterSession::PutBuildBlob"sv); } JupiterResult +JupiterSession::PutMultipartBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const IoHash& Hash, + ZenContentType ContentType, + uint64_t PayloadSize, + std::function<IoBuffer(uint64_t Offset, uint64_t Size)>&& Transmitter, + std::vector<std::function<JupiterResult(bool& OutIsComplete)>>& OutWorkItems) +{ + struct MultipartUploadResponse + { + struct Part + { + uint64_t FirstByte; + uint64_t LastByte; + std::string PartId; + std::string QueryString; + }; + + std::string UploadId; + std::string BlobName; + std::vector<Part> Parts; + + static MultipartUploadResponse Parse(CbObject& Payload) + { + MultipartUploadResponse Result; + Result.UploadId = Payload["uploadId"sv].AsString(); + Result.BlobName = Payload["blobName"sv].AsString(); + CbArrayView PartsArray = Payload["parts"sv].AsArrayView(); + Result.Parts.reserve(PartsArray.Num()); + for (CbFieldView PartView : PartsArray) + { + CbObjectView PartObject = PartView.AsObjectView(); + Result.Parts.emplace_back(Part{ + .FirstByte = PartObject["firstByte"sv].AsUInt64(), + .LastByte = PartObject["lastByte"sv].AsUInt64(), + .PartId = std::string(PartObject["partId"sv].AsString()), + .QueryString = std::string(PartObject["queryString"sv].AsString()), + }); + } + return Result; + } + }; + + CbObjectWriter StartMultipartPayloadWriter; + StartMultipartPayloadWriter.AddInteger("blobLength"sv, PayloadSize); + CbObject StartMultipartPayload = StartMultipartPayloadWriter.Save(); + + std::string StartMultipartResponseRequestString = + fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}/startMultipartUpload", Namespace, BucketId, BuildId, Hash.ToHexString()); + // ZEN_INFO("POST: {}", StartMultipartResponseRequestString); + HttpClient::Response StartMultipartResponse = + m_HttpClient.Post(StartMultipartResponseRequestString, StartMultipartPayload, HttpClient::Accept(ZenContentType::kCbObject)); + if (!StartMultipartResponse.IsSuccess()) + { + ZEN_WARN("{}", StartMultipartResponse.ErrorMessage("startMultipartUpload: ")); + return detail::ConvertResponse(StartMultipartResponse, "JupiterSession::PutMultipartBuildBlob"sv); + } + CbObject ResponseObject = LoadCompactBinaryObject(StartMultipartResponse.ResponsePayload); + + struct WorkloadData + { + MultipartUploadResponse PartDescription; + std::function<IoBuffer(uint64_t Offset, uint64_t Size)> Transmitter; + std::atomic<size_t> PartsLeft; + }; + + std::shared_ptr<WorkloadData> Workload(std::make_shared<WorkloadData>()); + + Workload->PartDescription = MultipartUploadResponse::Parse(ResponseObject); + Workload->Transmitter = std::move(Transmitter); + Workload->PartsLeft = Workload->PartDescription.Parts.size(); + + for (size_t PartIndex = 0; PartIndex < Workload->PartDescription.Parts.size(); PartIndex++) + { + OutWorkItems.emplace_back([this, Namespace, BucketId, BuildId, Hash, ContentType, Workload, PartIndex]( + bool& OutIsComplete) -> JupiterResult { + const MultipartUploadResponse::Part& Part = Workload->PartDescription.Parts[PartIndex]; + IoBuffer PartPayload = Workload->Transmitter(Part.FirstByte, Part.LastByte - Part.FirstByte); + std::string MultipartUploadResponseRequestString = fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}/uploadMultipart{}", + Namespace, + BucketId, + BuildId, + Hash.ToHexString(), + Part.QueryString); + // ZEN_INFO("PUT: {}", MultipartUploadResponseRequestString); + HttpClient::Response MultipartUploadResponse = m_HttpClient.Put(MultipartUploadResponseRequestString, PartPayload); + if (!MultipartUploadResponse.IsSuccess()) + { + ZEN_WARN("{}", MultipartUploadResponse.ErrorMessage(MultipartUploadResponseRequestString)); + } + OutIsComplete = Workload->PartsLeft.fetch_sub(1) == 1; + if (OutIsComplete) + { + int64_t TotalUploadedBytes = MultipartUploadResponse.UploadedBytes; + int64_t TotalDownloadedBytes = MultipartUploadResponse.DownloadedBytes; + double TotalElapsedSeconds = MultipartUploadResponse.ElapsedSeconds; + HttpClient::Response MultipartEndResponse = MultipartUploadResponse; + while (MultipartEndResponse.IsSuccess()) + { + CbObjectWriter CompletePayloadWriter; + CompletePayloadWriter.AddString("blobName"sv, Workload->PartDescription.BlobName); + CompletePayloadWriter.AddString("uploadId"sv, Workload->PartDescription.UploadId); + CompletePayloadWriter.AddBool("isCompressed"sv, ContentType == ZenContentType::kCompressedBinary); + CompletePayloadWriter.BeginArray("partIds"sv); + std::unordered_map<std::string, size_t> PartNameToIndex; + for (size_t UploadPartIndex = 0; UploadPartIndex < Workload->PartDescription.Parts.size(); UploadPartIndex++) + { + const MultipartUploadResponse::Part& PartDescription = Workload->PartDescription.Parts[UploadPartIndex]; + PartNameToIndex.insert({PartDescription.PartId, UploadPartIndex}); + CompletePayloadWriter.AddString(PartDescription.PartId); + } + CompletePayloadWriter.EndArray(); // "partIds" + CbObject CompletePayload = CompletePayloadWriter.Save(); + + std::string MultipartEndResponseRequestString = + fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}/completeMultipart", Namespace, BucketId, BuildId, Hash.ToHexString()); + + MultipartEndResponse = m_HttpClient.Post(MultipartEndResponseRequestString, + CompletePayload, + HttpClient::Accept(ZenContentType::kCbObject)); + TotalUploadedBytes += MultipartEndResponse.UploadedBytes; + TotalDownloadedBytes += MultipartEndResponse.DownloadedBytes; + TotalElapsedSeconds += MultipartEndResponse.ElapsedSeconds; + if (MultipartEndResponse.IsSuccess()) + { + CbObject ResponseObject = MultipartEndResponse.AsObject(); + CbArrayView MissingPartsArrayView = ResponseObject["missingParts"sv].AsArrayView(); + if (MissingPartsArrayView.Num() == 0) + { + break; + } + else + { + for (CbFieldView PartIdView : MissingPartsArrayView) + { + std::string RetryPartId(PartIdView.AsString()); + size_t RetryPartIndex = PartNameToIndex.at(RetryPartId); + const MultipartUploadResponse::Part& RetryPart = Workload->PartDescription.Parts[RetryPartIndex]; + IoBuffer RetryPartPayload = + Workload->Transmitter(RetryPart.FirstByte, RetryPart.LastByte - RetryPart.FirstByte - 1); + std::string RetryMultipartUploadResponseRequestString = + fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}/uploadMultipart{}", + Namespace, + BucketId, + BuildId, + Hash.ToHexString(), + RetryPart.QueryString); + + MultipartUploadResponse = m_HttpClient.Put(RetryMultipartUploadResponseRequestString, RetryPartPayload); + TotalUploadedBytes = MultipartUploadResponse.UploadedBytes; + TotalDownloadedBytes = MultipartUploadResponse.DownloadedBytes; + TotalElapsedSeconds = MultipartUploadResponse.ElapsedSeconds; + if (!MultipartUploadResponse.IsSuccess()) + { + ZEN_WARN("{}", MultipartUploadResponse.ErrorMessage(RetryMultipartUploadResponseRequestString)); + MultipartEndResponse = MultipartUploadResponse; + } + } + } + } + else + { + ZEN_WARN("{}", MultipartEndResponse.ErrorMessage(MultipartEndResponseRequestString)); + } + } + MultipartEndResponse.UploadedBytes = TotalUploadedBytes; + MultipartEndResponse.DownloadedBytes = TotalDownloadedBytes; + MultipartEndResponse.ElapsedSeconds = TotalElapsedSeconds; + return detail::ConvertResponse(MultipartEndResponse, "JupiterSession::PutMultipartBuildBlob"sv); + } + return detail::ConvertResponse(MultipartUploadResponse, "JupiterSession::PutMultipartBuildBlob"sv); + }); + } + return detail::ConvertResponse(StartMultipartResponse, "JupiterSession::PutMultipartBuildBlob"sv); +} + +JupiterResult +JupiterSession::GetMultipartBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const IoHash& Hash, + uint64_t ChunkSize, + std::function<void(uint64_t Offset, const IoBuffer& Chunk, uint64_t BytesRemaining)>&& Receiver, + std::vector<std::function<JupiterResult()>>& OutWorkItems) +{ + std::string RequestUrl = fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}", Namespace, BucketId, BuildId, Hash.ToHexString()); + HttpClient::Response Response = + m_HttpClient.Get(RequestUrl, HttpClient::KeyValueMap({{"Range", fmt::format("bytes={}-{}", 0, ChunkSize - 1)}})); + if (Response.IsSuccess()) + { + if (std::string_view ContentRange = Response.Header.Entries["Content-Range"]; !ContentRange.empty()) + { + if (std::string_view::size_type SizeDelimiterPos = ContentRange.find('/'); SizeDelimiterPos != std::string_view::npos) + { + if (std::optional<uint64_t> TotalSizeMaybe = ParseInt<uint64_t>(ContentRange.substr(SizeDelimiterPos + 1)); + TotalSizeMaybe.has_value()) + { + uint64_t TotalSize = TotalSizeMaybe.value(); + uint64_t PayloadSize = Response.ResponsePayload.GetSize(); + + Receiver(0, Response.ResponsePayload, TotalSize); + + if (TotalSize > PayloadSize) + { + struct WorkloadData + { + std::function<void(uint64_t Offset, const IoBuffer& Chunk, uint64_t BytesRemaining)> Receiver; + std::atomic<uint64_t> BytesRemaining; + }; + + std::shared_ptr<WorkloadData> Workload(std::make_shared<WorkloadData>()); + Workload->Receiver = std::move(Receiver); + Workload->BytesRemaining = TotalSize - PayloadSize; + + uint64_t Offset = PayloadSize; + while (Offset < TotalSize) + { + uint64_t PartSize = Min(ChunkSize, TotalSize - Offset); + OutWorkItems.emplace_back( + [this, Namespace, BucketId, BuildId, Hash, TotalSize, Workload, Offset, PartSize]() -> JupiterResult { + std::string RequestUrl = + fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}", Namespace, BucketId, BuildId, Hash.ToHexString()); + HttpClient::Response Response = m_HttpClient.Get( + RequestUrl, + HttpClient::KeyValueMap({{"Range", fmt::format("bytes={}-{}", Offset, Offset + PartSize - 1)}})); + if (Response.IsSuccess()) + { + uint64_t ByteRemaning = Workload->BytesRemaining.fetch_sub(Response.ResponsePayload.GetSize()); + Workload->Receiver(Offset, Response.ResponsePayload, ByteRemaning); + } + return detail::ConvertResponse(Response, "JupiterSession::GetMultipartBuildBlob"sv); + }); + Offset += PartSize; + } + } + return detail::ConvertResponse(Response, "JupiterSession::GetMultipartBuildBlob"sv); + } + } + } + Receiver(0, Response.ResponsePayload, Response.ResponsePayload.GetSize()); + } + return detail::ConvertResponse(Response, "JupiterSession::GetMultipartBuildBlob"sv); +} + +JupiterResult JupiterSession::GetBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, - std::filesystem::path TempFolderPath) + std::filesystem::path TempFolderPath, + uint64_t Offset, + uint64_t Size) { - HttpClient::Response Response = m_HttpClient.Download( - fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}", Namespace, BucketId, BuildId, PartId, Hash.ToHexString()), - TempFolderPath); + HttpClient::KeyValueMap Headers; + if (Offset != 0 || Size != (uint64_t)-1) + { + Headers.Entries.insert({"Range", fmt::format("bytes={}-{}", Offset, Offset + Size - 1)}); + } + HttpClient::Response Response = + m_HttpClient.Download(fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}", Namespace, BucketId, BuildId, Hash.ToHexString()), + TempFolderPath, + Headers); + return detail::ConvertResponse(Response, "JupiterSession::GetBuildBlob"sv); } @@ -454,14 +719,13 @@ JupiterResult JupiterSession::PutBlockMetadata(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, const IoBuffer& Payload) { ZEN_ASSERT(Payload.GetContentType() == ZenContentType::kCbObject); - HttpClient::Response Response = m_HttpClient.Put( - fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blocks/{}/metadata", Namespace, BucketId, BuildId, PartId, Hash.ToHexString()), - Payload); + HttpClient::Response Response = + m_HttpClient.Put(fmt::format("/api/v2/builds/{}/{}/{}/blocks/{}/metadata", Namespace, BucketId, BuildId, Hash.ToHexString()), + Payload); return detail::ConvertResponse(Response, "JupiterSession::PutBlockMetadata"sv); } @@ -494,12 +758,22 @@ JupiterSession::FinalizeBuildPart(std::string_view Namespace, } JupiterResult -JupiterSession::FindBlocks(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const Oid& PartId) +JupiterSession::FindBlocks(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId) { - HttpClient::Response Response = - m_HttpClient.Get(fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blocks/listBlocks", Namespace, BucketId, BuildId, PartId), - HttpClient::Accept(ZenContentType::kCbObject)); + HttpClient::Response Response = m_HttpClient.Get(fmt::format("/api/v2/builds/{}/{}/{}/blocks/listBlocks", Namespace, BucketId, BuildId), + HttpClient::Accept(ZenContentType::kCbObject)); return detail::ConvertResponse(Response, "JupiterSession::FindBlocks"sv); } +JupiterResult +JupiterSession::GetBlockMetadata(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, IoBuffer Payload) +{ + ZEN_ASSERT(Payload.GetContentType() == ZenContentType::kCbObject); + HttpClient::Response Response = + m_HttpClient.Post(fmt::format("/api/v2/builds/{}/{}/{}/blocks/getBlockMetadata", Namespace, BucketId, BuildId), + Payload, + HttpClient::Accept(ZenContentType::kCbObject)); + return detail::ConvertResponse(Response, "JupiterSession::GetBlockMetadata"sv); +} + } // namespace zen diff --git a/src/zenutil/logging.cpp b/src/zenutil/logging.cpp index 6314c407f..cb0fd6679 100644 --- a/src/zenutil/logging.cpp +++ b/src/zenutil/logging.cpp @@ -10,11 +10,13 @@ ZEN_THIRD_PARTY_INCLUDES_START #include <spdlog/spdlog.h> ZEN_THIRD_PARTY_INCLUDES_END +#include <zencore/callstack.h> #include <zencore/compactbinary.h> #include <zencore/filesystem.h> #include <zencore/logging.h> #include <zencore/memory/llm.h> #include <zencore/string.h> +#include <zencore/timer.h> #include <zenutil/logging/fullformatter.h> #include <zenutil/logging/jsonformatter.h> #include <zenutil/logging/rotatingfilesink.h> @@ -48,7 +50,7 @@ BeginInitializeLogging(const LoggingOptions& LogOptions) zen::logging::InitializeLogging(); zen::logging::EnableVTMode(); - bool IsAsync = true; + bool IsAsync = LogOptions.AllowAsync; if (LogOptions.IsDebug) { @@ -97,7 +99,13 @@ BeginInitializeLogging(const LoggingOptions& LogOptions) } } - std::set_terminate([]() { ZEN_CRITICAL("Program exited abnormally via std::terminate()"); }); + std::set_terminate([]() { + void* Frames[8]; + uint32_t FrameCount = GetCallstack(2, 8, Frames); + CallstackFrames* Callstack = CreateCallstack(FrameCount, Frames); + ZEN_CRITICAL("Program exited abnormally via std::terminate()\n{}", CallstackToString(Callstack, " ")); + FreeCallstack(Callstack); + }); // Default @@ -174,7 +182,7 @@ FinishInitializeLogging(const LoggingOptions& LogOptions) LogLevel = logging::level::Debug; } - if (LogOptions.IsTest) + if (LogOptions.IsTest || LogOptions.IsVerbose) { LogLevel = logging::level::Trace; } @@ -184,21 +192,25 @@ FinishInitializeLogging(const LoggingOptions& LogOptions) logging::RefreshLogLevels(LogLevel); spdlog::flush_on(spdlog::level::err); spdlog::flush_every(std::chrono::seconds{2}); - spdlog::set_formatter( - std::make_unique<logging::full_formatter>(LogOptions.LogId, std::chrono::system_clock::now())); // default to duration prefix + spdlog::set_formatter(std::make_unique<logging::full_formatter>( + LogOptions.LogId, + std::chrono::system_clock::now() - std::chrono::milliseconds(GetTimeSinceProcessStart()))); // default to duration prefix - if (LogOptions.AbsLogFile.extension() == ".json") + if (g_FileSink) { - g_FileSink->set_formatter(std::make_unique<logging::json_formatter>(LogOptions.LogId)); - } - else - { - g_FileSink->set_formatter(std::make_unique<logging::full_formatter>(LogOptions.LogId)); // this will have a date prefix - } + if (LogOptions.AbsLogFile.extension() == ".json") + { + g_FileSink->set_formatter(std::make_unique<logging::json_formatter>(LogOptions.LogId)); + } + else + { + g_FileSink->set_formatter(std::make_unique<logging::full_formatter>(LogOptions.LogId)); // this will have a date prefix + } - const std::string StartLogTime = zen::DateTime::Now().ToIso8601(); + const std::string StartLogTime = zen::DateTime::Now().ToIso8601(); - spdlog::apply_all([&](auto Logger) { Logger->info("log starting at {}", StartLogTime); }); + spdlog::apply_all([&](auto Logger) { Logger->info("log starting at {}", StartLogTime); }); + } g_IsLoggingInitialized = true; } @@ -206,7 +218,7 @@ FinishInitializeLogging(const LoggingOptions& LogOptions) void ShutdownLogging() { - if (g_IsLoggingInitialized) + if (g_IsLoggingInitialized && g_FileSink) { auto DefaultLogger = zen::logging::Default(); ZEN_LOG_INFO(DefaultLogger, "log ending at {}", zen::DateTime::Now().ToIso8601()); diff --git a/src/zenutil/service.cpp b/src/zenutil/service.cpp index ea7c2aae6..a2ff93efd 100644 --- a/src/zenutil/service.cpp +++ b/src/zenutil/service.cpp @@ -884,14 +884,14 @@ InstallService(std::string_view ServiceName, const ServiceSpec& Spec) { const std::string UnitName = GetUnitName(ServiceName); const std::filesystem::path ServiceUnitPath = GetServiceUnitPath(UnitName); - std::string UserName = Spec.UserName; + std::string UserName = Spec.UserName; if (UserName == "") { std::pair<int, std::string> UserResult = ExecuteProgram("echo $SUDO_USER"); if (UserResult.first != 0 || UserResult.second.empty()) { - ZEN_ERROR("Unable to determine current user"); + ZEN_ERROR("Unable to determine current user"); return MakeErrorCode(UserResult.first); } @@ -984,14 +984,14 @@ QueryInstalledService(std::string_view ServiceName, ServiceInfo& OutInfo) std::pair<int, std::string> ShowResult = ExecuteProgram(fmt::format("systemctl show -p ExecStart {}", UnitName)); if (ShowResult.first == 0) { - std::regex Regex(R"~(ExecStart=\{ path=(.*?) ; argv\[\]=(.*?) ;)~"); + std::regex Regex(R"~(ExecStart=\{ path=(.*?) ; argv\[\]=(.*?) ;)~"); std::smatch Match; if (std::regex_search(ShowResult.second, Match, Regex)) { - std::string Executable = Match[1].str(); - std::string CommandLine = Match[2].str(); - OutInfo.Spec.ExecutablePath = Executable; + std::string Executable = Match[1].str(); + std::string CommandLine = Match[2].str(); + OutInfo.Spec.ExecutablePath = Executable; OutInfo.Spec.CommandLineOptions = CommandLine.substr(Executable.size(), CommandLine.size()); } else diff --git a/src/zenutil/zenutil.cpp b/src/zenutil/zenutil.cpp index c54144549..19eb63ce9 100644 --- a/src/zenutil/zenutil.cpp +++ b/src/zenutil/zenutil.cpp @@ -6,6 +6,7 @@ # include <zenutil/cache/cacherequests.h> # include <zenutil/cache/rpcrecording.h> +# include <zenutil/chunkedfile.h> namespace zen { @@ -15,6 +16,7 @@ zenutil_forcelinktests() cachepolicy_forcelink(); cache::rpcrecord_forcelink(); cacherequests_forcelink(); + chunkedfile_forcelink(); } } // namespace zen |