From 7d8fe45af3b49d800f84f0ddce051c0b3b2e837d Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 26 Feb 2025 15:10:14 +0100 Subject: builds upload command (#278) - Feature: **EXPERIMENTAL** New `zen builds` command to list, upload and download folders to Cloud Build API - `builds list` list available builds (**INCOMPLETE - FILTERING MISSING**) - `builds upload` upload a folder to Cloud Build API - `--local-path` source folder to upload - `--create-build` creates a new parent build object (using the object id), if omitted a parent build must exist and `--build-id` must be given - `--build-id` an Oid in hex form for the Build identifier to use - omit to have the id auto generated - `--build-part-id` and Oid in hex form for the Build Part identifier for the folder - omit to have the id auto generated - `--build-part-name` name of the build part - if omitted the name of the leaf folder name give in `--local-path` - `--metadata-path` path to a json formatted file with meta data information about the build. Meta-data must be provided if `--create-build` is set - `--metadata` key-value pairs separated by ';' with build meta data for the build. (key1=value1;key2=value2). Meta-data must be provided if `--create-build` is set - `--clean` ignore any existing blocks of chunk data and upload a fresh set of blocks - `--allow-multipart` enable usage of multi-part http upload requests - `--manifest-path` path to text file listing files to include in upload. Exclude to upload everything in `--local-path` - `builds download` download a folder from Cloud Build API (**INCOMPLETE - WILL WIPE UNTRACKED DATA FROM TARGET FOLDER**) - `--local-path` target folder to download to - `--build-id` an Oid in hex form for the Build identifier to use - `--build-part-id` a comma separated list of Oid in hex for the build part identifier(s) to download - mutually exclusive to `--build-part-name` - `--build-part-name` a comma separated list of names for the build part(s) to download - if omitted the name of the leaf folder name give in `--local-path` - `--clean` deletes all data in target folder before downloading (NON-CLEAN IS NOT IMPLEMENTED YET) - `--allow-multipart` enable usage of multi-part http download reqeusts - `builds diff` download a folder from Cloud Build API - `--local-path` target folder to download to - `--compare-path` folder to compare target with - `--only-chunked` compare only files that would be chunked - `builds fetch-blob` fetch and validate a blob from remote store - `--build-id` an Oid in hex form for the Build identifier to use - `--blob-hash` an IoHash in hex form identifying the blob to download - `builds validate part` fetch a build part and validate all referenced attachments - `--build-id` an Oid in hex form for the Build identifier to use - `--build-part-id` an Oid in hex for the build part identifier to validate - mutually exclusive to `--build-part-name` - `--build-part-name` a name for the build part to validate - mutually exclusive to `--build-part-id` - `builds test` a series of operation that uploads, downloads and test various aspects of incremental operations - `--local-path` source folder to upload - Options for Cloud Build API remote store (`list`, `upload`, `download`, `fetch-blob`, `validate-part`) - `--url` Cloud Builds URL - `--assume-http2` assume that the builds endpoint is a HTTP/2 endpoint skipping HTTP/1.1 upgrade handshake - `--namespace` Builds Storage namespace - `--bucket` Builds Storage bucket - Authentication options for Cloud Build API - Auth token - `--access-token` http auth Cloud Storage access token - `--access-token-env` name of environment variable that holds the Http auth Cloud Storage access token - `--access-token-path` path to json file that holds the Http auth Cloud Storage access token - OpenId authentication - `--openid-provider-name` Open ID provider name - `--openid-provider-url` Open ID provider url - `--openid-client-id`Open ID client id - `--openid-refresh-token` Open ID refresh token - `--encryption-aes-key` 256 bit AES encryption key for storing OpenID credentials - `--encryption-aes-iv` 128 bit AES encryption initialization vector for storing OpenID credentials - OAuth authentication - `--oauth-url` OAuth provier url - `--oauth-clientid` OAuth client id - `--oauth-clientsecret` OAuth client secret - Options for file based remote store used for for testing purposes (`list`, `upload`, `download`, `fetch-blob`, `validate-part`, `test`) - `--storage-path` path to folder to store builds data - `--json-metadata` enable json output in store for all compact binary objects (off by default) - Output options for all builds commands - `--plain-progress` use plain line-by-line progress output - `--verbose` --- src/zenutil/chunkingcontroller.cpp | 265 +++++++++++++++++++++++++++++++++++++ 1 file changed, 265 insertions(+) create mode 100644 src/zenutil/chunkingcontroller.cpp (limited to 'src/zenutil/chunkingcontroller.cpp') diff --git a/src/zenutil/chunkingcontroller.cpp b/src/zenutil/chunkingcontroller.cpp new file mode 100644 index 000000000..bc0e57b14 --- /dev/null +++ b/src/zenutil/chunkingcontroller.cpp @@ -0,0 +1,265 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include + +#include +#include + +ZEN_THIRD_PARTY_INCLUDES_START +#include +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { +using namespace std::literals; + +namespace { + std::vector ReadStringArray(CbArrayView StringArray) + { + std::vector Result; + Result.reserve(StringArray.Num()); + for (CbFieldView FieldView : StringArray) + { + Result.emplace_back(FieldView.AsString()); + } + return Result; + } + + ChunkedParams ReadChunkParams(CbObjectView Params) + { + bool UseThreshold = Params["UseThreshold"sv].AsBool(true); + size_t MinSize = Params["MinSize"sv].AsUInt64(DefaultChunkedParams.MinSize); + size_t MaxSize = Params["MaxSize"sv].AsUInt64(DefaultChunkedParams.MaxSize); + size_t AvgSize = Params["AvgSize"sv].AsUInt64(DefaultChunkedParams.AvgSize); + + return ChunkedParams{.UseThreshold = UseThreshold, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize}; + } + +} // namespace + +class BasicChunkingController : public ChunkingController +{ +public: + BasicChunkingController(std::span ExcludeExtensions, + uint64_t ChunkFileSizeLimit, + const ChunkedParams& ChunkingParams) + : m_ChunkExcludeExtensions(ExcludeExtensions.begin(), ExcludeExtensions.end()) + , m_ChunkFileSizeLimit(ChunkFileSizeLimit) + , m_ChunkingParams(ChunkingParams) + { + } + + BasicChunkingController(CbObjectView Parameters) + : m_ChunkExcludeExtensions(ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView())) + , m_ChunkFileSizeLimit(Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit)) + , m_ChunkingParams(ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())) + { + } + + virtual bool ProcessFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + ChunkedInfoWithSource& OutChunked, + std::atomic& BytesProcessed) const override + { + const bool ExcludeFromChunking = + std::find(m_ChunkExcludeExtensions.begin(), m_ChunkExcludeExtensions.end(), InputPath.extension()) != + m_ChunkExcludeExtensions.end(); + + if (ExcludeFromChunking || (RawSize < m_ChunkFileSizeLimit)) + { + return false; + } + + BasicFile Buffer(InputPath, BasicFile::Mode::kRead); + OutChunked = ChunkData(Buffer, 0, RawSize, m_ChunkingParams, &BytesProcessed); + return true; + } + + virtual std::string_view GetName() const override { return Name; } + + virtual CbObject GetParameters() const override + { + CbObjectWriter Writer; + Writer.BeginArray("ChunkExcludeExtensions"sv); + { + for (const std::string& Extension : m_ChunkExcludeExtensions) + { + Writer.AddString(Extension); + } + } + Writer.EndArray(); // ChunkExcludeExtensions + Writer.AddInteger("ChunkFileSizeLimit"sv, m_ChunkFileSizeLimit); + Writer.BeginObject("ChunkingParams"sv); + { + Writer.AddBool("UseThreshold"sv, m_ChunkingParams.UseThreshold); + + Writer.AddInteger("MinSize"sv, (uint64_t)m_ChunkingParams.MinSize); + Writer.AddInteger("MaxSize"sv, (uint64_t)m_ChunkingParams.MaxSize); + Writer.AddInteger("AvgSize"sv, (uint64_t)m_ChunkingParams.AvgSize); + } + Writer.EndObject(); // ChunkingParams + return Writer.Save(); + } + static constexpr std::string_view Name = "BasicChunkingController"sv; + +protected: + const std::vector m_ChunkExcludeExtensions; + const uint64_t m_ChunkFileSizeLimit; + const ChunkedParams m_ChunkingParams; +}; + +class ChunkingControllerWithFixedChunking : public ChunkingController +{ +public: + ChunkingControllerWithFixedChunking(std::span FixedChunkingExtensions, + uint64_t ChunkFileSizeLimit, + const ChunkedParams& ChunkingParams, + uint32_t FixedChunkingChunkSize) + : m_FixedChunkingExtensions(FixedChunkingExtensions.begin(), FixedChunkingExtensions.end()) + , m_ChunkFileSizeLimit(ChunkFileSizeLimit) + , m_ChunkingParams(ChunkingParams) + , m_FixedChunkingChunkSize(FixedChunkingChunkSize) + { + } + + ChunkingControllerWithFixedChunking(CbObjectView Parameters) + : m_FixedChunkingExtensions(ReadStringArray(Parameters["FixedChunkingExtensions"sv].AsArrayView())) + , m_ChunkFileSizeLimit(Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit)) + , m_ChunkingParams(ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())) + , m_FixedChunkingChunkSize(Parameters["FixedChunkingChunkSize"sv].AsUInt32(16u * 1024u * 1024u)) + { + } + + virtual bool ProcessFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + ChunkedInfoWithSource& OutChunked, + std::atomic& BytesProcessed) const override + { + if (RawSize < m_ChunkFileSizeLimit) + { + return false; + } + const bool FixedChunking = std::find(m_FixedChunkingExtensions.begin(), m_FixedChunkingExtensions.end(), InputPath.extension()) != + m_FixedChunkingExtensions.end(); + + if (FixedChunking) + { + IoHashStream FullHash; + IoBuffer Source = IoBufferBuilder::MakeFromFile(InputPath); + uint64_t Offset = 0; + tsl::robin_map ChunkHashToChunkIndex; + ChunkHashToChunkIndex.reserve(1 + (RawSize / m_FixedChunkingChunkSize)); + while (Offset < RawSize) + { + uint64_t ChunkSize = std::min(RawSize - Offset, m_FixedChunkingChunkSize); + IoBuffer Chunk(Source, Offset, ChunkSize); + MemoryView ChunkData = Chunk.GetView(); + FullHash.Append(ChunkData); + + IoHash ChunkHash = IoHash::HashBuffer(ChunkData); + if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) + { + OutChunked.Info.ChunkSequence.push_back(It->second); + } + else + { + uint32_t ChunkIndex = gsl::narrow(OutChunked.Info.ChunkHashes.size()); + OutChunked.Info.ChunkHashes.push_back(ChunkHash); + OutChunked.Info.ChunkSequence.push_back(ChunkIndex); + OutChunked.ChunkSources.push_back({.Offset = Offset, .Size = gsl::narrow(ChunkSize)}); + } + Offset += ChunkSize; + BytesProcessed.fetch_add(ChunkSize); + } + OutChunked.Info.RawSize = RawSize; + OutChunked.Info.RawHash = FullHash.GetHash(); + return true; + } + else + { + BasicFile Buffer(InputPath, BasicFile::Mode::kRead); + OutChunked = ChunkData(Buffer, 0, RawSize, m_ChunkingParams, &BytesProcessed); + return true; + } + } + + virtual std::string_view GetName() const override { return Name; } + + virtual CbObject GetParameters() const override + { + CbObjectWriter Writer; + Writer.BeginArray("FixedChunkingExtensions"); + { + for (const std::string& Extension : m_FixedChunkingExtensions) + { + Writer.AddString(Extension); + } + } + Writer.EndArray(); // ChunkExcludeExtensions + Writer.AddInteger("ChunkFileSizeLimit"sv, m_ChunkFileSizeLimit); + Writer.BeginObject("ChunkingParams"sv); + { + Writer.AddBool("UseThreshold"sv, m_ChunkingParams.UseThreshold); + + Writer.AddInteger("MinSize"sv, (uint64_t)m_ChunkingParams.MinSize); + Writer.AddInteger("MaxSize"sv, (uint64_t)m_ChunkingParams.MaxSize); + Writer.AddInteger("AvgSize"sv, (uint64_t)m_ChunkingParams.AvgSize); + } + Writer.EndObject(); // ChunkingParams + Writer.AddInteger("FixedChunkingChunkSize"sv, m_FixedChunkingChunkSize); + return Writer.Save(); + } + + static constexpr std::string_view Name = "ChunkingControllerWithFixedChunking"sv; + +protected: + const std::vector m_FixedChunkingExtensions; + const uint64_t m_ChunkFileSizeLimit; + const ChunkedParams m_ChunkingParams; + const uint32_t m_FixedChunkingChunkSize; +}; + +std::unique_ptr +CreateBasicChunkingController(std::span ExcludeExtensions, + uint64_t ChunkFileSizeLimit, + const ChunkedParams& ChunkingParams) +{ + return std::make_unique(ExcludeExtensions, ChunkFileSizeLimit, ChunkingParams); +} +std::unique_ptr +CreateBasicChunkingController(CbObjectView Parameters) +{ + return std::make_unique(Parameters); +} + +std::unique_ptr +CreateChunkingControllerWithFixedChunking(std::span FixedChunkingExtensions, + uint64_t ChunkFileSizeLimit, + const ChunkedParams& ChunkingParams, + uint32_t FixedChunkingChunkSize) +{ + return std::make_unique(FixedChunkingExtensions, + ChunkFileSizeLimit, + ChunkingParams, + FixedChunkingChunkSize); +} +std::unique_ptr +CreateChunkingControllerWithFixedChunking(CbObjectView Parameters) +{ + return std::make_unique(Parameters); +} + +std::unique_ptr +CreateChunkingController(std::string_view Name, CbObjectView Parameters) +{ + if (Name == BasicChunkingController::Name) + { + return CreateBasicChunkingController(Parameters); + } + else if (Name == ChunkingControllerWithFixedChunking::Name) + { + return CreateChunkingControllerWithFixedChunking(Parameters); + } + return {}; +} + +} // namespace zen -- cgit v1.2.3 From 5791f51cccea1d4e5365456c8da89dbac0dd3ec0 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Fri, 28 Feb 2025 12:39:48 +0100 Subject: improve error handling (#289) * clearer errors * quicker abort * handle deleted local files * simplify parallellwork error handling * don't finish progress on destructor - gives wrong impression * graceful ctrl-c handling --- src/zenutil/chunkingcontroller.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src/zenutil/chunkingcontroller.cpp') diff --git a/src/zenutil/chunkingcontroller.cpp b/src/zenutil/chunkingcontroller.cpp index bc0e57b14..017d12433 100644 --- a/src/zenutil/chunkingcontroller.cpp +++ b/src/zenutil/chunkingcontroller.cpp @@ -58,7 +58,8 @@ public: virtual bool ProcessFile(const std::filesystem::path& InputPath, uint64_t RawSize, ChunkedInfoWithSource& OutChunked, - std::atomic& BytesProcessed) const override + std::atomic& BytesProcessed, + std::atomic& AbortFlag) const override { const bool ExcludeFromChunking = std::find(m_ChunkExcludeExtensions.begin(), m_ChunkExcludeExtensions.end(), InputPath.extension()) != @@ -70,7 +71,7 @@ public: } BasicFile Buffer(InputPath, BasicFile::Mode::kRead); - OutChunked = ChunkData(Buffer, 0, RawSize, m_ChunkingParams, &BytesProcessed); + OutChunked = ChunkData(Buffer, 0, RawSize, m_ChunkingParams, &BytesProcessed, &AbortFlag); return true; } @@ -132,7 +133,8 @@ public: virtual bool ProcessFile(const std::filesystem::path& InputPath, uint64_t RawSize, ChunkedInfoWithSource& OutChunked, - std::atomic& BytesProcessed) const override + std::atomic& BytesProcessed, + std::atomic& AbortFlag) const override { if (RawSize < m_ChunkFileSizeLimit) { @@ -150,6 +152,10 @@ public: ChunkHashToChunkIndex.reserve(1 + (RawSize / m_FixedChunkingChunkSize)); while (Offset < RawSize) { + if (AbortFlag) + { + return false; + } uint64_t ChunkSize = std::min(RawSize - Offset, m_FixedChunkingChunkSize); IoBuffer Chunk(Source, Offset, ChunkSize); MemoryView ChunkData = Chunk.GetView(); -- cgit v1.2.3 From fb09d861fd76e459ac86bec388bd406aaca8e681 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 12 Mar 2025 10:51:57 +0100 Subject: improved block gen logic (#302) - Improvement: Reduced memory usage during upload and part upload validation - Improvement: Reduced I/O usage during upload and download - Improvement: Faster block regeneration when uploading in response to PutBuild/FinalizeBuild - Improvement: More trace scopes for build upload operations - Bugfix: Fixed crash during download when trying to write outside a file range --- src/zenutil/chunkingcontroller.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/zenutil/chunkingcontroller.cpp') diff --git a/src/zenutil/chunkingcontroller.cpp b/src/zenutil/chunkingcontroller.cpp index 017d12433..2a7057a46 100644 --- a/src/zenutil/chunkingcontroller.cpp +++ b/src/zenutil/chunkingcontroller.cpp @@ -4,6 +4,7 @@ #include #include +#include ZEN_THIRD_PARTY_INCLUDES_START #include @@ -61,6 +62,7 @@ public: std::atomic& BytesProcessed, std::atomic& AbortFlag) const override { + ZEN_TRACE_CPU("BasicChunkingController::ProcessFile"); const bool ExcludeFromChunking = std::find(m_ChunkExcludeExtensions.begin(), m_ChunkExcludeExtensions.end(), InputPath.extension()) != m_ChunkExcludeExtensions.end(); @@ -136,6 +138,7 @@ public: std::atomic& BytesProcessed, std::atomic& AbortFlag) const override { + ZEN_TRACE_CPU("ChunkingControllerWithFixedChunking::ProcessFile"); if (RawSize < m_ChunkFileSizeLimit) { return false; @@ -145,6 +148,7 @@ public: if (FixedChunking) { + ZEN_TRACE_CPU("FixedChunking"); IoHashStream FullHash; IoBuffer Source = IoBufferBuilder::MakeFromFile(InputPath); uint64_t Offset = 0; -- cgit v1.2.3