From 49309fa7bbd38746d33cc48acfc5cd59598dbbe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Obr=C4=99bski?= Date: Mon, 3 Feb 2025 09:38:50 +0100 Subject: Fix workspace shares reply array (#280) Workspace shares were sent in the '/ws' reply as repeating objects 'shares' instead of a 'shares' array of objects --- src/zenserver/workspaces/httpworkspaces.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/zenserver/workspaces/httpworkspaces.cpp b/src/zenserver/workspaces/httpworkspaces.cpp index 2d59c9357..905ba5ab2 100644 --- a/src/zenserver/workspaces/httpworkspaces.cpp +++ b/src/zenserver/workspaces/httpworkspaces.cpp @@ -51,9 +51,9 @@ namespace { WriteWorkspaceConfig(Writer, WorkspaceConfig); if (std::optional> ShareIds = Workspaces.GetWorkspaceShares(WorkspaceConfig.Id); ShareIds) { - for (const Oid& ShareId : *ShareIds) + Writer.BeginArray("shares"); { - Writer.BeginArray("shares"); + for (const Oid& ShareId : *ShareIds) { if (std::optional WorkspaceShareConfig = Workspaces.GetWorkspaceShareConfiguration(WorkspaceConfig.Id, ShareId); @@ -66,8 +66,8 @@ namespace { Writer.EndObject(); } } - Writer.EndArray(); } + Writer.EndArray(); } } -- cgit v1.2.3 From d39724eb644cab4ec5bbf19a703cb770b34e68c4 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 12 Feb 2025 08:58:52 +0100 Subject: improved builds api interface in jupiter (#281) * multipart upload/download iterface in jupiter * review fixes --- src/zenhttp/httpclient.cpp | 311 ++++++++++++--------- src/zenhttp/httpclientauth.cpp | 3 + src/zenhttp/include/zenhttp/httpclient.h | 4 +- .../include/zenutil/jupiter/jupitersession.h | 70 +++-- src/zenutil/jupiter/jupitersession.cpp | 293 +++++++++++++++++++ 5 files changed, 518 insertions(+), 163 deletions(-) (limited to 'src') diff --git a/src/zenhttp/httpclient.cpp b/src/zenhttp/httpclient.cpp index 8052a8fd5..211a5d05c 100644 --- a/src/zenhttp/httpclient.cpp +++ b/src/zenhttp/httpclient.cpp @@ -282,7 +282,7 @@ AsCprBody(const IoBuffer& Obj) ////////////////////////////////////////////////////////////////////////// static HttpClient::Response -ResponseWithPayload(cpr::Response& HttpResponse, const HttpResponseCode WorkResponseCode, IoBuffer&& Payload) +ResponseWithPayload(std::string_view SessionId, cpr::Response& HttpResponse, const HttpResponseCode WorkResponseCode, IoBuffer&& Payload) { // This ends up doing a memcpy, would be good to get rid of it by streaming results // into buffer directly @@ -297,7 +297,7 @@ ResponseWithPayload(cpr::Response& HttpResponse, const HttpResponseCode WorkResp if (!IsHttpSuccessCode(WorkResponseCode) && WorkResponseCode != HttpResponseCode::NotFound) { - ZEN_WARN("HttpClient request failed: {}", HttpResponse); + ZEN_WARN("HttpClient request failed (session: {}): {}", SessionId, HttpResponse); } return HttpClient::Response{.StatusCode = WorkResponseCode, @@ -309,12 +309,12 @@ ResponseWithPayload(cpr::Response& HttpResponse, const HttpResponseCode WorkResp } static HttpClient::Response -CommonResponse(cpr::Response&& HttpResponse, IoBuffer&& Payload = {}) +CommonResponse(std::string_view SessionId, cpr::Response&& HttpResponse, IoBuffer&& Payload = {}) { const HttpResponseCode WorkResponseCode = HttpResponseCode(HttpResponse.status_code); if (HttpResponse.error) { - ZEN_WARN("HttpClient client error: {}", HttpResponse); + ZEN_WARN("HttpClient client error (session: {}): {}", SessionId, HttpResponse); // Client side failure code return HttpClient::Response{ @@ -339,6 +339,7 @@ CommonResponse(cpr::Response&& HttpResponse, IoBuffer&& Payload = {}) else { return ResponseWithPayload( + SessionId, HttpResponse, WorkResponseCode, Payload ? std::move(Payload) : IoBufferBuilder::MakeCloneFromMemory(HttpResponse.text.data(), HttpResponse.text.size())); @@ -448,7 +449,7 @@ ValidatePayload(cpr::Response& Response, std::unique_ptr&& Func, uint8_t RetryCount) +DoWithRetry(std::string_view SessionId, std::function&& Func, uint8_t RetryCount) { uint8_t Attempt = 0; cpr::Response Result = Func(); @@ -456,14 +457,17 @@ DoWithRetry(std::function&& Func, uint8_t RetryCount) { Sleep(100 * (Attempt + 1)); Attempt++; - ZEN_INFO("{} Attempt {}/{}", CommonResponse(std::move(Result)).ErrorMessage("Retry"), Attempt, RetryCount + 1); + ZEN_INFO("{} Attempt {}/{}", CommonResponse(SessionId, std::move(Result)).ErrorMessage("Retry"), Attempt, RetryCount + 1); Result = Func(); } return Result; } static cpr::Response -DoWithRetry(std::function&& Func, std::unique_ptr& PayloadFile, uint8_t RetryCount) +DoWithRetry(std::string_view SessionId, + std::function&& Func, + std::unique_ptr& PayloadFile, + uint8_t RetryCount) { uint8_t Attempt = 0; cpr::Response Result = Func(); @@ -482,7 +486,7 @@ DoWithRetry(std::function&& Func, std::unique_ptrAllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - Sess->SetBody(AsCprBody(Payload)); - Sess->UpdateHeader({HeaderContentType(Payload.GetContentType())}); - return Sess.Put(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + Sess->SetBody(AsCprBody(Payload)); + Sess->UpdateHeader({HeaderContentType(Payload.GetContentType())}); + return Sess.Put(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -845,31 +852,36 @@ HttpClient::Put(std::string_view Url, const KeyValueMap& Parameters) { ZEN_TRACE_CPU("HttpClient::Put"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = m_Impl->AllocSession(m_BaseUri, - Url, - m_ConnectionSettings, - {{"Content-Length", "0"}}, - Parameters, - m_SessionId, - GetAccessToken()); - return Sess.Put(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse(m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = m_Impl->AllocSession(m_BaseUri, + Url, + m_ConnectionSettings, + {{"Content-Length", "0"}}, + Parameters, + m_SessionId, + GetAccessToken()); + return Sess.Put(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response HttpClient::Get(std::string_view Url, const KeyValueMap& AdditionalHeader, const KeyValueMap& Parameters) { ZEN_TRACE_CPU("HttpClient::Get"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, Parameters, m_SessionId, GetAccessToken()); - return Sess.Get(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, Parameters, m_SessionId, GetAccessToken()); + return Sess.Get(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -877,13 +889,16 @@ HttpClient::Head(std::string_view Url, const KeyValueMap& AdditionalHeader) { ZEN_TRACE_CPU("HttpClient::Head"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - return Sess.Head(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + return Sess.Head(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -891,13 +906,16 @@ HttpClient::Delete(std::string_view Url, const KeyValueMap& AdditionalHeader) { ZEN_TRACE_CPU("HttpClient::Delete"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - return Sess.Delete(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + return Sess.Delete(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -905,13 +923,16 @@ HttpClient::Post(std::string_view Url, const KeyValueMap& AdditionalHeader, cons { ZEN_TRACE_CPU("HttpClient::PostNoPayload"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, Parameters, m_SessionId, GetAccessToken()); - return Sess.Post(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, Parameters, m_SessionId, GetAccessToken()); + return Sess.Post(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -925,16 +946,19 @@ HttpClient::Post(std::string_view Url, const IoBuffer& Payload, ZenContentType C { ZEN_TRACE_CPU("HttpClient::PostWithPayload"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - - Sess->SetBody(AsCprBody(Payload)); - Sess->UpdateHeader({HeaderContentType(ContentType)}); - return Sess.Post(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + + Sess->SetBody(AsCprBody(Payload)); + Sess->UpdateHeader({HeaderContentType(ContentType)}); + return Sess.Post(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -942,16 +966,19 @@ HttpClient::Post(std::string_view Url, CbObject Payload, const KeyValueMap& Addi { ZEN_TRACE_CPU("HttpClient::PostObjectPayload"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - - Sess->SetBody(AsCprBody(Payload)); - Sess->UpdateHeader({HeaderContentType(ZenContentType::kCbObject)}); - return Sess.Post(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + + Sess->SetBody(AsCprBody(Payload)); + Sess->UpdateHeader({HeaderContentType(ZenContentType::kCbObject)}); + return Sess.Post(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -965,24 +992,27 @@ HttpClient::Post(std::string_view Url, const CompositeBuffer& Payload, ZenConten { ZEN_TRACE_CPU("HttpClient::Post"); - return CommonResponse(DoWithRetry( - [&]() { - uint64_t SizeLeft = Payload.GetSize(); - CompositeBuffer::Iterator BufferIt = Payload.GetIterator(0); - auto ReadCallback = [&Payload, &BufferIt, &SizeLeft](char* buffer, size_t& size, intptr_t) { - size = Min(size, SizeLeft); - MutableMemoryView Data(buffer, size); - Payload.CopyTo(Data, BufferIt); - SizeLeft -= size; - return true; - }; - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - Sess->UpdateHeader({HeaderContentType(ContentType)}); - - return Sess.Post(cpr::ReadCallback(gsl::narrow(Payload.GetSize()), ReadCallback)); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + uint64_t SizeLeft = Payload.GetSize(); + CompositeBuffer::Iterator BufferIt = Payload.GetIterator(0); + auto ReadCallback = [&Payload, &BufferIt, &SizeLeft](char* buffer, size_t& size, intptr_t) { + size = Min(size, SizeLeft); + MutableMemoryView Data(buffer, size); + Payload.CopyTo(Data, BufferIt); + SizeLeft -= size; + return true; + }; + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + Sess->UpdateHeader({HeaderContentType(ContentType)}); + + return Sess.Post(cpr::ReadCallback(gsl::narrow(Payload.GetSize()), ReadCallback)); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -990,29 +1020,32 @@ HttpClient::Upload(std::string_view Url, const IoBuffer& Payload, const KeyValue { ZEN_TRACE_CPU("HttpClient::Upload"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - Sess->UpdateHeader({HeaderContentType(Payload.GetContentType())}); - - uint64_t Offset = 0; - if (Payload.IsWholeFile()) - { - auto ReadCallback = [&Payload, &Offset](char* buffer, size_t& size, intptr_t) { - size = Min(size, Payload.GetSize() - Offset); - IoBuffer PayloadRange = IoBuffer(Payload, Offset, size); - MutableMemoryView Data(buffer, size); - Data.CopyFrom(PayloadRange.GetView()); - Offset += size; - return true; - }; - return Sess.Put(cpr::ReadCallback(gsl::narrow(Payload.GetSize()), ReadCallback)); - } - Sess->SetBody(AsCprBody(Payload)); - return Sess.Put(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + Sess->UpdateHeader({HeaderContentType(Payload.GetContentType())}); + + uint64_t Offset = 0; + if (Payload.IsWholeFile()) + { + auto ReadCallback = [&Payload, &Offset](char* buffer, size_t& size, intptr_t) { + size = Min(size, Payload.GetSize() - Offset); + IoBuffer PayloadRange = IoBuffer(Payload, Offset, size); + MutableMemoryView Data(buffer, size); + Data.CopyFrom(PayloadRange.GetView()); + Offset += size; + return true; + }; + return Sess.Put(cpr::ReadCallback(gsl::narrow(Payload.GetSize()), ReadCallback)); + } + Sess->SetBody(AsCprBody(Payload)); + return Sess.Put(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -1020,24 +1053,27 @@ HttpClient::Upload(std::string_view Url, const CompositeBuffer& Payload, ZenCont { ZEN_TRACE_CPU("HttpClient::Upload"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - Sess->UpdateHeader({HeaderContentType(ContentType)}); - - uint64_t SizeLeft = Payload.GetSize(); - CompositeBuffer::Iterator BufferIt = Payload.GetIterator(0); - auto ReadCallback = [&Payload, &BufferIt, &SizeLeft](char* buffer, size_t& size, intptr_t) { - size = Min(size, SizeLeft); - MutableMemoryView Data(buffer, size); - Payload.CopyTo(Data, BufferIt); - SizeLeft -= size; - return true; - }; - return Sess.Put(cpr::ReadCallback(gsl::narrow(Payload.GetSize()), ReadCallback)); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + Sess->UpdateHeader({HeaderContentType(ContentType)}); + + uint64_t SizeLeft = Payload.GetSize(); + CompositeBuffer::Iterator BufferIt = Payload.GetIterator(0); + auto ReadCallback = [&Payload, &BufferIt, &SizeLeft](char* buffer, size_t& size, intptr_t) { + size = Min(size, SizeLeft); + MutableMemoryView Data(buffer, size); + Payload.CopyTo(Data, BufferIt); + SizeLeft -= size; + return true; + }; + return Sess.Put(cpr::ReadCallback(gsl::narrow(Payload.GetSize()), ReadCallback)); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -1048,6 +1084,7 @@ HttpClient::Download(std::string_view Url, const std::filesystem::path& TempFold std::string PayloadString; std::unique_ptr PayloadFile; cpr::Response Response = DoWithRetry( + m_SessionId, [&]() { auto GetHeader = [&](std::string header) -> std::pair { size_t DelimiterPos = header.find(':'); @@ -1249,7 +1286,7 @@ HttpClient::Download(std::string_view Url, const std::filesystem::path& TempFold PayloadFile, m_ConnectionSettings.RetryCount); - return CommonResponse(std::move(Response), PayloadFile ? PayloadFile->DetachToIoBuffer() : IoBuffer{}); + return CommonResponse(m_SessionId, std::move(Response), PayloadFile ? PayloadFile->DetachToIoBuffer() : IoBuffer{}); } ////////////////////////////////////////////////////////////////////////// diff --git a/src/zenhttp/httpclientauth.cpp b/src/zenhttp/httpclientauth.cpp index 04ac2ad3f..7fb3224f1 100644 --- a/src/zenhttp/httpclientauth.cpp +++ b/src/zenhttp/httpclientauth.cpp @@ -2,6 +2,7 @@ #include +#include #include ZEN_THIRD_PARTY_INCLUDES_START @@ -41,6 +42,7 @@ namespace zen { namespace httpclientauth { if (Response.error || Response.status_code != 200) { + ZEN_WARN("Failed fetching OAuth access token {}. Reason: '{}'", OAuthParams.Url, Response.reason); return HttpClientAccessToken{}; } @@ -49,6 +51,7 @@ namespace zen { namespace httpclientauth { if (JsonError.empty() == false) { + ZEN_WARN("Unable to parse OAuth json response from {}. Reason: '{}'", OAuthParams.Url, JsonError); return HttpClientAccessToken{}; } diff --git a/src/zenhttp/include/zenhttp/httpclient.h b/src/zenhttp/include/zenhttp/httpclient.h index 1cf77d794..a46b9fd83 100644 --- a/src/zenhttp/include/zenhttp/httpclient.h +++ b/src/zenhttp/include/zenhttp/httpclient.h @@ -60,9 +60,6 @@ struct HttpClientSettings class HttpClient { public: - struct Settings - { - }; HttpClient(std::string_view BaseUri, const HttpClientSettings& Connectionsettings = {}); ~HttpClient(); @@ -180,6 +177,7 @@ public: LoggerRef Logger() { return m_Log; } std::string_view GetBaseUri() const { return m_BaseUri; } bool Authenticate(); + std::string_view GetSessionId() const { return m_SessionId; } private: const std::optional GetAccessToken(); diff --git a/src/zenutil/include/zenutil/jupiter/jupitersession.h b/src/zenutil/include/zenutil/jupiter/jupitersession.h index 6a80332f4..075c35b40 100644 --- a/src/zenutil/include/zenutil/jupiter/jupitersession.h +++ b/src/zenutil/include/zenutil/jupiter/jupitersession.h @@ -102,29 +102,48 @@ public: std::vector Filter(std::string_view Namespace, std::string_view BucketId, const std::vector& ChunkHashes); - JupiterResult PutBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const IoBuffer& Payload); - JupiterResult GetBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); - JupiterResult FinalizeBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); - PutBuildPartResult PutBuildPart(std::string_view Namespace, - std::string_view BucketId, - const Oid& BuildId, - const Oid& PartId, - std::string_view PartName, - const IoBuffer& Payload); - JupiterResult GetBuildPart(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const Oid& PartId); - JupiterResult PutBuildBlob(std::string_view Namespace, - std::string_view BucketId, - const Oid& BuildId, - const Oid& PartId, - const IoHash& Hash, - ZenContentType ContentType, - const CompositeBuffer& Payload); - JupiterResult GetBuildBlob(std::string_view Namespace, - std::string_view BucketId, - const Oid& BuildId, - const Oid& PartId, - const IoHash& Hash, - std::filesystem::path TempFolderPath); + JupiterResult ListBuilds(std::string_view Namespace, std::string_view BucketId, const IoBuffer& Payload); + JupiterResult PutBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const IoBuffer& Payload); + JupiterResult GetBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); + JupiterResult FinalizeBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); + PutBuildPartResult PutBuildPart(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& PartId, + std::string_view PartName, + const IoBuffer& Payload); + JupiterResult GetBuildPart(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const Oid& PartId); + JupiterResult PutBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& PartId, + const IoHash& Hash, + ZenContentType ContentType, + const CompositeBuffer& Payload); + JupiterResult GetBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& PartId, + const IoHash& Hash, + std::filesystem::path TempFolderPath); + + JupiterResult PutMultipartBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& PartId, + const IoHash& Hash, + ZenContentType ContentType, + uint64_t PayloadSize, + std::function&& Transmitter, + std::vector>& OutWorkItems); + JupiterResult GetMultipartBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& PartId, + const IoHash& Hash, + uint64_t ChunkSize, + std::function&& Receiver, + std::vector>& OutWorkItems); JupiterResult PutBlockMetadata(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, @@ -137,6 +156,11 @@ public: const Oid& PartId, const IoHash& RawHash); JupiterResult FindBlocks(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const Oid& PartId); + JupiterResult GetBlockMetadata(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& PartId, + IoBuffer Payload); private: inline LoggerRef Log() { return m_Log; } diff --git a/src/zenutil/jupiter/jupitersession.cpp b/src/zenutil/jupiter/jupitersession.cpp index f706a7efc..d56927b44 100644 --- a/src/zenutil/jupiter/jupitersession.cpp +++ b/src/zenutil/jupiter/jupitersession.cpp @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include @@ -354,6 +356,16 @@ JupiterSession::CacheTypeExists(std::string_view Namespace, std::string_view Typ return Result; } +JupiterResult +JupiterSession::ListBuilds(std::string_view Namespace, std::string_view BucketId, const IoBuffer& Payload) +{ + ZEN_ASSERT(Payload.GetContentType() == ZenContentType::kCbObject); + HttpClient::Response Response = m_HttpClient.Post(fmt::format("/api/v2/builds/{}/{}/search", Namespace, BucketId), + Payload, + {HttpClient::Accept(ZenContentType::kCbObject)}); + return detail::ConvertResponse(Response, "JupiterSession::ListBuilds"sv); +} + JupiterResult JupiterSession::PutBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const IoBuffer& Payload) { @@ -436,6 +448,271 @@ JupiterSession::PutBuildBlob(std::string_view Namespace, return detail::ConvertResponse(Response, "JupiterSession::PutBuildBlob"sv); } +JupiterResult +JupiterSession::PutMultipartBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& PartId, + const IoHash& Hash, + ZenContentType ContentType, + uint64_t PayloadSize, + std::function&& Transmitter, + std::vector>& OutWorkItems) +{ + struct MultipartUploadResponse + { + struct Part + { + uint64_t FirstByte; + uint64_t LastByte; + std::string PartId; + std::string QueryString; + }; + + std::string UploadId; + std::string BlobName; + std::vector Parts; + + static MultipartUploadResponse Parse(CbObject& Payload) + { + MultipartUploadResponse Result; + Result.UploadId = Payload["uploadId"sv].AsString(); + Result.BlobName = Payload["blobName"sv].AsString(); + CbArrayView PartsArray = Payload["parts"sv].AsArrayView(); + Result.Parts.reserve(PartsArray.Num()); + for (CbFieldView PartView : PartsArray) + { + CbObjectView PartObject = PartView.AsObjectView(); + Result.Parts.emplace_back(Part{ + .FirstByte = PartObject["firstByte"sv].AsUInt64(), + .LastByte = PartObject["lastByte"sv].AsUInt64(), + .PartId = std::string(PartObject["partId"sv].AsString()), + .QueryString = std::string(PartObject["queryString"sv].AsString()), + }); + } + return Result; + } + }; + + CbObjectWriter StartMultipartPayloadWriter; + StartMultipartPayloadWriter.AddInteger("blobLength"sv, PayloadSize); + CbObject StartMultipartPayload = StartMultipartPayloadWriter.Save(); + + std::string StartMultipartResponseRequestString = fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}/startMultipartUpload", + Namespace, + BucketId, + BuildId, + PartId, + Hash.ToHexString()); + // ZEN_INFO("POST: {}", StartMultipartResponseRequestString); + HttpClient::Response StartMultipartResponse = + m_HttpClient.Post(StartMultipartResponseRequestString, StartMultipartPayload, HttpClient::Accept(ZenContentType::kCbObject)); + if (!StartMultipartResponse.IsSuccess()) + { + ZEN_WARN("{}", StartMultipartResponse.ErrorMessage("startMultipartUpload: ")); + return detail::ConvertResponse(StartMultipartResponse, "JupiterSession::PutMultipartBuildBlob"sv); + } + CbObject ResponseObject = LoadCompactBinaryObject(StartMultipartResponse.ResponsePayload); + + struct WorkloadData + { + MultipartUploadResponse PartDescription; + std::function Transmitter; + std::atomic PartsLeft; + }; + + std::shared_ptr Workload(std::make_shared()); + + Workload->PartDescription = MultipartUploadResponse::Parse(ResponseObject); + Workload->Transmitter = std::move(Transmitter); + Workload->PartsLeft = Workload->PartDescription.Parts.size(); + + for (size_t PartIndex = 0; PartIndex < Workload->PartDescription.Parts.size(); PartIndex++) + { + OutWorkItems.emplace_back([this, Namespace, BucketId, BuildId, PartId, Hash, ContentType, Workload, PartIndex]( + bool& OutIsComplete) -> JupiterResult { + const MultipartUploadResponse::Part& Part = Workload->PartDescription.Parts[PartIndex]; + IoBuffer PartPayload = Workload->Transmitter(Part.FirstByte, Part.LastByte - Part.FirstByte); + std::string MultipartUploadResponseRequestString = fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}/uploadMultipart{}", + Namespace, + BucketId, + BuildId, + PartId, + Hash.ToHexString(), + Part.QueryString); + // ZEN_INFO("PUT: {}", MultipartUploadResponseRequestString); + HttpClient::Response MultipartUploadResponse = m_HttpClient.Put(MultipartUploadResponseRequestString, PartPayload); + if (!MultipartUploadResponse.IsSuccess()) + { + ZEN_WARN("{}", MultipartUploadResponse.ErrorMessage(MultipartUploadResponseRequestString)); + } + OutIsComplete = Workload->PartsLeft.fetch_sub(1) == 1; + if (OutIsComplete) + { + int64_t TotalUploadedBytes = MultipartUploadResponse.UploadedBytes; + int64_t TotalDownloadedBytes = MultipartUploadResponse.DownloadedBytes; + double TotalElapsedSeconds = MultipartUploadResponse.ElapsedSeconds; + HttpClient::Response MultipartEndResponse = MultipartUploadResponse; + while (MultipartEndResponse.IsSuccess()) + { + CbObjectWriter CompletePayloadWriter; + CompletePayloadWriter.AddString("blobName"sv, Workload->PartDescription.BlobName); + CompletePayloadWriter.AddString("uploadId"sv, Workload->PartDescription.UploadId); + CompletePayloadWriter.AddBool("isCompressed"sv, ContentType == ZenContentType::kCompressedBinary); + CompletePayloadWriter.BeginArray("partIds"sv); + std::unordered_map PartNameToIndex; + for (size_t UploadPartIndex = 0; UploadPartIndex < Workload->PartDescription.Parts.size(); UploadPartIndex++) + { + const MultipartUploadResponse::Part& PartDescription = Workload->PartDescription.Parts[UploadPartIndex]; + PartNameToIndex.insert({PartDescription.PartId, UploadPartIndex}); + CompletePayloadWriter.AddString(PartDescription.PartId); + } + CompletePayloadWriter.EndArray(); // "partIds" + CbObject CompletePayload = CompletePayloadWriter.Save(); + + std::string MultipartEndResponseRequestString = + fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}/completeMultipart", + Namespace, + BucketId, + BuildId, + PartId, + Hash.ToHexString()); + + MultipartEndResponse = m_HttpClient.Post(MultipartEndResponseRequestString, + CompletePayload, + HttpClient::Accept(ZenContentType::kCbObject)); + TotalUploadedBytes += MultipartEndResponse.UploadedBytes; + TotalDownloadedBytes += MultipartEndResponse.DownloadedBytes; + TotalElapsedSeconds += MultipartEndResponse.ElapsedSeconds; + if (MultipartEndResponse.IsSuccess()) + { + CbObject ResponseObject = MultipartEndResponse.AsObject(); + CbArrayView MissingPartsArrayView = ResponseObject["missingParts"sv].AsArrayView(); + if (MissingPartsArrayView.Num() == 0) + { + break; + } + else + { + for (CbFieldView PartIdView : MissingPartsArrayView) + { + std::string RetryPartId(PartIdView.AsString()); + size_t RetryPartIndex = PartNameToIndex.at(RetryPartId); + const MultipartUploadResponse::Part& RetryPart = Workload->PartDescription.Parts[RetryPartIndex]; + IoBuffer RetryPartPayload = + Workload->Transmitter(RetryPart.FirstByte, RetryPart.LastByte - RetryPart.FirstByte); + std::string RetryMultipartUploadResponseRequestString = + fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}/uploadMultipart{}", + Namespace, + BucketId, + BuildId, + RetryPartId, + Hash.ToHexString(), + RetryPart.QueryString); + + MultipartUploadResponse = m_HttpClient.Put(RetryMultipartUploadResponseRequestString, RetryPartPayload); + TotalUploadedBytes = MultipartUploadResponse.UploadedBytes; + TotalDownloadedBytes = MultipartUploadResponse.DownloadedBytes; + TotalElapsedSeconds = MultipartUploadResponse.ElapsedSeconds; + if (!MultipartUploadResponse.IsSuccess()) + { + ZEN_WARN("{}", MultipartUploadResponse.ErrorMessage(RetryMultipartUploadResponseRequestString)); + MultipartEndResponse = MultipartUploadResponse; + } + } + } + } + else + { + ZEN_WARN("{}", MultipartEndResponse.ErrorMessage(MultipartEndResponseRequestString)); + } + } + MultipartEndResponse.UploadedBytes = TotalUploadedBytes; + MultipartEndResponse.DownloadedBytes = TotalDownloadedBytes; + MultipartEndResponse.ElapsedSeconds = TotalElapsedSeconds; + return detail::ConvertResponse(MultipartEndResponse, "JupiterSession::PutMultipartBuildBlob"sv); + } + return detail::ConvertResponse(MultipartUploadResponse, "JupiterSession::PutMultipartBuildBlob"sv); + }); + } + return detail::ConvertResponse(StartMultipartResponse, "JupiterSession::PutMultipartBuildBlob"sv); +} + +JupiterResult +JupiterSession::GetMultipartBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& PartId, + const IoHash& Hash, + uint64_t ChunkSize, + std::function&& Receiver, + std::vector>& OutWorkItems) +{ + std::string RequestUrl = + fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}", Namespace, BucketId, BuildId, PartId, Hash.ToHexString()); + HttpClient::Response Response = + m_HttpClient.Get(RequestUrl, HttpClient::KeyValueMap({{"Range", fmt::format("bytes={}-{}", 0, ChunkSize - 1)}})); + if (Response.IsSuccess()) + { + if (std::string_view ContentRange = Response.Header.Entries["Content-Range"]; !ContentRange.empty()) + { + if (std::string_view::size_type SizeDelimiterPos = ContentRange.find('/'); SizeDelimiterPos != std::string_view::npos) + { + if (std::optional TotalSizeMaybe = ParseInt(ContentRange.substr(SizeDelimiterPos + 1)); + TotalSizeMaybe.has_value()) + { + uint64_t TotalSize = TotalSizeMaybe.value(); + uint64_t PayloadSize = Response.ResponsePayload.GetSize(); + + Receiver(0, Response.ResponsePayload, TotalSize); + + if (TotalSize > PayloadSize) + { + struct WorkloadData + { + std::function Receiver; + std::atomic BytesRemaining; + }; + + std::shared_ptr Workload(std::make_shared()); + Workload->Receiver = std::move(Receiver); + Workload->BytesRemaining = TotalSize - PayloadSize; + + uint64_t Offset = PayloadSize; + while (Offset < TotalSize) + { + uint64_t PartSize = Min(ChunkSize, TotalSize - Offset); + OutWorkItems.emplace_back( + [this, Namespace, BucketId, BuildId, PartId, Hash, TotalSize, Workload, Offset, PartSize]() + -> JupiterResult { + std::string RequestUrl = fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}", + Namespace, + BucketId, + BuildId, + PartId, + Hash.ToHexString()); + HttpClient::Response Response = m_HttpClient.Get( + RequestUrl, + HttpClient::KeyValueMap({{"Range", fmt::format("bytes={}-{}", Offset, Offset + PartSize - 1)}})); + if (Response.IsSuccess()) + { + uint64_t ByteRemaning = Workload->BytesRemaining.fetch_sub(Response.ResponsePayload.GetSize()); + Workload->Receiver(Offset, Response.ResponsePayload, ByteRemaning); + } + return detail::ConvertResponse(Response, "JupiterSession::GetMultipartBuildBlob"sv); + }); + Offset += PartSize; + } + } + return detail::ConvertResponse(Response, "JupiterSession::GetMultipartBuildBlob"sv); + } + } + } + Receiver(0, Response.ResponsePayload, Response.ResponsePayload.GetSize()); + } + return detail::ConvertResponse(Response, "JupiterSession::GetMultipartBuildBlob"sv); +} + JupiterResult JupiterSession::GetBuildBlob(std::string_view Namespace, std::string_view BucketId, @@ -447,6 +724,7 @@ JupiterSession::GetBuildBlob(std::string_view Namespace, HttpClient::Response Response = m_HttpClient.Download( fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}", Namespace, BucketId, BuildId, PartId, Hash.ToHexString()), TempFolderPath); + return detail::ConvertResponse(Response, "JupiterSession::GetBuildBlob"sv); } @@ -502,4 +780,19 @@ JupiterSession::FindBlocks(std::string_view Namespace, std::string_view BucketId return detail::ConvertResponse(Response, "JupiterSession::FindBlocks"sv); } +JupiterResult +JupiterSession::GetBlockMetadata(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& PartId, + IoBuffer Payload) +{ + ZEN_ASSERT(Payload.GetContentType() == ZenContentType::kCbObject); + HttpClient::Response Response = + m_HttpClient.Post(fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blocks/getBlockMetadata", Namespace, BucketId, BuildId, PartId), + Payload, + HttpClient::Accept(ZenContentType::kCbObject)); + return detail::ConvertResponse(Response, "JupiterSession::GetBlockMetadata"sv); +} + } // namespace zen -- cgit v1.2.3 From da9179d330a37132488f6deb8d8068783b087256 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 12 Feb 2025 09:02:35 +0100 Subject: moving and small refactor of chunk blocks to prepare for builds api (#282) --- .../projectstore/buildsremoteprojectstore.cpp | 77 +--- .../projectstore/fileremoteprojectstore.cpp | 6 +- .../projectstore/jupiterremoteprojectstore.cpp | 4 +- src/zenserver/projectstore/projectstore.cpp | 16 +- src/zenserver/projectstore/remoteprojectstore.cpp | 226 ++++----- src/zenserver/projectstore/remoteprojectstore.h | 31 +- .../projectstore/zenremoteprojectstore.cpp | 2 +- src/zenstore/chunkedfile.cpp | 505 -------------------- src/zenstore/chunking.cpp | 382 --------------- src/zenstore/chunking.h | 56 --- src/zenstore/include/zenstore/chunkedfile.h | 54 --- src/zenutil/chunkblock.cpp | 166 +++++++ src/zenutil/chunkedfile.cpp | 510 +++++++++++++++++++++ src/zenutil/chunking.cpp | 382 +++++++++++++++ src/zenutil/chunking.h | 56 +++ src/zenutil/include/zenutil/chunkblock.h | 32 ++ src/zenutil/include/zenutil/chunkedfile.h | 58 +++ src/zenutil/zenutil.cpp | 2 + 18 files changed, 1328 insertions(+), 1237 deletions(-) delete mode 100644 src/zenstore/chunkedfile.cpp delete mode 100644 src/zenstore/chunking.cpp delete mode 100644 src/zenstore/chunking.h delete mode 100644 src/zenstore/include/zenstore/chunkedfile.h create mode 100644 src/zenutil/chunkblock.cpp create mode 100644 src/zenutil/chunkedfile.cpp create mode 100644 src/zenutil/chunking.cpp create mode 100644 src/zenutil/chunking.h create mode 100644 src/zenutil/include/zenutil/chunkblock.h create mode 100644 src/zenutil/include/zenutil/chunkedfile.h (limited to 'src') diff --git a/src/zenserver/projectstore/buildsremoteprojectstore.cpp b/src/zenserver/projectstore/buildsremoteprojectstore.cpp index 302b81729..412769174 100644 --- a/src/zenserver/projectstore/buildsremoteprojectstore.cpp +++ b/src/zenserver/projectstore/buildsremoteprojectstore.cpp @@ -3,6 +3,7 @@ #include "buildsremoteprojectstore.h" #include +#include #include #include @@ -114,7 +115,9 @@ public: return Result; } - virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, Block&& Block) override + virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, + const IoHash& RawHash, + ChunkBlockDescription&& Block) override { ZEN_ASSERT(m_OplogBuildPartId != Oid::Zero); JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); @@ -139,44 +142,10 @@ public: if (Block.BlockHash == RawHash) { - ZEN_ASSERT(Block.ChunkLengths.size() == Block.ChunkHashes.size()); - CbObjectWriter Writer; - Writer.AddHash("rawHash"sv, RawHash); - Writer.BeginArray("rawHashes"sv); - { - for (const IoHash& ChunkHash : Block.ChunkHashes) - { - Writer.AddHash(ChunkHash); - } - } - Writer.EndArray(); - Writer.BeginArray("chunkLengths"); - { - for (uint32_t ChunkSize : Block.ChunkLengths) - { - Writer.AddInteger(ChunkSize); - } - } - Writer.EndArray(); - Writer.BeginArray("chunkOffsets"); - { - ZEN_ASSERT(Block.FirstChunkOffset != (uint32_t)-1); - uint32_t Offset = Block.FirstChunkOffset; - for (uint32_t ChunkSize : Block.ChunkLengths) - { - Writer.AddInteger(Offset); - Offset += ChunkSize; - } - } - Writer.EndArray(); + CbObjectWriter BlockMetaData; + BlockMetaData.AddString("createdBy", GetRunningExecutablePath().stem().string()); - Writer.BeginObject("metadata"sv); - { - Writer.AddString("createdBy", "zenserver"); - } - Writer.EndObject(); - - IoBuffer MetaPayload = Writer.Save().GetBuffer().AsIoBuffer(); + IoBuffer MetaPayload = BuildChunkBlockDescription(Block, BlockMetaData.Save()).GetBuffer().AsIoBuffer(); MetaPayload.SetContentType(ZenContentType::kCbObject); JupiterResult PutMetaResult = Session.PutBlockMetadata(m_Namespace, m_Bucket, m_BuildId, m_OplogBuildPartId, RawHash, MetaPayload); @@ -357,8 +326,7 @@ public: Result.Reason); return Result; } - CbObject BlocksObject = LoadCompactBinaryObject(FindResult.Response); - if (!BlocksObject) + if (ValidateCompactBinary(FindResult.Response.GetView(), CbValidateMode::Default) != CbValidateError::None) { Result.ErrorCode = gsl::narrow(HttpResponseCode::InternalServerError); Result.Reason = fmt::format("The block list {}/{}/{}/{} is not formatted as a compact binary object"sv, @@ -369,25 +337,20 @@ public: m_OplogBuildPartId); return Result; } - - CbArrayView Blocks = BlocksObject["blocks"].AsArrayView(); - Result.Blocks.reserve(Blocks.Num()); - for (CbFieldView BlockView : Blocks) + std::optional> Blocks = + ParseChunkBlockDescriptionList(LoadCompactBinaryObject(FindResult.Response)); + if (!Blocks) { - CbObjectView BlockObject = BlockView.AsObjectView(); - IoHash BlockHash = BlockObject["rawHash"sv].AsHash(); - if (BlockHash != IoHash::Zero) - { - CbArrayView ChunksArray = BlockObject["rawHashes"sv].AsArrayView(); - std::vector ChunkHashes; - ChunkHashes.reserve(ChunksArray.Num()); - for (CbFieldView ChunkView : ChunksArray) - { - ChunkHashes.push_back(ChunkView.AsHash()); - } - Result.Blocks.emplace_back(Block{.BlockHash = BlockHash, .ChunkHashes = ChunkHashes}); - } + Result.ErrorCode = gsl::narrow(HttpResponseCode::InternalServerError); + Result.Reason = fmt::format("The block list {}/{}/{}/{} is not formatted as a list of blocks"sv, + m_JupiterClient->ServiceUrl(), + m_Namespace, + m_Bucket, + m_BuildId, + m_OplogBuildPartId); + return Result; } + Result.Blocks = std::move(Blocks.value()); return Result; } diff --git a/src/zenserver/projectstore/fileremoteprojectstore.cpp b/src/zenserver/projectstore/fileremoteprojectstore.cpp index 0fe739a12..5a21a7540 100644 --- a/src/zenserver/projectstore/fileremoteprojectstore.cpp +++ b/src/zenserver/projectstore/fileremoteprojectstore.cpp @@ -106,7 +106,7 @@ public: return Result; } - virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, Block&&) override + virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, ChunkBlockDescription&&) override { Stopwatch Timer; SaveAttachmentResult Result; @@ -192,8 +192,8 @@ public: return GetKnownBlocksResult{{.ErrorCode = static_cast(HttpResponseCode::NoContent), .ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000}}; } - std::vector KnownBlocks = GetBlocksFromOplog(LoadResult.ContainerObject, ExistingBlockHashes); - GetKnownBlocksResult Result{{.ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000}}; + std::vector KnownBlocks = GetBlocksFromOplog(LoadResult.ContainerObject, ExistingBlockHashes); + GetKnownBlocksResult Result{{.ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000}}; Result.Blocks = std::move(KnownBlocks); return Result; } diff --git a/src/zenserver/projectstore/jupiterremoteprojectstore.cpp b/src/zenserver/projectstore/jupiterremoteprojectstore.cpp index e906127ff..2b6a437d1 100644 --- a/src/zenserver/projectstore/jupiterremoteprojectstore.cpp +++ b/src/zenserver/projectstore/jupiterremoteprojectstore.cpp @@ -92,7 +92,7 @@ public: return Result; } - virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, Block&&) override + virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, ChunkBlockDescription&&) override { JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); JupiterResult PutResult = Session.PutCompressedBlob(m_Namespace, RawHash, Payload); @@ -193,7 +193,7 @@ public: return GetKnownBlocksResult{{.ErrorCode = static_cast(HttpResponseCode::NoContent), .ElapsedSeconds = LoadResult.ElapsedSeconds + ExistsResult.ElapsedSeconds}}; } - std::vector KnownBlocks = GetBlocksFromOplog(LoadResult.ContainerObject, ExistingBlockHashes); + std::vector KnownBlocks = GetBlocksFromOplog(LoadResult.ContainerObject, ExistingBlockHashes); GetKnownBlocksResult Result{ {.ElapsedSeconds = LoadResult.ElapsedSeconds + ExistsResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000.0}}; diff --git a/src/zenserver/projectstore/projectstore.cpp b/src/zenserver/projectstore/projectstore.cpp index 46a236af9..f6f7eba99 100644 --- a/src/zenserver/projectstore/projectstore.cpp +++ b/src/zenserver/projectstore/projectstore.cpp @@ -5347,7 +5347,7 @@ ProjectStore::ReadOplog(const std::string_view ProjectId, /* BuildBlocks */ false, /* IgnoreMissingAttachments */ false, /* AllowChunking*/ false, - [](CompressedBuffer&&, RemoteProjectStore::Block&&) {}, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, [](const IoHash&, TGetAttachmentBufferFunc&&) {}, [](std::vector>&&) {}, /* EmbedLooseFiles*/ false); @@ -8621,14 +8621,14 @@ TEST_CASE("project.store.block") Chunks.reserve(AttachmentSizes.size()); for (const auto& It : AttachmentsWithId) { - Chunks.push_back(std::make_pair(It.second.DecodeRawHash(), - [Buffer = It.second.GetCompressed().Flatten().AsIoBuffer()](const IoHash&) -> CompositeBuffer { - return CompositeBuffer(SharedBuffer(Buffer)); - })); + Chunks.push_back( + std::make_pair(It.second.DecodeRawHash(), [Buffer = It.second](const IoHash&) -> std::pair { + return {Buffer.DecodeRawSize(), Buffer}; + })); } - RemoteProjectStore::Block Block; - CompressedBuffer BlockBuffer = GenerateBlock(std::move(Chunks), Block); - CHECK(IterateBlock(BlockBuffer.Decompress(), [](CompressedBuffer&&, const IoHash&) {})); + ChunkBlockDescription Block; + CompressedBuffer BlockBuffer = GenerateChunkBlock(std::move(Chunks), Block); + CHECK(IterateChunkBlock(BlockBuffer.Decompress(), [](CompressedBuffer&&, const IoHash&) {})); } TEST_CASE("project.store.iterateoplog") diff --git a/src/zenserver/projectstore/remoteprojectstore.cpp b/src/zenserver/projectstore/remoteprojectstore.cpp index 0589fdc5f..5b75a840e 100644 --- a/src/zenserver/projectstore/remoteprojectstore.cpp +++ b/src/zenserver/projectstore/remoteprojectstore.cpp @@ -12,8 +12,8 @@ #include #include #include -#include #include +#include #include #include @@ -143,7 +143,7 @@ namespace remotestore_impl { NiceBytes(Stats.m_PeakReceivedBytes)); } - size_t AddBlock(RwLock& BlocksLock, std::vector& Blocks) + size_t AddBlock(RwLock& BlocksLock, std::vector& Blocks) { size_t BlockIndex; { @@ -573,7 +573,7 @@ namespace remotestore_impl { return; } - bool StoreChunksOK = IterateBlock( + bool StoreChunksOK = IterateChunkBlock( BlockPayload, [&WantedChunks, &WriteAttachmentBuffers, &WriteRawHashes, &Info](CompressedBuffer&& Chunk, const IoHash& AttachmentRawHash) { @@ -738,14 +738,14 @@ namespace remotestore_impl { }); }; - void CreateBlock(WorkerThreadPool& WorkerPool, - Latch& OpSectionsLatch, - std::vector>&& ChunksInBlock, - RwLock& SectionsLock, - std::vector& Blocks, - size_t BlockIndex, - const std::function& AsyncOnBlock, - AsyncRemoteResult& RemoteResult) + void CreateBlock(WorkerThreadPool& WorkerPool, + Latch& OpSectionsLatch, + std::vector>&& ChunksInBlock, + RwLock& SectionsLock, + std::vector& Blocks, + size_t BlockIndex, + const std::function& AsyncOnBlock, + AsyncRemoteResult& RemoteResult) { OpSectionsLatch.AddCount(1); WorkerPool.ScheduleWork([&Blocks, @@ -764,10 +764,10 @@ namespace remotestore_impl { try { ZEN_ASSERT(ChunkCount > 0); - Stopwatch Timer; - RemoteProjectStore::Block Block; - CompressedBuffer CompressedBlock = GenerateBlock(std::move(Chunks), Block); - IoHash BlockHash = CompressedBlock.DecodeRawHash(); + Stopwatch Timer; + ChunkBlockDescription Block; + CompressedBuffer CompressedBlock = GenerateChunkBlock(std::move(Chunks), Block); + IoHash BlockHash = CompressedBlock.DecodeRawHash(); { // We can share the lock as we are not resizing the vector and only touch BlockHash at our own index RwLock::SharedLockScope __(SectionsLock); @@ -800,8 +800,8 @@ namespace remotestore_impl { struct CreatedBlock { - IoBuffer Payload; - RemoteProjectStore::Block Block; + IoBuffer Payload; + ChunkBlockDescription Block; }; void UploadAttachments(WorkerThreadPool& WorkerPool, @@ -931,8 +931,8 @@ namespace remotestore_impl { } try { - IoBuffer Payload; - RemoteProjectStore::Block Block; + IoBuffer Payload; + ChunkBlockDescription Block; if (auto BlockIt = CreatedBlocks.find(RawHash); BlockIt != CreatedBlocks.end()) { Payload = BlockIt->second.Payload; @@ -1058,7 +1058,7 @@ namespace remotestore_impl { { auto It = BulkBlockAttachmentsToUpload.find(Chunk); ZEN_ASSERT(It != BulkBlockAttachmentsToUpload.end()); - CompositeBuffer ChunkPayload = It->second(It->first); + CompressedBuffer ChunkPayload = It->second(It->first).second; if (!ChunkPayload) { RemoteResult.SetError(static_cast(HttpResponseCode::NotFound), @@ -1067,8 +1067,8 @@ namespace remotestore_impl { ChunkBuffers.clear(); break; } - ChunksSize += ChunkPayload.GetSize(); - ChunkBuffers.emplace_back(SharedBuffer(std::move(ChunkPayload).Flatten().AsIoBuffer())); + ChunksSize += ChunkPayload.GetCompressedSize(); + ChunkBuffers.emplace_back(SharedBuffer(std::move(ChunkPayload).GetCompressed().Flatten().AsIoBuffer())); } RemoteProjectStore::SaveAttachmentsResult Result = RemoteStore.SaveAttachments(ChunkBuffers); if (Result.ErrorCode) @@ -1139,54 +1139,13 @@ namespace remotestore_impl { } } // namespace remotestore_impl -bool -IterateBlock(const SharedBuffer& BlockPayload, std::function Visitor) -{ - ZEN_ASSERT(BlockPayload); - if (BlockPayload.GetSize() < 1) - { - return false; - } - - MemoryView BlockView = BlockPayload.GetView(); - const uint8_t* ReadPtr = reinterpret_cast(BlockView.GetData()); - uint32_t NumberSize; - uint64_t ChunkCount = ReadVarUInt(ReadPtr, NumberSize); - ReadPtr += NumberSize; - std::vector ChunkSizes; - ChunkSizes.reserve(ChunkCount); - while (ChunkCount--) - { - ChunkSizes.push_back(ReadVarUInt(ReadPtr, NumberSize)); - ReadPtr += NumberSize; - } - ptrdiff_t TempBufferLength = std::distance(reinterpret_cast(BlockView.GetData()), ReadPtr); - ZEN_ASSERT(TempBufferLength > 0); - for (uint64_t ChunkSize : ChunkSizes) - { - IoBuffer Chunk(IoBuffer::Wrap, ReadPtr, ChunkSize); - IoHash AttachmentRawHash; - uint64_t AttachmentRawSize; - CompressedBuffer CompressedChunk = CompressedBuffer::FromCompressed(SharedBuffer(Chunk), AttachmentRawHash, AttachmentRawSize); - - if (!CompressedChunk) - { - ZEN_ERROR("Invalid chunk in block"); - return false; - } - Visitor(std::move(CompressedChunk), AttachmentRawHash); - ReadPtr += ChunkSize; - ZEN_ASSERT(ReadPtr <= BlockView.GetDataEnd()); - } - return true; -}; std::vector GetBlockHashesFromOplog(CbObjectView ContainerObject) { using namespace std::literals; - std::vector Result; - CbArrayView BlocksArray = ContainerObject["blocks"sv].AsArrayView(); + std::vector Result; + CbArrayView BlocksArray = ContainerObject["blocks"sv].AsArrayView(); std::vector BlockHashes; BlockHashes.reserve(BlocksArray.Num()); @@ -1199,11 +1158,11 @@ GetBlockHashesFromOplog(CbObjectView ContainerObject) return BlockHashes; } -std::vector +std::vector GetBlocksFromOplog(CbObjectView ContainerObject, std::span IncludeBlockHashes) { using namespace std::literals; - std::vector Result; + std::vector Result; CbArrayView BlocksArray = ContainerObject["blocks"sv].AsArrayView(); tsl::robin_set IncludeSet; IncludeSet.insert(IncludeBlockHashes.begin(), IncludeBlockHashes.end()); @@ -1232,47 +1191,6 @@ GetBlocksFromOplog(CbObjectView ContainerObject, std::span Include return Result; } -CompressedBuffer -GenerateBlock(std::vector>&& FetchChunks, RemoteProjectStore::Block& OutBlock) -{ - const size_t ChunkCount = FetchChunks.size(); - - std::vector ChunkSegments; - ChunkSegments.resize(1); - ChunkSegments.reserve(1 + ChunkCount); - OutBlock.ChunkHashes.reserve(ChunkCount); - OutBlock.ChunkLengths.reserve(ChunkCount); - { - IoBuffer TempBuffer(ChunkCount * 9); - MutableMemoryView View = TempBuffer.GetMutableView(); - uint8_t* BufferStartPtr = reinterpret_cast(View.GetData()); - uint8_t* BufferEndPtr = BufferStartPtr; - BufferEndPtr += WriteVarUInt(gsl::narrow(ChunkCount), BufferEndPtr); - for (const auto& It : FetchChunks) - { - CompositeBuffer Chunk = It.second(It.first); - uint64_t ChunkSize = 0; - std::span Segments = Chunk.GetSegments(); - for (const SharedBuffer& Segment : Segments) - { - ChunkSize += Segment.GetSize(); - ChunkSegments.push_back(Segment); - } - BufferEndPtr += WriteVarUInt(ChunkSize, BufferEndPtr); - OutBlock.ChunkHashes.push_back(It.first); - OutBlock.ChunkLengths.push_back(gsl::narrow(ChunkSize)); - } - ZEN_ASSERT(BufferEndPtr <= View.GetDataEnd()); - ptrdiff_t TempBufferLength = std::distance(BufferStartPtr, BufferEndPtr); - ChunkSegments[0] = SharedBuffer(IoBuffer(TempBuffer, 0, gsl::narrow(TempBufferLength))); - } - CompressedBuffer CompressedBlock = - CompressedBuffer::Compress(CompositeBuffer(std::move(ChunkSegments)), OodleCompressor::Mermaid, OodleCompressionLevel::None); - OutBlock.BlockHash = CompressedBlock.DecodeRawHash(); - OutBlock.FirstChunkOffset = gsl::narrow(CompressedBuffer::GetHeaderSizeForNoneEncoder() + ChunkSegments[0].GetSize()); - return CompressedBlock; -} - CbObject BuildContainer(CidStore& ChunkStore, ProjectStore::Project& Project, @@ -1283,9 +1201,9 @@ BuildContainer(CidStore& ChunkStore, bool BuildBlocks, bool IgnoreMissingAttachments, bool AllowChunking, - const std::vector& KnownBlocks, + const std::vector& KnownBlocks, WorkerThreadPool& WorkerPool, - const std::function& AsyncOnBlock, + const std::function& AsyncOnBlock, const std::function& OnLargeAttachment, const std::function>&&)>& OnBlockChunks, bool EmbedLooseFiles, @@ -1307,9 +1225,9 @@ BuildContainer(CidStore& ChunkStore, std::unordered_map UploadAttachments; - RwLock BlocksLock; - std::vector Blocks; - CompressedBuffer OpsBuffer; + RwLock BlocksLock; + std::vector Blocks; + CompressedBuffer OpsBuffer; std::filesystem::path AttachmentTempPath = Oplog.TempPath(); AttachmentTempPath.append(".pending"); @@ -1525,7 +1443,7 @@ BuildContainer(CidStore& ChunkStore, return {}; } - auto FindReuseBlocks = [](const std::vector& KnownBlocks, + auto FindReuseBlocks = [](const std::vector& KnownBlocks, const std::unordered_set& Attachments, JobContext* OptionalContext) -> std::vector { std::vector ReuseBlockIndexes; @@ -1538,8 +1456,8 @@ BuildContainer(CidStore& ChunkStore, for (size_t KnownBlockIndex = 0; KnownBlockIndex < KnownBlocks.size(); KnownBlockIndex++) { - const RemoteProjectStore::Block& KnownBlock = KnownBlocks[KnownBlockIndex]; - size_t BlockAttachmentCount = KnownBlock.ChunkHashes.size(); + const ChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; + size_t BlockAttachmentCount = KnownBlock.ChunkHashes.size(); if (BlockAttachmentCount == 0) { continue; @@ -1586,7 +1504,7 @@ BuildContainer(CidStore& ChunkStore, std::vector ReusedBlockIndexes = FindReuseBlocks(KnownBlocks, FoundHashes, OptionalContext); for (size_t KnownBlockIndex : ReusedBlockIndexes) { - const RemoteProjectStore::Block& KnownBlock = KnownBlocks[KnownBlockIndex]; + const ChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; for (const IoHash& KnownHash : KnownBlock.ChunkHashes) { if (UploadAttachments.erase(KnownHash) == 1) @@ -1632,12 +1550,12 @@ BuildContainer(CidStore& ChunkStore, return Chunked; }; - RwLock ResolveLock; - std::unordered_set ChunkedHashes; - std::unordered_set LargeChunkHashes; - std::unordered_map ChunkedUploadAttachments; - std::unordered_map LooseUploadAttachments; - std::unordered_set MissingHashes; + RwLock ResolveLock; + std::unordered_set ChunkedHashes; + std::unordered_set LargeChunkHashes; + std::unordered_map ChunkedUploadAttachments; + std::unordered_map, IoHash::Hasher> LooseUploadAttachments; + std::unordered_set MissingHashes; remotestore_impl::ReportMessage(OptionalContext, fmt::format("Resolving {} attachments from {} ops", UploadAttachments.size(), TotalOpCount)); @@ -1730,7 +1648,7 @@ BuildContainer(CidStore& ChunkStore, } else { - size_t RawSize = RawData.GetSize(); + uint64_t RawSize = RawData.GetSize(); CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer(RawData), OodleCompressor::Mermaid, OodleCompressionLevel::VeryFast); @@ -1753,8 +1671,8 @@ BuildContainer(CidStore& ChunkStore, { UploadAttachment->Size = Compressed.GetCompressedSize(); ResolveLock.WithExclusiveLock( - [RawHash, &LooseUploadAttachments, Data = std::move(TempAttachmentBuffer)]() { - LooseUploadAttachments.insert_or_assign(RawHash, std::move(Data)); + [RawHash, RawSize, &LooseUploadAttachments, Data = std::move(TempAttachmentBuffer)]() { + LooseUploadAttachments.insert_or_assign(RawHash, std::make_pair(RawSize, std::move(Data))); }); } } @@ -1927,7 +1845,7 @@ BuildContainer(CidStore& ChunkStore, std::vector ReusedBlockFromChunking = FindReuseBlocks(KnownBlocks, ChunkedHashes, OptionalContext); for (size_t KnownBlockIndex : ReusedBlockIndexes) { - const RemoteProjectStore::Block& KnownBlock = KnownBlocks[KnownBlockIndex]; + const ChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; for (const IoHash& KnownHash : KnownBlock.ChunkHashes) { if (ChunkedHashes.erase(KnownHash) == 1) @@ -2109,16 +2027,25 @@ BuildContainer(CidStore& ChunkStore, { if (auto It = LooseUploadAttachments.find(RawHash); It != LooseUploadAttachments.end()) { - ChunksInBlock.emplace_back(std::make_pair(RawHash, [IoBuffer = SharedBuffer(It->second)](const IoHash&) { - return CompositeBuffer(IoBuffer); - })); + ChunksInBlock.emplace_back(std::make_pair( + RawHash, + [RawSize = It->second.first, + IoBuffer = SharedBuffer(It->second.second)](const IoHash&) -> std::pair { + return std::make_pair(RawSize, CompressedBuffer::FromCompressedNoValidate(IoBuffer.AsIoBuffer())); + })); LooseUploadAttachments.erase(It); } else { - ChunksInBlock.emplace_back(std::make_pair(RawHash, [&ChunkStore](const IoHash& RawHash) { - return CompositeBuffer(SharedBuffer(ChunkStore.FindChunkByCid(RawHash))); - })); + ChunksInBlock.emplace_back( + std::make_pair(RawHash, [&ChunkStore](const IoHash& RawHash) -> std::pair { + IoBuffer Chunk = ChunkStore.FindChunkByCid(RawHash); + IoHash _; + uint64_t RawSize = 0; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(std::move(Chunk)), _, RawSize); + ZEN_ASSERT(Compressed); + return {RawSize, Compressed}; + })); } BlockSize += PayloadSize; @@ -2169,14 +2096,15 @@ BuildContainer(CidStore& ChunkStore, if (BlockAttachmentHashes.insert(ChunkHash).second) { const ChunkSource& Source = Chunked.ChunkSources[ChunkIndex]; - ChunksInBlock.emplace_back(std::make_pair( - ChunkHash, - [Source = ChunkedFile.Source, Offset = Source.Offset, Size = Source.Size](const IoHash&) { - return CompressedBuffer::Compress(SharedBuffer(IoBuffer(Source, Offset, Size)), - OodleCompressor::Mermaid, - OodleCompressionLevel::None) - .GetCompressed(); - })); + ChunksInBlock.emplace_back( + std::make_pair(ChunkHash, + [Source = ChunkedFile.Source, Offset = Source.Offset, Size = Source.Size]( + const IoHash&) -> std::pair { + return {Size, + CompressedBuffer::Compress(SharedBuffer(IoBuffer(Source, Offset, Size)), + OodleCompressor::Mermaid, + OodleCompressionLevel::None)}; + })); BlockSize += CompressedBuffer::GetHeaderSizeForNoneEncoder() + Source.Size; if (BuildBlocks) { @@ -2298,7 +2226,7 @@ BuildContainer(CidStore& ChunkStore, OplogContinerWriter.AddBinary("ops"sv, CompressedOpsSection.GetCompressed().Flatten().AsIoBuffer()); OplogContinerWriter.BeginArray("blocks"sv); { - for (const RemoteProjectStore::Block& B : Blocks) + for (const ChunkBlockDescription& B : Blocks) { ZEN_ASSERT(!B.ChunkHashes.empty()); if (BuildBlocks) @@ -2392,7 +2320,7 @@ BuildContainer(CidStore& ChunkStore, bool BuildBlocks, bool IgnoreMissingAttachments, bool AllowChunking, - const std::function& AsyncOnBlock, + const std::function& AsyncOnBlock, const std::function& OnLargeAttachment, const std::function>&&)>& OnBlockChunks, bool EmbedLooseFiles) @@ -2458,8 +2386,8 @@ SaveOplog(CidStore& ChunkStore, std::unordered_map CreatedBlocks; tsl::robin_map LooseLargeFiles; - auto MakeTempBlock = [AttachmentTempPath, &RemoteResult, &AttachmentsLock, &CreatedBlocks](CompressedBuffer&& CompressedBlock, - RemoteProjectStore::Block&& Block) { + auto MakeTempBlock = [AttachmentTempPath, &RemoteResult, &AttachmentsLock, &CreatedBlocks](CompressedBuffer&& CompressedBlock, + ChunkBlockDescription&& Block) { std::filesystem::path BlockPath = AttachmentTempPath; BlockPath.append(Block.BlockHash.ToHexString()); try @@ -2478,8 +2406,8 @@ SaveOplog(CidStore& ChunkStore, } }; - auto UploadBlock = [&RemoteStore, &RemoteResult, &Info, OptionalContext](CompressedBuffer&& CompressedBlock, - RemoteProjectStore::Block&& Block) { + auto UploadBlock = [&RemoteStore, &RemoteResult, &Info, OptionalContext](CompressedBuffer&& CompressedBlock, + ChunkBlockDescription&& Block) { IoHash BlockHash = Block.BlockHash; RemoteProjectStore::SaveAttachmentResult Result = RemoteStore.SaveAttachment(CompressedBlock.GetCompressed(), BlockHash, std::move(Block)); @@ -2512,7 +2440,7 @@ SaveOplog(CidStore& ChunkStore, ZEN_DEBUG("Found attachment {}", AttachmentHash); }; - std::function OnBlock; + std::function OnBlock; if (RemoteStoreInfo.UseTempBlockFiles) { OnBlock = MakeTempBlock; @@ -2522,7 +2450,7 @@ SaveOplog(CidStore& ChunkStore, OnBlock = UploadBlock; } - std::vector KnownBlocks; + std::vector KnownBlocks; uint64_t TransferWallTimeMS = 0; diff --git a/src/zenserver/projectstore/remoteprojectstore.h b/src/zenserver/projectstore/remoteprojectstore.h index e05cb9923..1ef0416b7 100644 --- a/src/zenserver/projectstore/remoteprojectstore.h +++ b/src/zenserver/projectstore/remoteprojectstore.h @@ -5,6 +5,8 @@ #include #include "projectstore.h" +#include + #include namespace zen { @@ -16,14 +18,6 @@ struct ChunkedInfo; class RemoteProjectStore { public: - struct Block - { - IoHash BlockHash; - std::vector ChunkHashes; - std::vector ChunkLengths; - uint32_t FirstChunkOffset = (uint32_t)-1; - }; - struct Result { int32_t ErrorCode{}; @@ -72,7 +66,7 @@ public: struct GetKnownBlocksResult : public Result { - std::vector Blocks; + std::vector Blocks; }; struct RemoteStoreInfo @@ -101,11 +95,11 @@ public: virtual RemoteStoreInfo GetInfo() const = 0; virtual Stats GetStats() const = 0; - virtual CreateContainerResult CreateContainer() = 0; - virtual SaveResult SaveContainer(const IoBuffer& Payload) = 0; - virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, Block&& Block) = 0; - virtual FinalizeResult FinalizeContainer(const IoHash& RawHash) = 0; - virtual SaveAttachmentsResult SaveAttachments(const std::vector& Payloads) = 0; + virtual CreateContainerResult CreateContainer() = 0; + virtual SaveResult SaveContainer(const IoBuffer& Payload) = 0; + virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, ChunkBlockDescription&& Block) = 0; + virtual FinalizeResult FinalizeContainer(const IoHash& RawHash) = 0; + virtual SaveAttachmentsResult SaveAttachments(const std::vector& Payloads) = 0; virtual LoadContainerResult LoadContainer() = 0; virtual GetKnownBlocksResult GetKnownBlocks() = 0; @@ -125,7 +119,6 @@ struct RemoteStoreOptions }; typedef std::function TGetAttachmentBufferFunc; -typedef std::function FetchChunkFunc; RemoteProjectStore::LoadContainerResult BuildContainer( CidStore& ChunkStore, @@ -137,7 +130,7 @@ RemoteProjectStore::LoadContainerResult BuildContainer( bool BuildBlocks, bool IgnoreMissingAttachments, bool AllowChunking, - const std::function& AsyncOnBlock, + const std::function& AsyncOnBlock, const std::function& OnLargeAttachment, const std::function>&&)>& OnBlockChunks, bool EmbedLooseFiles); @@ -173,9 +166,7 @@ RemoteProjectStore::Result LoadOplog(CidStore& ChunkStore, bool CleanOplog, JobContext* OptionalContext); -CompressedBuffer GenerateBlock(std::vector>&& FetchChunks, RemoteProjectStore::Block& OutBlock); -bool IterateBlock(const SharedBuffer& BlockPayload, std::function Visitor); -std::vector GetBlockHashesFromOplog(CbObjectView ContainerObject); -std::vector GetBlocksFromOplog(CbObjectView ContainerObject, std::span IncludeBlockHashes); +std::vector GetBlockHashesFromOplog(CbObjectView ContainerObject); +std::vector GetBlocksFromOplog(CbObjectView ContainerObject, std::span IncludeBlockHashes); } // namespace zen diff --git a/src/zenserver/projectstore/zenremoteprojectstore.cpp b/src/zenserver/projectstore/zenremoteprojectstore.cpp index 42519b108..2ebf58a5d 100644 --- a/src/zenserver/projectstore/zenremoteprojectstore.cpp +++ b/src/zenserver/projectstore/zenremoteprojectstore.cpp @@ -93,7 +93,7 @@ public: return Result; } - virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, Block&&) override + virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, ChunkBlockDescription&&) override { std::string SaveRequest = fmt::format("/{}/oplog/{}/{}"sv, m_Project, m_Oplog, RawHash); HttpClient::Response Response = m_Client.Post(SaveRequest, Payload, ZenContentType::kCompressedBinary); diff --git a/src/zenstore/chunkedfile.cpp b/src/zenstore/chunkedfile.cpp deleted file mode 100644 index f200bc1ec..000000000 --- a/src/zenstore/chunkedfile.cpp +++ /dev/null @@ -1,505 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#include -#include - -#include "chunking.h" - -ZEN_THIRD_PARTY_INCLUDES_START -#include -#include -ZEN_THIRD_PARTY_INCLUDES_END - -namespace zen { - -namespace { - struct ChunkedHeader - { - static constexpr uint32_t ExpectedMagic = 0x646b6863; // chkd - static constexpr uint32_t CurrentVersion = 1; - - uint32_t Magic = ExpectedMagic; - uint32_t Version = CurrentVersion; - uint32_t ChunkSequenceLength; - uint32_t ChunkHashCount; - uint64_t ChunkSequenceOffset; - uint64_t ChunkHashesOffset; - uint64_t RawSize = 0; - IoHash RawHash; - }; -} // namespace - -IoBuffer -SerializeChunkedInfo(const ChunkedInfo& Info) -{ - size_t HeaderSize = RoundUp(sizeof(ChunkedHeader), 16) + RoundUp(sizeof(uint32_t) * Info.ChunkSequence.size(), 16) + - RoundUp(sizeof(IoHash) * Info.ChunkHashes.size(), 16); - IoBuffer HeaderData(HeaderSize); - - ChunkedHeader Header; - Header.ChunkSequenceLength = gsl::narrow(Info.ChunkSequence.size()); - Header.ChunkHashCount = gsl::narrow(Info.ChunkHashes.size()); - Header.ChunkSequenceOffset = RoundUp(sizeof(ChunkedHeader), 16); - Header.ChunkHashesOffset = RoundUp(Header.ChunkSequenceOffset + sizeof(uint32_t) * Header.ChunkSequenceLength, 16); - Header.RawSize = Info.RawSize; - Header.RawHash = Info.RawHash; - - MutableMemoryView WriteView = HeaderData.GetMutableView(); - { - MutableMemoryView HeaderWriteView = WriteView.Left(sizeof(Header)); - HeaderWriteView.CopyFrom(MemoryView(&Header, sizeof(Header))); - } - { - MutableMemoryView ChunkSequenceWriteView = WriteView.Mid(Header.ChunkSequenceOffset, sizeof(uint32_t) * Header.ChunkSequenceLength); - ChunkSequenceWriteView.CopyFrom(MemoryView(Info.ChunkSequence.data(), ChunkSequenceWriteView.GetSize())); - } - { - MutableMemoryView ChunksWriteView = WriteView.Mid(Header.ChunkHashesOffset, sizeof(IoHash) * Header.ChunkHashCount); - ChunksWriteView.CopyFrom(MemoryView(Info.ChunkHashes.data(), ChunksWriteView.GetSize())); - } - - return HeaderData; -} - -ChunkedInfo -DeserializeChunkedInfo(IoBuffer& Buffer) -{ - MemoryView View = Buffer.GetView(); - ChunkedHeader Header; - { - MutableMemoryView HeaderWriteView(&Header, sizeof(Header)); - HeaderWriteView.CopyFrom(View.Left(sizeof(Header))); - } - if (Header.Magic != ChunkedHeader::ExpectedMagic) - { - return {}; - } - if (Header.Version != ChunkedHeader::CurrentVersion) - { - return {}; - } - ChunkedInfo Info; - Info.RawSize = Header.RawSize; - Info.RawHash = Header.RawHash; - Info.ChunkSequence.resize(Header.ChunkSequenceLength); - Info.ChunkHashes.resize(Header.ChunkHashCount); - { - MutableMemoryView ChunkSequenceWriteView(Info.ChunkSequence.data(), sizeof(uint32_t) * Header.ChunkSequenceLength); - ChunkSequenceWriteView.CopyFrom(View.Mid(Header.ChunkSequenceOffset, ChunkSequenceWriteView.GetSize())); - } - { - MutableMemoryView ChunksWriteView(Info.ChunkHashes.data(), sizeof(IoHash) * Header.ChunkHashCount); - ChunksWriteView.CopyFrom(View.Mid(Header.ChunkHashesOffset, ChunksWriteView.GetSize())); - } - - return Info; -} - -void -Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, std::function GetChunk) -{ - BasicFile Reconstructed; - Reconstructed.Open(TargetPath, BasicFile::Mode::kTruncate); - BasicFileWriter ReconstructedWriter(Reconstructed, 64 * 1024); - uint64_t Offset = 0; - for (uint32_t SequenceIndex : Info.ChunkSequence) - { - IoBuffer Chunk = GetChunk(Info.ChunkHashes[SequenceIndex]); - ReconstructedWriter.Write(Chunk.GetData(), Chunk.GetSize(), Offset); - Offset += Chunk.GetSize(); - } -} - -ChunkedInfoWithSource -ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Params) -{ - ChunkedInfoWithSource Result; - tsl::robin_map FoundChunks; - - ZenChunkHelper Chunker; - Chunker.SetUseThreshold(Params.UseThreshold); - Chunker.SetChunkSize(Params.MinSize, Params.MaxSize, Params.AvgSize); - size_t End = Offset + Size; - const size_t ScanBufferSize = 1u * 1024 * 1024; // (Params.MaxSize * 9) / 3;//1 * 1024 * 1024; - BasicFileBuffer RawBuffer(RawData, ScanBufferSize); - MemoryView SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset); - ZEN_ASSERT(!SliceView.IsEmpty()); - size_t SliceSize = SliceView.GetSize(); - IoHashStream RawHashStream; - while (Offset < End) - { - size_t ScanLength = Chunker.ScanChunk(SliceView.GetData(), SliceSize); - if (ScanLength == ZenChunkHelper::kNoBoundaryFound) - { - if (Offset + SliceSize == End) - { - ScanLength = SliceSize; - } - else - { - SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset); - SliceSize = SliceView.GetSize(); - Chunker.Reset(); - continue; - } - } - uint32_t ChunkLength = gsl::narrow(ScanLength); // +HashedLength); - MemoryView ChunkView = SliceView.Left(ScanLength); - RawHashStream.Append(ChunkView); - IoHash ChunkHash = IoHash::HashBuffer(ChunkView); - SliceView.RightChopInline(ScanLength); - if (auto It = FoundChunks.find(ChunkHash); It != FoundChunks.end()) - { - Result.Info.ChunkSequence.push_back(It->second); - } - else - { - uint32_t ChunkIndex = gsl::narrow(Result.Info.ChunkHashes.size()); - FoundChunks.insert_or_assign(ChunkHash, ChunkIndex); - Result.Info.ChunkHashes.push_back(ChunkHash); - Result.ChunkSources.push_back(ChunkSource{.Offset = Offset, .Size = ChunkLength}); - Result.Info.ChunkSequence.push_back(ChunkIndex); - } - - SliceSize = SliceView.GetSize(); - Offset += ChunkLength; - } - Result.Info.RawSize = Size; - Result.Info.RawHash = RawHashStream.GetHash(); - return Result; -} - -} // namespace zen - -#if ZEN_WITH_TESTS -# include -# include -# include -# include -# include -# include -# include -# include -# include - -# include "chunking.h" - -ZEN_THIRD_PARTY_INCLUDES_START -# include -# include -ZEN_THIRD_PARTY_INCLUDES_END - -namespace zen { -# if 0 -TEST_CASE("chunkedfile.findparams") -{ -# if 1 - DirectoryContent SourceContent1; - GetDirectoryContent("E:\\Temp\\ChunkingTestData\\31379208", DirectoryContentFlags::IncludeFiles, SourceContent1); - const std::vector& SourceFiles1 = SourceContent1.Files; - DirectoryContent SourceContent2; - GetDirectoryContent("E:\\Temp\\ChunkingTestData\\31379208_2", DirectoryContentFlags::IncludeFiles, SourceContent2); - const std::vector& SourceFiles2 = SourceContent2.Files; -# else - std::filesystem::path SourcePath1 = - "E:\\Temp\\ChunkingTestData\\31375996\\ShaderArchive-FortniteGame_Chunk10-PCD3D_SM6-PCD3D_SM6.ushaderbytecode"; - std::filesystem::path SourcePath2 = - "E:\\Temp\\ChunkingTestData\\31379208\\ShaderArchive-FortniteGame_Chunk10-PCD3D_SM6-PCD3D_SM6.ushaderbytecode"; - const std::vector& SourceFiles1 = {SourcePath1}; - const std::vector& SourceFiles2 = {SourcePath2}; -# endif - ChunkedParams Params[] = {ChunkedParams{.UseThreshold = false, .MinSize = 17280, .MaxSize = 139264, .AvgSize = 36340}, - ChunkedParams{.UseThreshold = false, .MinSize = 15456, .MaxSize = 122880, .AvgSize = 35598}, - ChunkedParams{.UseThreshold = false, .MinSize = 16848, .MaxSize = 135168, .AvgSize = 39030}, - ChunkedParams{.UseThreshold = false, .MinSize = 14256, .MaxSize = 114688, .AvgSize = 36222}, - ChunkedParams{.UseThreshold = false, .MinSize = 15744, .MaxSize = 126976, .AvgSize = 36600}, - ChunkedParams{.UseThreshold = false, .MinSize = 15264, .MaxSize = 122880, .AvgSize = 35442}, - ChunkedParams{.UseThreshold = false, .MinSize = 16464, .MaxSize = 131072, .AvgSize = 37950}, - ChunkedParams{.UseThreshold = false, .MinSize = 15408, .MaxSize = 122880, .AvgSize = 38914}, - ChunkedParams{.UseThreshold = false, .MinSize = 15408, .MaxSize = 122880, .AvgSize = 35556}, - ChunkedParams{.UseThreshold = false, .MinSize = 15360, .MaxSize = 122880, .AvgSize = 35520}, - ChunkedParams{.UseThreshold = false, .MinSize = 15312, .MaxSize = 122880, .AvgSize = 35478}, - ChunkedParams{.UseThreshold = false, .MinSize = 16896, .MaxSize = 135168, .AvgSize = 39072}, - ChunkedParams{.UseThreshold = false, .MinSize = 15360, .MaxSize = 122880, .AvgSize = 38880}, - ChunkedParams{.UseThreshold = false, .MinSize = 15840, .MaxSize = 126976, .AvgSize = 36678}, - ChunkedParams{.UseThreshold = false, .MinSize = 16800, .MaxSize = 135168, .AvgSize = 38994}, - ChunkedParams{.UseThreshold = false, .MinSize = 15888, .MaxSize = 126976, .AvgSize = 36714}, - ChunkedParams{.UseThreshold = false, .MinSize = 15792, .MaxSize = 126976, .AvgSize = 36636}, - ChunkedParams{.UseThreshold = false, .MinSize = 14880, .MaxSize = 118784, .AvgSize = 37609}, - ChunkedParams{.UseThreshold = false, .MinSize = 15936, .MaxSize = 126976, .AvgSize = 36756}, - ChunkedParams{.UseThreshold = false, .MinSize = 15456, .MaxSize = 122880, .AvgSize = 38955}, - ChunkedParams{.UseThreshold = false, .MinSize = 15984, .MaxSize = 126976, .AvgSize = 36792}, - ChunkedParams{.UseThreshold = false, .MinSize = 14400, .MaxSize = 114688, .AvgSize = 36338}, - ChunkedParams{.UseThreshold = false, .MinSize = 14832, .MaxSize = 118784, .AvgSize = 37568}, - ChunkedParams{.UseThreshold = false, .MinSize = 16944, .MaxSize = 135168, .AvgSize = 39108}, - ChunkedParams{.UseThreshold = false, .MinSize = 14352, .MaxSize = 114688, .AvgSize = 36297}, - ChunkedParams{.UseThreshold = false, .MinSize = 14208, .MaxSize = 114688, .AvgSize = 36188}, - ChunkedParams{.UseThreshold = false, .MinSize = 14448, .MaxSize = 114688, .AvgSize = 36372}, - ChunkedParams{.UseThreshold = false, .MinSize = 13296, .MaxSize = 106496, .AvgSize = 36592}, - ChunkedParams{.UseThreshold = false, .MinSize = 15264, .MaxSize = 122880, .AvgSize = 38805}, - ChunkedParams{.UseThreshold = false, .MinSize = 14304, .MaxSize = 114688, .AvgSize = 36263}, - ChunkedParams{.UseThreshold = false, .MinSize = 14784, .MaxSize = 118784, .AvgSize = 37534}, - ChunkedParams{.UseThreshold = false, .MinSize = 15312, .MaxSize = 122880, .AvgSize = 38839}, - ChunkedParams{.UseThreshold = false, .MinSize = 14256, .MaxSize = 114688, .AvgSize = 39360}, - ChunkedParams{.UseThreshold = false, .MinSize = 13776, .MaxSize = 110592, .AvgSize = 37976}, - ChunkedParams{.UseThreshold = false, .MinSize = 14736, .MaxSize = 118784, .AvgSize = 37493}, - ChunkedParams{.UseThreshold = false, .MinSize = 14928, .MaxSize = 118784, .AvgSize = 37643}, - ChunkedParams{.UseThreshold = false, .MinSize = 14448, .MaxSize = 114688, .AvgSize = 39504}, - ChunkedParams{.UseThreshold = false, .MinSize = 13392, .MaxSize = 106496, .AvgSize = 36664}, - ChunkedParams{.UseThreshold = false, .MinSize = 13872, .MaxSize = 110592, .AvgSize = 38048}, - ChunkedParams{.UseThreshold = false, .MinSize = 14352, .MaxSize = 114688, .AvgSize = 39432}, - ChunkedParams{.UseThreshold = false, .MinSize = 13200, .MaxSize = 106496, .AvgSize = 36520}, - ChunkedParams{.UseThreshold = false, .MinSize = 17328, .MaxSize = 139264, .AvgSize = 36378}, - ChunkedParams{.UseThreshold = false, .MinSize = 17376, .MaxSize = 139264, .AvgSize = 36421}, - ChunkedParams{.UseThreshold = false, .MinSize = 17424, .MaxSize = 139264, .AvgSize = 36459}, - ChunkedParams{.UseThreshold = false, .MinSize = 17472, .MaxSize = 139264, .AvgSize = 36502}, - ChunkedParams{.UseThreshold = false, .MinSize = 17520, .MaxSize = 139264, .AvgSize = 36540}, - ChunkedParams{.UseThreshold = false, .MinSize = 17808, .MaxSize = 143360, .AvgSize = 37423}, - ChunkedParams{.UseThreshold = false, .MinSize = 17856, .MaxSize = 143360, .AvgSize = 37466}, - ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 25834}, - ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 21917}, - ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 29751}, - ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 33668}, - ChunkedParams{.UseThreshold = false, .MinSize = 17952, .MaxSize = 143360, .AvgSize = 37547}, - ChunkedParams{.UseThreshold = false, .MinSize = 17904, .MaxSize = 143360, .AvgSize = 37504}, - ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 22371}, - ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 37585}, - ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 26406}, - ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 26450}, - ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 30615}, - ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 30441}, - ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 22417}, - ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 22557}, - ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 30528}, - ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 27112}, - ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 34644}, - ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 34476}, - ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 35408}, - ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 38592}, - ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 30483}, - ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 26586}, - ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 26496}, - ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 31302}, - ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 34516}, - ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 22964}, - ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 35448}, - ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 38630}, - ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 23010}, - ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 31260}, - ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 34600}, - ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 27156}, - ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 30570}, - ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 38549}, - ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 22510}, - ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 38673}, - ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 34560}, - ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 22464}, - ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 26540}, - ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 38511}, - ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 23057}, - ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 27202}, - ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 31347}, - ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 35492}, - ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 31389}, - ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 27246}, - ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 23103}, - ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 35532}, - ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 23150}, - ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 27292}, - ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 31434}, - ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 35576}, - ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 27336}, - ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 23196}, - ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 31476}, - ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 35616}, - ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 27862}, - ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 32121}, - ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 23603}, - ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 36380}, - ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 27908}, - ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 23650}, - ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 32166}, - ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 36424}, - ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 23696}, - ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 32253}, - ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 32208}, - ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 23743}, - ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 36548}, - ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 28042}, - ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 23789}, - ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 32295}, - ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 36508}, - ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 27952}, - ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 27998}, - ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 36464}}; - - static const size_t ParamsCount = sizeof(Params) / sizeof(ChunkedParams); - std::vector Infos1(SourceFiles1.size()); - std::vector Infos2(SourceFiles2.size()); - - WorkerThreadPool WorkerPool(32); - - for (size_t I = 0; I < ParamsCount; I++) - { - for (int UseThreshold = 0; UseThreshold < 2; UseThreshold++) - { - Latch WorkLatch(1); - ChunkedParams Param = Params[I]; - Param.UseThreshold = UseThreshold == 1; - Stopwatch Timer; - for (size_t F = 0; F < SourceFiles1.size(); F++) - { - WorkLatch.AddCount(1); - WorkerPool.ScheduleWork([&WorkLatch, F, Param, &SourceFiles1, &Infos1]() { - auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); - BasicFile SourceData1; - SourceData1.Open(SourceFiles1[F], BasicFile::Mode::kRead); - Infos1[F] = ChunkData(SourceData1, 0, SourceData1.FileSize(), Param); - }); - } - for (size_t F = 0; F < SourceFiles2.size(); F++) - { - WorkLatch.AddCount(1); - WorkerPool.ScheduleWork([&WorkLatch, F, Param, &SourceFiles2, &Infos2]() { - auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); - BasicFile SourceData2; - SourceData2.Open(SourceFiles2[F], BasicFile::Mode::kRead); - Infos2[F] = ChunkData(SourceData2, 0, SourceData2.FileSize(), Param); - }); - } - WorkLatch.CountDown(); - WorkLatch.Wait(); - uint64_t ChunkTimeMS = Timer.GetElapsedTimeMs(); - - uint64_t Raw1Size = 0; - tsl::robin_set Chunks1; - size_t ChunkedSize1 = 0; - for (size_t F = 0; F < SourceFiles1.size(); F++) - { - const ChunkedInfoWithSource& Info = Infos1[F]; - Raw1Size += Info.Info.RawSize; - for (uint32_t Chunk1Index = 0; Chunk1Index < Info.Info.ChunkHashes.size(); ++Chunk1Index) - { - const IoHash ChunkHash = Info.Info.ChunkHashes[Chunk1Index]; - if (Chunks1.insert(ChunkHash).second) - { - ChunkedSize1 += Info.ChunkSources[Chunk1Index].Size; - } - } - } - - uint64_t Raw2Size = 0; - tsl::robin_set Chunks2; - size_t ChunkedSize2 = 0; - size_t DiffSize = 0; - for (size_t F = 0; F < SourceFiles2.size(); F++) - { - const ChunkedInfoWithSource& Info = Infos2[F]; - Raw2Size += Info.Info.RawSize; - for (uint32_t Chunk2Index = 0; Chunk2Index < Info.Info.ChunkHashes.size(); ++Chunk2Index) - { - const IoHash ChunkHash = Info.Info.ChunkHashes[Chunk2Index]; - if (Chunks2.insert(ChunkHash).second) - { - ChunkedSize2 += Info.ChunkSources[Chunk2Index].Size; - if (!Chunks1.contains(ChunkHash)) - { - DiffSize += Info.ChunkSources[Chunk2Index].Size; - } - } - } - } - - ZEN_INFO( - "Diff = {}, Chunks1 = {}, Chunks2 = {}, .UseThreshold = {}, .MinSize = {}, .MaxSize = {}, .AvgSize = {}, RawSize(1) = {}, " - "RawSize(2) = {}, " - "Saved(1) = {}, Saved(2) = {} in {}", - NiceBytes(DiffSize), - Chunks1.size(), - Chunks2.size(), - Param.UseThreshold, - Param.MinSize, - Param.MaxSize, - Param.AvgSize, - NiceBytes(Raw1Size), - NiceBytes(Raw2Size), - NiceBytes(Raw1Size - ChunkedSize1), - NiceBytes(Raw2Size - ChunkedSize2), - NiceTimeSpanMs(ChunkTimeMS)); - } - } - -# if 0 - for (int64_t MinSizeBase = (12u * 1024u); MinSizeBase <= (32u * 1024u); MinSizeBase += 512) - { - for (int64_t Wiggle = -132; Wiggle < 126; Wiggle += 2) - { - // size_t MinSize = 7 * 1024 - 61; // (size_t)(MinSizeBase + Wiggle); - // size_t MaxSize = 16 * (7 * 1024); // 8 * 7 * 1024;// MinSizeBase * 6; - // size_t AvgSize = MaxSize / 2; // 4 * 7 * 1024;// MinSizeBase * 3; - size_t MinSize = (size_t)(MinSizeBase + Wiggle); - //for (size_t MaxSize = (MinSize * 4) - 768; MaxSize < (MinSize * 5) + 768; MaxSize += 64) - size_t MaxSize = 8u * MinSizeBase; - { - for (size_t AvgSize = (MaxSize - MinSize) / 32 + MinSize; AvgSize < (MaxSize - MinSize) / 4 + MinSize; AvgSize += (MaxSize - MinSize) / 32) -// size_t AvgSize = (MaxSize - MinSize) / 4 + MinSize; - { - WorkLatch.AddCount(1); - WorkerPool.ScheduleWork([&WorkLatch, MinSize, MaxSize, AvgSize, SourcePath1, SourcePath2]() - { - auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); - ChunkedParams Params{ .UseThreshold = true, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize }; - BasicFile SourceData1; - SourceData1.Open(SourcePath1, BasicFile::Mode::kRead); - BasicFile SourceData2; - SourceData2.Open(SourcePath2, BasicFile::Mode::kRead); - ChunkedInfoWithSource Info1 = ChunkData(SourceData1, Params); - ChunkedInfoWithSource Info2 = ChunkData(SourceData2, Params); - - tsl::robin_set Chunks1; - Chunks1.reserve(Info1.Info.ChunkHashes.size()); - Chunks1.insert(Info1.Info.ChunkHashes.begin(), Info1.Info.ChunkHashes.end()); - size_t ChunkedSize1 = 0; - for (uint32_t Chunk1Index = 0; Chunk1Index < Info1.Info.ChunkHashes.size(); ++Chunk1Index) - { - ChunkedSize1 += Info1.ChunkSources[Chunk1Index].Size; - } - size_t DiffSavedSize = 0; - size_t ChunkedSize2 = 0; - for (uint32_t Chunk2Index = 0; Chunk2Index < Info2.Info.ChunkHashes.size(); ++Chunk2Index) - { - ChunkedSize2 += Info2.ChunkSources[Chunk2Index].Size; - if (Chunks1.find(Info2.Info.ChunkHashes[Chunk2Index]) == Chunks1.end()) - { - DiffSavedSize += Info2.ChunkSources[Chunk2Index].Size; - } - } - ZEN_INFO("Diff {}, Chunks1: {}, Chunks2: {}, Min: {}, Max: {}, Avg: {}, Saved(1) {}, Saved(2) {}", - NiceBytes(DiffSavedSize), - Info1.Info.ChunkHashes.size(), - Info2.Info.ChunkHashes.size(), - MinSize, - MaxSize, - AvgSize, - NiceBytes(Info1.Info.RawSize - ChunkedSize1), - NiceBytes(Info2.Info.RawSize - ChunkedSize2)); - }); - } - } - } - } -# endif // 0 - - // WorkLatch.CountDown(); - // WorkLatch.Wait(); -} -# endif // 0 - -void -chunkedfile_forcelink() -{ -} - -} // namespace zen - -#endif diff --git a/src/zenstore/chunking.cpp b/src/zenstore/chunking.cpp deleted file mode 100644 index 30edd322a..000000000 --- a/src/zenstore/chunking.cpp +++ /dev/null @@ -1,382 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#include "chunking.h" - -#include - -#include - -namespace zen::detail { - -static const uint32_t BuzhashTable[] = { - 0x458be752, 0xc10748cc, 0xfbbcdbb8, 0x6ded5b68, 0xb10a82b5, 0x20d75648, 0xdfc5665f, 0xa8428801, 0x7ebf5191, 0x841135c7, 0x65cc53b3, - 0x280a597c, 0x16f60255, 0xc78cbc3e, 0x294415f5, 0xb938d494, 0xec85c4e6, 0xb7d33edc, 0xe549b544, 0xfdeda5aa, 0x882bf287, 0x3116737c, - 0x05569956, 0xe8cc1f68, 0x0806ac5e, 0x22a14443, 0x15297e10, 0x50d090e7, 0x4ba60f6f, 0xefd9f1a7, 0x5c5c885c, 0x82482f93, 0x9bfd7c64, - 0x0b3e7276, 0xf2688e77, 0x8fad8abc, 0xb0509568, 0xf1ada29f, 0xa53efdfe, 0xcb2b1d00, 0xf2a9e986, 0x6463432b, 0x95094051, 0x5a223ad2, - 0x9be8401b, 0x61e579cb, 0x1a556a14, 0x5840fdc2, 0x9261ddf6, 0xcde002bb, 0x52432bb0, 0xbf17373e, 0x7b7c222f, 0x2955ed16, 0x9f10ca59, - 0xe840c4c9, 0xccabd806, 0x14543f34, 0x1462417a, 0x0d4a1f9c, 0x087ed925, 0xd7f8f24c, 0x7338c425, 0xcf86c8f5, 0xb19165cd, 0x9891c393, - 0x325384ac, 0x0308459d, 0x86141d7e, 0xc922116a, 0xe2ffa6b6, 0x53f52aed, 0x2cd86197, 0xf5b9f498, 0xbf319c8f, 0xe0411fae, 0x977eb18c, - 0xd8770976, 0x9833466a, 0xc674df7f, 0x8c297d45, 0x8ca48d26, 0xc49ed8e2, 0x7344f874, 0x556f79c7, 0x6b25eaed, 0xa03e2b42, 0xf68f66a4, - 0x8e8b09a2, 0xf2e0e62a, 0x0d3a9806, 0x9729e493, 0x8c72b0fc, 0x160b94f6, 0x450e4d3d, 0x7a320e85, 0xbef8f0e1, 0x21d73653, 0x4e3d977a, - 0x1e7b3929, 0x1cc6c719, 0xbe478d53, 0x8d752809, 0xe6d8c2c6, 0x275f0892, 0xc8acc273, 0x4cc21580, 0xecc4a617, 0xf5f7be70, 0xe795248a, - 0x375a2fe9, 0x425570b6, 0x8898dcf8, 0xdc2d97c4, 0x0106114b, 0x364dc22f, 0x1e0cad1f, 0xbe63803c, 0x5f69fac2, 0x4d5afa6f, 0x1bc0dfb5, - 0xfb273589, 0x0ea47f7b, 0x3c1c2b50, 0x21b2a932, 0x6b1223fd, 0x2fe706a8, 0xf9bd6ce2, 0xa268e64e, 0xe987f486, 0x3eacf563, 0x1ca2018c, - 0x65e18228, 0x2207360a, 0x57cf1715, 0x34c37d2b, 0x1f8f3cde, 0x93b657cf, 0x31a019fd, 0xe69eb729, 0x8bca7b9b, 0x4c9d5bed, 0x277ebeaf, - 0xe0d8f8ae, 0xd150821c, 0x31381871, 0xafc3f1b0, 0x927db328, 0xe95effac, 0x305a47bd, 0x426ba35b, 0x1233af3f, 0x686a5b83, 0x50e072e5, - 0xd9d3bb2a, 0x8befc475, 0x487f0de6, 0xc88dff89, 0xbd664d5e, 0x971b5d18, 0x63b14847, 0xd7d3c1ce, 0x7f583cf3, 0x72cbcb09, 0xc0d0a81c, - 0x7fa3429b, 0xe9158a1b, 0x225ea19a, 0xd8ca9ea3, 0xc763b282, 0xbb0c6341, 0x020b8293, 0xd4cd299d, 0x58cfa7f8, 0x91b4ee53, 0x37e4d140, - 0x95ec764c, 0x30f76b06, 0x5ee68d24, 0x679c8661, 0xa41979c2, 0xf2b61284, 0x4fac1475, 0x0adb49f9, 0x19727a23, 0x15a7e374, 0xc43a18d5, - 0x3fb1aa73, 0x342fc615, 0x924c0793, 0xbee2d7f0, 0x8a279de9, 0x4aa2d70c, 0xe24dd37f, 0xbe862c0b, 0x177c22c2, 0x5388e5ee, 0xcd8a7510, - 0xf901b4fd, 0xdbc13dbc, 0x6c0bae5b, 0x64efe8c7, 0x48b02079, 0x80331a49, 0xca3d8ae6, 0xf3546190, 0xfed7108b, 0xc49b941b, 0x32baf4a9, - 0xeb833a4a, 0x88a3f1a5, 0x3a91ce0a, 0x3cc27da1, 0x7112e684, 0x4a3096b1, 0x3794574c, 0xa3c8b6f3, 0x1d213941, 0x6e0a2e00, 0x233479f1, - 0x0f4cd82f, 0x6093edd2, 0x5d7d209e, 0x464fe319, 0xd4dcac9e, 0x0db845cb, 0xfb5e4bc3, 0xe0256ce1, 0x09fb4ed1, 0x0914be1e, 0xa5bdb2c3, - 0xc6eb57bb, 0x30320350, 0x3f397e91, 0xa67791bc, 0x86bc0e2c, 0xefa0a7e2, 0xe9ff7543, 0xe733612c, 0xd185897b, 0x329e5388, 0x91dd236b, - 0x2ecb0d93, 0xf4d82a3d, 0x35b5c03f, 0xe4e606f0, 0x05b21843, 0x37b45964, 0x5eff22f4, 0x6027f4cc, 0x77178b3c, 0xae507131, 0x7bf7cabc, - 0xf9c18d66, 0x593ade65, 0xd95ddf11, -}; - -// ROL operation (compiler turns this into a ROL when optimizing) -ZEN_FORCEINLINE static uint32_t -Rotate32(uint32_t Value, size_t RotateCount) -{ - RotateCount &= 31; - - return ((Value) << (RotateCount)) | ((Value) >> (32 - RotateCount)); -} - -} // namespace zen::detail - -namespace zen { - -void -ZenChunkHelper::Reset() -{ - InternalReset(); - - m_BytesScanned = 0; -} - -void -ZenChunkHelper::InternalReset() -{ - m_CurrentHash = 0; - m_CurrentChunkSize = 0; - m_WindowSize = 0; -} - -void -ZenChunkHelper::SetChunkSize(size_t MinSize, size_t MaxSize, size_t AvgSize) -{ - if (m_WindowSize) - return; // Already started - - static_assert(kChunkSizeLimitMin > kWindowSize); - - if (AvgSize) - { - // TODO: Validate AvgSize range - } - else - { - if (MinSize && MaxSize) - { - AvgSize = std::lrint(std::pow(2, (std::log2(MinSize) + std::log2(MaxSize)) / 2)); - } - else if (MinSize) - { - AvgSize = MinSize * 4; - } - else if (MaxSize) - { - AvgSize = MaxSize / 4; - } - else - { - AvgSize = kDefaultAverageChunkSize; - } - } - - if (MinSize) - { - // TODO: Validate MinSize range - } - else - { - MinSize = std::max(AvgSize / 4, kChunkSizeLimitMin); - } - - if (MaxSize) - { - // TODO: Validate MaxSize range - } - else - { - MaxSize = std::min(AvgSize * 4, kChunkSizeLimitMax); - } - - m_Discriminator = gsl::narrow(AvgSize - MinSize); - - if (m_Discriminator < MinSize) - { - m_Discriminator = gsl::narrow(MinSize); - } - - if (m_Discriminator > MaxSize) - { - m_Discriminator = gsl::narrow(MaxSize); - } - - m_Threshold = gsl::narrow((uint64_t(std::numeric_limits::max()) + 1) / m_Discriminator); - - m_ChunkSizeMin = MinSize; - m_ChunkSizeMax = MaxSize; - m_ChunkSizeAvg = AvgSize; -} - -size_t -ZenChunkHelper::ScanChunk(const void* DataBytesIn, size_t ByteCount) -{ - size_t Result = InternalScanChunk(DataBytesIn, ByteCount); - - if (Result == kNoBoundaryFound) - { - m_BytesScanned += ByteCount; - } - else - { - m_BytesScanned += Result; - } - - return Result; -} - -size_t -ZenChunkHelper::InternalScanChunk(const void* DataBytesIn, size_t ByteCount) -{ - size_t CurrentOffset = 0; - const uint8_t* CursorPtr = reinterpret_cast(DataBytesIn); - - // There's no point in updating the hash if we know we're not - // going to have a cut point, so just skip the data. This logic currently - // provides roughly a 20% speedup on my machine - - const size_t NeedHashOffset = m_ChunkSizeMin - kWindowSize; - - if (m_CurrentChunkSize < NeedHashOffset) - { - const uint32_t SkipBytes = gsl::narrow(std::min(ByteCount, NeedHashOffset - m_CurrentChunkSize)); - - ByteCount -= SkipBytes; - m_CurrentChunkSize += SkipBytes; - CurrentOffset += SkipBytes; - CursorPtr += SkipBytes; - - m_WindowSize = 0; - - if (ByteCount == 0) - { - return kNoBoundaryFound; - } - } - - // Fill window first - - if (m_WindowSize < kWindowSize) - { - const uint32_t FillBytes = uint32_t(std::min(ByteCount, kWindowSize - m_WindowSize)); - - memcpy(&m_Window[m_WindowSize], CursorPtr, FillBytes); - - CursorPtr += FillBytes; - - m_WindowSize += FillBytes; - m_CurrentChunkSize += FillBytes; - - CurrentOffset += FillBytes; - ByteCount -= FillBytes; - - if (m_WindowSize < kWindowSize) - { - return kNoBoundaryFound; - } - - // We have a full window, initialize hash - - uint32_t CurrentHash = 0; - - for (int i = 1; i < kWindowSize; ++i) - { - CurrentHash ^= detail::Rotate32(detail::BuzhashTable[m_Window[i - 1]], kWindowSize - i); - } - - m_CurrentHash = CurrentHash ^ detail::BuzhashTable[m_Window[kWindowSize - 1]]; - } - - // Scan for boundaries (i.e points where the hash matches the value determined by - // the discriminator) - - uint32_t CurrentHash = m_CurrentHash; - uint32_t CurrentChunkSize = m_CurrentChunkSize; - - size_t Index = CurrentChunkSize % kWindowSize; - - if (m_Threshold && m_UseThreshold) - { - // This is roughly 4x faster than the general modulo approach on my - // TR 3990X (~940MB/sec) and doesn't require any special parameters to - // achieve max performance - - while (ByteCount) - { - const uint8_t NewByte = *CursorPtr; - const uint8_t OldByte = m_Window[Index]; - - CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^ - detail::BuzhashTable[NewByte]; - - CurrentChunkSize++; - CurrentOffset++; - - if (CurrentChunkSize >= m_ChunkSizeMin) - { - bool FoundBoundary; - - if (CurrentChunkSize >= m_ChunkSizeMax) - { - FoundBoundary = true; - } - else - { - FoundBoundary = CurrentHash <= m_Threshold; - } - - if (FoundBoundary) - { - // Boundary found! - InternalReset(); - - return CurrentOffset; - } - } - - m_Window[Index++] = *CursorPtr; - - if (Index == kWindowSize) - { - Index = 0; - } - - ++CursorPtr; - --ByteCount; - } - } - else if ((m_Discriminator & (m_Discriminator - 1)) == 0) - { - // This is quite a bit faster than the generic modulo path, but - // requires a very specific average chunk size to be used. If you - // pass in an even power-of-two divided by 0.75 as the average - // chunk size you'll hit this path - - const uint32_t Mask = m_Discriminator - 1; - - while (ByteCount) - { - const uint8_t NewByte = *CursorPtr; - const uint8_t OldByte = m_Window[Index]; - - CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^ - detail::BuzhashTable[NewByte]; - - CurrentChunkSize++; - CurrentOffset++; - - if (CurrentChunkSize >= m_ChunkSizeMin) - { - bool FoundBoundary; - - if (CurrentChunkSize >= m_ChunkSizeMax) - { - FoundBoundary = true; - } - else - { - FoundBoundary = (CurrentHash & Mask) == Mask; - } - - if (FoundBoundary) - { - // Boundary found! - InternalReset(); - - return CurrentOffset; - } - } - - m_Window[Index++] = *CursorPtr; - - if (Index == kWindowSize) - { - Index = 0; - } - - ++CursorPtr; - --ByteCount; - } - } - else - { - // This is the slowest path, which caps out around 250MB/sec for large sizes - // on my TR3900X - - while (ByteCount) - { - const uint8_t NewByte = *CursorPtr; - const uint8_t OldByte = m_Window[Index]; - - CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^ - detail::BuzhashTable[NewByte]; - - CurrentChunkSize++; - CurrentOffset++; - - if (CurrentChunkSize >= m_ChunkSizeMin) - { - bool FoundBoundary; - - if (CurrentChunkSize >= m_ChunkSizeMax) - { - FoundBoundary = true; - } - else - { - FoundBoundary = (CurrentHash % m_Discriminator) == (m_Discriminator - 1); - } - - if (FoundBoundary) - { - // Boundary found! - InternalReset(); - - return CurrentOffset; - } - } - - m_Window[Index++] = *CursorPtr; - - if (Index == kWindowSize) - { - Index = 0; - } - - ++CursorPtr; - --ByteCount; - } - } - - m_CurrentChunkSize = CurrentChunkSize; - m_CurrentHash = CurrentHash; - - return kNoBoundaryFound; -} - -} // namespace zen diff --git a/src/zenstore/chunking.h b/src/zenstore/chunking.h deleted file mode 100644 index 09c56454f..000000000 --- a/src/zenstore/chunking.h +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#pragma once -#include - -namespace zen { - -/** Content-defined chunking helper - */ -class ZenChunkHelper -{ -public: - void SetChunkSize(size_t MinSize, size_t MaxSize, size_t AvgSize); - size_t ScanChunk(const void* DataBytes, size_t ByteCount); - void Reset(); - - // This controls which chunking approach is used - threshold or - // modulo based. Threshold is faster and generates similarly sized - // chunks - void SetUseThreshold(bool NewState) { m_UseThreshold = NewState; } - - inline size_t ChunkSizeMin() const { return m_ChunkSizeMin; } - inline size_t ChunkSizeMax() const { return m_ChunkSizeMax; } - inline size_t ChunkSizeAvg() const { return m_ChunkSizeAvg; } - inline uint64_t BytesScanned() const { return m_BytesScanned; } - - static constexpr size_t kNoBoundaryFound = size_t(~0ull); - -private: - size_t m_ChunkSizeMin = 0; - size_t m_ChunkSizeMax = 0; - size_t m_ChunkSizeAvg = 0; - - uint32_t m_Discriminator = 0; // Computed in SetChunkSize() - uint32_t m_Threshold = 0; // Computed in SetChunkSize() - - bool m_UseThreshold = true; - - static constexpr size_t kChunkSizeLimitMax = 64 * 1024 * 1024; - static constexpr size_t kChunkSizeLimitMin = 1024; - static constexpr size_t kDefaultAverageChunkSize = 64 * 1024; - - static constexpr int kWindowSize = 48; - uint8_t m_Window[kWindowSize]; - uint32_t m_WindowSize = 0; - - uint32_t m_CurrentHash = 0; - uint32_t m_CurrentChunkSize = 0; - - uint64_t m_BytesScanned = 0; - - size_t InternalScanChunk(const void* DataBytes, size_t ByteCount); - void InternalReset(); -}; - -} // namespace zen diff --git a/src/zenstore/include/zenstore/chunkedfile.h b/src/zenstore/include/zenstore/chunkedfile.h deleted file mode 100644 index c6330bdbd..000000000 --- a/src/zenstore/include/zenstore/chunkedfile.h +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#pragma once - -#include -#include -#include - -#include -#include - -namespace zen { - -class BasicFile; - -struct ChunkedInfo -{ - uint64_t RawSize = 0; - IoHash RawHash; - std::vector ChunkSequence; - std::vector ChunkHashes; -}; - -struct ChunkSource -{ - uint64_t Offset; // 8 - uint32_t Size; // 4 -}; - -struct ChunkedInfoWithSource -{ - ChunkedInfo Info; - std::vector ChunkSources; -}; - -struct ChunkedParams -{ - bool UseThreshold = true; - size_t MinSize = (2u * 1024u) - 128u; - size_t MaxSize = (16u * 1024u); - size_t AvgSize = (3u * 1024u); -}; - -static const ChunkedParams UShaderByteCodeParams = {.UseThreshold = true, .MinSize = 17280, .MaxSize = 139264, .AvgSize = 36340}; - -ChunkedInfoWithSource ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Params = {}); -void Reconstruct(const ChunkedInfo& Info, - const std::filesystem::path& TargetPath, - std::function GetChunk); -IoBuffer SerializeChunkedInfo(const ChunkedInfo& Info); -ChunkedInfo DeserializeChunkedInfo(IoBuffer& Buffer); - -void chunkedfile_forcelink(); -} // namespace zen diff --git a/src/zenutil/chunkblock.cpp b/src/zenutil/chunkblock.cpp new file mode 100644 index 000000000..6dae5af11 --- /dev/null +++ b/src/zenutil/chunkblock.cpp @@ -0,0 +1,166 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include + +#include +#include + +#include + +namespace zen { + +using namespace std::literals; + +ChunkBlockDescription +ParseChunkBlockDescription(const CbObjectView& BlockObject) +{ + ChunkBlockDescription Result; + Result.BlockHash = BlockObject["rawHash"sv].AsHash(); + if (Result.BlockHash != IoHash::Zero) + { + CbArrayView ChunksArray = BlockObject["rawHashes"sv].AsArrayView(); + Result.ChunkHashes.reserve(ChunksArray.Num()); + for (CbFieldView ChunkView : ChunksArray) + { + Result.ChunkHashes.push_back(ChunkView.AsHash()); + } + + CbArrayView ChunkRawLengthsArray = BlockObject["chunkRawLengths"sv].AsArrayView(); + std::vector ChunkLengths; + Result.ChunkRawLengths.reserve(ChunkRawLengthsArray.Num()); + for (CbFieldView ChunkView : ChunkRawLengthsArray) + { + Result.ChunkRawLengths.push_back(ChunkView.AsUInt32()); + } + } + return Result; +} + +std::vector +ParseChunkBlockDescriptionList(const CbObjectView& BlocksObject) +{ + if (!BlocksObject) + { + return {}; + } + std::vector Result; + CbArrayView Blocks = BlocksObject["blocks"].AsArrayView(); + Result.reserve(Blocks.Num()); + for (CbFieldView BlockView : Blocks) + { + CbObjectView BlockObject = BlockView.AsObjectView(); + Result.emplace_back(ParseChunkBlockDescription(BlockObject)); + } + return Result; +} + +CbObject +BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView MetaData) +{ + ZEN_ASSERT(Block.ChunkRawLengths.size() == Block.ChunkHashes.size()); + + CbObjectWriter Writer; + Writer.AddHash("rawHash"sv, Block.BlockHash); + Writer.BeginArray("rawHashes"sv); + { + for (const IoHash& ChunkHash : Block.ChunkHashes) + { + Writer.AddHash(ChunkHash); + } + } + Writer.EndArray(); + Writer.BeginArray("chunkRawLengths"); + { + for (uint32_t ChunkSize : Block.ChunkRawLengths) + { + Writer.AddInteger(ChunkSize); + } + } + Writer.EndArray(); + + Writer.AddObject("metadata", MetaData); + + return Writer.Save(); +} + +CompressedBuffer +GenerateChunkBlock(std::vector>&& FetchChunks, ChunkBlockDescription& OutBlock) +{ + const size_t ChunkCount = FetchChunks.size(); + + std::vector ChunkSegments; + ChunkSegments.resize(1); + ChunkSegments.reserve(1 + ChunkCount); + OutBlock.ChunkHashes.reserve(ChunkCount); + OutBlock.ChunkRawLengths.reserve(ChunkCount); + { + IoBuffer TempBuffer(ChunkCount * 9); + MutableMemoryView View = TempBuffer.GetMutableView(); + uint8_t* BufferStartPtr = reinterpret_cast(View.GetData()); + uint8_t* BufferEndPtr = BufferStartPtr; + BufferEndPtr += WriteVarUInt(gsl::narrow(ChunkCount), BufferEndPtr); + for (const auto& It : FetchChunks) + { + std::pair Chunk = It.second(It.first); + uint64_t ChunkSize = 0; + std::span Segments = Chunk.second.GetCompressed().GetSegments(); + for (const SharedBuffer& Segment : Segments) + { + ChunkSize += Segment.GetSize(); + ChunkSegments.push_back(Segment); + } + BufferEndPtr += WriteVarUInt(ChunkSize, BufferEndPtr); + OutBlock.ChunkHashes.push_back(It.first); + OutBlock.ChunkRawLengths.push_back(gsl::narrow(Chunk.first)); + } + ZEN_ASSERT(BufferEndPtr <= View.GetDataEnd()); + ptrdiff_t TempBufferLength = std::distance(BufferStartPtr, BufferEndPtr); + ChunkSegments[0] = SharedBuffer(IoBuffer(TempBuffer, 0, gsl::narrow(TempBufferLength))); + } + CompressedBuffer CompressedBlock = + CompressedBuffer::Compress(CompositeBuffer(std::move(ChunkSegments)), OodleCompressor::Mermaid, OodleCompressionLevel::None); + OutBlock.BlockHash = CompressedBlock.DecodeRawHash(); + return CompressedBlock; +} + +bool +IterateChunkBlock(const SharedBuffer& BlockPayload, std::function Visitor) +{ + ZEN_ASSERT(BlockPayload); + if (BlockPayload.GetSize() < 1) + { + return false; + } + + MemoryView BlockView = BlockPayload.GetView(); + const uint8_t* ReadPtr = reinterpret_cast(BlockView.GetData()); + uint32_t NumberSize; + uint64_t ChunkCount = ReadVarUInt(ReadPtr, NumberSize); + ReadPtr += NumberSize; + std::vector ChunkSizes; + ChunkSizes.reserve(ChunkCount); + while (ChunkCount--) + { + ChunkSizes.push_back(ReadVarUInt(ReadPtr, NumberSize)); + ReadPtr += NumberSize; + } + for (uint64_t ChunkSize : ChunkSizes) + { + IoBuffer Chunk(IoBuffer::Wrap, ReadPtr, ChunkSize); + IoHash AttachmentRawHash; + uint64_t AttachmentRawSize; + CompressedBuffer CompressedChunk = CompressedBuffer::FromCompressed(SharedBuffer(Chunk), AttachmentRawHash, AttachmentRawSize); + + if (!CompressedChunk) + { + ZEN_ERROR("Invalid chunk in block"); + return false; + } + Visitor(std::move(CompressedChunk), AttachmentRawHash); + ReadPtr += ChunkSize; + ZEN_ASSERT(ReadPtr <= BlockView.GetDataEnd()); + } + return true; +}; + +} // namespace zen diff --git a/src/zenutil/chunkedfile.cpp b/src/zenutil/chunkedfile.cpp new file mode 100644 index 000000000..c08492eb0 --- /dev/null +++ b/src/zenutil/chunkedfile.cpp @@ -0,0 +1,510 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include + +#include + +#include "chunking.h" + +ZEN_THIRD_PARTY_INCLUDES_START +#include +#include +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +namespace { + struct ChunkedHeader + { + static constexpr uint32_t ExpectedMagic = 0x646b6863; // chkd + static constexpr uint32_t CurrentVersion = 1; + + uint32_t Magic = ExpectedMagic; + uint32_t Version = CurrentVersion; + uint32_t ChunkSequenceLength; + uint32_t ChunkHashCount; + uint64_t ChunkSequenceOffset; + uint64_t ChunkHashesOffset; + uint64_t RawSize = 0; + IoHash RawHash; + }; +} // namespace + +IoBuffer +SerializeChunkedInfo(const ChunkedInfo& Info) +{ + size_t HeaderSize = RoundUp(sizeof(ChunkedHeader), 16) + RoundUp(sizeof(uint32_t) * Info.ChunkSequence.size(), 16) + + RoundUp(sizeof(IoHash) * Info.ChunkHashes.size(), 16); + IoBuffer HeaderData(HeaderSize); + + ChunkedHeader Header; + Header.ChunkSequenceLength = gsl::narrow(Info.ChunkSequence.size()); + Header.ChunkHashCount = gsl::narrow(Info.ChunkHashes.size()); + Header.ChunkSequenceOffset = RoundUp(sizeof(ChunkedHeader), 16); + Header.ChunkHashesOffset = RoundUp(Header.ChunkSequenceOffset + sizeof(uint32_t) * Header.ChunkSequenceLength, 16); + Header.RawSize = Info.RawSize; + Header.RawHash = Info.RawHash; + + MutableMemoryView WriteView = HeaderData.GetMutableView(); + { + MutableMemoryView HeaderWriteView = WriteView.Left(sizeof(Header)); + HeaderWriteView.CopyFrom(MemoryView(&Header, sizeof(Header))); + } + { + MutableMemoryView ChunkSequenceWriteView = WriteView.Mid(Header.ChunkSequenceOffset, sizeof(uint32_t) * Header.ChunkSequenceLength); + ChunkSequenceWriteView.CopyFrom(MemoryView(Info.ChunkSequence.data(), ChunkSequenceWriteView.GetSize())); + } + { + MutableMemoryView ChunksWriteView = WriteView.Mid(Header.ChunkHashesOffset, sizeof(IoHash) * Header.ChunkHashCount); + ChunksWriteView.CopyFrom(MemoryView(Info.ChunkHashes.data(), ChunksWriteView.GetSize())); + } + + return HeaderData; +} + +ChunkedInfo +DeserializeChunkedInfo(IoBuffer& Buffer) +{ + MemoryView View = Buffer.GetView(); + ChunkedHeader Header; + { + MutableMemoryView HeaderWriteView(&Header, sizeof(Header)); + HeaderWriteView.CopyFrom(View.Left(sizeof(Header))); + } + if (Header.Magic != ChunkedHeader::ExpectedMagic) + { + return {}; + } + if (Header.Version != ChunkedHeader::CurrentVersion) + { + return {}; + } + ChunkedInfo Info; + Info.RawSize = Header.RawSize; + Info.RawHash = Header.RawHash; + Info.ChunkSequence.resize(Header.ChunkSequenceLength); + Info.ChunkHashes.resize(Header.ChunkHashCount); + { + MutableMemoryView ChunkSequenceWriteView(Info.ChunkSequence.data(), sizeof(uint32_t) * Header.ChunkSequenceLength); + ChunkSequenceWriteView.CopyFrom(View.Mid(Header.ChunkSequenceOffset, ChunkSequenceWriteView.GetSize())); + } + { + MutableMemoryView ChunksWriteView(Info.ChunkHashes.data(), sizeof(IoHash) * Header.ChunkHashCount); + ChunksWriteView.CopyFrom(View.Mid(Header.ChunkHashesOffset, ChunksWriteView.GetSize())); + } + + return Info; +} + +void +Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, std::function GetChunk) +{ + BasicFile Reconstructed; + Reconstructed.Open(TargetPath, BasicFile::Mode::kTruncate); + BasicFileWriter ReconstructedWriter(Reconstructed, 64 * 1024); + uint64_t Offset = 0; + for (uint32_t SequenceIndex : Info.ChunkSequence) + { + IoBuffer Chunk = GetChunk(Info.ChunkHashes[SequenceIndex]); + ReconstructedWriter.Write(Chunk.GetData(), Chunk.GetSize(), Offset); + Offset += Chunk.GetSize(); + } +} + +ChunkedInfoWithSource +ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Params, std::atomic* BytesProcessed) +{ + ChunkedInfoWithSource Result; + tsl::robin_map FoundChunks; + + ZenChunkHelper Chunker; + Chunker.SetUseThreshold(Params.UseThreshold); + Chunker.SetChunkSize(Params.MinSize, Params.MaxSize, Params.AvgSize); + size_t End = Offset + Size; + const size_t ScanBufferSize = 1u * 1024 * 1024; // (Params.MaxSize * 9) / 3;//1 * 1024 * 1024; + BasicFileBuffer RawBuffer(RawData, ScanBufferSize); + MemoryView SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset); + ZEN_ASSERT(!SliceView.IsEmpty()); + size_t SliceSize = SliceView.GetSize(); + IoHashStream RawHashStream; + while (Offset < End) + { + size_t ScanLength = Chunker.ScanChunk(SliceView.GetData(), SliceSize); + if (ScanLength == ZenChunkHelper::kNoBoundaryFound) + { + if (Offset + SliceSize == End) + { + ScanLength = SliceSize; + } + else + { + SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset); + SliceSize = SliceView.GetSize(); + Chunker.Reset(); + continue; + } + } + uint32_t ChunkLength = gsl::narrow(ScanLength); // +HashedLength); + MemoryView ChunkView = SliceView.Left(ScanLength); + RawHashStream.Append(ChunkView); + IoHash ChunkHash = IoHash::HashBuffer(ChunkView); + SliceView.RightChopInline(ScanLength); + if (auto It = FoundChunks.find(ChunkHash); It != FoundChunks.end()) + { + Result.Info.ChunkSequence.push_back(It->second); + } + else + { + uint32_t ChunkIndex = gsl::narrow(Result.Info.ChunkHashes.size()); + FoundChunks.insert_or_assign(ChunkHash, ChunkIndex); + Result.Info.ChunkHashes.push_back(ChunkHash); + Result.ChunkSources.push_back(ChunkSource{.Offset = Offset, .Size = ChunkLength}); + Result.Info.ChunkSequence.push_back(ChunkIndex); + } + + SliceSize = SliceView.GetSize(); + Offset += ChunkLength; + if (BytesProcessed != nullptr) + { + BytesProcessed->fetch_add(ChunkLength); + } + } + Result.Info.RawSize = Size; + Result.Info.RawHash = RawHashStream.GetHash(); + return Result; +} + +} // namespace zen + +#if ZEN_WITH_TESTS +# include +# include +# include +# include +# include +# include +# include +# include +# include + +# include "chunking.h" + +ZEN_THIRD_PARTY_INCLUDES_START +# include +# include +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { +# if 0 +TEST_CASE("chunkedfile.findparams") +{ +# if 1 + DirectoryContent SourceContent1; + GetDirectoryContent("E:\\Temp\\ChunkingTestData\\31379208", DirectoryContentFlags::IncludeFiles, SourceContent1); + const std::vector& SourceFiles1 = SourceContent1.Files; + DirectoryContent SourceContent2; + GetDirectoryContent("E:\\Temp\\ChunkingTestData\\31379208_2", DirectoryContentFlags::IncludeFiles, SourceContent2); + const std::vector& SourceFiles2 = SourceContent2.Files; +# else + std::filesystem::path SourcePath1 = + "E:\\Temp\\ChunkingTestData\\31375996\\ShaderArchive-FortniteGame_Chunk10-PCD3D_SM6-PCD3D_SM6.ushaderbytecode"; + std::filesystem::path SourcePath2 = + "E:\\Temp\\ChunkingTestData\\31379208\\ShaderArchive-FortniteGame_Chunk10-PCD3D_SM6-PCD3D_SM6.ushaderbytecode"; + const std::vector& SourceFiles1 = {SourcePath1}; + const std::vector& SourceFiles2 = {SourcePath2}; +# endif + ChunkedParams Params[] = {ChunkedParams{.UseThreshold = false, .MinSize = 17280, .MaxSize = 139264, .AvgSize = 36340}, + ChunkedParams{.UseThreshold = false, .MinSize = 15456, .MaxSize = 122880, .AvgSize = 35598}, + ChunkedParams{.UseThreshold = false, .MinSize = 16848, .MaxSize = 135168, .AvgSize = 39030}, + ChunkedParams{.UseThreshold = false, .MinSize = 14256, .MaxSize = 114688, .AvgSize = 36222}, + ChunkedParams{.UseThreshold = false, .MinSize = 15744, .MaxSize = 126976, .AvgSize = 36600}, + ChunkedParams{.UseThreshold = false, .MinSize = 15264, .MaxSize = 122880, .AvgSize = 35442}, + ChunkedParams{.UseThreshold = false, .MinSize = 16464, .MaxSize = 131072, .AvgSize = 37950}, + ChunkedParams{.UseThreshold = false, .MinSize = 15408, .MaxSize = 122880, .AvgSize = 38914}, + ChunkedParams{.UseThreshold = false, .MinSize = 15408, .MaxSize = 122880, .AvgSize = 35556}, + ChunkedParams{.UseThreshold = false, .MinSize = 15360, .MaxSize = 122880, .AvgSize = 35520}, + ChunkedParams{.UseThreshold = false, .MinSize = 15312, .MaxSize = 122880, .AvgSize = 35478}, + ChunkedParams{.UseThreshold = false, .MinSize = 16896, .MaxSize = 135168, .AvgSize = 39072}, + ChunkedParams{.UseThreshold = false, .MinSize = 15360, .MaxSize = 122880, .AvgSize = 38880}, + ChunkedParams{.UseThreshold = false, .MinSize = 15840, .MaxSize = 126976, .AvgSize = 36678}, + ChunkedParams{.UseThreshold = false, .MinSize = 16800, .MaxSize = 135168, .AvgSize = 38994}, + ChunkedParams{.UseThreshold = false, .MinSize = 15888, .MaxSize = 126976, .AvgSize = 36714}, + ChunkedParams{.UseThreshold = false, .MinSize = 15792, .MaxSize = 126976, .AvgSize = 36636}, + ChunkedParams{.UseThreshold = false, .MinSize = 14880, .MaxSize = 118784, .AvgSize = 37609}, + ChunkedParams{.UseThreshold = false, .MinSize = 15936, .MaxSize = 126976, .AvgSize = 36756}, + ChunkedParams{.UseThreshold = false, .MinSize = 15456, .MaxSize = 122880, .AvgSize = 38955}, + ChunkedParams{.UseThreshold = false, .MinSize = 15984, .MaxSize = 126976, .AvgSize = 36792}, + ChunkedParams{.UseThreshold = false, .MinSize = 14400, .MaxSize = 114688, .AvgSize = 36338}, + ChunkedParams{.UseThreshold = false, .MinSize = 14832, .MaxSize = 118784, .AvgSize = 37568}, + ChunkedParams{.UseThreshold = false, .MinSize = 16944, .MaxSize = 135168, .AvgSize = 39108}, + ChunkedParams{.UseThreshold = false, .MinSize = 14352, .MaxSize = 114688, .AvgSize = 36297}, + ChunkedParams{.UseThreshold = false, .MinSize = 14208, .MaxSize = 114688, .AvgSize = 36188}, + ChunkedParams{.UseThreshold = false, .MinSize = 14448, .MaxSize = 114688, .AvgSize = 36372}, + ChunkedParams{.UseThreshold = false, .MinSize = 13296, .MaxSize = 106496, .AvgSize = 36592}, + ChunkedParams{.UseThreshold = false, .MinSize = 15264, .MaxSize = 122880, .AvgSize = 38805}, + ChunkedParams{.UseThreshold = false, .MinSize = 14304, .MaxSize = 114688, .AvgSize = 36263}, + ChunkedParams{.UseThreshold = false, .MinSize = 14784, .MaxSize = 118784, .AvgSize = 37534}, + ChunkedParams{.UseThreshold = false, .MinSize = 15312, .MaxSize = 122880, .AvgSize = 38839}, + ChunkedParams{.UseThreshold = false, .MinSize = 14256, .MaxSize = 114688, .AvgSize = 39360}, + ChunkedParams{.UseThreshold = false, .MinSize = 13776, .MaxSize = 110592, .AvgSize = 37976}, + ChunkedParams{.UseThreshold = false, .MinSize = 14736, .MaxSize = 118784, .AvgSize = 37493}, + ChunkedParams{.UseThreshold = false, .MinSize = 14928, .MaxSize = 118784, .AvgSize = 37643}, + ChunkedParams{.UseThreshold = false, .MinSize = 14448, .MaxSize = 114688, .AvgSize = 39504}, + ChunkedParams{.UseThreshold = false, .MinSize = 13392, .MaxSize = 106496, .AvgSize = 36664}, + ChunkedParams{.UseThreshold = false, .MinSize = 13872, .MaxSize = 110592, .AvgSize = 38048}, + ChunkedParams{.UseThreshold = false, .MinSize = 14352, .MaxSize = 114688, .AvgSize = 39432}, + ChunkedParams{.UseThreshold = false, .MinSize = 13200, .MaxSize = 106496, .AvgSize = 36520}, + ChunkedParams{.UseThreshold = false, .MinSize = 17328, .MaxSize = 139264, .AvgSize = 36378}, + ChunkedParams{.UseThreshold = false, .MinSize = 17376, .MaxSize = 139264, .AvgSize = 36421}, + ChunkedParams{.UseThreshold = false, .MinSize = 17424, .MaxSize = 139264, .AvgSize = 36459}, + ChunkedParams{.UseThreshold = false, .MinSize = 17472, .MaxSize = 139264, .AvgSize = 36502}, + ChunkedParams{.UseThreshold = false, .MinSize = 17520, .MaxSize = 139264, .AvgSize = 36540}, + ChunkedParams{.UseThreshold = false, .MinSize = 17808, .MaxSize = 143360, .AvgSize = 37423}, + ChunkedParams{.UseThreshold = false, .MinSize = 17856, .MaxSize = 143360, .AvgSize = 37466}, + ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 25834}, + ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 21917}, + ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 29751}, + ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 33668}, + ChunkedParams{.UseThreshold = false, .MinSize = 17952, .MaxSize = 143360, .AvgSize = 37547}, + ChunkedParams{.UseThreshold = false, .MinSize = 17904, .MaxSize = 143360, .AvgSize = 37504}, + ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 22371}, + ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 37585}, + ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 26406}, + ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 26450}, + ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 30615}, + ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 30441}, + ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 22417}, + ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 22557}, + ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 30528}, + ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 27112}, + ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 34644}, + ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 34476}, + ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 35408}, + ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 38592}, + ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 30483}, + ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 26586}, + ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 26496}, + ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 31302}, + ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 34516}, + ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 22964}, + ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 35448}, + ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 38630}, + ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 23010}, + ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 31260}, + ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 34600}, + ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 27156}, + ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 30570}, + ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 38549}, + ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 22510}, + ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 38673}, + ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 34560}, + ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 22464}, + ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 26540}, + ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 38511}, + ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 23057}, + ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 27202}, + ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 31347}, + ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 35492}, + ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 31389}, + ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 27246}, + ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 23103}, + ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 35532}, + ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 23150}, + ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 27292}, + ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 31434}, + ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 35576}, + ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 27336}, + ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 23196}, + ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 31476}, + ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 35616}, + ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 27862}, + ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 32121}, + ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 23603}, + ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 36380}, + ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 27908}, + ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 23650}, + ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 32166}, + ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 36424}, + ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 23696}, + ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 32253}, + ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 32208}, + ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 23743}, + ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 36548}, + ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 28042}, + ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 23789}, + ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 32295}, + ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 36508}, + ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 27952}, + ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 27998}, + ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 36464}}; + + static const size_t ParamsCount = sizeof(Params) / sizeof(ChunkedParams); + std::vector Infos1(SourceFiles1.size()); + std::vector Infos2(SourceFiles2.size()); + + WorkerThreadPool WorkerPool(32); + + for (size_t I = 0; I < ParamsCount; I++) + { + for (int UseThreshold = 0; UseThreshold < 2; UseThreshold++) + { + Latch WorkLatch(1); + ChunkedParams Param = Params[I]; + Param.UseThreshold = UseThreshold == 1; + Stopwatch Timer; + for (size_t F = 0; F < SourceFiles1.size(); F++) + { + WorkLatch.AddCount(1); + WorkerPool.ScheduleWork([&WorkLatch, F, Param, &SourceFiles1, &Infos1]() { + auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); + BasicFile SourceData1; + SourceData1.Open(SourceFiles1[F], BasicFile::Mode::kRead); + Infos1[F] = ChunkData(SourceData1, 0, SourceData1.FileSize(), Param); + }); + } + for (size_t F = 0; F < SourceFiles2.size(); F++) + { + WorkLatch.AddCount(1); + WorkerPool.ScheduleWork([&WorkLatch, F, Param, &SourceFiles2, &Infos2]() { + auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); + BasicFile SourceData2; + SourceData2.Open(SourceFiles2[F], BasicFile::Mode::kRead); + Infos2[F] = ChunkData(SourceData2, 0, SourceData2.FileSize(), Param); + }); + } + WorkLatch.CountDown(); + WorkLatch.Wait(); + uint64_t ChunkTimeMS = Timer.GetElapsedTimeMs(); + + uint64_t Raw1Size = 0; + tsl::robin_set Chunks1; + size_t ChunkedSize1 = 0; + for (size_t F = 0; F < SourceFiles1.size(); F++) + { + const ChunkedInfoWithSource& Info = Infos1[F]; + Raw1Size += Info.Info.RawSize; + for (uint32_t Chunk1Index = 0; Chunk1Index < Info.Info.ChunkHashes.size(); ++Chunk1Index) + { + const IoHash ChunkHash = Info.Info.ChunkHashes[Chunk1Index]; + if (Chunks1.insert(ChunkHash).second) + { + ChunkedSize1 += Info.ChunkSources[Chunk1Index].Size; + } + } + } + + uint64_t Raw2Size = 0; + tsl::robin_set Chunks2; + size_t ChunkedSize2 = 0; + size_t DiffSize = 0; + for (size_t F = 0; F < SourceFiles2.size(); F++) + { + const ChunkedInfoWithSource& Info = Infos2[F]; + Raw2Size += Info.Info.RawSize; + for (uint32_t Chunk2Index = 0; Chunk2Index < Info.Info.ChunkHashes.size(); ++Chunk2Index) + { + const IoHash ChunkHash = Info.Info.ChunkHashes[Chunk2Index]; + if (Chunks2.insert(ChunkHash).second) + { + ChunkedSize2 += Info.ChunkSources[Chunk2Index].Size; + if (!Chunks1.contains(ChunkHash)) + { + DiffSize += Info.ChunkSources[Chunk2Index].Size; + } + } + } + } + + ZEN_INFO( + "Diff = {}, Chunks1 = {}, Chunks2 = {}, .UseThreshold = {}, .MinSize = {}, .MaxSize = {}, .AvgSize = {}, RawSize(1) = {}, " + "RawSize(2) = {}, " + "Saved(1) = {}, Saved(2) = {} in {}", + NiceBytes(DiffSize), + Chunks1.size(), + Chunks2.size(), + Param.UseThreshold, + Param.MinSize, + Param.MaxSize, + Param.AvgSize, + NiceBytes(Raw1Size), + NiceBytes(Raw2Size), + NiceBytes(Raw1Size - ChunkedSize1), + NiceBytes(Raw2Size - ChunkedSize2), + NiceTimeSpanMs(ChunkTimeMS)); + } + } + +# if 0 + for (int64_t MinSizeBase = (12u * 1024u); MinSizeBase <= (32u * 1024u); MinSizeBase += 512) + { + for (int64_t Wiggle = -132; Wiggle < 126; Wiggle += 2) + { + // size_t MinSize = 7 * 1024 - 61; // (size_t)(MinSizeBase + Wiggle); + // size_t MaxSize = 16 * (7 * 1024); // 8 * 7 * 1024;// MinSizeBase * 6; + // size_t AvgSize = MaxSize / 2; // 4 * 7 * 1024;// MinSizeBase * 3; + size_t MinSize = (size_t)(MinSizeBase + Wiggle); + //for (size_t MaxSize = (MinSize * 4) - 768; MaxSize < (MinSize * 5) + 768; MaxSize += 64) + size_t MaxSize = 8u * MinSizeBase; + { + for (size_t AvgSize = (MaxSize - MinSize) / 32 + MinSize; AvgSize < (MaxSize - MinSize) / 4 + MinSize; AvgSize += (MaxSize - MinSize) / 32) +// size_t AvgSize = (MaxSize - MinSize) / 4 + MinSize; + { + WorkLatch.AddCount(1); + WorkerPool.ScheduleWork([&WorkLatch, MinSize, MaxSize, AvgSize, SourcePath1, SourcePath2]() + { + auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); + ChunkedParams Params{ .UseThreshold = true, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize }; + BasicFile SourceData1; + SourceData1.Open(SourcePath1, BasicFile::Mode::kRead); + BasicFile SourceData2; + SourceData2.Open(SourcePath2, BasicFile::Mode::kRead); + ChunkedInfoWithSource Info1 = ChunkData(SourceData1, Params); + ChunkedInfoWithSource Info2 = ChunkData(SourceData2, Params); + + tsl::robin_set Chunks1; + Chunks1.reserve(Info1.Info.ChunkHashes.size()); + Chunks1.insert(Info1.Info.ChunkHashes.begin(), Info1.Info.ChunkHashes.end()); + size_t ChunkedSize1 = 0; + for (uint32_t Chunk1Index = 0; Chunk1Index < Info1.Info.ChunkHashes.size(); ++Chunk1Index) + { + ChunkedSize1 += Info1.ChunkSources[Chunk1Index].Size; + } + size_t DiffSavedSize = 0; + size_t ChunkedSize2 = 0; + for (uint32_t Chunk2Index = 0; Chunk2Index < Info2.Info.ChunkHashes.size(); ++Chunk2Index) + { + ChunkedSize2 += Info2.ChunkSources[Chunk2Index].Size; + if (Chunks1.find(Info2.Info.ChunkHashes[Chunk2Index]) == Chunks1.end()) + { + DiffSavedSize += Info2.ChunkSources[Chunk2Index].Size; + } + } + ZEN_INFO("Diff {}, Chunks1: {}, Chunks2: {}, Min: {}, Max: {}, Avg: {}, Saved(1) {}, Saved(2) {}", + NiceBytes(DiffSavedSize), + Info1.Info.ChunkHashes.size(), + Info2.Info.ChunkHashes.size(), + MinSize, + MaxSize, + AvgSize, + NiceBytes(Info1.Info.RawSize - ChunkedSize1), + NiceBytes(Info2.Info.RawSize - ChunkedSize2)); + }); + } + } + } + } +# endif // 0 + + // WorkLatch.CountDown(); + // WorkLatch.Wait(); +} +# endif // 0 + +void +chunkedfile_forcelink() +{ +} + +} // namespace zen + +#endif diff --git a/src/zenutil/chunking.cpp b/src/zenutil/chunking.cpp new file mode 100644 index 000000000..30edd322a --- /dev/null +++ b/src/zenutil/chunking.cpp @@ -0,0 +1,382 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "chunking.h" + +#include + +#include + +namespace zen::detail { + +static const uint32_t BuzhashTable[] = { + 0x458be752, 0xc10748cc, 0xfbbcdbb8, 0x6ded5b68, 0xb10a82b5, 0x20d75648, 0xdfc5665f, 0xa8428801, 0x7ebf5191, 0x841135c7, 0x65cc53b3, + 0x280a597c, 0x16f60255, 0xc78cbc3e, 0x294415f5, 0xb938d494, 0xec85c4e6, 0xb7d33edc, 0xe549b544, 0xfdeda5aa, 0x882bf287, 0x3116737c, + 0x05569956, 0xe8cc1f68, 0x0806ac5e, 0x22a14443, 0x15297e10, 0x50d090e7, 0x4ba60f6f, 0xefd9f1a7, 0x5c5c885c, 0x82482f93, 0x9bfd7c64, + 0x0b3e7276, 0xf2688e77, 0x8fad8abc, 0xb0509568, 0xf1ada29f, 0xa53efdfe, 0xcb2b1d00, 0xf2a9e986, 0x6463432b, 0x95094051, 0x5a223ad2, + 0x9be8401b, 0x61e579cb, 0x1a556a14, 0x5840fdc2, 0x9261ddf6, 0xcde002bb, 0x52432bb0, 0xbf17373e, 0x7b7c222f, 0x2955ed16, 0x9f10ca59, + 0xe840c4c9, 0xccabd806, 0x14543f34, 0x1462417a, 0x0d4a1f9c, 0x087ed925, 0xd7f8f24c, 0x7338c425, 0xcf86c8f5, 0xb19165cd, 0x9891c393, + 0x325384ac, 0x0308459d, 0x86141d7e, 0xc922116a, 0xe2ffa6b6, 0x53f52aed, 0x2cd86197, 0xf5b9f498, 0xbf319c8f, 0xe0411fae, 0x977eb18c, + 0xd8770976, 0x9833466a, 0xc674df7f, 0x8c297d45, 0x8ca48d26, 0xc49ed8e2, 0x7344f874, 0x556f79c7, 0x6b25eaed, 0xa03e2b42, 0xf68f66a4, + 0x8e8b09a2, 0xf2e0e62a, 0x0d3a9806, 0x9729e493, 0x8c72b0fc, 0x160b94f6, 0x450e4d3d, 0x7a320e85, 0xbef8f0e1, 0x21d73653, 0x4e3d977a, + 0x1e7b3929, 0x1cc6c719, 0xbe478d53, 0x8d752809, 0xe6d8c2c6, 0x275f0892, 0xc8acc273, 0x4cc21580, 0xecc4a617, 0xf5f7be70, 0xe795248a, + 0x375a2fe9, 0x425570b6, 0x8898dcf8, 0xdc2d97c4, 0x0106114b, 0x364dc22f, 0x1e0cad1f, 0xbe63803c, 0x5f69fac2, 0x4d5afa6f, 0x1bc0dfb5, + 0xfb273589, 0x0ea47f7b, 0x3c1c2b50, 0x21b2a932, 0x6b1223fd, 0x2fe706a8, 0xf9bd6ce2, 0xa268e64e, 0xe987f486, 0x3eacf563, 0x1ca2018c, + 0x65e18228, 0x2207360a, 0x57cf1715, 0x34c37d2b, 0x1f8f3cde, 0x93b657cf, 0x31a019fd, 0xe69eb729, 0x8bca7b9b, 0x4c9d5bed, 0x277ebeaf, + 0xe0d8f8ae, 0xd150821c, 0x31381871, 0xafc3f1b0, 0x927db328, 0xe95effac, 0x305a47bd, 0x426ba35b, 0x1233af3f, 0x686a5b83, 0x50e072e5, + 0xd9d3bb2a, 0x8befc475, 0x487f0de6, 0xc88dff89, 0xbd664d5e, 0x971b5d18, 0x63b14847, 0xd7d3c1ce, 0x7f583cf3, 0x72cbcb09, 0xc0d0a81c, + 0x7fa3429b, 0xe9158a1b, 0x225ea19a, 0xd8ca9ea3, 0xc763b282, 0xbb0c6341, 0x020b8293, 0xd4cd299d, 0x58cfa7f8, 0x91b4ee53, 0x37e4d140, + 0x95ec764c, 0x30f76b06, 0x5ee68d24, 0x679c8661, 0xa41979c2, 0xf2b61284, 0x4fac1475, 0x0adb49f9, 0x19727a23, 0x15a7e374, 0xc43a18d5, + 0x3fb1aa73, 0x342fc615, 0x924c0793, 0xbee2d7f0, 0x8a279de9, 0x4aa2d70c, 0xe24dd37f, 0xbe862c0b, 0x177c22c2, 0x5388e5ee, 0xcd8a7510, + 0xf901b4fd, 0xdbc13dbc, 0x6c0bae5b, 0x64efe8c7, 0x48b02079, 0x80331a49, 0xca3d8ae6, 0xf3546190, 0xfed7108b, 0xc49b941b, 0x32baf4a9, + 0xeb833a4a, 0x88a3f1a5, 0x3a91ce0a, 0x3cc27da1, 0x7112e684, 0x4a3096b1, 0x3794574c, 0xa3c8b6f3, 0x1d213941, 0x6e0a2e00, 0x233479f1, + 0x0f4cd82f, 0x6093edd2, 0x5d7d209e, 0x464fe319, 0xd4dcac9e, 0x0db845cb, 0xfb5e4bc3, 0xe0256ce1, 0x09fb4ed1, 0x0914be1e, 0xa5bdb2c3, + 0xc6eb57bb, 0x30320350, 0x3f397e91, 0xa67791bc, 0x86bc0e2c, 0xefa0a7e2, 0xe9ff7543, 0xe733612c, 0xd185897b, 0x329e5388, 0x91dd236b, + 0x2ecb0d93, 0xf4d82a3d, 0x35b5c03f, 0xe4e606f0, 0x05b21843, 0x37b45964, 0x5eff22f4, 0x6027f4cc, 0x77178b3c, 0xae507131, 0x7bf7cabc, + 0xf9c18d66, 0x593ade65, 0xd95ddf11, +}; + +// ROL operation (compiler turns this into a ROL when optimizing) +ZEN_FORCEINLINE static uint32_t +Rotate32(uint32_t Value, size_t RotateCount) +{ + RotateCount &= 31; + + return ((Value) << (RotateCount)) | ((Value) >> (32 - RotateCount)); +} + +} // namespace zen::detail + +namespace zen { + +void +ZenChunkHelper::Reset() +{ + InternalReset(); + + m_BytesScanned = 0; +} + +void +ZenChunkHelper::InternalReset() +{ + m_CurrentHash = 0; + m_CurrentChunkSize = 0; + m_WindowSize = 0; +} + +void +ZenChunkHelper::SetChunkSize(size_t MinSize, size_t MaxSize, size_t AvgSize) +{ + if (m_WindowSize) + return; // Already started + + static_assert(kChunkSizeLimitMin > kWindowSize); + + if (AvgSize) + { + // TODO: Validate AvgSize range + } + else + { + if (MinSize && MaxSize) + { + AvgSize = std::lrint(std::pow(2, (std::log2(MinSize) + std::log2(MaxSize)) / 2)); + } + else if (MinSize) + { + AvgSize = MinSize * 4; + } + else if (MaxSize) + { + AvgSize = MaxSize / 4; + } + else + { + AvgSize = kDefaultAverageChunkSize; + } + } + + if (MinSize) + { + // TODO: Validate MinSize range + } + else + { + MinSize = std::max(AvgSize / 4, kChunkSizeLimitMin); + } + + if (MaxSize) + { + // TODO: Validate MaxSize range + } + else + { + MaxSize = std::min(AvgSize * 4, kChunkSizeLimitMax); + } + + m_Discriminator = gsl::narrow(AvgSize - MinSize); + + if (m_Discriminator < MinSize) + { + m_Discriminator = gsl::narrow(MinSize); + } + + if (m_Discriminator > MaxSize) + { + m_Discriminator = gsl::narrow(MaxSize); + } + + m_Threshold = gsl::narrow((uint64_t(std::numeric_limits::max()) + 1) / m_Discriminator); + + m_ChunkSizeMin = MinSize; + m_ChunkSizeMax = MaxSize; + m_ChunkSizeAvg = AvgSize; +} + +size_t +ZenChunkHelper::ScanChunk(const void* DataBytesIn, size_t ByteCount) +{ + size_t Result = InternalScanChunk(DataBytesIn, ByteCount); + + if (Result == kNoBoundaryFound) + { + m_BytesScanned += ByteCount; + } + else + { + m_BytesScanned += Result; + } + + return Result; +} + +size_t +ZenChunkHelper::InternalScanChunk(const void* DataBytesIn, size_t ByteCount) +{ + size_t CurrentOffset = 0; + const uint8_t* CursorPtr = reinterpret_cast(DataBytesIn); + + // There's no point in updating the hash if we know we're not + // going to have a cut point, so just skip the data. This logic currently + // provides roughly a 20% speedup on my machine + + const size_t NeedHashOffset = m_ChunkSizeMin - kWindowSize; + + if (m_CurrentChunkSize < NeedHashOffset) + { + const uint32_t SkipBytes = gsl::narrow(std::min(ByteCount, NeedHashOffset - m_CurrentChunkSize)); + + ByteCount -= SkipBytes; + m_CurrentChunkSize += SkipBytes; + CurrentOffset += SkipBytes; + CursorPtr += SkipBytes; + + m_WindowSize = 0; + + if (ByteCount == 0) + { + return kNoBoundaryFound; + } + } + + // Fill window first + + if (m_WindowSize < kWindowSize) + { + const uint32_t FillBytes = uint32_t(std::min(ByteCount, kWindowSize - m_WindowSize)); + + memcpy(&m_Window[m_WindowSize], CursorPtr, FillBytes); + + CursorPtr += FillBytes; + + m_WindowSize += FillBytes; + m_CurrentChunkSize += FillBytes; + + CurrentOffset += FillBytes; + ByteCount -= FillBytes; + + if (m_WindowSize < kWindowSize) + { + return kNoBoundaryFound; + } + + // We have a full window, initialize hash + + uint32_t CurrentHash = 0; + + for (int i = 1; i < kWindowSize; ++i) + { + CurrentHash ^= detail::Rotate32(detail::BuzhashTable[m_Window[i - 1]], kWindowSize - i); + } + + m_CurrentHash = CurrentHash ^ detail::BuzhashTable[m_Window[kWindowSize - 1]]; + } + + // Scan for boundaries (i.e points where the hash matches the value determined by + // the discriminator) + + uint32_t CurrentHash = m_CurrentHash; + uint32_t CurrentChunkSize = m_CurrentChunkSize; + + size_t Index = CurrentChunkSize % kWindowSize; + + if (m_Threshold && m_UseThreshold) + { + // This is roughly 4x faster than the general modulo approach on my + // TR 3990X (~940MB/sec) and doesn't require any special parameters to + // achieve max performance + + while (ByteCount) + { + const uint8_t NewByte = *CursorPtr; + const uint8_t OldByte = m_Window[Index]; + + CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^ + detail::BuzhashTable[NewByte]; + + CurrentChunkSize++; + CurrentOffset++; + + if (CurrentChunkSize >= m_ChunkSizeMin) + { + bool FoundBoundary; + + if (CurrentChunkSize >= m_ChunkSizeMax) + { + FoundBoundary = true; + } + else + { + FoundBoundary = CurrentHash <= m_Threshold; + } + + if (FoundBoundary) + { + // Boundary found! + InternalReset(); + + return CurrentOffset; + } + } + + m_Window[Index++] = *CursorPtr; + + if (Index == kWindowSize) + { + Index = 0; + } + + ++CursorPtr; + --ByteCount; + } + } + else if ((m_Discriminator & (m_Discriminator - 1)) == 0) + { + // This is quite a bit faster than the generic modulo path, but + // requires a very specific average chunk size to be used. If you + // pass in an even power-of-two divided by 0.75 as the average + // chunk size you'll hit this path + + const uint32_t Mask = m_Discriminator - 1; + + while (ByteCount) + { + const uint8_t NewByte = *CursorPtr; + const uint8_t OldByte = m_Window[Index]; + + CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^ + detail::BuzhashTable[NewByte]; + + CurrentChunkSize++; + CurrentOffset++; + + if (CurrentChunkSize >= m_ChunkSizeMin) + { + bool FoundBoundary; + + if (CurrentChunkSize >= m_ChunkSizeMax) + { + FoundBoundary = true; + } + else + { + FoundBoundary = (CurrentHash & Mask) == Mask; + } + + if (FoundBoundary) + { + // Boundary found! + InternalReset(); + + return CurrentOffset; + } + } + + m_Window[Index++] = *CursorPtr; + + if (Index == kWindowSize) + { + Index = 0; + } + + ++CursorPtr; + --ByteCount; + } + } + else + { + // This is the slowest path, which caps out around 250MB/sec for large sizes + // on my TR3900X + + while (ByteCount) + { + const uint8_t NewByte = *CursorPtr; + const uint8_t OldByte = m_Window[Index]; + + CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^ + detail::BuzhashTable[NewByte]; + + CurrentChunkSize++; + CurrentOffset++; + + if (CurrentChunkSize >= m_ChunkSizeMin) + { + bool FoundBoundary; + + if (CurrentChunkSize >= m_ChunkSizeMax) + { + FoundBoundary = true; + } + else + { + FoundBoundary = (CurrentHash % m_Discriminator) == (m_Discriminator - 1); + } + + if (FoundBoundary) + { + // Boundary found! + InternalReset(); + + return CurrentOffset; + } + } + + m_Window[Index++] = *CursorPtr; + + if (Index == kWindowSize) + { + Index = 0; + } + + ++CursorPtr; + --ByteCount; + } + } + + m_CurrentChunkSize = CurrentChunkSize; + m_CurrentHash = CurrentHash; + + return kNoBoundaryFound; +} + +} // namespace zen diff --git a/src/zenutil/chunking.h b/src/zenutil/chunking.h new file mode 100644 index 000000000..09c56454f --- /dev/null +++ b/src/zenutil/chunking.h @@ -0,0 +1,56 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once +#include + +namespace zen { + +/** Content-defined chunking helper + */ +class ZenChunkHelper +{ +public: + void SetChunkSize(size_t MinSize, size_t MaxSize, size_t AvgSize); + size_t ScanChunk(const void* DataBytes, size_t ByteCount); + void Reset(); + + // This controls which chunking approach is used - threshold or + // modulo based. Threshold is faster and generates similarly sized + // chunks + void SetUseThreshold(bool NewState) { m_UseThreshold = NewState; } + + inline size_t ChunkSizeMin() const { return m_ChunkSizeMin; } + inline size_t ChunkSizeMax() const { return m_ChunkSizeMax; } + inline size_t ChunkSizeAvg() const { return m_ChunkSizeAvg; } + inline uint64_t BytesScanned() const { return m_BytesScanned; } + + static constexpr size_t kNoBoundaryFound = size_t(~0ull); + +private: + size_t m_ChunkSizeMin = 0; + size_t m_ChunkSizeMax = 0; + size_t m_ChunkSizeAvg = 0; + + uint32_t m_Discriminator = 0; // Computed in SetChunkSize() + uint32_t m_Threshold = 0; // Computed in SetChunkSize() + + bool m_UseThreshold = true; + + static constexpr size_t kChunkSizeLimitMax = 64 * 1024 * 1024; + static constexpr size_t kChunkSizeLimitMin = 1024; + static constexpr size_t kDefaultAverageChunkSize = 64 * 1024; + + static constexpr int kWindowSize = 48; + uint8_t m_Window[kWindowSize]; + uint32_t m_WindowSize = 0; + + uint32_t m_CurrentHash = 0; + uint32_t m_CurrentChunkSize = 0; + + uint64_t m_BytesScanned = 0; + + size_t InternalScanChunk(const void* DataBytes, size_t ByteCount); + void InternalReset(); +}; + +} // namespace zen diff --git a/src/zenutil/include/zenutil/chunkblock.h b/src/zenutil/include/zenutil/chunkblock.h new file mode 100644 index 000000000..9b7414629 --- /dev/null +++ b/src/zenutil/include/zenutil/chunkblock.h @@ -0,0 +1,32 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include + +#include +#include + +#include +#include + +namespace zen { + +struct ChunkBlockDescription +{ + IoHash BlockHash; + std::vector ChunkHashes; + std::vector ChunkRawLengths; +}; + +std::vector ParseChunkBlockDescriptionList(const CbObjectView& BlocksObject); +ChunkBlockDescription ParseChunkBlockDescription(const CbObjectView& BlockObject); +CbObject BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView MetaData); + +typedef std::function(const IoHash& RawHash)> FetchChunkFunc; + +CompressedBuffer GenerateChunkBlock(std::vector>&& FetchChunks, ChunkBlockDescription& OutBlock); +bool IterateChunkBlock(const SharedBuffer& BlockPayload, + std::function Visitor); + +} // namespace zen diff --git a/src/zenutil/include/zenutil/chunkedfile.h b/src/zenutil/include/zenutil/chunkedfile.h new file mode 100644 index 000000000..7110ad317 --- /dev/null +++ b/src/zenutil/include/zenutil/chunkedfile.h @@ -0,0 +1,58 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include +#include +#include + +#include +#include + +namespace zen { + +class BasicFile; + +struct ChunkedInfo +{ + uint64_t RawSize = 0; + IoHash RawHash; + std::vector ChunkSequence; + std::vector ChunkHashes; +}; + +struct ChunkSource +{ + uint64_t Offset; // 8 + uint32_t Size; // 4 +}; + +struct ChunkedInfoWithSource +{ + ChunkedInfo Info; + std::vector ChunkSources; +}; + +struct ChunkedParams +{ + bool UseThreshold = true; + size_t MinSize = (2u * 1024u) - 128u; + size_t MaxSize = (16u * 1024u); + size_t AvgSize = (3u * 1024u); +}; + +static const ChunkedParams UShaderByteCodeParams = {.UseThreshold = true, .MinSize = 17280, .MaxSize = 139264, .AvgSize = 36340}; + +ChunkedInfoWithSource ChunkData(BasicFile& RawData, + uint64_t Offset, + uint64_t Size, + ChunkedParams Params = {}, + std::atomic* BytesProcessed = nullptr); +void Reconstruct(const ChunkedInfo& Info, + const std::filesystem::path& TargetPath, + std::function GetChunk); +IoBuffer SerializeChunkedInfo(const ChunkedInfo& Info); +ChunkedInfo DeserializeChunkedInfo(IoBuffer& Buffer); + +void chunkedfile_forcelink(); +} // namespace zen diff --git a/src/zenutil/zenutil.cpp b/src/zenutil/zenutil.cpp index c54144549..19eb63ce9 100644 --- a/src/zenutil/zenutil.cpp +++ b/src/zenutil/zenutil.cpp @@ -6,6 +6,7 @@ # include # include +# include namespace zen { @@ -15,6 +16,7 @@ zenutil_forcelinktests() cachepolicy_forcelink(); cache::rpcrecord_forcelink(); cacherequests_forcelink(); + chunkedfile_forcelink(); } } // namespace zen -- cgit v1.2.3 From 800305f7f2afe01e9ca038a71a68e9318c52ee77 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 12 Feb 2025 09:05:07 +0100 Subject: move WriteToTempFile to basicfile.h (#283) add helper constructors to BasicFile --- src/zencore/basicfile.cpp | 64 +++++++++++++- src/zencore/include/zencore/basicfile.h | 10 ++- src/zenserver/projectstore/remoteprojectstore.cpp | 100 +++++++++------------- 3 files changed, 111 insertions(+), 63 deletions(-) (limited to 'src') diff --git a/src/zencore/basicfile.cpp b/src/zencore/basicfile.cpp index c2a21ae90..b3bffd34d 100644 --- a/src/zencore/basicfile.cpp +++ b/src/zencore/basicfile.cpp @@ -28,6 +28,20 @@ BasicFile::~BasicFile() { Close(); } +BasicFile::BasicFile(const std::filesystem::path& FileName, Mode Mode) +{ + Open(FileName, Mode); +} + +BasicFile::BasicFile(const std::filesystem::path& FileName, Mode Mode, std::error_code& Ec) +{ + Open(FileName, Mode, Ec); +} + +BasicFile::BasicFile(const std::filesystem::path& FileName, Mode Mode, std::function&& RetryCallback) +{ + Open(FileName, Mode, std::move(RetryCallback)); +} void BasicFile::Open(const std::filesystem::path& FileName, Mode Mode) @@ -267,7 +281,7 @@ BasicFile::StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function&& RetryCallback) +{ + if (std::filesystem::is_regular_file(Path)) + { + IoBuffer ExistingTempFile = IoBuffer(IoBufferBuilder::MakeFromFile(Path)); + if (ExistingTempFile && ExistingTempFile.GetSize() == Buffer.GetSize()) + { + ExistingTempFile.SetDeleteOnClose(true); + return ExistingTempFile; + } + } + BasicFile BlockFile; + BlockFile.Open(Path, BasicFile::Mode::kTruncateDelete, std::move(RetryCallback)); + uint64_t Offset = 0; + { + static const uint64_t BufferingSize = 256u * 1024u; + BasicFileWriter BufferedOutput(BlockFile, BufferingSize / 2); + for (const SharedBuffer& Segment : Buffer.GetSegments()) + { + size_t SegmentSize = Segment.GetSize(); + + IoBufferFileReference FileRef; + if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.GetFileReference(FileRef)) + { + ScanFile(FileRef.FileHandle, + FileRef.FileChunkOffset, + FileRef.FileChunkSize, + BufferingSize, + [&BufferedOutput, &Offset](const void* Data, size_t Size) { + BufferedOutput.Write(Data, Size, Offset); + Offset += Size; + }); + } + else + { + BufferedOutput.Write(Segment.GetData(), SegmentSize, Offset); + Offset += SegmentSize; + } + } + } + void* FileHandle = BlockFile.Detach(); + IoBuffer BlockBuffer = IoBuffer(IoBuffer::File, FileHandle, 0, Offset, /*IsWholeFile*/ true); + BlockBuffer.SetDeleteOnClose(true); + return BlockBuffer; +} + ////////////////////////////////////////////////////////////////////////// /* diff --git a/src/zencore/include/zencore/basicfile.h b/src/zencore/include/zencore/basicfile.h index 03c5605df..7edd40c9c 100644 --- a/src/zencore/include/zencore/basicfile.h +++ b/src/zencore/include/zencore/basicfile.h @@ -46,6 +46,10 @@ public: kPreventWrite = 0x2000'0000, // Do not open with write sharing mode (prevent other processes from writing to file while open) }; + BasicFile(const std::filesystem::path& FileName, Mode Mode); + BasicFile(const std::filesystem::path& FileName, Mode Mode, std::error_code& Ec); + BasicFile(const std::filesystem::path& FileName, Mode Mode, std::function&& RetryCallback); + void Open(const std::filesystem::path& FileName, Mode Mode); void Open(const std::filesystem::path& FileName, Mode Mode, std::error_code& Ec); void Open(const std::filesystem::path& FileName, Mode Mode, std::function&& RetryCallback); @@ -56,7 +60,7 @@ public: void StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function&& ChunkFun); void Write(MemoryView Data, uint64_t FileOffset); void Write(MemoryView Data, uint64_t FileOffset, std::error_code& Ec); - uint64_t Write(CompositeBuffer Data, uint64_t FileOffset, std::error_code& Ec); + uint64_t Write(const CompositeBuffer& Data, uint64_t FileOffset, std::error_code& Ec); void Write(const void* Data, uint64_t Size, uint64_t FileOffset); void Write(const void* Data, uint64_t Size, uint64_t FileOffset, std::error_code& Ec); void Flush(); @@ -180,6 +184,10 @@ private: uint64_t m_BufferEnd; }; +IoBuffer WriteToTempFile(CompositeBuffer&& Buffer, + const std::filesystem::path& Path, + std::function&& RetryCallback); + ZENCORE_API void basicfile_forcelink(); } // namespace zen diff --git a/src/zenserver/projectstore/remoteprojectstore.cpp b/src/zenserver/projectstore/remoteprojectstore.cpp index 5b75a840e..0285cc22f 100644 --- a/src/zenserver/projectstore/remoteprojectstore.cpp +++ b/src/zenserver/projectstore/remoteprojectstore.cpp @@ -154,63 +154,6 @@ namespace remotestore_impl { return BlockIndex; } - IoBuffer WriteToTempFile(CompressedBuffer&& CompressedBuffer, std::filesystem::path Path) - { - if (std::filesystem::is_regular_file(Path)) - { - IoBuffer ExistingTempFile = IoBuffer(IoBufferBuilder::MakeFromFile(Path)); - if (ExistingTempFile && ExistingTempFile.GetSize() == CompressedBuffer.GetCompressedSize()) - { - ExistingTempFile.SetDeleteOnClose(true); - return ExistingTempFile; - } - } - IoBuffer BlockBuffer; - BasicFile BlockFile; - uint32_t RetriesLeft = 3; - BlockFile.Open(Path, BasicFile::Mode::kTruncateDelete, [&](std::error_code& Ec) { - if (RetriesLeft == 0) - { - return false; - } - ZEN_WARN("Failed to create temporary oplog block '{}': '{}', retries left: {}.", Path, Ec.message(), RetriesLeft); - Sleep(100 - (3 - RetriesLeft) * 100); // Total 600 ms - RetriesLeft--; - return true; - }); - uint64_t Offset = 0; - { - CompositeBuffer Compressed = std::move(CompressedBuffer).GetCompressed(); - for (const SharedBuffer& Segment : Compressed.GetSegments()) - { - size_t SegmentSize = Segment.GetSize(); - static const uint64_t BufferingSize = 256u * 1024u; - - IoBufferFileReference FileRef; - if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.GetFileReference(FileRef)) - { - ScanFile(FileRef.FileHandle, - FileRef.FileChunkOffset, - FileRef.FileChunkSize, - BufferingSize, - [&BlockFile, &Offset](const void* Data, size_t Size) { - BlockFile.Write(Data, Size, Offset); - Offset += Size; - }); - } - else - { - BlockFile.Write(Segment.GetData(), SegmentSize, Offset); - Offset += SegmentSize; - } - } - } - void* FileHandle = BlockFile.Detach(); - BlockBuffer = IoBuffer(IoBuffer::File, FileHandle, 0, Offset, /*IsWholeFile*/ true); - BlockBuffer.SetDeleteOnClose(true); - return BlockBuffer; - } - RemoteProjectStore::Result WriteOplogSection(ProjectStore::Oplog& Oplog, const CbObjectView& SectionObject, JobContext* OptionalContext) { using namespace std::literals; @@ -1635,9 +1578,22 @@ BuildContainer(CidStore& ChunkStore, std::filesystem::path AttachmentPath = AttachmentTempPath; AttachmentPath.append(RawHash.ToHexString()); + uint32_t RetriesLeft = 3; IoBuffer TempAttachmentBuffer = - remotestore_impl::WriteToTempFile(std::move(Compressed), AttachmentPath); + WriteToTempFile(std::move(Compressed).GetCompressed(), AttachmentPath, [&](std::error_code& Ec) { + if (RetriesLeft == 0) + { + return false; + } + ZEN_WARN("Failed to create temporary attachment '{}': '{}', retries left: {}.", + AttachmentPath, + Ec.message(), + RetriesLeft); + Sleep(100 - (3 - RetriesLeft) * 100); // Total 600 ms + RetriesLeft--; + return true; + }); ZEN_INFO("Saved temp attachment to '{}', {} ({})", AttachmentPath, NiceBytes(RawSize), @@ -1656,7 +1612,21 @@ BuildContainer(CidStore& ChunkStore, std::filesystem::path AttachmentPath = AttachmentTempPath; AttachmentPath.append(RawHash.ToHexString()); - IoBuffer TempAttachmentBuffer = remotestore_impl::WriteToTempFile(std::move(Compressed), AttachmentPath); + uint32_t RetriesLeft = 3; + IoBuffer TempAttachmentBuffer = + WriteToTempFile(std::move(Compressed).GetCompressed(), AttachmentPath, [&](std::error_code& Ec) { + if (RetriesLeft == 0) + { + return false; + } + ZEN_WARN("Failed to create temporary attachment '{}': '{}', retries left: {}.", + AttachmentPath, + Ec.message(), + RetriesLeft); + Sleep(100 - (3 - RetriesLeft) * 100); // Total 600 ms + RetriesLeft--; + return true; + }); ZEN_INFO("Saved temp attachment to '{}', {} ({})", AttachmentPath, NiceBytes(RawSize), @@ -2392,7 +2362,17 @@ SaveOplog(CidStore& ChunkStore, BlockPath.append(Block.BlockHash.ToHexString()); try { - IoBuffer BlockBuffer = remotestore_impl::WriteToTempFile(std::move(CompressedBlock), BlockPath); + uint32_t RetriesLeft = 3; + IoBuffer BlockBuffer = WriteToTempFile(std::move(CompressedBlock).GetCompressed(), BlockPath, [&](std::error_code& Ec) { + if (RetriesLeft == 0) + { + return false; + } + ZEN_WARN("Failed to create temporary oplog block '{}': '{}', retries left: {}.", BlockPath, Ec.message(), RetriesLeft); + Sleep(100 - (3 - RetriesLeft) * 100); // Total 600 ms + RetriesLeft--; + return true; + }); RwLock::ExclusiveLockScope __(AttachmentsLock); CreatedBlocks.insert({Block.BlockHash, {.Payload = std::move(BlockBuffer), .Block = std::move(Block)}}); ZEN_DEBUG("Saved temp block to '{}', {}", AttachmentTempPath, NiceBytes(BlockBuffer.GetSize())); -- cgit v1.2.3 From 574c62a94f61265be5ae3d086834fa8b9d29eefc Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 24 Feb 2025 12:38:18 +0100 Subject: strip leading path separator when creating workspace shares (#285) --- src/zen/cmds/workspaces_cmd.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/zen/cmds/workspaces_cmd.cpp b/src/zen/cmds/workspaces_cmd.cpp index 05d3c573f..166d4218d 100644 --- a/src/zen/cmds/workspaces_cmd.cpp +++ b/src/zen/cmds/workspaces_cmd.cpp @@ -25,7 +25,7 @@ namespace { if (!Path.empty()) { std::u8string PathString = Path.u8string(); - if (PathString.ends_with(std::filesystem::path::preferred_separator)) + if (PathString.ends_with(std::filesystem::path::preferred_separator) || PathString.ends_with('/')) { PathString.pop_back(); Path = std::filesystem::path(PathString); @@ -42,6 +42,19 @@ namespace { } } + static void RemoveLeadingPathSeparator(std::filesystem::path& Path) + { + if (!Path.empty()) + { + std::u8string PathString = Path.u8string(); + if (PathString.starts_with(std::filesystem::path::preferred_separator) || PathString.starts_with('/')) + { + PathString.erase(PathString.begin()); + Path = std::filesystem::path(PathString); + } + } + } + void ShowShare(const Workspaces::WorkspaceShareConfiguration& Share, const Oid& WorkspaceId, std::string_view Prefix) { ZEN_CONSOLE("{}Id: {}", Prefix, Share.Id); @@ -451,6 +464,7 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** } RemoveTrailingPathSeparator(m_SharePath); + RemoveLeadingPathSeparator(m_SharePath); if (m_ShareId.empty()) { -- cgit v1.2.3 From 5bc5b0dd59c0f02afe553e5074dfe57951b19044 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 25 Feb 2025 15:48:43 +0100 Subject: improvements and infrastructure for upcoming builds api command line (#284) * add modification tick to filesystem traversal * add ShowDetails option to ProgressBar * log callstack if we terminate process * handle chunking if MaxSize > 1MB * BasicFile write helpers and WriteToTempFile simplifications * bugfix for CompositeBuffer::IterateRange when using DecompressToComposite for actually comrpessed data revert of earlier optimization * faster compress/decompress for large disk-based files * enable progress feedback in IoHash::HashBuffer * add payload validation in HttpClient::Get * fix range requests (range is including end byte) * remove BuildPartId for blob/block related operations in builds api --- src/zen/cmds/copy_cmd.cpp | 6 +- src/zen/cmds/serve_cmd.cpp | 2 +- src/zen/zen.cpp | 20 +- src/zen/zen.h | 3 +- src/zencore/basicfile.cpp | 79 ++++++-- src/zencore/compositebuffer.cpp | 34 +--- src/zencore/compress.cpp | 214 ++++++++++++++++----- src/zencore/filesystem.cpp | 26 ++- src/zencore/include/zencore/basicfile.h | 6 +- src/zencore/include/zencore/filesystem.h | 20 +- src/zencore/include/zencore/iohash.h | 4 +- src/zencore/iohash.cpp | 28 ++- src/zenhttp/httpclient.cpp | 27 ++- src/zenserver/objectstore/objectstore.cpp | 2 +- .../projectstore/buildsremoteprojectstore.cpp | 31 ++- src/zenserver/projectstore/remoteprojectstore.cpp | 55 +----- src/zenstore/filecas.cpp | 4 +- src/zenutil/chunkedfile.cpp | 2 +- .../include/zenutil/jupiter/jupitersession.h | 13 +- src/zenutil/jupiter/jupitersession.cpp | 85 +++----- src/zenutil/logging.cpp | 9 +- 21 files changed, 399 insertions(+), 271 deletions(-) (limited to 'src') diff --git a/src/zen/cmds/copy_cmd.cpp b/src/zen/cmds/copy_cmd.cpp index d42d3c107..cc6ddd505 100644 --- a/src/zen/cmds/copy_cmd.cpp +++ b/src/zen/cmds/copy_cmd.cpp @@ -120,7 +120,11 @@ CopyCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) { } - virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t) override + virtual void VisitFile(const std::filesystem::path& Parent, + const path_view& File, + uint64_t FileSize, + uint32_t, + uint64_t) override { ZEN_UNUSED(FileSize); std::error_code Ec; diff --git a/src/zen/cmds/serve_cmd.cpp b/src/zen/cmds/serve_cmd.cpp index 8e36e74ce..f87725e36 100644 --- a/src/zen/cmds/serve_cmd.cpp +++ b/src/zen/cmds/serve_cmd.cpp @@ -120,7 +120,7 @@ ServeCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) struct FsVisitor : public FileSystemTraversal::TreeVisitor { - virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t) override + virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t, uint64_t) override { std::filesystem::path ServerPath = std::filesystem::relative(Parent / File, RootPath); std::string ServerPathString = reinterpret_cast(ServerPath.generic_u8string().c_str()); diff --git a/src/zen/zen.cpp b/src/zen/zen.cpp index fd58b024a..872ea8941 100644 --- a/src/zen/zen.cpp +++ b/src/zen/zen.cpp @@ -24,6 +24,7 @@ #include "cmds/vfs_cmd.h" #include "cmds/workspaces_cmd.h" +#include #include #include #include @@ -251,7 +252,10 @@ ZenCmdBase::ResolveTargetHostSpec(const std::string& InHostSpec) return ResolveTargetHostSpec(InHostSpec, /* out */ Dummy); } -ProgressBar::ProgressBar(bool PlainProgress) : m_PlainProgress(PlainProgress), m_LastUpdateMS(m_SW.GetElapsedTimeMs() - 10000) +ProgressBar::ProgressBar(bool PlainProgress, bool ShowDetails) +: m_PlainProgress(PlainProgress) +, m_ShowDetails(ShowDetails) +, m_LastUpdateMS(m_SW.GetElapsedTimeMs() - 10000) { } @@ -270,6 +274,7 @@ ProgressBar::~ProgressBar() void ProgressBar::UpdateState(const State& NewState, bool DoLinebreak) { + ZEN_ASSERT(NewState.TotalCount >= NewState.RemainingCount); if (DoLinebreak == false && m_State == NewState) { return; @@ -288,7 +293,8 @@ ProgressBar::UpdateState(const State& NewState, bool DoLinebreak) if (m_PlainProgress) { - ZEN_CONSOLE("{} {}% ({})", NewState.Task, PercentDone, NiceTimeSpanMs(ElapsedTimeMS)); + std::string Details = (m_ShowDetails && !NewState.Details.empty()) ? fmt::format(": {}", NewState.Details) : ""; + ZEN_CONSOLE("{} {}% ({}){}", NewState.Task, PercentDone, NiceTimeSpanMs(ElapsedTimeMS), Details); } else { @@ -327,7 +333,7 @@ ProgressBar::ForceLinebreak() void ProgressBar::Finish() { - if (m_LastOutputLength > 0 && m_State.RemainingCount > 0) + if (m_LastOutputLength > 0) { State NewState = m_State; NewState.RemainingCount = 0; @@ -367,7 +373,13 @@ main(int argc, char** argv) // Set output mode to handle virtual terminal sequences zen::logging::EnableVTMode(); - std::set_terminate([]() { ZEN_CRITICAL("Program exited abnormally via std::terminate()"); }); + std::set_terminate([]() { + void* Frames[8]; + uint32_t FrameCount = GetCallstack(2, 8, Frames); + CallstackFrames* Callstack = CreateCallstack(FrameCount, Frames); + ZEN_CRITICAL("Program exited abnormally via std::terminate()\n{}", CallstackToString(Callstack, " ")); + FreeCallstack(Callstack); + }); LoggerRef DefaultLogger = zen::logging::Default(); auto& Sinks = DefaultLogger.SpdLogger->sinks(); diff --git a/src/zen/zen.h b/src/zen/zen.h index 9c9586050..835c2b6ac 100644 --- a/src/zen/zen.h +++ b/src/zen/zen.h @@ -84,7 +84,7 @@ public: uint64_t RemainingCount = 0; }; - explicit ProgressBar(bool PlainProgress); + explicit ProgressBar(bool PlainProgress, bool ShowDetails = true); ~ProgressBar(); void UpdateState(const State& NewState, bool DoLinebreak); @@ -95,6 +95,7 @@ public: private: const bool m_PlainProgress; + const bool m_ShowDetails; Stopwatch m_SW; uint64_t m_LastUpdateMS; State m_State; diff --git a/src/zencore/basicfile.cpp b/src/zencore/basicfile.cpp index b3bffd34d..6e879ca0d 100644 --- a/src/zencore/basicfile.cpp +++ b/src/zencore/basicfile.cpp @@ -280,6 +280,20 @@ BasicFile::StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function&& RetryCallback) +WriteToTempFile(const CompositeBuffer& Buffer, const std::filesystem::path& Path) { - if (std::filesystem::is_regular_file(Path)) + TemporaryFile Temp; + std::error_code Ec; + Temp.CreateTemporary(Path.parent_path(), Ec); + if (Ec) { - IoBuffer ExistingTempFile = IoBuffer(IoBufferBuilder::MakeFromFile(Path)); - if (ExistingTempFile && ExistingTempFile.GetSize() == Buffer.GetSize()) - { - ExistingTempFile.SetDeleteOnClose(true); - return ExistingTempFile; - } + throw std::system_error(Ec, fmt::format("Failed to create temp file for blob at '{}'", Path)); } - BasicFile BlockFile; - BlockFile.Open(Path, BasicFile::Mode::kTruncateDelete, std::move(RetryCallback)); - uint64_t Offset = 0; + { + uint64_t Offset = 0; static const uint64_t BufferingSize = 256u * 1024u; - BasicFileWriter BufferedOutput(BlockFile, BufferingSize / 2); + // BasicFileWriter BufferedOutput(BlockFile, BufferingSize / 2); for (const SharedBuffer& Segment : Buffer.GetSegments()) { size_t SegmentSize = Segment.GetSize(); @@ -859,22 +883,39 @@ WriteToTempFile(CompositeBuffer&& Buffer, const std::filesystem::path& Path, std FileRef.FileChunkOffset, FileRef.FileChunkSize, BufferingSize, - [&BufferedOutput, &Offset](const void* Data, size_t Size) { - BufferedOutput.Write(Data, Size, Offset); + [&Temp, &Offset](const void* Data, size_t Size) { + Temp.Write(Data, Size, Offset); Offset += Size; }); } else { - BufferedOutput.Write(Segment.GetData(), SegmentSize, Offset); + Temp.Write(Segment.GetData(), SegmentSize, Offset); Offset += SegmentSize; } } } - void* FileHandle = BlockFile.Detach(); - IoBuffer BlockBuffer = IoBuffer(IoBuffer::File, FileHandle, 0, Offset, /*IsWholeFile*/ true); - BlockBuffer.SetDeleteOnClose(true); - return BlockBuffer; + + Temp.MoveTemporaryIntoPlace(Path, Ec); + if (Ec) + { + IoBuffer TmpBuffer = IoBufferBuilder::MakeFromFile(Path); + if (TmpBuffer) + { + IoHash ExistingHash = IoHash::HashBuffer(TmpBuffer); + const IoHash ExpectedHash = IoHash::HashBuffer(Buffer); + if (ExistingHash == ExpectedHash) + { + TmpBuffer.SetDeleteOnClose(true); + return TmpBuffer; + } + } + throw std::system_error(Ec, fmt::format("Failed to move temp file to '{}'", Path)); + } + + IoBuffer TmpBuffer = IoBufferBuilder::MakeFromFile(Path); + TmpBuffer.SetDeleteOnClose(true); + return TmpBuffer; } ////////////////////////////////////////////////////////////////////////// diff --git a/src/zencore/compositebuffer.cpp b/src/zencore/compositebuffer.cpp index 49870a304..252ac9045 100644 --- a/src/zencore/compositebuffer.cpp +++ b/src/zencore/compositebuffer.cpp @@ -275,36 +275,18 @@ CompositeBuffer::IterateRange(uint64_t Offset, Visitor(View, Segment); break; } - if (Offset < SegmentSize) + else if (Offset <= SegmentSize) { - if (Offset == 0 && Size >= SegmentSize) + const MemoryView View = Segment.GetView().Mid(Offset, Size); + Offset = 0; + if (Size == 0 || !View.IsEmpty()) { - const MemoryView View = Segment.GetView(); - if (!View.IsEmpty()) - { - Visitor(View, Segment); - } - Size -= View.GetSize(); - if (Size == 0) - { - break; - } + Visitor(View, Segment); } - else + Size -= View.GetSize(); + if (Size == 0) { - // If we only want a section of the segment, do a subrange so we don't have to materialize the entire iobuffer - IoBuffer SubRange(Segment.AsIoBuffer(), Offset, Min(Size, SegmentSize - Offset)); - const MemoryView View = SubRange.GetView(); - if (!View.IsEmpty()) - { - Visitor(View, Segment); - } - Size -= View.GetSize(); - if (Size == 0) - { - break; - } - Offset = 0; + break; } } else diff --git a/src/zencore/compress.cpp b/src/zencore/compress.cpp index 29c1d9256..0e2ce2b54 100644 --- a/src/zencore/compress.cpp +++ b/src/zencore/compress.cpp @@ -2,6 +2,7 @@ #include +#include #include #include #include @@ -314,37 +315,77 @@ BlockEncoder::Compress(const CompositeBuffer& RawData, const uint64_t BlockSize) CompressedBlockSizes.reserve(BlockCount); uint64_t CompressedSize = 0; { - UniqueBuffer RawBlockCopy; MutableMemoryView CompressedBlocksView = CompressedData.GetMutableView() + sizeof(BufferHeader) + MetaSize; - CompositeBuffer::Iterator It = RawData.GetIterator(0); - - for (uint64_t RawOffset = 0; RawOffset < RawSize;) + IoBufferFileReference FileRef = {nullptr, 0, 0}; + if ((RawData.GetSegments().size() == 1) && RawData.GetSegments()[0].AsIoBuffer().GetFileReference(FileRef)) { - const uint64_t RawBlockSize = zen::Min(RawSize - RawOffset, BlockSize); - const MemoryView RawBlock = RawData.ViewOrCopyRange(It, RawBlockSize, RawBlockCopy); - RawHash.Append(RawBlock); - - MutableMemoryView CompressedBlock = CompressedBlocksView; - if (!CompressBlock(CompressedBlock, RawBlock)) + ZEN_ASSERT(FileRef.FileHandle != nullptr); + UniqueBuffer RawBlockCopy = UniqueBuffer::Alloc(BlockSize); + BasicFile Source; + Source.Attach(FileRef.FileHandle); + for (uint64_t RawOffset = 0; RawOffset < RawSize;) { - return CompositeBuffer(); - } + const uint64_t RawBlockSize = zen::Min(RawSize - RawOffset, BlockSize); + Source.Read(RawBlockCopy.GetData(), RawBlockSize, FileRef.FileChunkOffset + RawOffset); + const MemoryView RawBlock = RawBlockCopy.GetView().Left(RawBlockSize); + RawHash.Append(RawBlock); + MutableMemoryView CompressedBlock = CompressedBlocksView; + if (!CompressBlock(CompressedBlock, RawBlock)) + { + Source.Detach(); + return CompositeBuffer(); + } - uint64_t CompressedBlockSize = CompressedBlock.GetSize(); - if (RawBlockSize <= CompressedBlockSize) - { - CompressedBlockSize = RawBlockSize; - CompressedBlocksView = CompressedBlocksView.CopyFrom(RawBlock); + uint64_t CompressedBlockSize = CompressedBlock.GetSize(); + if (RawBlockSize <= CompressedBlockSize) + { + CompressedBlockSize = RawBlockSize; + CompressedBlocksView = CompressedBlocksView.CopyFrom(RawBlock); + } + else + { + CompressedBlocksView += CompressedBlockSize; + } + + CompressedBlockSizes.push_back(static_cast(CompressedBlockSize)); + CompressedSize += CompressedBlockSize; + RawOffset += RawBlockSize; } - else + Source.Detach(); + } + else + { + UniqueBuffer RawBlockCopy; + CompositeBuffer::Iterator It = RawData.GetIterator(0); + + for (uint64_t RawOffset = 0; RawOffset < RawSize;) { - CompressedBlocksView += CompressedBlockSize; - } + const uint64_t RawBlockSize = zen::Min(RawSize - RawOffset, BlockSize); + const MemoryView RawBlock = RawData.ViewOrCopyRange(It, RawBlockSize, RawBlockCopy); + RawHash.Append(RawBlock); - CompressedBlockSizes.push_back(static_cast(CompressedBlockSize)); - CompressedSize += CompressedBlockSize; - RawOffset += RawBlockSize; + MutableMemoryView CompressedBlock = CompressedBlocksView; + if (!CompressBlock(CompressedBlock, RawBlock)) + { + return CompositeBuffer(); + } + + uint64_t CompressedBlockSize = CompressedBlock.GetSize(); + if (RawBlockSize <= CompressedBlockSize) + { + CompressedBlockSize = RawBlockSize; + CompressedBlocksView = CompressedBlocksView.CopyFrom(RawBlock); + } + else + { + CompressedBlocksView += CompressedBlockSize; + } + + CompressedBlockSizes.push_back(static_cast(CompressedBlockSize)); + CompressedSize += CompressedBlockSize; + RawOffset += RawBlockSize; + } } } @@ -560,51 +601,118 @@ BlockDecoder::TryDecompressTo(const BufferHeader& Header, CompressedOffset += CompressedBlockSize; } - for (size_t BlockIndex = FirstBlockIndex; BlockIndex <= LastBlockIndex; BlockIndex++) + IoBufferFileReference FileRef = {nullptr, 0, 0}; + if ((CompressedData.GetSegments().size() == 1) && CompressedData.GetSegments()[0].AsIoBuffer().GetFileReference(FileRef)) { - const uint64_t UncompressedBlockSize = BlockIndex == Header.BlockCount - 1 ? LastBlockSize : BlockSize; - const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); - const bool IsCompressed = CompressedBlockSize < UncompressedBlockSize; + ZEN_ASSERT(FileRef.FileHandle != nullptr); + BasicFile Source; + Source.Attach(FileRef.FileHandle); - const uint64_t BytesToUncompress = OffsetInFirstBlock > 0 ? zen::Min(RawView.GetSize(), UncompressedBlockSize - OffsetInFirstBlock) - : zen::Min(RemainingRawSize, BlockSize); + for (size_t BlockIndex = FirstBlockIndex; BlockIndex <= LastBlockIndex; BlockIndex++) + { + const uint64_t UncompressedBlockSize = BlockIndex == Header.BlockCount - 1 ? LastBlockSize : BlockSize; + const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); + const bool IsCompressed = CompressedBlockSize < UncompressedBlockSize; - MemoryView CompressedBlock = CompressedData.ViewOrCopyRange(CompressedOffset, CompressedBlockSize, CompressedBlockCopy); + const uint64_t BytesToUncompress = OffsetInFirstBlock > 0 + ? zen::Min(RawView.GetSize(), UncompressedBlockSize - OffsetInFirstBlock) + : zen::Min(RemainingRawSize, BlockSize); - if (IsCompressed) - { - MutableMemoryView UncompressedBlock = RawView.Left(BytesToUncompress); + if (CompressedBlockCopy.GetSize() < CompressedBlockSize) + { + CompressedBlockCopy = UniqueBuffer::Alloc(CompressedBlockSize); + } + Source.Read(CompressedBlockCopy.GetData(), CompressedBlockSize, FileRef.FileChunkOffset + CompressedOffset); - const bool IsAligned = BytesToUncompress == UncompressedBlockSize; - if (!IsAligned) + MemoryView CompressedBlock = CompressedBlockCopy.GetView().Left(CompressedBlockSize); + + if (IsCompressed) { - // Decompress to a temporary buffer when the first or the last block reads are not aligned with the block boundaries. - if (UncompressedBlockCopy.IsNull()) + MutableMemoryView UncompressedBlock = RawView.Left(BytesToUncompress); + + const bool IsAligned = BytesToUncompress == UncompressedBlockSize; + if (!IsAligned) { - UncompressedBlockCopy = UniqueBuffer::Alloc(BlockSize); + // Decompress to a temporary buffer when the first or the last block reads are not aligned with the block boundaries. + if (UncompressedBlockCopy.IsNull()) + { + UncompressedBlockCopy = UniqueBuffer::Alloc(BlockSize); + } + UncompressedBlock = UncompressedBlockCopy.GetMutableView().Mid(0, UncompressedBlockSize); } - UncompressedBlock = UncompressedBlockCopy.GetMutableView().Mid(0, UncompressedBlockSize); - } - if (!DecompressBlock(UncompressedBlock, CompressedBlock)) - { - return false; - } + if (!DecompressBlock(UncompressedBlock, CompressedBlock)) + { + Source.Detach(); + return false; + } - if (!IsAligned) + if (!IsAligned) + { + RawView.CopyFrom(UncompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress)); + } + } + else { - RawView.CopyFrom(UncompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress)); + RawView.CopyFrom(CompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress)); } + + OffsetInFirstBlock = 0; + RemainingRawSize -= BytesToUncompress; + CompressedOffset += CompressedBlockSize; + RawView += BytesToUncompress; } - else + Source.Detach(); + } + else + { + for (size_t BlockIndex = FirstBlockIndex; BlockIndex <= LastBlockIndex; BlockIndex++) { - RawView.CopyFrom(CompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress)); - } + const uint64_t UncompressedBlockSize = BlockIndex == Header.BlockCount - 1 ? LastBlockSize : BlockSize; + const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); + const bool IsCompressed = CompressedBlockSize < UncompressedBlockSize; - OffsetInFirstBlock = 0; - RemainingRawSize -= BytesToUncompress; - CompressedOffset += CompressedBlockSize; - RawView += BytesToUncompress; + const uint64_t BytesToUncompress = OffsetInFirstBlock > 0 + ? zen::Min(RawView.GetSize(), UncompressedBlockSize - OffsetInFirstBlock) + : zen::Min(RemainingRawSize, BlockSize); + + MemoryView CompressedBlock = CompressedData.ViewOrCopyRange(CompressedOffset, CompressedBlockSize, CompressedBlockCopy); + + if (IsCompressed) + { + MutableMemoryView UncompressedBlock = RawView.Left(BytesToUncompress); + + const bool IsAligned = BytesToUncompress == UncompressedBlockSize; + if (!IsAligned) + { + // Decompress to a temporary buffer when the first or the last block reads are not aligned with the block boundaries. + if (UncompressedBlockCopy.IsNull()) + { + UncompressedBlockCopy = UniqueBuffer::Alloc(BlockSize); + } + UncompressedBlock = UncompressedBlockCopy.GetMutableView().Mid(0, UncompressedBlockSize); + } + + if (!DecompressBlock(UncompressedBlock, CompressedBlock)) + { + return false; + } + + if (!IsAligned) + { + RawView.CopyFrom(UncompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress)); + } + } + else + { + RawView.CopyFrom(CompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress)); + } + + OffsetInFirstBlock = 0; + RemainingRawSize -= BytesToUncompress; + CompressedOffset += CompressedBlockSize; + RawView += BytesToUncompress; + } } return RemainingRawSize == 0; diff --git a/src/zencore/filesystem.cpp b/src/zencore/filesystem.cpp index b8c35212f..5716d1255 100644 --- a/src/zencore/filesystem.cpp +++ b/src/zencore/filesystem.cpp @@ -683,7 +683,7 @@ CopyTree(std::filesystem::path FromPath, std::filesystem::path ToPath, const Cop { } - virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t) override + virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t, uint64_t) override { std::error_code Ec; const std::filesystem::path Relative = std::filesystem::relative(Parent, BasePath, Ec); @@ -1236,7 +1236,11 @@ FileSystemTraversal::TraverseFileSystem(const std::filesystem::path& RootDir, Tr } else { - Visitor.VisitFile(RootDir, FileName, DirInfo->EndOfFile.QuadPart, gsl::narrow(DirInfo->FileAttributes)); + Visitor.VisitFile(RootDir, + FileName, + DirInfo->EndOfFile.QuadPart, + gsl::narrow(DirInfo->FileAttributes), + (uint64_t)DirInfo->LastWriteTime.QuadPart); } const uint64_t NextOffset = DirInfo->NextEntryOffset; @@ -1285,7 +1289,7 @@ FileSystemTraversal::TraverseFileSystem(const std::filesystem::path& RootDir, Tr } else if (S_ISREG(Stat.st_mode)) { - Visitor.VisitFile(RootDir, FileName, Stat.st_size, gsl::narrow(Stat.st_mode)); + Visitor.VisitFile(RootDir, FileName, Stat.st_size, gsl::narrow(Stat.st_mode), gsl::narrow(Stat.st_mtime)); } else { @@ -1544,7 +1548,8 @@ GetDirectoryContent(const std::filesystem::path& RootDir, DirectoryContentFlags virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, - uint32_t NativeModeOrAttributes) override + uint32_t NativeModeOrAttributes, + uint64_t NativeModificationTick) override { if (EnumHasAnyFlags(Flags, DirectoryContentFlags::IncludeFiles)) { @@ -1557,6 +1562,10 @@ GetDirectoryContent(const std::filesystem::path& RootDir, DirectoryContentFlags { Content.FileAttributes.push_back(NativeModeOrAttributes); } + if (EnumHasAnyFlags(Flags, DirectoryContentFlags::IncludeModificationTick)) + { + Content.FileModificationTicks.push_back(NativeModificationTick); + } } } @@ -1612,7 +1621,8 @@ GetDirectoryContent(const std::filesystem::path& RootDir, virtual void VisitFile(const std::filesystem::path&, const path_view& File, uint64_t FileSize, - uint32_t NativeModeOrAttributes) override + uint32_t NativeModeOrAttributes, + uint64_t NativeModificationTick) override { if (EnumHasAnyFlags(Flags, DirectoryContentFlags::IncludeFiles)) { @@ -1625,6 +1635,10 @@ GetDirectoryContent(const std::filesystem::path& RootDir, { Content.FileAttributes.push_back(NativeModeOrAttributes); } + if (EnumHasAnyFlags(Flags, DirectoryContentFlags::IncludeModificationTick)) + { + Content.FileModificationTicks.push_back(NativeModificationTick); + } } } @@ -1928,7 +1942,7 @@ TEST_CASE("filesystem") // Traversal struct : public FileSystemTraversal::TreeVisitor { - virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t, uint32_t) override + virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t, uint32_t, uint64_t) override { bFoundExpected |= std::filesystem::equivalent(Parent / File, Expected); } diff --git a/src/zencore/include/zencore/basicfile.h b/src/zencore/include/zencore/basicfile.h index 7edd40c9c..a78132879 100644 --- a/src/zencore/include/zencore/basicfile.h +++ b/src/zencore/include/zencore/basicfile.h @@ -60,6 +60,7 @@ public: void StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function&& ChunkFun); void Write(MemoryView Data, uint64_t FileOffset); void Write(MemoryView Data, uint64_t FileOffset, std::error_code& Ec); + uint64_t Write(const CompositeBuffer& Data, uint64_t FileOffset); uint64_t Write(const CompositeBuffer& Data, uint64_t FileOffset, std::error_code& Ec); void Write(const void* Data, uint64_t Size, uint64_t FileOffset); void Write(const void* Data, uint64_t Size, uint64_t FileOffset, std::error_code& Ec); @@ -174,6 +175,7 @@ public: ~BasicFileWriter(); void Write(const void* Data, uint64_t Size, uint64_t FileOffset); + void Write(const CompositeBuffer& Data, uint64_t FileOffset); void Flush(); private: @@ -184,9 +186,7 @@ private: uint64_t m_BufferEnd; }; -IoBuffer WriteToTempFile(CompositeBuffer&& Buffer, - const std::filesystem::path& Path, - std::function&& RetryCallback); +IoBuffer WriteToTempFile(const CompositeBuffer& Buffer, const std::filesystem::path& Path); ZENCORE_API void basicfile_forcelink(); diff --git a/src/zencore/include/zencore/filesystem.h b/src/zencore/include/zencore/filesystem.h index ca8682cd7..250745e86 100644 --- a/src/zencore/include/zencore/filesystem.h +++ b/src/zencore/include/zencore/filesystem.h @@ -203,7 +203,8 @@ public: virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, - uint32_t NativeModeOrAttributes) = 0; + uint32_t NativeModeOrAttributes, + uint64_t NativeModificationTick) = 0; // This should return true if we should recurse into the directory virtual bool VisitDirectory(const std::filesystem::path& Parent, @@ -216,13 +217,14 @@ public: enum class DirectoryContentFlags : uint8_t { - None = 0, - IncludeDirs = 1u << 0, - IncludeFiles = 1u << 1, - Recursive = 1u << 2, - IncludeFileSizes = 1u << 3, - IncludeAttributes = 1u << 4, - IncludeAllEntries = IncludeDirs | IncludeFiles | Recursive + None = 0, + IncludeDirs = 1u << 0, + IncludeFiles = 1u << 1, + Recursive = 1u << 2, + IncludeFileSizes = 1u << 3, + IncludeAttributes = 1u << 4, + IncludeModificationTick = 1u << 5, + IncludeAllEntries = IncludeDirs | IncludeFiles | Recursive }; ENUM_CLASS_FLAGS(DirectoryContentFlags) @@ -232,6 +234,7 @@ struct DirectoryContent std::vector Files; std::vector FileSizes; std::vector FileAttributes; + std::vector FileModificationTicks; std::vector Directories; std::vector DirectoryAttributes; }; @@ -246,6 +249,7 @@ public: std::vector FileNames; std::vector FileSizes; std::vector FileAttributes; + std::vector FileModificationTicks; std::vector DirectoryNames; std::vector DirectoryAttributes; }; diff --git a/src/zencore/include/zencore/iohash.h b/src/zencore/include/zencore/iohash.h index 8871a5895..7443e17b7 100644 --- a/src/zencore/include/zencore/iohash.h +++ b/src/zencore/include/zencore/iohash.h @@ -47,8 +47,8 @@ struct IoHash static IoHash HashBuffer(const void* data, size_t byteCount); static IoHash HashBuffer(MemoryView Data) { return HashBuffer(Data.GetData(), Data.GetSize()); } - static IoHash HashBuffer(const CompositeBuffer& Buffer); - static IoHash HashBuffer(const IoBuffer& Buffer); + static IoHash HashBuffer(const CompositeBuffer& Buffer, std::atomic* ProcessedBytes = nullptr); + static IoHash HashBuffer(const IoBuffer& Buffer, std::atomic* ProcessedBytes = nullptr); static IoHash FromHexString(const char* string); static IoHash FromHexString(const std::string_view string); static bool TryParse(std::string_view Str, IoHash& Hash); diff --git a/src/zencore/iohash.cpp b/src/zencore/iohash.cpp index 7200e6e3f..3b2af0db4 100644 --- a/src/zencore/iohash.cpp +++ b/src/zencore/iohash.cpp @@ -30,7 +30,7 @@ IoHash::HashBuffer(const void* data, size_t byteCount) } IoHash -IoHash::HashBuffer(const CompositeBuffer& Buffer) +IoHash::HashBuffer(const CompositeBuffer& Buffer, std::atomic* ProcessedBytes) { IoHashStream Hasher; @@ -46,11 +46,21 @@ IoHash::HashBuffer(const CompositeBuffer& Buffer) FileRef.FileChunkOffset, FileRef.FileChunkSize, BufferingSize, - [&Hasher](const void* Data, size_t Size) { Hasher.Append(Data, Size); }); + [&Hasher, ProcessedBytes](const void* Data, size_t Size) { + Hasher.Append(Data, Size); + if (ProcessedBytes != nullptr) + { + ProcessedBytes->fetch_add(Size); + } + }); } else { Hasher.Append(Segment.GetData(), SegmentSize); + if (ProcessedBytes != nullptr) + { + ProcessedBytes->fetch_add(SegmentSize); + } } } @@ -58,7 +68,7 @@ IoHash::HashBuffer(const CompositeBuffer& Buffer) } IoHash -IoHash::HashBuffer(const IoBuffer& Buffer) +IoHash::HashBuffer(const IoBuffer& Buffer, std::atomic* ProcessedBytes) { IoHashStream Hasher; @@ -71,11 +81,21 @@ IoHash::HashBuffer(const IoBuffer& Buffer) FileRef.FileChunkOffset, FileRef.FileChunkSize, BufferingSize, - [&Hasher](const void* Data, size_t Size) { Hasher.Append(Data, Size); }); + [&Hasher, ProcessedBytes](const void* Data, size_t Size) { + Hasher.Append(Data, Size); + if (ProcessedBytes != nullptr) + { + ProcessedBytes->fetch_add(Size); + } + }); } else { Hasher.Append(Buffer.GetData(), BufferSize); + if (ProcessedBytes != nullptr) + { + ProcessedBytes->fetch_add(BufferSize); + } } return Hasher.GetHash(); diff --git a/src/zenhttp/httpclient.cpp b/src/zenhttp/httpclient.cpp index 211a5d05c..bb15a6fce 100644 --- a/src/zenhttp/httpclient.cpp +++ b/src/zenhttp/httpclient.cpp @@ -449,12 +449,27 @@ ValidatePayload(cpr::Response& Response, std::unique_ptr&& Func, uint8_t RetryCount) +DoWithRetry( + std::string_view SessionId, + std::function&& Func, + uint8_t RetryCount, + std::function&& Validate = [](cpr::Response&) { return true; }) { uint8_t Attempt = 0; cpr::Response Result = Func(); - while (Attempt < RetryCount && ShouldRetry(Result)) + while (Attempt < RetryCount) { + if (!ShouldRetry(Result)) + { + if (Result.error || !IsHttpSuccessCode(Result.status_code)) + { + break; + } + if (Validate(Result)) + { + break; + } + } Sleep(100 * (Attempt + 1)); Attempt++; ZEN_INFO("{} Attempt {}/{}", CommonResponse(SessionId, std::move(Result)).ErrorMessage("Retry"), Attempt, RetryCount + 1); @@ -881,7 +896,11 @@ HttpClient::Get(std::string_view Url, const KeyValueMap& AdditionalHeader, const m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, Parameters, m_SessionId, GetAccessToken()); return Sess.Get(); }, - m_ConnectionSettings.RetryCount)); + m_ConnectionSettings.RetryCount, + [](cpr::Response& Result) { + std::unique_ptr NoTempFile; + return ValidatePayload(Result, NoTempFile); + })); } HttpClient::Response @@ -1247,7 +1266,7 @@ HttpClient::Download(std::string_view Url, const std::filesystem::path& TempFold { uint64_t DownloadedSize = PayloadFile ? PayloadFile->GetSize() : PayloadString.length(); - std::string Range = fmt::format("bytes={}-{}", DownloadedSize, ContentLength.value()); + std::string Range = fmt::format("bytes={}-{}", DownloadedSize, DownloadedSize + ContentLength.value() - 1); if (auto RangeIt = HeadersWithRange.Entries.find("Range"); RangeIt != HeadersWithRange.Entries.end()) { if (RangeIt->second == Range) diff --git a/src/zenserver/objectstore/objectstore.cpp b/src/zenserver/objectstore/objectstore.cpp index b0212ab07..5d96de225 100644 --- a/src/zenserver/objectstore/objectstore.cpp +++ b/src/zenserver/objectstore/objectstore.cpp @@ -376,7 +376,7 @@ HttpObjectStoreService::ListBucket(zen::HttpRouterRequest& Request, const std::s Writer.BeginArray("Contents"sv); } - void VisitFile(const fs::path& Parent, const path_view& File, uint64_t FileSize, uint32_t) override + void VisitFile(const fs::path& Parent, const path_view& File, uint64_t FileSize, uint32_t, uint64_t) override { const fs::path FullPath = Parent / fs::path(File); fs::path RelativePath = fs::relative(FullPath, BucketPath); diff --git a/src/zenserver/projectstore/buildsremoteprojectstore.cpp b/src/zenserver/projectstore/buildsremoteprojectstore.cpp index 412769174..e4e91104c 100644 --- a/src/zenserver/projectstore/buildsremoteprojectstore.cpp +++ b/src/zenserver/projectstore/buildsremoteprojectstore.cpp @@ -123,18 +123,17 @@ public: JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); JupiterResult PutResult = - Session.PutBuildBlob(m_Namespace, m_Bucket, m_BuildId, m_OplogBuildPartId, RawHash, ZenContentType::kCompressedBinary, Payload); + Session.PutBuildBlob(m_Namespace, m_Bucket, m_BuildId, RawHash, ZenContentType::kCompressedBinary, Payload); AddStats(PutResult); SaveAttachmentResult Result{ConvertResult(PutResult)}; if (Result.ErrorCode) { - Result.Reason = fmt::format("Failed saving oplog attachment to {}/{}/{}/{}/{}/{}. Reason: '{}'", + Result.Reason = fmt::format("Failed saving oplog attachment to {}/{}/{}/{}/{}. Reason: '{}'", m_JupiterClient->ServiceUrl(), m_Namespace, m_Bucket, m_BuildId, - m_OplogBuildPartId, RawHash, Result.Reason); return Result; @@ -147,18 +146,16 @@ public: IoBuffer MetaPayload = BuildChunkBlockDescription(Block, BlockMetaData.Save()).GetBuffer().AsIoBuffer(); MetaPayload.SetContentType(ZenContentType::kCbObject); - JupiterResult PutMetaResult = - Session.PutBlockMetadata(m_Namespace, m_Bucket, m_BuildId, m_OplogBuildPartId, RawHash, MetaPayload); + JupiterResult PutMetaResult = Session.PutBlockMetadata(m_Namespace, m_Bucket, m_BuildId, RawHash, MetaPayload); AddStats(PutMetaResult); RemoteProjectStore::Result MetaDataResult = ConvertResult(PutMetaResult); if (MetaDataResult.ErrorCode) { - ZEN_WARN("Failed saving block attachment meta data to {}/{}/{}/{}/{}/{}. Reason: '{}'", + ZEN_WARN("Failed saving block attachment meta data to {}/{}/{}/{}/{}. Reason: '{}'", m_JupiterClient->ServiceUrl(), m_Namespace, m_Bucket, m_BuildId, - m_OplogBuildPartId, RawHash, MetaDataResult.Reason); } @@ -311,30 +308,28 @@ public: { ZEN_ASSERT(m_OplogBuildPartId != Oid::Zero); JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); - JupiterResult FindResult = Session.FindBlocks(m_Namespace, m_Bucket, m_BuildId, m_OplogBuildPartId); + JupiterResult FindResult = Session.FindBlocks(m_Namespace, m_Bucket, m_BuildId); AddStats(FindResult); GetKnownBlocksResult Result{ConvertResult(FindResult)}; if (Result.ErrorCode) { Result.ErrorCode = gsl::narrow(HttpResponseCode::InternalServerError); - Result.Reason = fmt::format("Failed listing know blocks for {}/{}/{}/{}/{}. Reason: '{}'", + Result.Reason = fmt::format("Failed listing know blocks for {}/{}/{}/{}. Reason: '{}'", m_JupiterClient->ServiceUrl(), m_Namespace, m_Bucket, m_BuildId, - m_OplogBuildPartId, Result.Reason); return Result; } if (ValidateCompactBinary(FindResult.Response.GetView(), CbValidateMode::Default) != CbValidateError::None) { Result.ErrorCode = gsl::narrow(HttpResponseCode::InternalServerError); - Result.Reason = fmt::format("The block list {}/{}/{}/{} is not formatted as a compact binary object"sv, + Result.Reason = fmt::format("The block list {}/{}/{} is not formatted as a compact binary object"sv, m_JupiterClient->ServiceUrl(), m_Namespace, m_Bucket, - m_BuildId, - m_OplogBuildPartId); + m_BuildId); return Result; } std::optional> Blocks = @@ -342,12 +337,11 @@ public: if (!Blocks) { Result.ErrorCode = gsl::narrow(HttpResponseCode::InternalServerError); - Result.Reason = fmt::format("The block list {}/{}/{}/{} is not formatted as a list of blocks"sv, + Result.Reason = fmt::format("The block list {}/{}/{} is not formatted as a list of blocks"sv, m_JupiterClient->ServiceUrl(), m_Namespace, m_Bucket, - m_BuildId, - m_OplogBuildPartId); + m_BuildId); return Result; } Result.Blocks = std::move(Blocks.value()); @@ -358,18 +352,17 @@ public: { ZEN_ASSERT(m_OplogBuildPartId != Oid::Zero); JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); - JupiterResult GetResult = Session.GetBuildBlob(m_Namespace, m_Bucket, m_BuildId, m_OplogBuildPartId, RawHash, m_TempFilePath); + JupiterResult GetResult = Session.GetBuildBlob(m_Namespace, m_Bucket, m_BuildId, RawHash, m_TempFilePath); AddStats(GetResult); LoadAttachmentResult Result{ConvertResult(GetResult), std::move(GetResult.Response)}; if (GetResult.ErrorCode) { - Result.Reason = fmt::format("Failed fetching oplog attachment from {}/{}/{}&{}/{}/{}. Reason: '{}'", + Result.Reason = fmt::format("Failed fetching oplog attachment from {}/{}/{}/{}/{}. Reason: '{}'", m_JupiterClient->ServiceUrl(), m_Namespace, m_Bucket, m_BuildId, - m_OplogBuildPartId, RawHash, Result.Reason); } diff --git a/src/zenserver/projectstore/remoteprojectstore.cpp b/src/zenserver/projectstore/remoteprojectstore.cpp index 0285cc22f..b4b2c6fc4 100644 --- a/src/zenserver/projectstore/remoteprojectstore.cpp +++ b/src/zenserver/projectstore/remoteprojectstore.cpp @@ -1466,10 +1466,7 @@ BuildContainer(CidStore& ChunkStore, }; std::vector ChunkedFiles; - auto ChunkFile = [AttachmentTempPath](const IoHash& RawHash, - IoBuffer& RawData, - const IoBufferFileReference& FileRef, - JobContext*) -> ChunkedFile { + auto ChunkFile = [](const IoHash& RawHash, IoBuffer& RawData, const IoBufferFileReference& FileRef, JobContext*) -> ChunkedFile { ChunkedFile Chunked; Stopwatch Timer; @@ -1578,22 +1575,7 @@ BuildContainer(CidStore& ChunkStore, std::filesystem::path AttachmentPath = AttachmentTempPath; AttachmentPath.append(RawHash.ToHexString()); - uint32_t RetriesLeft = 3; - - IoBuffer TempAttachmentBuffer = - WriteToTempFile(std::move(Compressed).GetCompressed(), AttachmentPath, [&](std::error_code& Ec) { - if (RetriesLeft == 0) - { - return false; - } - ZEN_WARN("Failed to create temporary attachment '{}': '{}', retries left: {}.", - AttachmentPath, - Ec.message(), - RetriesLeft); - Sleep(100 - (3 - RetriesLeft) * 100); // Total 600 ms - RetriesLeft--; - return true; - }); + IoBuffer TempAttachmentBuffer = WriteToTempFile(std::move(Compressed).GetCompressed(), AttachmentPath); ZEN_INFO("Saved temp attachment to '{}', {} ({})", AttachmentPath, NiceBytes(RawSize), @@ -1612,34 +1594,21 @@ BuildContainer(CidStore& ChunkStore, std::filesystem::path AttachmentPath = AttachmentTempPath; AttachmentPath.append(RawHash.ToHexString()); - uint32_t RetriesLeft = 3; - IoBuffer TempAttachmentBuffer = - WriteToTempFile(std::move(Compressed).GetCompressed(), AttachmentPath, [&](std::error_code& Ec) { - if (RetriesLeft == 0) - { - return false; - } - ZEN_WARN("Failed to create temporary attachment '{}': '{}', retries left: {}.", - AttachmentPath, - Ec.message(), - RetriesLeft); - Sleep(100 - (3 - RetriesLeft) * 100); // Total 600 ms - RetriesLeft--; - return true; - }); + uint64_t CompressedSize = Compressed.GetCompressedSize(); + IoBuffer TempAttachmentBuffer = WriteToTempFile(std::move(Compressed).GetCompressed(), AttachmentPath); ZEN_INFO("Saved temp attachment to '{}', {} ({})", AttachmentPath, NiceBytes(RawSize), NiceBytes(TempAttachmentBuffer.GetSize())); - if (Compressed.GetCompressedSize() > MaxChunkEmbedSize) + if (CompressedSize > MaxChunkEmbedSize) { OnLargeAttachment(RawHash, [Data = std::move(TempAttachmentBuffer)](const IoHash&) { return Data; }); ResolveLock.WithExclusiveLock([RawHash, &LargeChunkHashes]() { LargeChunkHashes.insert(RawHash); }); } else { - UploadAttachment->Size = Compressed.GetCompressedSize(); + UploadAttachment->Size = CompressedSize; ResolveLock.WithExclusiveLock( [RawHash, RawSize, &LooseUploadAttachments, Data = std::move(TempAttachmentBuffer)]() { LooseUploadAttachments.insert_or_assign(RawHash, std::make_pair(RawSize, std::move(Data))); @@ -2362,17 +2331,7 @@ SaveOplog(CidStore& ChunkStore, BlockPath.append(Block.BlockHash.ToHexString()); try { - uint32_t RetriesLeft = 3; - IoBuffer BlockBuffer = WriteToTempFile(std::move(CompressedBlock).GetCompressed(), BlockPath, [&](std::error_code& Ec) { - if (RetriesLeft == 0) - { - return false; - } - ZEN_WARN("Failed to create temporary oplog block '{}': '{}', retries left: {}.", BlockPath, Ec.message(), RetriesLeft); - Sleep(100 - (3 - RetriesLeft) * 100); // Total 600 ms - RetriesLeft--; - return true; - }); + IoBuffer BlockBuffer = WriteToTempFile(std::move(CompressedBlock).GetCompressed(), BlockPath); RwLock::ExclusiveLockScope __(AttachmentsLock); CreatedBlocks.insert({Block.BlockHash, {.Payload = std::move(BlockBuffer), .Block = std::move(Block)}}); ZEN_DEBUG("Saved temp block to '{}', {}", AttachmentTempPath, NiceBytes(BlockBuffer.GetSize())); diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp index 14123528c..34db51aa9 100644 --- a/src/zenstore/filecas.cpp +++ b/src/zenstore/filecas.cpp @@ -185,7 +185,7 @@ FileCasStrategy::Initialize(const std::filesystem::path& RootDirectory, bool IsN // in this folder as well struct Visitor : public FileSystemTraversal::TreeVisitor { - virtual void VisitFile(const std::filesystem::path&, const path_view&, uint64_t, uint32_t) override + virtual void VisitFile(const std::filesystem::path&, const path_view&, uint64_t, uint32_t, uint64_t) override { // We don't care about files } @@ -1174,7 +1174,7 @@ FileCasStrategy::ScanFolderForCasFiles(const std::filesystem::path& RootDir) struct Visitor : public FileSystemTraversal::TreeVisitor { Visitor(const std::filesystem::path& RootDir, std::vector& Entries) : RootDirectory(RootDir), Entries(Entries) {} - virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t) override + virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t, uint64_t) override { std::filesystem::path RelPath = std::filesystem::relative(Parent, RootDirectory); diff --git a/src/zenutil/chunkedfile.cpp b/src/zenutil/chunkedfile.cpp index c08492eb0..3f3a6661c 100644 --- a/src/zenutil/chunkedfile.cpp +++ b/src/zenutil/chunkedfile.cpp @@ -121,7 +121,7 @@ ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Para Chunker.SetUseThreshold(Params.UseThreshold); Chunker.SetChunkSize(Params.MinSize, Params.MaxSize, Params.AvgSize); size_t End = Offset + Size; - const size_t ScanBufferSize = 1u * 1024 * 1024; // (Params.MaxSize * 9) / 3;//1 * 1024 * 1024; + const size_t ScanBufferSize = Max(1u * 1024 * 1024, Params.MaxSize); BasicFileBuffer RawBuffer(RawData, ScanBufferSize); MemoryView SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset); ZEN_ASSERT(!SliceView.IsEmpty()); diff --git a/src/zenutil/include/zenutil/jupiter/jupitersession.h b/src/zenutil/include/zenutil/jupiter/jupitersession.h index 075c35b40..852271868 100644 --- a/src/zenutil/include/zenutil/jupiter/jupitersession.h +++ b/src/zenutil/include/zenutil/jupiter/jupitersession.h @@ -116,21 +116,18 @@ public: JupiterResult PutBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, ZenContentType ContentType, const CompositeBuffer& Payload); JupiterResult GetBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, std::filesystem::path TempFolderPath); JupiterResult PutMultipartBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, ZenContentType ContentType, uint64_t PayloadSize, @@ -139,7 +136,6 @@ public: JupiterResult GetMultipartBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, uint64_t ChunkSize, std::function&& Receiver, @@ -147,7 +143,6 @@ public: JupiterResult PutBlockMetadata(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, const IoBuffer& Payload); FinalizeBuildPartResult FinalizeBuildPart(std::string_view Namespace, @@ -155,12 +150,8 @@ public: const Oid& BuildId, const Oid& PartId, const IoHash& RawHash); - JupiterResult FindBlocks(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const Oid& PartId); - JupiterResult GetBlockMetadata(std::string_view Namespace, - std::string_view BucketId, - const Oid& BuildId, - const Oid& PartId, - IoBuffer Payload); + JupiterResult FindBlocks(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); + JupiterResult GetBlockMetadata(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, IoBuffer Payload); private: inline LoggerRef Log() { return m_Log; } diff --git a/src/zenutil/jupiter/jupitersession.cpp b/src/zenutil/jupiter/jupitersession.cpp index d56927b44..06ac6ae36 100644 --- a/src/zenutil/jupiter/jupitersession.cpp +++ b/src/zenutil/jupiter/jupitersession.cpp @@ -436,15 +436,14 @@ JupiterResult JupiterSession::PutBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, ZenContentType ContentType, const CompositeBuffer& Payload) { - HttpClient::Response Response = m_HttpClient.Upload( - fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}", Namespace, BucketId, BuildId, PartId, Hash.ToHexString()), - Payload, - ContentType); + HttpClient::Response Response = + m_HttpClient.Upload(fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}", Namespace, BucketId, BuildId, Hash.ToHexString()), + Payload, + ContentType); return detail::ConvertResponse(Response, "JupiterSession::PutBuildBlob"sv); } @@ -452,7 +451,6 @@ JupiterResult JupiterSession::PutMultipartBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, ZenContentType ContentType, uint64_t PayloadSize, @@ -498,12 +496,8 @@ JupiterSession::PutMultipartBuildBlob(std::string_view Namespace, StartMultipartPayloadWriter.AddInteger("blobLength"sv, PayloadSize); CbObject StartMultipartPayload = StartMultipartPayloadWriter.Save(); - std::string StartMultipartResponseRequestString = fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}/startMultipartUpload", - Namespace, - BucketId, - BuildId, - PartId, - Hash.ToHexString()); + std::string StartMultipartResponseRequestString = + fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}/startMultipartUpload", Namespace, BucketId, BuildId, Hash.ToHexString()); // ZEN_INFO("POST: {}", StartMultipartResponseRequestString); HttpClient::Response StartMultipartResponse = m_HttpClient.Post(StartMultipartResponseRequestString, StartMultipartPayload, HttpClient::Accept(ZenContentType::kCbObject)); @@ -529,15 +523,14 @@ JupiterSession::PutMultipartBuildBlob(std::string_view Namespace, for (size_t PartIndex = 0; PartIndex < Workload->PartDescription.Parts.size(); PartIndex++) { - OutWorkItems.emplace_back([this, Namespace, BucketId, BuildId, PartId, Hash, ContentType, Workload, PartIndex]( + OutWorkItems.emplace_back([this, Namespace, BucketId, BuildId, Hash, ContentType, Workload, PartIndex]( bool& OutIsComplete) -> JupiterResult { const MultipartUploadResponse::Part& Part = Workload->PartDescription.Parts[PartIndex]; IoBuffer PartPayload = Workload->Transmitter(Part.FirstByte, Part.LastByte - Part.FirstByte); - std::string MultipartUploadResponseRequestString = fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}/uploadMultipart{}", + std::string MultipartUploadResponseRequestString = fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}/uploadMultipart{}", Namespace, BucketId, BuildId, - PartId, Hash.ToHexString(), Part.QueryString); // ZEN_INFO("PUT: {}", MultipartUploadResponseRequestString); @@ -571,12 +564,7 @@ JupiterSession::PutMultipartBuildBlob(std::string_view Namespace, CbObject CompletePayload = CompletePayloadWriter.Save(); std::string MultipartEndResponseRequestString = - fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}/completeMultipart", - Namespace, - BucketId, - BuildId, - PartId, - Hash.ToHexString()); + fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}/completeMultipart", Namespace, BucketId, BuildId, Hash.ToHexString()); MultipartEndResponse = m_HttpClient.Post(MultipartEndResponseRequestString, CompletePayload, @@ -600,13 +588,12 @@ JupiterSession::PutMultipartBuildBlob(std::string_view Namespace, size_t RetryPartIndex = PartNameToIndex.at(RetryPartId); const MultipartUploadResponse::Part& RetryPart = Workload->PartDescription.Parts[RetryPartIndex]; IoBuffer RetryPartPayload = - Workload->Transmitter(RetryPart.FirstByte, RetryPart.LastByte - RetryPart.FirstByte); + Workload->Transmitter(RetryPart.FirstByte, RetryPart.LastByte - RetryPart.FirstByte - 1); std::string RetryMultipartUploadResponseRequestString = - fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}/uploadMultipart{}", + fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}/uploadMultipart{}", Namespace, BucketId, BuildId, - RetryPartId, Hash.ToHexString(), RetryPart.QueryString); @@ -642,14 +629,12 @@ JupiterResult JupiterSession::GetMultipartBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, uint64_t ChunkSize, std::function&& Receiver, std::vector>& OutWorkItems) { - std::string RequestUrl = - fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}", Namespace, BucketId, BuildId, PartId, Hash.ToHexString()); + std::string RequestUrl = fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}", Namespace, BucketId, BuildId, Hash.ToHexString()); HttpClient::Response Response = m_HttpClient.Get(RequestUrl, HttpClient::KeyValueMap({{"Range", fmt::format("bytes={}-{}", 0, ChunkSize - 1)}})); if (Response.IsSuccess()) @@ -683,17 +668,12 @@ JupiterSession::GetMultipartBuildBlob(std::string_view Namespa { uint64_t PartSize = Min(ChunkSize, TotalSize - Offset); OutWorkItems.emplace_back( - [this, Namespace, BucketId, BuildId, PartId, Hash, TotalSize, Workload, Offset, PartSize]() - -> JupiterResult { - std::string RequestUrl = fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}", - Namespace, - BucketId, - BuildId, - PartId, - Hash.ToHexString()); - HttpClient::Response Response = m_HttpClient.Get( - RequestUrl, - HttpClient::KeyValueMap({{"Range", fmt::format("bytes={}-{}", Offset, Offset + PartSize - 1)}})); + [this, Namespace, BucketId, BuildId, Hash, TotalSize, Workload, Offset, PartSize]() -> JupiterResult { + std::string RequestUrl = + fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}", Namespace, BucketId, BuildId, Hash.ToHexString()); + HttpClient::Response Response = m_HttpClient.Get( + RequestUrl, + HttpClient::KeyValueMap({{"Range", fmt::format("bytes={}-{}", Offset, Offset + PartSize - 1)}})); if (Response.IsSuccess()) { uint64_t ByteRemaning = Workload->BytesRemaining.fetch_sub(Response.ResponsePayload.GetSize()); @@ -717,13 +697,12 @@ JupiterResult JupiterSession::GetBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, std::filesystem::path TempFolderPath) { - HttpClient::Response Response = m_HttpClient.Download( - fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}", Namespace, BucketId, BuildId, PartId, Hash.ToHexString()), - TempFolderPath); + HttpClient::Response Response = + m_HttpClient.Download(fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}", Namespace, BucketId, BuildId, Hash.ToHexString()), + TempFolderPath); return detail::ConvertResponse(Response, "JupiterSession::GetBuildBlob"sv); } @@ -732,14 +711,13 @@ JupiterResult JupiterSession::PutBlockMetadata(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, const IoBuffer& Payload) { ZEN_ASSERT(Payload.GetContentType() == ZenContentType::kCbObject); - HttpClient::Response Response = m_HttpClient.Put( - fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blocks/{}/metadata", Namespace, BucketId, BuildId, PartId, Hash.ToHexString()), - Payload); + HttpClient::Response Response = + m_HttpClient.Put(fmt::format("/api/v2/builds/{}/{}/{}/blocks/{}/metadata", Namespace, BucketId, BuildId, Hash.ToHexString()), + Payload); return detail::ConvertResponse(Response, "JupiterSession::PutBlockMetadata"sv); } @@ -772,24 +750,19 @@ JupiterSession::FinalizeBuildPart(std::string_view Namespace, } JupiterResult -JupiterSession::FindBlocks(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const Oid& PartId) +JupiterSession::FindBlocks(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId) { - HttpClient::Response Response = - m_HttpClient.Get(fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blocks/listBlocks", Namespace, BucketId, BuildId, PartId), - HttpClient::Accept(ZenContentType::kCbObject)); + HttpClient::Response Response = m_HttpClient.Get(fmt::format("/api/v2/builds/{}/{}/{}/blocks/listBlocks", Namespace, BucketId, BuildId), + HttpClient::Accept(ZenContentType::kCbObject)); return detail::ConvertResponse(Response, "JupiterSession::FindBlocks"sv); } JupiterResult -JupiterSession::GetBlockMetadata(std::string_view Namespace, - std::string_view BucketId, - const Oid& BuildId, - const Oid& PartId, - IoBuffer Payload) +JupiterSession::GetBlockMetadata(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, IoBuffer Payload) { ZEN_ASSERT(Payload.GetContentType() == ZenContentType::kCbObject); HttpClient::Response Response = - m_HttpClient.Post(fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blocks/getBlockMetadata", Namespace, BucketId, BuildId, PartId), + m_HttpClient.Post(fmt::format("/api/v2/builds/{}/{}/{}/blocks/getBlockMetadata", Namespace, BucketId, BuildId), Payload, HttpClient::Accept(ZenContentType::kCbObject)); return detail::ConvertResponse(Response, "JupiterSession::GetBlockMetadata"sv); diff --git a/src/zenutil/logging.cpp b/src/zenutil/logging.cpp index 6314c407f..0444fa2c4 100644 --- a/src/zenutil/logging.cpp +++ b/src/zenutil/logging.cpp @@ -10,6 +10,7 @@ ZEN_THIRD_PARTY_INCLUDES_START #include ZEN_THIRD_PARTY_INCLUDES_END +#include #include #include #include @@ -97,7 +98,13 @@ BeginInitializeLogging(const LoggingOptions& LogOptions) } } - std::set_terminate([]() { ZEN_CRITICAL("Program exited abnormally via std::terminate()"); }); + std::set_terminate([]() { + void* Frames[8]; + uint32_t FrameCount = GetCallstack(2, 8, Frames); + CallstackFrames* Callstack = CreateCallstack(FrameCount, Frames); + ZEN_CRITICAL("Program exited abnormally via std::terminate()\n{}", CallstackToString(Callstack, " ")); + FreeCallstack(Callstack); + }); // Default -- cgit v1.2.3 From 7d8fe45af3b49d800f84f0ddce051c0b3b2e837d Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 26 Feb 2025 15:10:14 +0100 Subject: builds upload command (#278) - Feature: **EXPERIMENTAL** New `zen builds` command to list, upload and download folders to Cloud Build API - `builds list` list available builds (**INCOMPLETE - FILTERING MISSING**) - `builds upload` upload a folder to Cloud Build API - `--local-path` source folder to upload - `--create-build` creates a new parent build object (using the object id), if omitted a parent build must exist and `--build-id` must be given - `--build-id` an Oid in hex form for the Build identifier to use - omit to have the id auto generated - `--build-part-id` and Oid in hex form for the Build Part identifier for the folder - omit to have the id auto generated - `--build-part-name` name of the build part - if omitted the name of the leaf folder name give in `--local-path` - `--metadata-path` path to a json formatted file with meta data information about the build. Meta-data must be provided if `--create-build` is set - `--metadata` key-value pairs separated by ';' with build meta data for the build. (key1=value1;key2=value2). Meta-data must be provided if `--create-build` is set - `--clean` ignore any existing blocks of chunk data and upload a fresh set of blocks - `--allow-multipart` enable usage of multi-part http upload requests - `--manifest-path` path to text file listing files to include in upload. Exclude to upload everything in `--local-path` - `builds download` download a folder from Cloud Build API (**INCOMPLETE - WILL WIPE UNTRACKED DATA FROM TARGET FOLDER**) - `--local-path` target folder to download to - `--build-id` an Oid in hex form for the Build identifier to use - `--build-part-id` a comma separated list of Oid in hex for the build part identifier(s) to download - mutually exclusive to `--build-part-name` - `--build-part-name` a comma separated list of names for the build part(s) to download - if omitted the name of the leaf folder name give in `--local-path` - `--clean` deletes all data in target folder before downloading (NON-CLEAN IS NOT IMPLEMENTED YET) - `--allow-multipart` enable usage of multi-part http download reqeusts - `builds diff` download a folder from Cloud Build API - `--local-path` target folder to download to - `--compare-path` folder to compare target with - `--only-chunked` compare only files that would be chunked - `builds fetch-blob` fetch and validate a blob from remote store - `--build-id` an Oid in hex form for the Build identifier to use - `--blob-hash` an IoHash in hex form identifying the blob to download - `builds validate part` fetch a build part and validate all referenced attachments - `--build-id` an Oid in hex form for the Build identifier to use - `--build-part-id` an Oid in hex for the build part identifier to validate - mutually exclusive to `--build-part-name` - `--build-part-name` a name for the build part to validate - mutually exclusive to `--build-part-id` - `builds test` a series of operation that uploads, downloads and test various aspects of incremental operations - `--local-path` source folder to upload - Options for Cloud Build API remote store (`list`, `upload`, `download`, `fetch-blob`, `validate-part`) - `--url` Cloud Builds URL - `--assume-http2` assume that the builds endpoint is a HTTP/2 endpoint skipping HTTP/1.1 upgrade handshake - `--namespace` Builds Storage namespace - `--bucket` Builds Storage bucket - Authentication options for Cloud Build API - Auth token - `--access-token` http auth Cloud Storage access token - `--access-token-env` name of environment variable that holds the Http auth Cloud Storage access token - `--access-token-path` path to json file that holds the Http auth Cloud Storage access token - OpenId authentication - `--openid-provider-name` Open ID provider name - `--openid-provider-url` Open ID provider url - `--openid-client-id`Open ID client id - `--openid-refresh-token` Open ID refresh token - `--encryption-aes-key` 256 bit AES encryption key for storing OpenID credentials - `--encryption-aes-iv` 128 bit AES encryption initialization vector for storing OpenID credentials - OAuth authentication - `--oauth-url` OAuth provier url - `--oauth-clientid` OAuth client id - `--oauth-clientsecret` OAuth client secret - Options for file based remote store used for for testing purposes (`list`, `upload`, `download`, `fetch-blob`, `validate-part`, `test`) - `--storage-path` path to folder to store builds data - `--json-metadata` enable json output in store for all compact binary objects (off by default) - Output options for all builds commands - `--plain-progress` use plain line-by-line progress output - `--verbose` --- src/zen/cmds/builds_cmd.cpp | 6106 ++++++++++++++++++++ src/zen/cmds/builds_cmd.h | 106 + src/zen/zen.cpp | 3 + src/zencore/filesystem.cpp | 140 + src/zencore/include/zencore/filesystem.h | 21 + .../projectstore/buildsremoteprojectstore.cpp | 7 +- .../projectstore/fileremoteprojectstore.cpp | 4 +- .../projectstore/jupiterremoteprojectstore.cpp | 2 +- src/zenserver/projectstore/projectstore.cpp | 6 +- src/zenserver/projectstore/remoteprojectstore.cpp | 72 +- src/zenserver/projectstore/remoteprojectstore.h | 6 +- src/zenutil/chunkblock.cpp | 94 +- src/zenutil/chunkedcontent.cpp | 865 +++ src/zenutil/chunkingcontroller.cpp | 265 + src/zenutil/filebuildstorage.cpp | 616 ++ src/zenutil/include/zenutil/buildstorage.h | 55 + src/zenutil/include/zenutil/chunkblock.h | 17 +- src/zenutil/include/zenutil/chunkedcontent.h | 256 + src/zenutil/include/zenutil/chunkingcontroller.h | 55 + src/zenutil/include/zenutil/filebuildstorage.h | 16 + .../include/zenutil/jupiter/jupiterbuildstorage.h | 17 + src/zenutil/include/zenutil/parallellwork.h | 69 + src/zenutil/jupiter/jupiterbuildstorage.cpp | 371 ++ 23 files changed, 9108 insertions(+), 61 deletions(-) create mode 100644 src/zen/cmds/builds_cmd.cpp create mode 100644 src/zen/cmds/builds_cmd.h create mode 100644 src/zenutil/chunkedcontent.cpp create mode 100644 src/zenutil/chunkingcontroller.cpp create mode 100644 src/zenutil/filebuildstorage.cpp create mode 100644 src/zenutil/include/zenutil/buildstorage.h create mode 100644 src/zenutil/include/zenutil/chunkedcontent.h create mode 100644 src/zenutil/include/zenutil/chunkingcontroller.h create mode 100644 src/zenutil/include/zenutil/filebuildstorage.h create mode 100644 src/zenutil/include/zenutil/jupiter/jupiterbuildstorage.h create mode 100644 src/zenutil/include/zenutil/parallellwork.h create mode 100644 src/zenutil/jupiter/jupiterbuildstorage.cpp (limited to 'src') diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp new file mode 100644 index 000000000..ececab29e --- /dev/null +++ b/src/zen/cmds/builds_cmd.cpp @@ -0,0 +1,6106 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "builds_cmd.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +ZEN_THIRD_PARTY_INCLUDES_START +#include +#include +#include +ZEN_THIRD_PARTY_INCLUDES_END + +#if ZEN_PLATFORM_WINDOWS +# include +#else +# include +# include +# include +# include +#endif + +#define EXTRA_VERIFY 0 + +namespace zen { +namespace { + using namespace std::literals; + + static const size_t DefaultMaxBlockSize = 64u * 1024u * 1024u; + static const size_t DefaultMaxChunkEmbedSize = 3u * 512u * 1024u; + + struct ChunksBlockParameters + { + size_t MaxBlockSize = DefaultMaxBlockSize; + size_t MaxChunkEmbedSize = DefaultMaxChunkEmbedSize; + }; + + const ChunksBlockParameters DefaultChunksBlockParams{.MaxBlockSize = 32u * 1024u * 1024u, + .MaxChunkEmbedSize = DefaultChunkedParams.MaxSize}; + const std::string ZenFolderName = ".zen"; + const std::string ZenStateFilePath = fmt::format("{}/current_state.cbo", ZenFolderName); + const std::string ZenStateFileJsonPath = fmt::format("{}/current_state.json", ZenFolderName); + const std::string ZenTempFolderName = fmt::format("{}/tmp", ZenFolderName); + const std::string ZenTempReuseFolderName = fmt::format("{}/reuse", ZenTempFolderName); + const std::string ZenTempStorageFolderName = fmt::format("{}/storage", ZenTempFolderName); + const std::string ZenTempBlockFolderName = fmt::format("{}/blocks", ZenTempFolderName); + const std::string ZenTempChunkFolderName = fmt::format("{}/chunks", ZenTempFolderName); + const std::string ZenExcludeManifestName = ".zen_exclude_manifest.txt"; + + const std::string UnsyncFolderName = ".unsync"; + + const std::string UGSFolderName = ".ugs"; + const std::string LegacyZenTempFolderName = ".zen-tmp"; + + const std::vector DefaultExcludeFolders({UnsyncFolderName, ZenFolderName, UGSFolderName, LegacyZenTempFolderName}); + const std::vector DefaultExcludeExtensions({}); + + static bool IsVerbose = false; + static bool UsePlainProgress = false; + +#define ZEN_CONSOLE_VERBOSE(fmtstr, ...) \ + if (IsVerbose) \ + { \ + ZEN_CONSOLE_LOG(zen::logging::level::Info, fmtstr, ##__VA_ARGS__); \ + } + + const std::string DefaultAccessTokenEnvVariableName( +#if ZEN_PLATFORM_WINDOWS + "UE-CloudDataCacheAccessToken"sv +#endif +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + "UE_CloudDataCacheAccessToken"sv +#endif + + ); + + template + std::string FormatArray(std::span Items, std::string_view Prefix) + { + ExtendableStringBuilder<512> SB; + for (const T& Item : Items) + { + SB.Append(fmt::format("{}{}", Prefix, Item)); + } + return SB.ToString(); + } + + void CleanDirectory(const std::filesystem::path& Path, std::span ExcludeDirectories) + { + DirectoryContent LocalDirectoryContent; + GetDirectoryContent(Path, DirectoryContentFlags::IncludeDirs | DirectoryContentFlags::IncludeFiles, LocalDirectoryContent); + for (const std::filesystem::path& LocalFilePath : LocalDirectoryContent.Files) + { + std::filesystem::remove(LocalFilePath); + } + + for (const std::filesystem::path& LocalDirPath : LocalDirectoryContent.Directories) + { + bool Leave = false; + for (const std::string_view ExcludeDirectory : ExcludeDirectories) + { + if (LocalDirPath == (Path / ExcludeDirectory)) + { + Leave = true; + break; + } + } + if (!Leave) + { + zen::CleanDirectory(LocalDirPath); + std::filesystem::remove(LocalDirPath); + } + } + } + + std::string ReadAccessTokenFromFile(const std::filesystem::path& Path) + { + if (!std::filesystem::is_regular_file(Path)) + { + throw std::runtime_error(fmt::format("the file '{}' does not exist", Path)); + } + IoBuffer Body = IoBufferBuilder::MakeFromFile(Path); + std::string JsonText(reinterpret_cast(Body.GetData()), Body.GetSize()); + std::string JsonError; + json11::Json TokenInfo = json11::Json::parse(JsonText, JsonError); + if (!JsonError.empty()) + { + throw std::runtime_error(fmt::format("failed parsing json file '{}'. Reason: '{}'", Path, JsonError)); + } + const std::string AuthToken = TokenInfo["Token"].string_value(); + if (AuthToken.empty()) + { + throw std::runtime_error(fmt::format("the json file '{}' does not contain a value for \"Token\"", Path)); + } + return AuthToken; + } + + CompositeBuffer WriteToTempFileIfNeeded(const CompositeBuffer& Buffer, const std::filesystem::path& TempFolderPath, const IoHash& Hash) + { + // If this is a file based buffer or a compressed buffer with a memory-based header, we don't need to rewrite to disk to save memory + std::span Segments = Buffer.GetSegments(); + ZEN_ASSERT(Buffer.GetSegments().size() > 0); + size_t SegmentIndexToCheck = Segments.size() > 1 ? 1 : 0; + IoBufferFileReference FileRef; + if (Segments[SegmentIndexToCheck].GetFileReference(FileRef)) + { + return Buffer; + } + std::filesystem::path TempFilePath = (TempFolderPath / Hash.ToHexString()).make_preferred(); + return CompositeBuffer(WriteToTempFile(Buffer, TempFilePath)); + } + + class FilteredRate + { + public: + FilteredRate() {} + + void Start() + { + if (StartTimeUS == (uint64_t)-1) + { + uint64_t Expected = (uint64_t)-1; + if (StartTimeUS.compare_exchange_weak(Expected, Timer.GetElapsedTimeUs())) + { + LastTimeUS = StartTimeUS.load(); + } + } + } + void Stop() + { + if (EndTimeUS == (uint64_t)-1) + { + uint64_t Expected = (uint64_t)-1; + EndTimeUS.compare_exchange_weak(Expected, Timer.GetElapsedTimeUs()); + } + } + + void Update(uint64_t Count) + { + if (LastTimeUS == (uint64_t)-1) + { + return; + } + uint64_t TimeUS = Timer.GetElapsedTimeUs(); + uint64_t TimeDeltaUS = TimeUS - LastTimeUS; + if (TimeDeltaUS >= 1000000) + { + uint64_t Delta = Count - LastCount; + uint64_t PerSecond = (Delta * 1000000) / TimeDeltaUS; + + LastPerSecond = PerSecond; + + LastCount = Count; + + FilteredPerSecond = (PerSecond + (LastPerSecond * 7)) / 8; + + LastTimeUS = TimeUS; + } + } + + uint64_t GetCurrent() const + { + if (LastTimeUS == (uint64_t)-1) + { + return 0; + } + return FilteredPerSecond; + } + + uint64_t GetElapsedTime() const + { + if (StartTimeUS == (uint64_t)-1) + { + return 0; + } + if (EndTimeUS == (uint64_t)-1) + { + return 0; + } + uint64_t TimeDeltaUS = EndTimeUS - StartTimeUS; + return TimeDeltaUS; + } + + bool IsActive() const { return (StartTimeUS != (uint64_t)-1) && (EndTimeUS == (uint64_t)-1); } + + private: + Stopwatch Timer; + std::atomic StartTimeUS = (uint64_t)-1; + std::atomic EndTimeUS = (uint64_t)-1; + std::atomic LastTimeUS = (uint64_t)-1; + uint64_t LastCount = 0; + uint64_t LastPerSecond = 0; + uint64_t FilteredPerSecond = 0; + }; + + uint64_t GetBytesPerSecond(uint64_t ElapsedWallTimeUS, uint64_t Count) + { + if (ElapsedWallTimeUS == 0) + { + return 0; + } + return Count * 1000000 / ElapsedWallTimeUS; + } + + ChunkedFolderContent ScanAndChunkFolder( + GetFolderContentStatistics& GetFolderContentStats, + ChunkingStatistics& ChunkingStats, + const std::filesystem::path& Path, + std::function&& IsAcceptedFolder, + std::function&& IsAcceptedFile, + ChunkingController& ChunkController, + std::atomic& AbortFlag) + { + FolderContent Content = GetFolderContent( + GetFolderContentStats, + Path, + std::move(IsAcceptedFolder), + std::move(IsAcceptedFile), + GetMediumWorkerPool(EWorkloadType::Burst), + UsePlainProgress ? 5000 : 200, + [](bool, std::ptrdiff_t) {}, + AbortFlag); + if (AbortFlag) + { + return {}; + } + + ProgressBar ProgressBar(UsePlainProgress); + FilteredRate FilteredBytesHashed; + FilteredBytesHashed.Start(); + ChunkedFolderContent FolderContent = ChunkFolderContent( + ChunkingStats, + GetMediumWorkerPool(EWorkloadType::Burst), + Path, + Content, + ChunkController, + UsePlainProgress ? 5000 : 200, + [&](bool, std::ptrdiff_t) { + FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); + ProgressBar.UpdateState({.Task = "Scanning files ", + .Details = fmt::format("{}/{} ({}/{}, {}B/s) files, {} ({}) chunks found", + ChunkingStats.FilesProcessed.load(), + GetFolderContentStats.AcceptedFileCount.load(), + NiceBytes(ChunkingStats.BytesHashed.load()), + NiceBytes(GetFolderContentStats.FoundFileByteCount), + NiceNum(FilteredBytesHashed.GetCurrent()), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load())), + .TotalCount = GetFolderContentStats.AcceptedFileByteCount, + .RemainingCount = GetFolderContentStats.AcceptedFileByteCount - ChunkingStats.BytesHashed.load()}, + false); + }, + AbortFlag); + FilteredBytesHashed.Stop(); + ProgressBar.Finish(); + + ZEN_CONSOLE("Found {} ({}) files divided into {} ({}) unique chunks in '{}' in {}. Average hash rate {}B/sec", + ChunkingStats.FilesProcessed.load(), + NiceBytes(ChunkingStats.BytesHashed.load()), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load()), + Path, + NiceTimeSpanMs((GetFolderContentStats.ElapsedWallTimeUS + ChunkingStats.ElapsedWallTimeUS) / 1000), + NiceNum(GetBytesPerSecond(ChunkingStats.ElapsedWallTimeUS, ChunkingStats.BytesHashed))); + return FolderContent; + }; + + struct DiskStatistics + { + std::atomic OpenReadCount = 0; + std::atomic OpenWriteCount = 0; + std::atomic ReadCount = 0; + std::atomic ReadByteCount = 0; + std::atomic WriteCount = 0; + std::atomic WriteByteCount = 0; + std::atomic CurrentOpenFileCount = 0; + }; + + struct FindBlocksStatistics + { + uint64_t FindBlockTimeMS = 0; + uint64_t PotentialChunkCount = 0; + uint64_t PotentialChunkByteCount = 0; + uint64_t FoundBlockCount = 0; + uint64_t FoundBlockChunkCount = 0; + uint64_t FoundBlockByteCount = 0; + uint64_t AcceptedBlockCount = 0; + uint64_t AcceptedChunkCount = 0; + uint64_t AcceptedByteCount = 0; + uint64_t RejectedBlockCount = 0; + uint64_t RejectedChunkCount = 0; + uint64_t RejectedByteCount = 0; + uint64_t AcceptedReduntantChunkCount = 0; + uint64_t AcceptedReduntantByteCount = 0; + uint64_t NewBlocksCount = 0; + uint64_t NewBlocksChunkCount = 0; + uint64_t NewBlocksChunkByteCount = 0; + }; + + struct UploadStatistics + { + std::atomic BlockCount = 0; + std::atomic BlocksBytes = 0; + std::atomic ChunkCount = 0; + std::atomic ChunksBytes = 0; + std::atomic ReadFromDiskBytes = 0; + std::atomic MultipartAttachmentCount = 0; + uint64_t ElapsedWallTimeUS = 0; + + UploadStatistics& operator+=(const UploadStatistics& Rhs) + { + BlockCount += Rhs.BlockCount; + BlocksBytes += Rhs.BlocksBytes; + ChunkCount += Rhs.ChunkCount; + ChunksBytes += Rhs.ChunksBytes; + ReadFromDiskBytes += Rhs.ReadFromDiskBytes; + MultipartAttachmentCount += Rhs.MultipartAttachmentCount; + ElapsedWallTimeUS += Rhs.ElapsedWallTimeUS; + return *this; + } + }; + + struct LooseChunksStatistics + { + uint64_t ChunkCount = 0; + uint64_t ChunkByteCount = 0; + std::atomic CompressedChunkCount = 0; + std::atomic CompressedChunkBytes = 0; + uint64_t CompressChunksElapsedWallTimeUS = 0; + + LooseChunksStatistics& operator+=(const LooseChunksStatistics& Rhs) + { + ChunkCount += Rhs.ChunkCount; + ChunkByteCount += Rhs.ChunkByteCount; + CompressedChunkCount += Rhs.CompressedChunkCount; + CompressedChunkBytes += Rhs.CompressedChunkBytes; + CompressChunksElapsedWallTimeUS += Rhs.CompressChunksElapsedWallTimeUS; + return *this; + } + }; + + struct GenerateBlocksStatistics + { + std::atomic GeneratedBlockByteCount = 0; + std::atomic GeneratedBlockCount = 0; + uint64_t GenerateBlocksElapsedWallTimeUS = 0; + + GenerateBlocksStatistics& operator+=(const GenerateBlocksStatistics& Rhs) + { + GeneratedBlockByteCount += Rhs.GeneratedBlockByteCount; + GeneratedBlockCount += Rhs.GeneratedBlockCount; + GenerateBlocksElapsedWallTimeUS += Rhs.GenerateBlocksElapsedWallTimeUS; + return *this; + } + }; + + std::vector CalculateAbsoluteChunkOrders(const std::span LocalChunkHashes, + const std::span LocalChunkOrder, + const tsl::robin_map& ChunkHashToLocalChunkIndex, + const std::span& LooseChunkIndexes, + const std::span& BlockDescriptions) + { +#if EXTRA_VERIFY + std::vector TmpAbsoluteChunkHashes; + TmpAbsoluteChunkHashes.reserve(LocalChunkHashes.size()); +#endif // EXTRA_VERIFY + std::vector LocalChunkIndexToAbsoluteChunkIndex; + LocalChunkIndexToAbsoluteChunkIndex.resize(LocalChunkHashes.size(), (uint32_t)-1); + std::uint32_t AbsoluteChunkCount = 0; + for (uint32_t ChunkIndex : LooseChunkIndexes) + { + LocalChunkIndexToAbsoluteChunkIndex[ChunkIndex] = AbsoluteChunkCount; +#if EXTRA_VERIFY + TmpAbsoluteChunkHashes.push_back(LocalChunkHashes[ChunkIndex]); +#endif // EXTRA_VERIFY + AbsoluteChunkCount++; + } + for (const ChunkBlockDescription& Block : BlockDescriptions) + { + for (const IoHash& ChunkHash : Block.ChunkRawHashes) + { + if (auto It = ChunkHashToLocalChunkIndex.find(ChunkHash); It != ChunkHashToLocalChunkIndex.end()) + { + const uint32_t LocalChunkIndex = It->second; + ZEN_ASSERT_SLOW(LocalChunkHashes[LocalChunkIndex] == ChunkHash); + LocalChunkIndexToAbsoluteChunkIndex[LocalChunkIndex] = AbsoluteChunkCount; + } +#if EXTRA_VERIFY + TmpAbsoluteChunkHashes.push_back(ChunkHash); +#endif // EXTRA_VERIFY + AbsoluteChunkCount++; + } + } + std::vector AbsoluteChunkOrder; + AbsoluteChunkOrder.reserve(LocalChunkHashes.size()); + for (const uint32_t LocalChunkIndex : LocalChunkOrder) + { + const uint32_t AbsoluteChunkIndex = LocalChunkIndexToAbsoluteChunkIndex[LocalChunkIndex]; +#if EXTRA_VERIFY + ZEN_ASSERT(LocalChunkHashes[LocalChunkIndex] == TmpAbsoluteChunkHashes[AbsoluteChunkIndex]); +#endif // EXTRA_VERIFY + AbsoluteChunkOrder.push_back(AbsoluteChunkIndex); + } +#if EXTRA_VERIFY + { + uint32_t OrderIndex = 0; + while (OrderIndex < LocalChunkOrder.size()) + { + const uint32_t LocalChunkIndex = LocalChunkOrder[OrderIndex]; + const IoHash& LocalChunkHash = LocalChunkHashes[LocalChunkIndex]; + const uint32_t AbsoluteChunkIndex = AbsoluteChunkOrder[OrderIndex]; + const IoHash& AbsoluteChunkHash = TmpAbsoluteChunkHashes[AbsoluteChunkIndex]; + ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash); + OrderIndex++; + } + } +#endif // EXTRA_VERIFY + return AbsoluteChunkOrder; + } + + void CalculateLocalChunkOrders(const std::span& AbsoluteChunkOrders, + const std::span LooseChunkHashes, + const std::span LooseChunkRawSizes, + const std::span& BlockDescriptions, + std::vector& OutLocalChunkHashes, + std::vector& OutLocalChunkRawSizes, + std::vector& OutLocalChunkOrders) + { + std::vector AbsoluteChunkHashes; + std::vector AbsoluteChunkRawSizes; + AbsoluteChunkHashes.insert(AbsoluteChunkHashes.end(), LooseChunkHashes.begin(), LooseChunkHashes.end()); + AbsoluteChunkRawSizes.insert(AbsoluteChunkRawSizes.end(), LooseChunkRawSizes.begin(), LooseChunkRawSizes.end()); + for (const ChunkBlockDescription& Block : BlockDescriptions) + { + AbsoluteChunkHashes.insert(AbsoluteChunkHashes.end(), Block.ChunkRawHashes.begin(), Block.ChunkRawHashes.end()); + AbsoluteChunkRawSizes.insert(AbsoluteChunkRawSizes.end(), Block.ChunkRawLengths.begin(), Block.ChunkRawLengths.end()); + } + OutLocalChunkHashes.reserve(AbsoluteChunkHashes.size()); + OutLocalChunkRawSizes.reserve(AbsoluteChunkRawSizes.size()); + OutLocalChunkOrders.reserve(AbsoluteChunkOrders.size()); + + tsl::robin_map ChunkHashToChunkIndex; + ChunkHashToChunkIndex.reserve(AbsoluteChunkHashes.size()); + + for (uint32_t AbsoluteChunkOrderIndex = 0; AbsoluteChunkOrderIndex < AbsoluteChunkOrders.size(); AbsoluteChunkOrderIndex++) + { + const uint32_t AbsoluteChunkIndex = AbsoluteChunkOrders[AbsoluteChunkOrderIndex]; + const IoHash& AbsoluteChunkHash = AbsoluteChunkHashes[AbsoluteChunkIndex]; + const uint64_t AbsoluteChunkRawSize = AbsoluteChunkRawSizes[AbsoluteChunkIndex]; + + if (auto It = ChunkHashToChunkIndex.find(AbsoluteChunkHash); It != ChunkHashToChunkIndex.end()) + { + const uint32_t LocalChunkIndex = It->second; + OutLocalChunkOrders.push_back(LocalChunkIndex); + } + else + { + uint32_t LocalChunkIndex = gsl::narrow(OutLocalChunkHashes.size()); + OutLocalChunkHashes.push_back(AbsoluteChunkHash); + OutLocalChunkRawSizes.push_back(AbsoluteChunkRawSize); + OutLocalChunkOrders.push_back(LocalChunkIndex); + ChunkHashToChunkIndex.insert_or_assign(AbsoluteChunkHash, LocalChunkIndex); + } +#if EXTRA_VERIFY + const uint32_t LocalChunkIndex = OutLocalChunkOrders[AbsoluteChunkOrderIndex]; + const IoHash& LocalChunkHash = OutLocalChunkHashes[LocalChunkIndex]; + const uint64_t& LocalChunkRawSize = OutLocalChunkRawSizes[LocalChunkIndex]; + ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash); + ZEN_ASSERT(LocalChunkRawSize == AbsoluteChunkRawSize); +#endif // EXTRA_VERIFY + } +#if EXTRA_VERIFY + for (uint32_t OrderIndex = 0; OrderIndex < OutLocalChunkOrders.size(); OrderIndex++) + { + uint32_t LocalChunkIndex = OutLocalChunkOrders[OrderIndex]; + const IoHash LocalChunkHash = OutLocalChunkHashes[LocalChunkIndex]; + uint64_t LocalChunkRawSize = OutLocalChunkRawSizes[LocalChunkIndex]; + + uint32_t VerifyChunkIndex = AbsoluteChunkOrders[OrderIndex]; + const IoHash VerifyChunkHash = AbsoluteChunkHashes[VerifyChunkIndex]; + uint64_t VerifyChunkRawSize = AbsoluteChunkRawSizes[VerifyChunkIndex]; + + ZEN_ASSERT(LocalChunkHash == VerifyChunkHash); + ZEN_ASSERT(LocalChunkRawSize == VerifyChunkRawSize); + } +#endif // EXTRA_VERIFY + } + + void WriteBuildContentToCompactBinary(CbObjectWriter& PartManifestWriter, + const SourcePlatform Platform, + std::span Paths, + std::span RawHashes, + std::span RawSizes, + std::span Attributes, + std::span SequenceRawHashes, + std::span ChunkCounts, + std::span LocalChunkHashes, + std::span LocalChunkRawSizes, + std::vector AbsoluteChunkOrders, + const std::span LooseLocalChunkIndexes, + const std::span BlockHashes) + { + ZEN_ASSERT(Platform != SourcePlatform::_Count); + PartManifestWriter.AddString("platform"sv, ToString(Platform)); + + uint64_t TotalSize = 0; + for (const uint64_t Size : RawSizes) + { + TotalSize += Size; + } + PartManifestWriter.AddInteger("totalSize", TotalSize); + + PartManifestWriter.BeginObject("files"sv); + { + compactbinary_helpers::WriteArray(Paths, "paths"sv, PartManifestWriter); + compactbinary_helpers::WriteArray(RawHashes, "rawhashes"sv, PartManifestWriter); + compactbinary_helpers::WriteArray(RawSizes, "rawsizes"sv, PartManifestWriter); + if (Platform == SourcePlatform::Windows) + { + compactbinary_helpers::WriteArray(Attributes, "attributes"sv, PartManifestWriter); + } + if (Platform == SourcePlatform::Linux || Platform == SourcePlatform::MacOS) + { + compactbinary_helpers::WriteArray(Attributes, "mode"sv, PartManifestWriter); + } + } + PartManifestWriter.EndObject(); // files + + PartManifestWriter.BeginObject("chunkedContent"); + { + compactbinary_helpers::WriteArray(SequenceRawHashes, "sequenceRawHashes"sv, PartManifestWriter); + compactbinary_helpers::WriteArray(ChunkCounts, "chunkcounts"sv, PartManifestWriter); + compactbinary_helpers::WriteArray(AbsoluteChunkOrders, "chunkorders"sv, PartManifestWriter); + } + PartManifestWriter.EndObject(); // chunkedContent + + size_t LooseChunkCount = LooseLocalChunkIndexes.size(); + if (LooseChunkCount > 0) + { + PartManifestWriter.BeginObject("chunkAttachments"); + { + PartManifestWriter.BeginArray("rawHashes"sv); + for (uint32_t ChunkIndex : LooseLocalChunkIndexes) + { + PartManifestWriter.AddBinaryAttachment(LocalChunkHashes[ChunkIndex]); + } + PartManifestWriter.EndArray(); // rawHashes + + PartManifestWriter.BeginArray("chunkRawSizes"sv); + for (uint32_t ChunkIndex : LooseLocalChunkIndexes) + { + PartManifestWriter.AddInteger(LocalChunkRawSizes[ChunkIndex]); + } + PartManifestWriter.EndArray(); // chunkSizes + } + PartManifestWriter.EndObject(); // + } + + if (BlockHashes.size() > 0) + { + PartManifestWriter.BeginObject("blockAttachments"); + { + compactbinary_helpers::WriteBinaryAttachmentArray(BlockHashes, "rawHashes"sv, PartManifestWriter); + } + PartManifestWriter.EndObject(); // blocks + } + } + + void ReadBuildContentFromCompactBinary(CbObjectView BuildPartManifest, + SourcePlatform& OutPlatform, + std::vector& OutPaths, + std::vector& OutRawHashes, + std::vector& OutRawSizes, + std::vector& OutAttributes, + std::vector& OutSequenceRawHashes, + std::vector& OutChunkCounts, + std::vector& OutAbsoluteChunkOrders, + std::vector& OutLooseChunkHashes, + std::vector& OutLooseChunkRawSizes, + std::vector& OutBlockRawHashes) + { + OutPlatform = FromString(BuildPartManifest["platform"sv].AsString(), SourcePlatform::_Count); + + CbObjectView FilesObject = BuildPartManifest["files"sv].AsObjectView(); + + compactbinary_helpers::ReadArray("paths"sv, FilesObject, OutPaths); + compactbinary_helpers::ReadArray("rawhashes"sv, FilesObject, OutRawHashes); + compactbinary_helpers::ReadArray("rawsizes"sv, FilesObject, OutRawSizes); + + uint64_t PathCount = OutPaths.size(); + if (OutRawHashes.size() != PathCount) + { + throw std::runtime_error(fmt::format("Number of raw hashes entries does not match number of paths")); + } + if (OutRawSizes.size() != PathCount) + { + throw std::runtime_error(fmt::format("Number of raw sizes entries does not match number of paths")); + } + + std::vector ModeArray; + compactbinary_helpers::ReadArray("mode"sv, FilesObject, ModeArray); + if (ModeArray.size() != PathCount && ModeArray.size() != 0) + { + throw std::runtime_error(fmt::format("Number of attribute entries does not match number of paths")); + } + + std::vector AttributeArray; + compactbinary_helpers::ReadArray("attributes"sv, FilesObject, ModeArray); + if (AttributeArray.size() != PathCount && AttributeArray.size() != 0) + { + throw std::runtime_error(fmt::format("Number of attribute entries does not match number of paths")); + } + + if (ModeArray.size() > 0) + { + if (OutPlatform == SourcePlatform::_Count) + { + OutPlatform = SourcePlatform::Linux; // Best guess - under dev format + } + OutAttributes = std::move(ModeArray); + } + else if (AttributeArray.size() > 0) + { + if (OutPlatform == SourcePlatform::_Count) + { + OutPlatform = SourcePlatform::Windows; + } + OutAttributes = std::move(AttributeArray); + } + else + { + if (OutPlatform == SourcePlatform::_Count) + { + OutPlatform = GetSourceCurrentPlatform(); + } + } + + if (FilesObject["chunkcounts"sv]) + { + // Legacy style + + std::vector LegacyChunkCounts; + compactbinary_helpers::ReadArray("chunkcounts"sv, FilesObject, LegacyChunkCounts); + if (LegacyChunkCounts.size() != PathCount) + { + throw std::runtime_error(fmt::format("Number of chunk count entries does not match number of paths")); + } + std::vector LegacyAbsoluteChunkOrders; + compactbinary_helpers::ReadArray("chunkorders"sv, FilesObject, LegacyAbsoluteChunkOrders); + + CbArrayView ChunkOrdersArray = BuildPartManifest["chunkorders"sv].AsArrayView(); + const uint64_t ChunkOrdersCount = ChunkOrdersArray.Num(); + + tsl::robin_set FoundRawHashes; + FoundRawHashes.reserve(PathCount); + + OutChunkCounts.reserve(PathCount); + OutAbsoluteChunkOrders.reserve(ChunkOrdersCount); + + uint32_t OrderIndexOffset = 0; + for (uint32_t PathIndex = 0; PathIndex < OutPaths.size(); PathIndex++) + { + const IoHash& PathRawHash = OutRawHashes[PathIndex]; + uint32_t LegacyChunkCount = LegacyChunkCounts[PathIndex]; + + if (FoundRawHashes.insert(PathRawHash).second) + { + OutSequenceRawHashes.push_back(PathRawHash); + OutChunkCounts.push_back(LegacyChunkCount); + std::span AbsoluteChunkOrder = + std::span(LegacyAbsoluteChunkOrders).subspan(OrderIndexOffset, LegacyChunkCount); + OutAbsoluteChunkOrders.insert(OutAbsoluteChunkOrders.end(), AbsoluteChunkOrder.begin(), AbsoluteChunkOrder.end()); + } + OrderIndexOffset += LegacyChunkCounts[PathIndex]; + } + } + if (CbObjectView ChunkContentView = BuildPartManifest["chunkedContent"sv].AsObjectView(); ChunkContentView) + { + compactbinary_helpers::ReadArray("sequenceRawHashes"sv, ChunkContentView, OutSequenceRawHashes); + compactbinary_helpers::ReadArray("chunkcounts"sv, ChunkContentView, OutChunkCounts); + if (OutChunkCounts.size() != OutSequenceRawHashes.size()) + { + throw std::runtime_error(fmt::format("Number of chunk count entries does not match number of paths")); + } + compactbinary_helpers::ReadArray("chunkorders"sv, ChunkContentView, OutAbsoluteChunkOrders); + } + + CbObjectView ChunkAttachmentsView = BuildPartManifest["chunkAttachments"sv].AsObjectView(); + { + compactbinary_helpers::ReadBinaryAttachmentArray("rawHashes"sv, ChunkAttachmentsView, OutLooseChunkHashes); + compactbinary_helpers::ReadArray("chunkRawSizes"sv, ChunkAttachmentsView, OutLooseChunkRawSizes); + if (OutLooseChunkHashes.size() != OutLooseChunkRawSizes.size()) + { + throw std::runtime_error( + fmt::format("Number of attachment chunk hashes does not match number of attachemnt chunk raw sizes")); + } + } + + CbObjectView BlocksView = BuildPartManifest["blockAttachments"sv].AsObjectView(); + { + compactbinary_helpers::ReadBinaryAttachmentArray("rawHashes"sv, BlocksView, OutBlockRawHashes); + } + } + + bool ReadStateObject(CbObjectView StateView, + Oid& OutBuildId, + std::vector& BuildPartsIds, + std::vector& BuildPartsNames, + std::vector& OutPartContents, + FolderContent& OutLocalFolderState) + { + try + { + CbObjectView BuildView = StateView["builds"sv].AsArrayView().CreateViewIterator().AsObjectView(); + OutBuildId = BuildView["buildId"sv].AsObjectId(); + for (CbFieldView PartView : BuildView["parts"sv].AsArrayView()) + { + CbObjectView PartObjectView = PartView.AsObjectView(); + BuildPartsIds.push_back(PartObjectView["partId"sv].AsObjectId()); + BuildPartsNames.push_back(std::string(PartObjectView["partName"sv].AsString())); + OutPartContents.push_back(LoadChunkedFolderContentToCompactBinary(PartObjectView["content"sv].AsObjectView())); + } + OutLocalFolderState = LoadFolderContentToCompactBinary(StateView["localFolderState"sv].AsObjectView()); + return true; + } + catch (const std::exception& Ex) + { + ZEN_CONSOLE("Unable to read local state: ", Ex.what()); + return false; + } + } + + CbObject CreateStateObject(const Oid& BuildId, + std::vector> AllBuildParts, + std::span PartContents, + const FolderContent& LocalFolderState) + { + CbObjectWriter CurrentStateWriter; + CurrentStateWriter.BeginArray("builds"sv); + { + CurrentStateWriter.BeginObject(); + { + CurrentStateWriter.AddObjectId("buildId"sv, BuildId); + CurrentStateWriter.BeginArray("parts"sv); + for (size_t PartIndex = 0; PartIndex < AllBuildParts.size(); PartIndex++) + { + const Oid BuildPartId = AllBuildParts[PartIndex].first; + CurrentStateWriter.BeginObject(); + { + CurrentStateWriter.AddObjectId("partId"sv, BuildPartId); + CurrentStateWriter.AddString("partName"sv, AllBuildParts[PartIndex].second); + CurrentStateWriter.BeginObject("content"); + { + SaveChunkedFolderContentToCompactBinary(PartContents[PartIndex], CurrentStateWriter); + } + CurrentStateWriter.EndObject(); + } + CurrentStateWriter.EndObject(); + } + CurrentStateWriter.EndArray(); // parts + } + CurrentStateWriter.EndObject(); + } + CurrentStateWriter.EndArray(); // builds + + CurrentStateWriter.BeginObject("localFolderState"sv); + { + SaveFolderContentToCompactBinary(LocalFolderState, CurrentStateWriter); + } + CurrentStateWriter.EndObject(); // localFolderState + + return CurrentStateWriter.Save(); + } + + class BufferedOpenFile + { + public: + BufferedOpenFile(const std::filesystem::path Path) : Source(Path, BasicFile::Mode::kRead), SourceSize(Source.FileSize()) {} + BufferedOpenFile() = delete; + BufferedOpenFile(const BufferedOpenFile&) = delete; + BufferedOpenFile(BufferedOpenFile&&) = delete; + BufferedOpenFile& operator=(BufferedOpenFile&&) = delete; + BufferedOpenFile& operator=(const BufferedOpenFile&) = delete; + + const uint64_t BlockSize = 256u * 1024u; + CompositeBuffer GetRange(uint64_t Offset, uint64_t Size) + { + ZEN_ASSERT((CacheBlockIndex == (uint64_t)-1) || Cache); + auto _ = MakeGuard([&]() { ZEN_ASSERT((CacheBlockIndex == (uint64_t)-1) || Cache); }); + + ZEN_ASSERT((Offset + Size) <= SourceSize); + const uint64_t BlockIndexStart = Offset / BlockSize; + const uint64_t BlockIndexEnd = (Offset + Size) / BlockSize; + + std::vector BufferRanges; + BufferRanges.reserve(BlockIndexEnd - BlockIndexStart + 1); + + uint64_t ReadOffset = Offset; + for (uint64_t BlockIndex = BlockIndexStart; BlockIndex <= BlockIndexEnd; BlockIndex++) + { + const uint64_t BlockStartOffset = BlockIndex * BlockSize; + if (CacheBlockIndex != BlockIndex) + { + uint64_t CacheSize = Min(BlockSize, SourceSize - BlockStartOffset); + ZEN_ASSERT(CacheSize > 0); + Cache = IoBuffer(CacheSize); + Source.Read(Cache.GetMutableView().GetData(), CacheSize, BlockStartOffset); + CacheBlockIndex = BlockIndex; + } + + const uint64_t BytesRead = ReadOffset - Offset; + ZEN_ASSERT(BlockStartOffset <= ReadOffset); + const uint64_t OffsetIntoBlock = ReadOffset - BlockStartOffset; + ZEN_ASSERT(OffsetIntoBlock < Cache.GetSize()); + const uint64_t BlockBytes = Min(Cache.GetSize() - OffsetIntoBlock, Size - BytesRead); + BufferRanges.emplace_back(SharedBuffer(IoBuffer(Cache, OffsetIntoBlock, BlockBytes))); + ReadOffset += BlockBytes; + } + CompositeBuffer Result(std::move(BufferRanges)); + ZEN_ASSERT(Result.GetSize() == Size); + return Result; + } + + private: + BasicFile Source; + const uint64_t SourceSize; + uint64_t CacheBlockIndex = (uint64_t)-1; + IoBuffer Cache; + }; + + class ReadFileCache + { + public: + // A buffered file reader that provides CompositeBuffer where the buffers are owned and the memory never overwritten + ReadFileCache(DiskStatistics& DiskStats, + const std::filesystem::path& Path, + const std::vector& Paths, + const std::vector& RawSizes, + size_t MaxOpenFileCount) + : m_Path(Path) + , m_Paths(Paths) + , m_RawSizes(RawSizes) + , m_DiskStats(DiskStats) + { + m_OpenFiles.reserve(MaxOpenFileCount); + } + ~ReadFileCache() + { + m_DiskStats.CurrentOpenFileCount -= m_OpenFiles.size(); + m_OpenFiles.clear(); + } + + CompositeBuffer GetRange(uint32_t PathIndex, uint64_t Offset, uint64_t Size) + { + auto CacheIt = + std::find_if(m_OpenFiles.begin(), m_OpenFiles.end(), [PathIndex](const auto& Lhs) { return Lhs.first == PathIndex; }); + if (CacheIt != m_OpenFiles.end()) + { + if (CacheIt != m_OpenFiles.begin()) + { + auto CachedFile(std::move(CacheIt->second)); + m_OpenFiles.erase(CacheIt); + m_OpenFiles.insert(m_OpenFiles.begin(), std::make_pair(PathIndex, std::move(CachedFile))); + } + CompositeBuffer Result = m_OpenFiles.front().second->GetRange(Offset, Size); + m_DiskStats.ReadByteCount += Result.GetSize(); + return Result; + } + const std::filesystem::path AttachmentPath = (m_Path / m_Paths[PathIndex]).make_preferred(); + if (Size == m_RawSizes[PathIndex]) + { + IoBuffer Result = IoBufferBuilder::MakeFromFile(AttachmentPath); + m_DiskStats.OpenReadCount++; + m_DiskStats.ReadByteCount += Result.GetSize(); + return CompositeBuffer(SharedBuffer(Result)); + } + if (m_OpenFiles.size() == m_OpenFiles.capacity()) + { + m_OpenFiles.pop_back(); + m_DiskStats.CurrentOpenFileCount--; + } + m_OpenFiles.insert(m_OpenFiles.begin(), std::make_pair(PathIndex, std::make_unique(AttachmentPath))); + CompositeBuffer Result = m_OpenFiles.front().second->GetRange(Offset, Size); + m_DiskStats.ReadByteCount += Result.GetSize(); + m_DiskStats.OpenReadCount++; + m_DiskStats.CurrentOpenFileCount++; + return Result; + } + + private: + const std::filesystem::path m_Path; + const std::vector& m_Paths; + const std::vector& m_RawSizes; + std::vector>> m_OpenFiles; + DiskStatistics& m_DiskStats; + }; + + CompositeBuffer ValidateBlob(BuildStorage& Storage, + const Oid& BuildId, + const IoHash& BlobHash, + uint64_t& OutCompressedSize, + uint64_t& OutDecompressedSize) + { + IoBuffer Payload = Storage.GetBuildBlob(BuildId, BlobHash); + if (!Payload) + { + throw std::runtime_error(fmt::format("Blob {} could not be found", BlobHash)); + } + if (Payload.GetContentType() != ZenContentType::kCompressedBinary) + { + throw std::runtime_error(fmt::format("Blob {} ({} bytes) has unexpected content type '{}'", + BlobHash, + Payload.GetSize(), + ToString(Payload.GetContentType()))); + } + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(Payload), RawHash, RawSize); + if (!Compressed) + { + throw std::runtime_error(fmt::format("Blob {} ({} bytes) compressed header is invalid", BlobHash, Payload.GetSize())); + } + if (RawHash != BlobHash) + { + throw std::runtime_error( + fmt::format("Blob {} ({} bytes) compressed header has a mismatching raw hash {}", BlobHash, Payload.GetSize(), RawHash)); + } + SharedBuffer Decompressed = Compressed.Decompress(); + if (!Decompressed) + { + throw std::runtime_error( + fmt::format("Blob {} ({} bytes) failed to decompress - header information mismatch", BlobHash, Payload.GetSize())); + } + IoHash ValidateRawHash = IoHash::HashBuffer(Decompressed); + if (ValidateRawHash != BlobHash) + { + throw std::runtime_error(fmt::format("Blob {} ({} bytes) decompressed hash {} does not match header information", + BlobHash, + Payload.GetSize(), + ValidateRawHash)); + } + CompositeBuffer DecompressedComposite = Compressed.DecompressToComposite(); + if (!DecompressedComposite) + { + throw std::runtime_error(fmt::format("Blob {} ({} bytes) failed to decompress to composite", BlobHash, Payload.GetSize())); + } + OutCompressedSize = Payload.GetSize(); + OutDecompressedSize = RawSize; + return DecompressedComposite; + } + + ChunkBlockDescription ValidateChunkBlock(BuildStorage& Storage, + const Oid& BuildId, + const IoHash& BlobHash, + uint64_t& OutCompressedSize, + uint64_t& OutDecompressedSize) + { + CompositeBuffer BlockBuffer = ValidateBlob(Storage, BuildId, BlobHash, OutCompressedSize, OutDecompressedSize); + return GetChunkBlockDescription(BlockBuffer.Flatten(), BlobHash); + } + + CompositeBuffer FetchChunk(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const IoHash& ChunkHash, + ReadFileCache& OpenFileCache) + { + auto It = Lookup.ChunkHashToChunkIndex.find(ChunkHash); + ZEN_ASSERT(It != Lookup.ChunkHashToChunkIndex.end()); + uint32_t ChunkIndex = It->second; + std::span ChunkLocations = GetChunkLocations(Lookup, ChunkIndex); + ZEN_ASSERT(!ChunkLocations.empty()); + CompositeBuffer Chunk = + OpenFileCache.GetRange(ChunkLocations[0].PathIndex, ChunkLocations[0].Offset, Content.ChunkedContent.ChunkRawSizes[ChunkIndex]); + ZEN_ASSERT_SLOW(IoHash::HashBuffer(Chunk) == ChunkHash); + return Chunk; + }; + + CompressedBuffer GenerateBlock(const std::filesystem::path& Path, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const std::vector& ChunksInBlock, + ChunkBlockDescription& OutBlockDescription, + DiskStatistics& DiskStats) + { + ReadFileCache OpenFileCache(DiskStats, Path, Content.Paths, Content.RawSizes, 4); + + std::vector> BlockContent; + BlockContent.reserve(ChunksInBlock.size()); + for (uint32_t ChunkIndex : ChunksInBlock) + { + BlockContent.emplace_back(std::make_pair( + Content.ChunkedContent.ChunkHashes[ChunkIndex], + [&Content, &Lookup, &OpenFileCache, ChunkIndex](const IoHash& ChunkHash) -> std::pair { + CompositeBuffer Chunk = FetchChunk(Content, Lookup, ChunkHash, OpenFileCache); + if (!Chunk) + { + ZEN_ASSERT(false); + } + uint64_t RawSize = Chunk.GetSize(); + return {RawSize, CompressedBuffer::Compress(Chunk, OodleCompressor::Mermaid, OodleCompressionLevel::None)}; + })); + } + + return GenerateChunkBlock(std::move(BlockContent), OutBlockDescription); + }; + + void ArrangeChunksIntoBlocks(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + uint64_t MaxBlockSize, + std::vector& ChunkIndexes, + std::vector>& OutBlocks) + { + std::sort(ChunkIndexes.begin(), ChunkIndexes.end(), [&Content, &Lookup](uint32_t Lhs, uint32_t Rhs) { + const ChunkedContentLookup::ChunkLocation& LhsLocation = GetChunkLocations(Lookup, Lhs)[0]; + const ChunkedContentLookup::ChunkLocation& RhsLocation = GetChunkLocations(Lookup, Rhs)[0]; + if (LhsLocation.PathIndex < RhsLocation.PathIndex) + { + return true; + } + else if (LhsLocation.PathIndex > RhsLocation.PathIndex) + { + return false; + } + return LhsLocation.Offset < RhsLocation.Offset; + }); + + uint64_t MaxBlockSizeLowThreshold = MaxBlockSize - (MaxBlockSize / 16); + + uint64_t BlockSize = 0; + + uint32_t ChunkIndexStart = 0; + for (uint32_t ChunkIndexOffset = 0; ChunkIndexOffset < ChunkIndexes.size();) + { + const uint32_t ChunkIndex = ChunkIndexes[ChunkIndexOffset]; + const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + + if ((BlockSize + ChunkSize) > MaxBlockSize) + { + // Within the span of MaxBlockSizeLowThreshold and MaxBlockSize, see if there is a break + // between source paths for chunks. Break the block at the last such break if any. + ZEN_ASSERT(ChunkIndexOffset > ChunkIndexStart); + + const uint32_t ChunkPathIndex = Lookup.ChunkLocations[Lookup.ChunkLocationOffset[ChunkIndex]].PathIndex; + + uint64_t ScanBlockSize = BlockSize; + + uint32_t ScanChunkIndexOffset = ChunkIndexOffset - 1; + while (ScanChunkIndexOffset > (ChunkIndexStart + 2)) + { + const uint32_t TestChunkIndex = ChunkIndexes[ScanChunkIndexOffset]; + const uint64_t TestChunkSize = Content.ChunkedContent.ChunkRawSizes[TestChunkIndex]; + if ((ScanBlockSize - TestChunkSize) < MaxBlockSizeLowThreshold) + { + break; + } + + const uint32_t TestPathIndex = Lookup.ChunkLocations[Lookup.ChunkLocationOffset[TestChunkIndex]].PathIndex; + if (ChunkPathIndex != TestPathIndex) + { + ChunkIndexOffset = ScanChunkIndexOffset + 1; + break; + } + + ScanBlockSize -= TestChunkSize; + ScanChunkIndexOffset--; + } + + std::vector ChunksInBlock; + ChunksInBlock.reserve(ChunkIndexOffset - ChunkIndexStart); + for (uint32_t AddIndexOffset = ChunkIndexStart; AddIndexOffset < ChunkIndexOffset; AddIndexOffset++) + { + const uint32_t AddChunkIndex = ChunkIndexes[AddIndexOffset]; + ChunksInBlock.push_back(AddChunkIndex); + } + OutBlocks.emplace_back(std::move(ChunksInBlock)); + BlockSize = 0; + ChunkIndexStart = ChunkIndexOffset; + } + else + { + ChunkIndexOffset++; + BlockSize += ChunkSize; + } + } + if (ChunkIndexStart < ChunkIndexes.size()) + { + std::vector ChunksInBlock; + ChunksInBlock.reserve(ChunkIndexes.size() - ChunkIndexStart); + for (uint32_t AddIndexOffset = ChunkIndexStart; AddIndexOffset < ChunkIndexes.size(); AddIndexOffset++) + { + const uint32_t AddChunkIndex = ChunkIndexes[AddIndexOffset]; + ChunksInBlock.push_back(AddChunkIndex); + } + OutBlocks.emplace_back(std::move(ChunksInBlock)); + } + } + + CompositeBuffer CompressChunk(const std::filesystem::path& Path, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + uint32_t ChunkIndex, + const std::filesystem::path& TempFolderPath) + { + const IoHash& ChunkHash = Content.ChunkedContent.ChunkHashes[ChunkIndex]; + const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + + const ChunkedContentLookup::ChunkLocation& Source = GetChunkLocations(Lookup, ChunkIndex)[0]; + + IoBuffer RawSource = + IoBufferBuilder::MakeFromFile((Path / Content.Paths[Source.PathIndex]).make_preferred(), Source.Offset, ChunkSize); + if (!RawSource) + { + throw std::runtime_error(fmt::format("Failed fetching chunk {}", ChunkHash)); + } + if (RawSource.GetSize() != ChunkSize) + { + throw std::runtime_error(fmt::format("Fetched chunk {} has invalid size", ChunkHash)); + } + ZEN_ASSERT_SLOW(IoHash::HashBuffer(RawSource) == ChunkHash); + + CompressedBuffer CompressedBlob = CompressedBuffer::Compress(SharedBuffer(std::move(RawSource))); + if (!CompressedBlob) + { + throw std::runtime_error(fmt::format("Failed decompressing chunk {}", ChunkHash)); + } + if (TempFolderPath.empty()) + { + return CompressedBlob.GetCompressed().MakeOwned(); + } + else + { + CompositeBuffer TempPayload = WriteToTempFileIfNeeded(CompressedBlob.GetCompressed(), TempFolderPath, ChunkHash); + return CompressedBuffer::FromCompressedNoValidate(std::move(TempPayload)).GetCompressed(); + } + } + + struct GeneratedBlocks + { + std::vector BlockDescriptions; + std::vector BlockSizes; + std::vector BlockBuffers; + std::vector BlockMetaDatas; + std::vector MetaDataHasBeenUploaded; + tsl::robin_map BlockHashToBlockIndex; + }; + + void GenerateBuildBlocks(const std::filesystem::path& Path, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + BuildStorage& Storage, + const Oid& BuildId, + std::atomic& AbortFlag, + const std::vector>& NewBlockChunks, + GeneratedBlocks& OutBlocks, + DiskStatistics& DiskStats, + UploadStatistics& UploadStats, + GenerateBlocksStatistics& GenerateBlocksStats) + { + const std::size_t NewBlockCount = NewBlockChunks.size(); + if (NewBlockCount > 0) + { + ProgressBar ProgressBar(UsePlainProgress); + + OutBlocks.BlockDescriptions.resize(NewBlockCount); + OutBlocks.BlockSizes.resize(NewBlockCount); + OutBlocks.BlockBuffers.resize(NewBlockCount); + OutBlocks.BlockMetaDatas.resize(NewBlockCount); + OutBlocks.MetaDataHasBeenUploaded.resize(NewBlockCount, false); + OutBlocks.BlockHashToBlockIndex.reserve(NewBlockCount); + + RwLock Lock; + + WorkerThreadPool& GenerateBlobsPool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool();// + WorkerThreadPool& UploadBlocksPool = GetSmallWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool();// + + FilteredRate FilteredGeneratedBytesPerSecond; + FilteredRate FilteredUploadedBytesPerSecond; + + ParallellWork Work(AbortFlag); + + std::atomic PendingUploadCount(0); + + for (size_t BlockIndex = 0; BlockIndex < NewBlockCount; BlockIndex++) + { + if (Work.IsAborted()) + { + break; + } + const std::vector& ChunksInBlock = NewBlockChunks[BlockIndex]; + Work.ScheduleWork( + GenerateBlobsPool, + [&, BlockIndex](std::atomic& AbortFlag) { + if (!AbortFlag) + { + FilteredGeneratedBytesPerSecond.Start(); + // TODO: Convert ScheduleWork body to function + + CompressedBuffer CompressedBlock = + GenerateBlock(Path, Content, Lookup, ChunksInBlock, OutBlocks.BlockDescriptions[BlockIndex], DiskStats); + ZEN_CONSOLE_VERBOSE("Generated block {} ({}) containing {} chunks", + OutBlocks.BlockDescriptions[BlockIndex].BlockHash, + NiceBytes(CompressedBlock.GetCompressedSize()), + OutBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); + + OutBlocks.BlockSizes[BlockIndex] = CompressedBlock.GetCompressedSize(); + + CompositeBuffer Payload = WriteToTempFileIfNeeded(CompressedBlock.GetCompressed(), + Path / ZenTempBlockFolderName, + OutBlocks.BlockDescriptions[BlockIndex].BlockHash); + { + CbObjectWriter Writer; + Writer.AddString("createdBy", "zen"); + OutBlocks.BlockMetaDatas[BlockIndex] = Writer.Save(); + } + GenerateBlocksStats.GeneratedBlockByteCount += OutBlocks.BlockSizes[BlockIndex]; + GenerateBlocksStats.GeneratedBlockCount++; + + Lock.WithExclusiveLock([&]() { + OutBlocks.BlockHashToBlockIndex.insert_or_assign(OutBlocks.BlockDescriptions[BlockIndex].BlockHash, + BlockIndex); + }); + + if (GenerateBlocksStats.GeneratedBlockCount == NewBlockCount) + { + FilteredGeneratedBytesPerSecond.Stop(); + } + + if (!AbortFlag) + { + PendingUploadCount++; + Work.ScheduleWork( + UploadBlocksPool, + [&, BlockIndex, Payload = std::move(Payload)](std::atomic& AbortFlag) { + if (!AbortFlag) + { + if (GenerateBlocksStats.GeneratedBlockCount == NewBlockCount) + { + FilteredUploadedBytesPerSecond.Stop(); + OutBlocks.BlockBuffers[BlockIndex] = std::move(Payload); + } + else + { + FilteredUploadedBytesPerSecond.Start(); + // TODO: Convert ScheduleWork body to function + + PendingUploadCount--; + + const CbObject BlockMetaData = + BuildChunkBlockDescription(OutBlocks.BlockDescriptions[BlockIndex], + OutBlocks.BlockMetaDatas[BlockIndex]); + + const IoHash& BlockHash = OutBlocks.BlockDescriptions[BlockIndex].BlockHash; + Storage.PutBuildBlob(BuildId, BlockHash, ZenContentType::kCompressedBinary, Payload); + UploadStats.BlocksBytes += Payload.GetSize(); + ZEN_CONSOLE_VERBOSE("Uploaded block {} ({}) containing {} chunks", + OutBlocks.BlockDescriptions[BlockIndex].BlockHash, + NiceBytes(Payload.GetSize()), + OutBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); + + Storage.PutBlockMetadata(BuildId, + OutBlocks.BlockDescriptions[BlockIndex].BlockHash, + BlockMetaData); + ZEN_CONSOLE_VERBOSE("Uploaded block {} metadata ({})", + OutBlocks.BlockDescriptions[BlockIndex].BlockHash, + NiceBytes(BlockMetaData.GetSize())); + + OutBlocks.MetaDataHasBeenUploaded[BlockIndex] = true; + + UploadStats.BlocksBytes += BlockMetaData.GetSize(); + UploadStats.BlockCount++; + if (UploadStats.BlockCount == NewBlockCount) + { + FilteredUploadedBytesPerSecond.Stop(); + } + } + } + }, + [&](const std::exception& Ex, std::atomic& AbortFlag) { + ZEN_CONSOLE("Failed uploading block. Reason: {}", Ex.what()); + AbortFlag = true; + }); + } + } + }, + [&](const std::exception& Ex, std::atomic& AbortFlag) { + ZEN_CONSOLE("Failed generating block. Reason: {}", Ex.what()); + AbortFlag = true; + }); + } + + Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, PendingWork); + + FilteredGeneratedBytesPerSecond.Update(GenerateBlocksStats.GeneratedBlockByteCount.load()); + FilteredUploadedBytesPerSecond.Update(UploadStats.BlocksBytes.load()); + + std::string Details = fmt::format("Generated {}/{} ({}, {}B/s) and uploaded {}/{} ({}, {}bits/s) blocks", + GenerateBlocksStats.GeneratedBlockCount.load(), + NewBlockCount, + NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount.load()), + NiceNum(FilteredGeneratedBytesPerSecond.GetCurrent()), + UploadStats.BlockCount.load(), + NewBlockCount, + NiceBytes(UploadStats.BlocksBytes.load()), + NiceNum(FilteredUploadedBytesPerSecond.GetCurrent() * 8)); + + ProgressBar.UpdateState( + {.Task = "Generating blocks", + .Details = Details, + .TotalCount = gsl::narrow(NewBlockCount), + .RemainingCount = gsl::narrow(NewBlockCount - GenerateBlocksStats.GeneratedBlockCount.load())}, + false); + }); + + ProgressBar.Finish(); + + GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS = FilteredGeneratedBytesPerSecond.GetElapsedTime(); + UploadStats.ElapsedWallTimeUS = FilteredUploadedBytesPerSecond.GetElapsedTime(); + } + } + + void UploadPartBlobs(BuildStorage& Storage, + const Oid& BuildId, + const std::filesystem::path& Path, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + std::span RawHashes, + const std::vector>& NewBlockChunks, + GeneratedBlocks& NewBlocks, + std::span LooseChunkIndexes, + const std::uint64_t LargeAttachmentSize, + std::atomic& AbortFlag, + DiskStatistics& DiskStats, + UploadStatistics& UploadStats, + GenerateBlocksStatistics& GenerateBlocksStats, + LooseChunksStatistics& LooseChunksStats) + { + { + ProgressBar ProgressBar(UsePlainProgress); + + WorkerThreadPool& ReadChunkPool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // + WorkerThreadPool& UploadChunkPool = GetSmallWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // + + FilteredRate FilteredGenerateBlockBytesPerSecond; + FilteredRate FilteredCompressedBytesPerSecond; + FilteredRate FilteredUploadedBytesPerSecond; + + ParallellWork Work(AbortFlag); + + std::atomic UploadedBlockSize = 0; + std::atomic UploadedBlockCount = 0; + std::atomic UploadedChunkSize = 0; + std::atomic UploadedChunkCount = 0; + + tsl::robin_map ChunkIndexToLooseChunkOrderIndex; + ChunkIndexToLooseChunkOrderIndex.reserve(LooseChunkIndexes.size()); + for (uint32_t OrderIndex = 0; OrderIndex < LooseChunkIndexes.size(); OrderIndex++) + { + ChunkIndexToLooseChunkOrderIndex.insert_or_assign(LooseChunkIndexes[OrderIndex], OrderIndex); + } + + std::vector FoundChunkHashes; + FoundChunkHashes.reserve(RawHashes.size()); + + std::vector BlockIndexes; + std::vector LooseChunkOrderIndexes; + + uint64_t TotalChunksSize = 0; + uint64_t TotalBlocksSize = 0; + for (const IoHash& RawHash : RawHashes) + { + if (auto It = NewBlocks.BlockHashToBlockIndex.find(RawHash); It != NewBlocks.BlockHashToBlockIndex.end()) + { + BlockIndexes.push_back(It->second); + TotalBlocksSize += NewBlocks.BlockSizes[It->second]; + } + if (auto ChunkIndexIt = Lookup.ChunkHashToChunkIndex.find(RawHash); ChunkIndexIt != Lookup.ChunkHashToChunkIndex.end()) + { + const uint32_t ChunkIndex = ChunkIndexIt->second; + if (auto LooseOrderIndexIt = ChunkIndexToLooseChunkOrderIndex.find(ChunkIndex); + LooseOrderIndexIt != ChunkIndexToLooseChunkOrderIndex.end()) + { + LooseChunkOrderIndexes.push_back(LooseOrderIndexIt->second); + TotalChunksSize += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + } + } + uint64_t TotalSize = TotalChunksSize + TotalBlocksSize; + + const size_t UploadBlockCount = BlockIndexes.size(); + const uint32_t UploadChunkCount = gsl::narrow(LooseChunkOrderIndexes.size()); + + auto AsyncUploadBlock = [&](const size_t BlockIndex, const IoHash BlockHash, CompositeBuffer&& Payload) { + Work.ScheduleWork( + UploadChunkPool, + [&, BlockIndex, BlockHash, Payload = CompositeBuffer(std::move(Payload))](std::atomic& AbortFlag) { + if (!AbortFlag) + { + FilteredUploadedBytesPerSecond.Start(); + const CbObject BlockMetaData = + BuildChunkBlockDescription(NewBlocks.BlockDescriptions[BlockIndex], NewBlocks.BlockMetaDatas[BlockIndex]); + + Storage.PutBuildBlob(BuildId, BlockHash, ZenContentType::kCompressedBinary, Payload); + ZEN_CONSOLE_VERBOSE("Uploaded block {} ({}) containing {} chunks", + NewBlocks.BlockDescriptions[BlockIndex].BlockHash, + NiceBytes(Payload.GetSize()), + NewBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); + UploadedBlockSize += Payload.GetSize(); + UploadStats.BlocksBytes += Payload.GetSize(); + + Storage.PutBlockMetadata(BuildId, BlockHash, BlockMetaData); + ZEN_CONSOLE_VERBOSE("Uploaded block {} metadata ({})", + NewBlocks.BlockDescriptions[BlockIndex].BlockHash, + NiceBytes(BlockMetaData.GetSize())); + + NewBlocks.MetaDataHasBeenUploaded[BlockIndex] = true; + + UploadStats.BlockCount++; + UploadStats.BlocksBytes += BlockMetaData.GetSize(); + + UploadedBlockCount++; + if (UploadedBlockCount == UploadBlockCount && UploadedChunkCount == UploadChunkCount) + { + FilteredUploadedBytesPerSecond.Stop(); + } + } + }, + [&](const std::exception& Ex, std::atomic& AbortFlag) { + ZEN_CONSOLE("Failed uploading block. Reason: {}", Ex.what()); + AbortFlag = true; + }); + }; + + auto AsyncUploadLooseChunk = [&](const IoHash& RawHash, CompositeBuffer&& Payload) { + Work.ScheduleWork( + UploadChunkPool, + [&, RawHash, Payload = CompositeBuffer(std::move(Payload))](std::atomic& AbortFlag) { + if (!AbortFlag) + { + const uint64_t PayloadSize = Payload.GetSize(); + if (PayloadSize >= LargeAttachmentSize) + { + UploadStats.MultipartAttachmentCount++; + std::vector> MultipartWork = Storage.PutLargeBuildBlob( + BuildId, + RawHash, + ZenContentType::kCompressedBinary, + PayloadSize, + [Payload = std::move(Payload), &FilteredUploadedBytesPerSecond](uint64_t Offset, + uint64_t Size) -> IoBuffer { + FilteredUploadedBytesPerSecond.Start(); + + IoBuffer PartPayload = Payload.Mid(Offset, Size).Flatten().AsIoBuffer(); + PartPayload.SetContentType(ZenContentType::kBinary); + return PartPayload; + }, + [&](uint64_t SentBytes, bool IsComplete) { + UploadStats.ChunksBytes += SentBytes; + UploadedChunkSize += SentBytes; + if (IsComplete) + { + UploadStats.ChunkCount++; + UploadedChunkCount++; + if (UploadedBlockCount == UploadBlockCount && UploadedChunkCount == UploadChunkCount) + { + FilteredUploadedBytesPerSecond.Stop(); + } + } + }); + for (auto& WorkPart : MultipartWork) + { + Work.ScheduleWork( + UploadChunkPool, + [Work = std::move(WorkPart)](std::atomic& AbortFlag) { + if (!AbortFlag) + { + Work(); + } + }, + [&, RawHash](const std::exception& Ex, std::atomic& AbortFlag) { + ZEN_CONSOLE("Failed uploading multipart blob {}. Reason: {}", RawHash, Ex.what()); + AbortFlag = true; + }); + } + ZEN_CONSOLE_VERBOSE("Uploaded multipart chunk {} ({})", RawHash, NiceBytes(PayloadSize)); + } + else + { + Storage.PutBuildBlob(BuildId, RawHash, ZenContentType::kCompressedBinary, Payload); + ZEN_CONSOLE_VERBOSE("Uploaded chunk {} ({})", RawHash, NiceBytes(PayloadSize)); + UploadStats.ChunksBytes += Payload.GetSize(); + UploadStats.ChunkCount++; + UploadedChunkSize += Payload.GetSize(); + UploadedChunkCount++; + if (UploadedChunkCount == UploadChunkCount) + { + FilteredUploadedBytesPerSecond.Stop(); + } + } + } + }, + [&](const std::exception& Ex, std::atomic& AbortFlag) { + ZEN_CONSOLE("Failed uploading chunk. Reason: {}", Ex.what()); + AbortFlag = true; + }); + }; + + std::vector GenerateBlockIndexes; + + std::atomic GeneratedBlockCount = 0; + std::atomic GeneratedBlockByteCount = 0; + + // Start upload of any pre-built blocks + for (const size_t BlockIndex : BlockIndexes) + { + if (CompositeBuffer BlockPayload = std::move(NewBlocks.BlockBuffers[BlockIndex]); BlockPayload) + { + const IoHash& BlockHash = NewBlocks.BlockDescriptions[BlockIndex].BlockHash; + FoundChunkHashes.push_back(BlockHash); + if (!AbortFlag) + { + AsyncUploadBlock(BlockIndex, BlockHash, std::move(BlockPayload)); + } + // GeneratedBlockCount++; + } + else + { + GenerateBlockIndexes.push_back(BlockIndex); + } + } + + std::vector CompressLooseChunkOrderIndexes; + + // Start upload of any pre-compressed loose chunks + for (const uint32_t LooseChunkOrderIndex : LooseChunkOrderIndexes) + { + CompressLooseChunkOrderIndexes.push_back(LooseChunkOrderIndex); + } + + // Start generation of any non-prebuilt blocks and schedule upload + for (const size_t BlockIndex : GenerateBlockIndexes) + { + const IoHash& BlockHash = NewBlocks.BlockDescriptions[BlockIndex].BlockHash; + FoundChunkHashes.push_back(BlockHash); + if (!AbortFlag) + { + Work.ScheduleWork( + ReadChunkPool, + [&, BlockIndex](std::atomic& AbortFlag) { + if (!AbortFlag) + { + FilteredGenerateBlockBytesPerSecond.Start(); + ChunkBlockDescription BlockDescription; + CompressedBuffer CompressedBlock = + GenerateBlock(Path, Content, Lookup, NewBlockChunks[BlockIndex], BlockDescription, DiskStats); + if (!CompressedBlock) + { + throw std::runtime_error(fmt::format("Failed generating block {}", BlockHash)); + } + ZEN_ASSERT(BlockDescription.BlockHash == BlockHash); + + CompositeBuffer Payload = WriteToTempFileIfNeeded(CompressedBlock.GetCompressed(), + Path / ZenTempBlockFolderName, + BlockDescription.BlockHash); + + GenerateBlocksStats.GeneratedBlockByteCount += NewBlocks.BlockSizes[BlockIndex]; + GenerateBlocksStats.GeneratedBlockCount++; + GeneratedBlockByteCount += NewBlocks.BlockSizes[BlockIndex]; + GeneratedBlockCount++; + if (GeneratedBlockCount == GenerateBlockIndexes.size()) + { + FilteredGenerateBlockBytesPerSecond.Stop(); + } + if (!AbortFlag) + { + AsyncUploadBlock(BlockIndex, BlockHash, std::move(Payload)); + } + ZEN_CONSOLE_VERBOSE("Regenerated block {} ({}) containing {} chunks", + NewBlocks.BlockDescriptions[BlockIndex].BlockHash, + NiceBytes(CompressedBlock.GetCompressedSize()), + NewBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); + } + }, + [&](const std::exception& Ex, std::atomic& AbortFlag) { + ZEN_CONSOLE("Failed generating block. Reason: {}", Ex.what()); + AbortFlag = true; + }); + } + } + + std::atomic CompressedLooseChunkCount = 0; + std::atomic CompressedLooseChunkByteCount = 0; + + // Start compression of any non-precompressed loose chunks and schedule upload + for (const uint32_t CompressLooseChunkOrderIndex : CompressLooseChunkOrderIndexes) + { + const uint32_t ChunkIndex = LooseChunkIndexes[CompressLooseChunkOrderIndex]; + Work.ScheduleWork( + ReadChunkPool, + [&, ChunkIndex](std::atomic& AbortFlag) { + if (!AbortFlag) + { + FilteredCompressedBytesPerSecond.Start(); + CompositeBuffer Payload = CompressChunk(Path, Content, Lookup, ChunkIndex, Path / ZenTempChunkFolderName); + ZEN_CONSOLE_VERBOSE("Compressed chunk {} ({} -> {})", + Content.ChunkedContent.ChunkHashes[ChunkIndex], + NiceBytes(Content.ChunkedContent.ChunkRawSizes[ChunkIndex]), + NiceBytes(Payload.GetSize())); + UploadStats.ReadFromDiskBytes += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + LooseChunksStats.CompressedChunkBytes += Payload.GetSize(); + LooseChunksStats.CompressedChunkCount++; + CompressedLooseChunkByteCount += Payload.GetSize(); + CompressedLooseChunkCount++; + if (CompressedLooseChunkCount == CompressLooseChunkOrderIndexes.size()) + { + FilteredCompressedBytesPerSecond.Stop(); + } + if (!AbortFlag) + { + AsyncUploadLooseChunk(Content.ChunkedContent.ChunkHashes[ChunkIndex], std::move(Payload)); + } + } + }, + [&, ChunkIndex](const std::exception& Ex, std::atomic& AbortFlag) { + ZEN_CONSOLE("Failed compressing part blob {}. Reason: {}", + Content.ChunkedContent.ChunkHashes[ChunkIndex], + Ex.what()); + AbortFlag = true; + }); + } + + Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, PendingWork); + FilteredCompressedBytesPerSecond.Update(CompressedLooseChunkByteCount.load()); + FilteredGenerateBlockBytesPerSecond.Update(GeneratedBlockByteCount.load()); + FilteredUploadedBytesPerSecond.Update(UploadedChunkSize.load() + UploadedBlockSize.load()); + uint64_t UploadedSize = UploadedChunkSize.load() + UploadedBlockSize.load(); + ProgressBar.UpdateState({.Task = "Uploading blobs ", + .Details = fmt::format("Compressed {}/{} chunks. " + "Uploaded {}/{} blobs ({}/{} {}bits/s)", + CompressedLooseChunkCount.load(), + CompressLooseChunkOrderIndexes.size(), + + UploadedBlockCount.load() + UploadedChunkCount.load(), + UploadBlockCount + UploadChunkCount, + + NiceBytes(UploadedChunkSize.load() + UploadedBlockSize.load()), + NiceBytes(TotalSize), + NiceNum(FilteredUploadedBytesPerSecond.GetCurrent())), + .TotalCount = gsl::narrow(TotalSize), + .RemainingCount = gsl::narrow(TotalSize - UploadedSize)}, + false); + }); + + ProgressBar.Finish(); + UploadStats.ElapsedWallTimeUS = FilteredUploadedBytesPerSecond.GetElapsedTime(); + GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS = FilteredGenerateBlockBytesPerSecond.GetElapsedTime(); + LooseChunksStats.CompressChunksElapsedWallTimeUS = FilteredCompressedBytesPerSecond.GetElapsedTime(); + } + } + + std::vector FindReuseBlocks(const std::vector& KnownBlocks, + std::span ChunkHashes, + std::span ChunkIndexes, + uint8_t MinPercentLimit, + std::vector& OutUnusedChunkIndexes, + FindBlocksStatistics& FindBlocksStats) + { + // Find all blocks with a usage level higher than MinPercentLimit + // Pick out the blocks with usage higher or equal to MinPercentLimit + // Sort them with highest size usage - most usage first + // Make a list of all chunks and mark them as not found + // For each block, recalculate the block has usage percent based on the chunks marked as not found + // If the block still reaches MinPercentLimit, keep it and remove the matching chunks from the not found list + // Repeat for following all remaining block that initially matched MinPercentLimit + + std::vector FilteredReuseBlockIndexes; + + uint32_t ChunkCount = gsl::narrow(ChunkHashes.size()); + std::vector ChunkFound(ChunkCount, false); + + if (ChunkCount > 0) + { + if (!KnownBlocks.empty()) + { + Stopwatch ReuseTimer; + + tsl::robin_map ChunkHashToChunkIndex; + ChunkHashToChunkIndex.reserve(ChunkIndexes.size()); + for (uint32_t ChunkIndex : ChunkIndexes) + { + ChunkHashToChunkIndex.insert_or_assign(ChunkHashes[ChunkIndex], ChunkIndex); + } + + std::vector BlockSizes(KnownBlocks.size(), 0); + std::vector BlockUseSize(KnownBlocks.size(), 0); + + std::vector ReuseBlockIndexes; + + for (size_t KnownBlockIndex = 0; KnownBlockIndex < KnownBlocks.size(); KnownBlockIndex++) + { + const ChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; + if (KnownBlock.BlockHash != IoHash::Zero && + KnownBlock.ChunkRawHashes.size() == KnownBlock.ChunkCompressedLengths.size()) + { + size_t BlockAttachmentCount = KnownBlock.ChunkRawHashes.size(); + if (BlockAttachmentCount == 0) + { + continue; + } + size_t ReuseSize = 0; + size_t BlockSize = 0; + size_t FoundAttachmentCount = 0; + size_t BlockChunkCount = KnownBlock.ChunkRawHashes.size(); + for (size_t BlockChunkIndex = 0; BlockChunkIndex < BlockChunkCount; BlockChunkIndex++) + { + const IoHash& BlockChunkHash = KnownBlock.ChunkRawHashes[BlockChunkIndex]; + const uint32_t BlockChunkSize = KnownBlock.ChunkCompressedLengths[BlockChunkIndex]; + BlockSize += BlockChunkSize; + if (ChunkHashToChunkIndex.contains(BlockChunkHash)) + { + ReuseSize += BlockChunkSize; + FoundAttachmentCount++; + } + } + + size_t ReusePercent = (ReuseSize * 100) / BlockSize; + + if (ReusePercent >= MinPercentLimit) + { + ZEN_CONSOLE_VERBOSE("Reusing block {}. {} attachments found, usage level: {}%", + KnownBlock.BlockHash, + FoundAttachmentCount, + ReusePercent); + ReuseBlockIndexes.push_back(KnownBlockIndex); + + BlockSizes[KnownBlockIndex] = BlockSize; + BlockUseSize[KnownBlockIndex] = ReuseSize; + } + else if (FoundAttachmentCount > 0) + { + ZEN_CONSOLE_VERBOSE("Skipping block {}. {} attachments found, usage level: {}%", + KnownBlock.BlockHash, + FoundAttachmentCount, + ReusePercent); + FindBlocksStats.RejectedBlockCount++; + FindBlocksStats.RejectedChunkCount += FoundAttachmentCount; + FindBlocksStats.RejectedByteCount += ReuseSize; + } + } + } + + if (!ReuseBlockIndexes.empty()) + { + std::sort(ReuseBlockIndexes.begin(), ReuseBlockIndexes.end(), [&](size_t Lhs, size_t Rhs) { + return BlockUseSize[Lhs] > BlockUseSize[Rhs]; + }); + + for (size_t KnownBlockIndex : ReuseBlockIndexes) + { + std::vector FoundChunkIndexes; + size_t BlockSize = 0; + size_t AdjustedReuseSize = 0; + const ChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; + for (size_t BlockChunkIndex = 0; BlockChunkIndex < KnownBlock.ChunkRawHashes.size(); BlockChunkIndex++) + { + const IoHash& BlockChunkHash = KnownBlock.ChunkRawHashes[BlockChunkIndex]; + const uint32_t BlockChunkSize = KnownBlock.ChunkCompressedLengths[BlockChunkIndex]; + BlockSize += BlockChunkSize; + if (auto It = ChunkHashToChunkIndex.find(BlockChunkHash); It != ChunkHashToChunkIndex.end()) + { + const uint32_t ChunkIndex = It->second; + if (!ChunkFound[ChunkIndex]) + { + FoundChunkIndexes.push_back(ChunkIndex); + AdjustedReuseSize += KnownBlock.ChunkCompressedLengths[BlockChunkIndex]; + } + } + } + + size_t ReusePercent = (AdjustedReuseSize * 100) / BlockSize; + + if (ReusePercent >= MinPercentLimit) + { + ZEN_CONSOLE_VERBOSE("Reusing block {}. {} attachments found, usage level: {}%", + KnownBlock.BlockHash, + FoundChunkIndexes.size(), + ReusePercent); + FilteredReuseBlockIndexes.push_back(KnownBlockIndex); + + for (uint32_t ChunkIndex : FoundChunkIndexes) + { + ChunkFound[ChunkIndex] = true; + } + FindBlocksStats.AcceptedChunkCount += FoundChunkIndexes.size(); + FindBlocksStats.AcceptedByteCount += AdjustedReuseSize; + FindBlocksStats.AcceptedReduntantChunkCount += KnownBlock.ChunkRawHashes.size() - FoundChunkIndexes.size(); + FindBlocksStats.AcceptedReduntantByteCount += BlockSize - AdjustedReuseSize; + } + else + { + ZEN_CONSOLE_VERBOSE("Skipping block {}. filtered usage level: {}%", KnownBlock.BlockHash, ReusePercent); + FindBlocksStats.RejectedBlockCount++; + FindBlocksStats.RejectedChunkCount += FoundChunkIndexes.size(); + FindBlocksStats.RejectedByteCount += AdjustedReuseSize; + } + } + } + } + OutUnusedChunkIndexes.reserve(ChunkIndexes.size() - FindBlocksStats.AcceptedChunkCount); + for (uint32_t ChunkIndex : ChunkIndexes) + { + if (!ChunkFound[ChunkIndex]) + { + OutUnusedChunkIndexes.push_back(ChunkIndex); + } + } + } + return FilteredReuseBlockIndexes; + }; + + bool UploadFolder(BuildStorage& Storage, + const Oid& BuildId, + const Oid& BuildPartId, + const std::string_view BuildPartName, + const std::filesystem::path& Path, + const std::filesystem::path& ManifestPath, + const uint8_t BlockReuseMinPercentLimit, + bool AllowMultiparts, + const CbObject& MetaData, + bool CreateBuild, + bool IgnoreExistingBlocks) + { + Stopwatch ProcessTimer; + + std::atomic AbortFlag = false; + + const std::filesystem::path ZenTempFolder = Path / ZenTempFolderName; + CreateDirectories(ZenTempFolder); + CleanDirectory(ZenTempFolder, {}); + auto _ = MakeGuard([&]() { + CleanDirectory(ZenTempFolder, {}); + std::filesystem::remove(ZenTempFolder); + }); + CreateDirectories(Path / ZenTempBlockFolderName); + CreateDirectories(Path / ZenTempChunkFolderName); + + CbObject ChunkerParameters; + + ChunkedFolderContent LocalContent; + + GetFolderContentStatistics LocalFolderScanStats; + ChunkingStatistics ChunkingStats; + { + auto IsAcceptedFolder = [ExcludeFolders = DefaultExcludeFolders](const std::string_view& RelativePath) -> bool { + for (const std::string_view& ExcludeFolder : ExcludeFolders) + { + if (RelativePath.starts_with(ExcludeFolder)) + { + if (RelativePath.length() == ExcludeFolder.length()) + { + return false; + } + else if (RelativePath[ExcludeFolder.length()] == '/') + { + return false; + } + } + } + return true; + }; + + auto IsAcceptedFile = [ExcludeExtensions = + DefaultExcludeExtensions](const std::string_view& RelativePath, uint64_t, uint32_t) -> bool { + for (const std::string_view& ExcludeExtension : ExcludeExtensions) + { + if (RelativePath.ends_with(ExcludeExtension)) + { + return false; + } + } + return true; + }; + + auto ParseManifest = [](const std::filesystem::path& Path, + const std::filesystem::path& ManifestPath) -> std::vector { + std::vector AssetPaths; + std::filesystem::path AbsoluteManifestPath = ManifestPath.is_absolute() ? ManifestPath : Path / ManifestPath; + IoBuffer ManifestContent = ReadFile(AbsoluteManifestPath).Flatten(); + std::string_view ManifestString((const char*)ManifestContent.GetView().GetData(), ManifestContent.GetSize()); + std::string_view::size_type Offset = 0; + while (Offset < ManifestContent.GetSize()) + { + size_t PathBreakOffset = ManifestString.find_first_of("\t\r\n", Offset); + if (PathBreakOffset == std::string_view::npos) + { + PathBreakOffset = ManifestContent.GetSize(); + } + std::string_view AssetPath = ManifestString.substr(Offset, PathBreakOffset - Offset); + if (!AssetPath.empty()) + { + AssetPaths.emplace_back(std::filesystem::path(AssetPath)); + } + Offset = PathBreakOffset; + size_t EolOffset = ManifestString.find_first_of("\r\n", Offset); + if (EolOffset == std::string_view::npos) + { + break; + } + Offset = EolOffset; + size_t LineBreakOffset = ManifestString.find_first_not_of("\t\r\n", Offset); + if (LineBreakOffset == std::string_view::npos) + { + break; + } + Offset = LineBreakOffset; + } + return AssetPaths; + }; + + Stopwatch ScanTimer; + FolderContent Content; + if (ManifestPath.empty()) + { + std::filesystem::path ExcludeManifestPath = Path / ZenExcludeManifestName; + tsl::robin_set ExcludeAssetPaths; + if (std::filesystem::is_regular_file(ExcludeManifestPath)) + { + std::vector AssetPaths = ParseManifest(Path, ExcludeManifestPath); + ExcludeAssetPaths.reserve(AssetPaths.size()); + for (const std::filesystem::path& AssetPath : AssetPaths) + { + ExcludeAssetPaths.insert(AssetPath.generic_string()); + } + } + Content = GetFolderContent( + LocalFolderScanStats, + Path, + std::move(IsAcceptedFolder), + [&IsAcceptedFile, + &ExcludeAssetPaths](const std::string_view& RelativePath, uint64_t Size, uint32_t Attributes) -> bool { + if (RelativePath == ZenExcludeManifestName) + { + return false; + } + if (!IsAcceptedFile(RelativePath, Size, Attributes)) + { + return false; + } + if (ExcludeAssetPaths.contains(std::filesystem::path(RelativePath).generic_string())) + { + return false; + } + return true; + }, + GetMediumWorkerPool(EWorkloadType::Burst), + UsePlainProgress ? 5000 : 200, + [&](bool, std::ptrdiff_t) { + ZEN_DEBUG("Found {} files in '{}'...", LocalFolderScanStats.AcceptedFileCount.load(), Path); + }, + AbortFlag); + } + else + { + Stopwatch ManifestParseTimer; + std::vector AssetPaths = ParseManifest(Path, ManifestPath); + for (const std::filesystem::path& AssetPath : AssetPaths) + { + Content.Paths.push_back(AssetPath); + Content.RawSizes.push_back(std::filesystem::file_size(Path / AssetPath)); +#if ZEN_PLATFORM_WINDOWS + Content.Attributes.push_back(GetFileAttributes(Path / AssetPath)); +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX + Content.Attributes.push_back(GetFileMode(Path / AssetPath)); +#endif // ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX + LocalFolderScanStats.AcceptedFileByteCount += Content.RawSizes.back(); + LocalFolderScanStats.AcceptedFileCount++; + } + if (ManifestPath.is_relative()) + { + Content.Paths.push_back(ManifestPath); + Content.RawSizes.push_back(std::filesystem::file_size(ManifestPath)); +#if ZEN_PLATFORM_WINDOWS + Content.Attributes.push_back(GetFileAttributes(ManifestPath)); +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX + Content.Attributes.push_back(GetFileMode(ManifestPath)); +#endif // ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX + + LocalFolderScanStats.AcceptedFileByteCount += Content.RawSizes.back(); + LocalFolderScanStats.AcceptedFileCount++; + } + LocalFolderScanStats.FoundFileByteCount.store(LocalFolderScanStats.AcceptedFileByteCount); + LocalFolderScanStats.FoundFileCount.store(LocalFolderScanStats.AcceptedFileCount); + LocalFolderScanStats.ElapsedWallTimeUS = ManifestParseTimer.GetElapsedTimeUs(); + } + + std::unique_ptr ChunkController = CreateBasicChunkingController(); + { + CbObjectWriter ChunkParametersWriter; + ChunkParametersWriter.AddString("name"sv, ChunkController->GetName()); + ChunkParametersWriter.AddObject("parameters"sv, ChunkController->GetParameters()); + ChunkerParameters = ChunkParametersWriter.Save(); + } + + std::uint64_t TotalRawSize = 0; + for (uint64_t RawSize : Content.RawSizes) + { + TotalRawSize += RawSize; + } + { + ProgressBar ProgressBar(UsePlainProgress); + FilteredRate FilteredBytesHashed; + FilteredBytesHashed.Start(); + LocalContent = ChunkFolderContent( + ChunkingStats, + GetMediumWorkerPool(EWorkloadType::Burst), + Path, + Content, + *ChunkController, + UsePlainProgress ? 5000 : 200, + [&](bool, std::ptrdiff_t) { + FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); + ProgressBar.UpdateState({.Task = "Scanning files ", + .Details = fmt::format("{}/{} ({}/{}, {}B/s) files, {} ({}) chunks found", + ChunkingStats.FilesProcessed.load(), + Content.Paths.size(), + NiceBytes(ChunkingStats.BytesHashed.load()), + NiceBytes(TotalRawSize), + NiceNum(FilteredBytesHashed.GetCurrent()), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load())), + .TotalCount = TotalRawSize, + .RemainingCount = TotalRawSize - ChunkingStats.BytesHashed.load()}, + false); + }, + AbortFlag); + FilteredBytesHashed.Stop(); + ProgressBar.Finish(); + } + + if (AbortFlag) + { + return true; + } + + ZEN_CONSOLE("Found {} ({}) files divided into {} ({}) unique chunks in '{}' in {}. Average hash rate {}B/sec", + LocalContent.Paths.size(), + NiceBytes(TotalRawSize), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load()), + Path, + NiceTimeSpanMs(ScanTimer.GetElapsedTimeMs()), + NiceNum(GetBytesPerSecond(ChunkingStats.ElapsedWallTimeUS, ChunkingStats.BytesHashed))); + } + + const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalContent); + std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; + + if (CreateBuild) + { + Stopwatch PutBuildTimer; + CbObject PutBuildResult = Storage.PutBuild(BuildId, MetaData); + ZEN_CONSOLE("PutBuild took {}. Payload size: {}", + NiceLatencyNs(PutBuildTimer.GetElapsedTimeUs() * 1000), + NiceBytes(MetaData.GetSize())); + PreferredMultipartChunkSize = PutBuildResult["chunkSize"sv].AsUInt64(PreferredMultipartChunkSize); + } + else + { + Stopwatch GetBuildTimer; + CbObject Build = Storage.GetBuild(BuildId); + ZEN_CONSOLE("GetBuild took {}. Payload size: {}", + NiceLatencyNs(GetBuildTimer.GetElapsedTimeUs() * 1000), + NiceBytes(Build.GetSize())); + if (auto ChunkSize = Build["chunkSize"sv].AsUInt64(); ChunkSize != 0) + { + PreferredMultipartChunkSize = ChunkSize; + } + else if (AllowMultiparts) + { + ZEN_WARN("PreferredMultipartChunkSize is unknown. Defaulting to '{}'", NiceBytes(PreferredMultipartChunkSize)); + } + } + + const std::uint64_t LargeAttachmentSize = AllowMultiparts ? PreferredMultipartChunkSize * 4u : (std::uint64_t)-1; + + FindBlocksStatistics FindBlocksStats; + GenerateBlocksStatistics GenerateBlocksStats; + LooseChunksStatistics LooseChunksStats; + + std::vector KnownBlocks; + std::vector ReuseBlockIndexes; + std::vector NewBlockChunkIndexes; + Stopwatch BlockArrangeTimer; + + std::vector LooseChunkIndexes; + { + bool EnableBlocks = true; + std::vector BlockChunkIndexes; + for (uint32_t ChunkIndex = 0; ChunkIndex < LocalContent.ChunkedContent.ChunkHashes.size(); ChunkIndex++) + { + const uint64_t ChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + if (!EnableBlocks || ChunkRawSize == 0 || ChunkRawSize > DefaultChunksBlockParams.MaxChunkEmbedSize) + { + LooseChunkIndexes.push_back(ChunkIndex); + LooseChunksStats.ChunkByteCount += ChunkRawSize; + } + else + { + BlockChunkIndexes.push_back(ChunkIndex); + FindBlocksStats.PotentialChunkByteCount += ChunkRawSize; + } + } + FindBlocksStats.PotentialChunkCount = BlockChunkIndexes.size(); + LooseChunksStats.ChunkCount = LooseChunkIndexes.size(); + + if (IgnoreExistingBlocks) + { + ZEN_CONSOLE("Ignoring any existing blocks in store"); + NewBlockChunkIndexes = std::move(BlockChunkIndexes); + } + else + { + Stopwatch KnownBlocksTimer; + KnownBlocks = Storage.FindBlocks(BuildId); + FindBlocksStats.FindBlockTimeMS = KnownBlocksTimer.GetElapsedTimeMs(); + FindBlocksStats.FoundBlockCount = KnownBlocks.size(); + + ReuseBlockIndexes = FindReuseBlocks(KnownBlocks, + LocalContent.ChunkedContent.ChunkHashes, + BlockChunkIndexes, + BlockReuseMinPercentLimit, + NewBlockChunkIndexes, + FindBlocksStats); + FindBlocksStats.AcceptedBlockCount = ReuseBlockIndexes.size(); + + for (const ChunkBlockDescription& Description : KnownBlocks) + { + for (uint32_t ChunkRawLength : Description.ChunkRawLengths) + { + FindBlocksStats.FoundBlockByteCount += ChunkRawLength; + } + FindBlocksStats.FoundBlockChunkCount += Description.ChunkRawHashes.size(); + } + } + } + + std::vector> NewBlockChunks; + ArrangeChunksIntoBlocks(LocalContent, LocalLookup, DefaultChunksBlockParams.MaxBlockSize, NewBlockChunkIndexes, NewBlockChunks); + + FindBlocksStats.NewBlocksCount = NewBlockChunks.size(); + for (uint32_t ChunkIndex : NewBlockChunkIndexes) + { + FindBlocksStats.NewBlocksChunkByteCount += LocalContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + FindBlocksStats.NewBlocksChunkCount = NewBlockChunkIndexes.size(); + + const double AcceptedByteCountPercent = FindBlocksStats.PotentialChunkByteCount > 0 + ? (100.0 * FindBlocksStats.AcceptedByteCount / FindBlocksStats.PotentialChunkByteCount) + : 0.0; + + const double AcceptedReduntantByteCountPercent = + FindBlocksStats.AcceptedByteCount > 0 ? (100.0 * FindBlocksStats.AcceptedReduntantByteCount) / + (FindBlocksStats.AcceptedByteCount + FindBlocksStats.AcceptedReduntantByteCount) + : 0.0; + ZEN_CONSOLE( + "Found {} chunks in {} ({}) blocks eligeble for reuse in {}\n" + " Reusing {} ({}) matching chunks in {} blocks ({:.1f}%)\n" + " Accepting {} ({}) redundant chunks ({:.1f}%)\n" + " Rejected {} ({}) chunks in {} blocks\n" + " Arranged {} ({}) chunks in {} new blocks\n" + " Keeping {} ({}) chunks as loose chunks\n" + " Discovery completed in {}", + FindBlocksStats.FoundBlockChunkCount, + FindBlocksStats.FoundBlockCount, + NiceBytes(FindBlocksStats.FoundBlockByteCount), + NiceTimeSpanMs(FindBlocksStats.FindBlockTimeMS), + + FindBlocksStats.AcceptedChunkCount, + NiceBytes(FindBlocksStats.AcceptedByteCount), + FindBlocksStats.AcceptedBlockCount, + AcceptedByteCountPercent, + + FindBlocksStats.AcceptedReduntantChunkCount, + NiceBytes(FindBlocksStats.AcceptedReduntantByteCount), + AcceptedReduntantByteCountPercent, + + FindBlocksStats.RejectedChunkCount, + NiceBytes(FindBlocksStats.RejectedByteCount), + FindBlocksStats.RejectedBlockCount, + + FindBlocksStats.NewBlocksChunkCount, + NiceBytes(FindBlocksStats.NewBlocksChunkByteCount), + FindBlocksStats.NewBlocksCount, + + LooseChunksStats.ChunkCount, + NiceBytes(LooseChunksStats.ChunkByteCount), + + NiceTimeSpanMs(BlockArrangeTimer.GetElapsedTimeMs())); + + DiskStatistics DiskStats; + UploadStatistics UploadStats; + GeneratedBlocks NewBlocks; + + if (!NewBlockChunks.empty()) + { + Stopwatch GenerateBuildBlocksTimer; + auto __ = MakeGuard([&]() { + uint64_t BlockGenerateTimeUs = GenerateBuildBlocksTimer.GetElapsedTimeUs(); + ZEN_CONSOLE("Generated {} ({}) and uploaded {} ({}) blocks in {}. Generate speed: {}B/sec. Transfer speed {}bits/sec.", + GenerateBlocksStats.GeneratedBlockCount.load(), + NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount), + UploadStats.BlockCount.load(), + NiceBytes(UploadStats.BlocksBytes.load()), + NiceTimeSpanMs(BlockGenerateTimeUs / 1000), + NiceNum(GetBytesPerSecond(GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS, + GenerateBlocksStats.GeneratedBlockByteCount)), + NiceNum(GetBytesPerSecond(UploadStats.ElapsedWallTimeUS, UploadStats.BlocksBytes * 8))); + }); + GenerateBuildBlocks(Path, + LocalContent, + LocalLookup, + Storage, + BuildId, + AbortFlag, + NewBlockChunks, + NewBlocks, + DiskStats, + UploadStats, + GenerateBlocksStats); + } + + if (AbortFlag) + { + return true; + } + + CbObject PartManifest; + { + CbObjectWriter PartManifestWriter; + Stopwatch ManifestGenerationTimer; + auto __ = MakeGuard([&]() { + ZEN_CONSOLE("Generated build part manifest in {} ({})", + NiceTimeSpanMs(ManifestGenerationTimer.GetElapsedTimeMs()), + NiceBytes(PartManifestWriter.GetSaveSize())); + }); + PartManifestWriter.AddObject("chunker"sv, ChunkerParameters); + + std::vector AllChunkBlockHashes; + std::vector AllChunkBlockDescriptions; + AllChunkBlockHashes.reserve(ReuseBlockIndexes.size() + NewBlocks.BlockDescriptions.size()); + AllChunkBlockDescriptions.reserve(ReuseBlockIndexes.size() + NewBlocks.BlockDescriptions.size()); + for (size_t ReuseBlockIndex : ReuseBlockIndexes) + { + AllChunkBlockDescriptions.push_back(KnownBlocks[ReuseBlockIndex]); + AllChunkBlockHashes.push_back(KnownBlocks[ReuseBlockIndex].BlockHash); + } + AllChunkBlockDescriptions.insert(AllChunkBlockDescriptions.end(), + NewBlocks.BlockDescriptions.begin(), + NewBlocks.BlockDescriptions.end()); + for (const ChunkBlockDescription& BlockDescription : NewBlocks.BlockDescriptions) + { + AllChunkBlockHashes.push_back(BlockDescription.BlockHash); + } +#if EXTRA_VERIFY + tsl::robin_map ChunkHashToAbsoluteChunkIndex; + std::vector AbsoluteChunkHashes; + AbsoluteChunkHashes.reserve(LocalContent.ChunkedContent.ChunkHashes.size()); + for (uint32_t ChunkIndex : LooseChunkIndexes) + { + ChunkHashToAbsoluteChunkIndex.insert({LocalContent.ChunkedContent.ChunkHashes[ChunkIndex], AbsoluteChunkHashes.size()}); + AbsoluteChunkHashes.push_back(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); + } + for (const ChunkBlockDescription& Block : AllChunkBlockDescriptions) + { + for (const IoHash& ChunkHash : Block.ChunkHashes) + { + ChunkHashToAbsoluteChunkIndex.insert({ChunkHash, AbsoluteChunkHashes.size()}); + AbsoluteChunkHashes.push_back(ChunkHash); + } + } + for (const IoHash& ChunkHash : LocalContent.ChunkedContent.ChunkHashes) + { + ZEN_ASSERT(AbsoluteChunkHashes[ChunkHashToAbsoluteChunkIndex.at(ChunkHash)] == ChunkHash); + ZEN_ASSERT(LocalContent.ChunkedContent.ChunkHashes[LocalLookup.ChunkHashToChunkIndex.at(ChunkHash)] == ChunkHash); + } + for (const uint32_t ChunkIndex : LocalContent.ChunkedContent.ChunkOrders) + { + ZEN_ASSERT(AbsoluteChunkHashes[ChunkHashToAbsoluteChunkIndex.at(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex])] == + LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); + ZEN_ASSERT(LocalLookup.ChunkHashToChunkIndex.at(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]) == ChunkIndex); + } +#endif // EXTRA_VERIFY + std::vector AbsoluteChunkOrders = CalculateAbsoluteChunkOrders(LocalContent.ChunkedContent.ChunkHashes, + LocalContent.ChunkedContent.ChunkOrders, + LocalLookup.ChunkHashToChunkIndex, + LooseChunkIndexes, + AllChunkBlockDescriptions); + +#if EXTRA_VERIFY + for (uint32_t ChunkOrderIndex = 0; ChunkOrderIndex < LocalContent.ChunkedContent.ChunkOrders.size(); ChunkOrderIndex++) + { + uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[ChunkOrderIndex]; + uint32_t AbsoluteChunkIndex = AbsoluteChunkOrders[ChunkOrderIndex]; + const IoHash& LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; + const IoHash& AbsoluteChunkHash = AbsoluteChunkHashes[AbsoluteChunkIndex]; + ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash); + } +#endif // EXTRA_VERIFY + + WriteBuildContentToCompactBinary(PartManifestWriter, + LocalContent.Platform, + LocalContent.Paths, + LocalContent.RawHashes, + LocalContent.RawSizes, + LocalContent.Attributes, + LocalContent.ChunkedContent.SequenceRawHashes, + LocalContent.ChunkedContent.ChunkCounts, + LocalContent.ChunkedContent.ChunkHashes, + LocalContent.ChunkedContent.ChunkRawSizes, + AbsoluteChunkOrders, + LooseChunkIndexes, + AllChunkBlockHashes); + +#if EXTRA_VERIFY + { + ChunkedFolderContent VerifyFolderContent; + + std::vector OutAbsoluteChunkOrders; + std::vector OutLooseChunkHashes; + std::vector OutLooseChunkRawSizes; + std::vector OutBlockRawHashes; + + ReadBuildContentFromCompactBinary(PartManifestWriter.Save(), + VerifyFolderContent.Platform, + VerifyFolderContent.Paths, + VerifyFolderContent.RawHashes, + VerifyFolderContent.RawSizes, + VerifyFolderContent.Attributes, + VerifyFolderContent.ChunkedContent.SequenceRawHashes, + VerifyFolderContent.ChunkedContent.ChunkCounts, + OutAbsoluteChunkOrders, + OutLooseChunkHashes, + OutLooseChunkRawSizes, + OutBlockRawHashes); + ZEN_ASSERT(OutBlockRawHashes == AllChunkBlockHashes); + + for (uint32_t OrderIndex = 0; OrderIndex < OutAbsoluteChunkOrders.size(); OrderIndex++) + { + uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[OrderIndex]; + const IoHash LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; + + uint32_t VerifyChunkIndex = OutAbsoluteChunkOrders[OrderIndex]; + const IoHash VerifyChunkHash = AbsoluteChunkHashes[VerifyChunkIndex]; + + ZEN_ASSERT(LocalChunkHash == VerifyChunkHash); + } + + CalculateLocalChunkOrders(OutAbsoluteChunkOrders, + OutLooseChunkHashes, + OutLooseChunkRawSizes, + AllChunkBlockDescriptions, + VerifyFolderContent.ChunkedContent.ChunkHashes, + VerifyFolderContent.ChunkedContent.ChunkRawSizes, + VerifyFolderContent.ChunkedContent.ChunkOrders); + + ZEN_ASSERT(LocalContent.Paths == VerifyFolderContent.Paths); + ZEN_ASSERT(LocalContent.RawHashes == VerifyFolderContent.RawHashes); + ZEN_ASSERT(LocalContent.RawSizes == VerifyFolderContent.RawSizes); + ZEN_ASSERT(LocalContent.Attributes == VerifyFolderContent.Attributes); + ZEN_ASSERT(LocalContent.ChunkedContent.SequenceRawHashes == VerifyFolderContent.ChunkedContent.SequenceRawHashes); + ZEN_ASSERT(LocalContent.ChunkedContent.ChunkCounts == VerifyFolderContent.ChunkedContent.ChunkCounts); + + for (uint32_t OrderIndex = 0; OrderIndex < LocalContent.ChunkedContent.ChunkOrders.size(); OrderIndex++) + { + uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[OrderIndex]; + const IoHash LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; + uint64_t LocalChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[LocalChunkIndex]; + + uint32_t VerifyChunkIndex = VerifyFolderContent.ChunkedContent.ChunkOrders[OrderIndex]; + const IoHash VerifyChunkHash = VerifyFolderContent.ChunkedContent.ChunkHashes[VerifyChunkIndex]; + uint64_t VerifyChunkRawSize = VerifyFolderContent.ChunkedContent.ChunkRawSizes[VerifyChunkIndex]; + + ZEN_ASSERT(LocalChunkHash == VerifyChunkHash); + ZEN_ASSERT(LocalChunkRawSize == VerifyChunkRawSize); + } + } +#endif // EXTRA_VERIFY + PartManifest = PartManifestWriter.Save(); + } + + Stopwatch PutBuildPartResultTimer; + std::pair> PutBuildPartResult = Storage.PutBuildPart(BuildId, BuildPartId, BuildPartName, PartManifest); + ZEN_CONSOLE("PutBuildPart took {}, payload size {}. {} attachments are missing.", + NiceLatencyNs(PutBuildPartResultTimer.GetElapsedTimeUs() * 1000), + NiceBytes(PartManifest.GetSize()), + PutBuildPartResult.second.size()); + IoHash PartHash = PutBuildPartResult.first; + + auto UploadAttachments = [&](std::span RawHashes) { + if (!AbortFlag) + { + ZEN_CONSOLE_VERBOSE("Uploading attachments: {}", FormatArray(RawHashes, "\n "sv)); + + UploadStatistics TempUploadStats; + GenerateBlocksStatistics TempGenerateBlocksStats; + LooseChunksStatistics TempLooseChunksStats; + + Stopwatch TempUploadTimer; + auto __ = MakeGuard([&]() { + uint64_t TempChunkUploadTimeUs = TempUploadTimer.GetElapsedTimeUs(); + ZEN_CONSOLE( + "Generated {} ({} {}B/s) and uploaded {} ({}) blocks. " + "Compressed {} ({} {}B/s) and uploaded {} ({}) chunks. " + "Transferred {} ({}B/s) in {}", + TempGenerateBlocksStats.GeneratedBlockCount.load(), + NiceBytes(TempGenerateBlocksStats.GeneratedBlockByteCount.load()), + NiceNum(GetBytesPerSecond(TempGenerateBlocksStats.GenerateBlocksElapsedWallTimeUS, + TempGenerateBlocksStats.GeneratedBlockByteCount)), + TempUploadStats.BlockCount.load(), + NiceBytes(TempUploadStats.BlocksBytes), + + TempLooseChunksStats.CompressedChunkCount.load(), + NiceBytes(TempLooseChunksStats.CompressedChunkBytes.load()), + NiceNum(GetBytesPerSecond(TempLooseChunksStats.CompressChunksElapsedWallTimeUS, + TempLooseChunksStats.CompressedChunkBytes)), + TempUploadStats.ChunkCount.load(), + NiceBytes(TempUploadStats.ChunksBytes), + + NiceBytes(TempUploadStats.BlocksBytes + TempUploadStats.ChunksBytes), + NiceNum(GetBytesPerSecond(TempUploadStats.ElapsedWallTimeUS, TempUploadStats.ChunksBytes * 8)), + NiceTimeSpanMs(TempChunkUploadTimeUs / 1000)); + }); + UploadPartBlobs(Storage, + BuildId, + Path, + LocalContent, + LocalLookup, + RawHashes, + NewBlockChunks, + NewBlocks, + LooseChunkIndexes, + LargeAttachmentSize, + AbortFlag, + DiskStats, + TempUploadStats, + TempGenerateBlocksStats, + TempLooseChunksStats); + UploadStats += TempUploadStats; + LooseChunksStats += TempLooseChunksStats; + GenerateBlocksStats += TempGenerateBlocksStats; + } + }; + if (IgnoreExistingBlocks) + { + ZEN_CONSOLE_VERBOSE("PutBuildPart uploading all attachments, needs are: {}", + FormatArray(PutBuildPartResult.second, "\n "sv)); + + std::vector ForceUploadChunkHashes; + ForceUploadChunkHashes.reserve(LooseChunkIndexes.size()); + + for (uint32_t ChunkIndex : LooseChunkIndexes) + { + ForceUploadChunkHashes.push_back(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); + } + + for (size_t BlockIndex = 0; BlockIndex < NewBlocks.BlockBuffers.size(); BlockIndex++) + { + if (NewBlocks.BlockBuffers[BlockIndex]) + { + // Block was not uploaded during generation + ForceUploadChunkHashes.push_back(NewBlocks.BlockDescriptions[BlockIndex].BlockHash); + } + } + UploadAttachments(ForceUploadChunkHashes); + } + else if (!PutBuildPartResult.second.empty()) + { + ZEN_CONSOLE_VERBOSE("PutBuildPart needs attachments: {}", FormatArray(PutBuildPartResult.second, "\n "sv)); + UploadAttachments(PutBuildPartResult.second); + } + + while (true) + { + Stopwatch FinalizeBuildPartTimer; + std::vector Needs = Storage.FinalizeBuildPart(BuildId, BuildPartId, PartHash); + ZEN_CONSOLE("FinalizeBuildPart took {}. {} attachments are missing.", + NiceLatencyNs(FinalizeBuildPartTimer.GetElapsedTimeUs() * 1000), + Needs.size()); + if (Needs.empty()) + { + break; + } + if (AbortFlag) + { + return true; + } + ZEN_CONSOLE_VERBOSE("FinalizeBuildPart needs attachments: {}", FormatArray(Needs, "\n "sv)); + UploadAttachments(Needs); + if (AbortFlag) + { + return true; + } + } + + if (AbortFlag) + { + return true; + } + + if (CreateBuild) + { + Stopwatch FinalizeBuildTimer; + Storage.FinalizeBuild(BuildId); + ZEN_CONSOLE("FinalizeBuild took {}", NiceLatencyNs(FinalizeBuildTimer.GetElapsedTimeUs() * 1000)); + } + + if (!NewBlocks.BlockDescriptions.empty()) + { + uint64_t UploadBlockMetadataCount = 0; + std::vector BlockHashes; + BlockHashes.reserve(NewBlocks.BlockDescriptions.size()); + Stopwatch UploadBlockMetadataTimer; + for (size_t BlockIndex = 0; BlockIndex < NewBlocks.BlockDescriptions.size(); BlockIndex++) + { + const IoHash& BlockHash = NewBlocks.BlockDescriptions[BlockIndex].BlockHash; + if (!NewBlocks.MetaDataHasBeenUploaded[BlockIndex]) + { + const CbObject BlockMetaData = + BuildChunkBlockDescription(NewBlocks.BlockDescriptions[BlockIndex], NewBlocks.BlockMetaDatas[BlockIndex]); + Storage.PutBlockMetadata(BuildId, BlockHash, BlockMetaData); + UploadStats.BlocksBytes += BlockMetaData.GetSize(); + NewBlocks.MetaDataHasBeenUploaded[BlockIndex] = true; + UploadBlockMetadataCount++; + } + BlockHashes.push_back(BlockHash); + } + if (UploadBlockMetadataCount > 0) + { + uint64_t ElapsedUS = UploadBlockMetadataTimer.GetElapsedTimeUs(); + UploadStats.ElapsedWallTimeUS += ElapsedUS; + ZEN_CONSOLE("Uploaded metadata for {} blocks in {}", UploadBlockMetadataCount, NiceTimeSpanMs(ElapsedUS / 1000)); + } + + std::vector VerifyBlockDescriptions = Storage.GetBlockMetadata(BuildId, BlockHashes); + if (VerifyBlockDescriptions.size() != BlockHashes.size()) + { + ZEN_CONSOLE("Uploaded blocks could not all be found, {} blocks are missing", + BlockHashes.size() - VerifyBlockDescriptions.size()); + return true; + } + } + + const double DeltaByteCountPercent = + ChunkingStats.BytesHashed > 0 + ? (100.0 * (FindBlocksStats.NewBlocksChunkByteCount + LooseChunksStats.CompressedChunkBytes)) / (ChunkingStats.BytesHashed) + : 0.0; + + const std::string LargeAttachmentStats = + (LargeAttachmentSize != (uint64_t)-1) ? fmt::format(" ({} as multipart)", UploadStats.MultipartAttachmentCount.load()) : ""; + + ZEN_CONSOLE_VERBOSE( + "Folder scanning stats:" + "\n FoundFileCount: {}" + "\n FoundFileByteCount: {}" + "\n AcceptedFileCount: {}" + "\n AcceptedFileByteCount: {}" + "\n ElapsedWallTimeUS: {}", + LocalFolderScanStats.FoundFileCount.load(), + NiceBytes(LocalFolderScanStats.FoundFileByteCount.load()), + LocalFolderScanStats.AcceptedFileCount.load(), + NiceBytes(LocalFolderScanStats.AcceptedFileByteCount.load()), + NiceLatencyNs(LocalFolderScanStats.ElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE_VERBOSE( + "Chunking stats:" + "\n FilesProcessed: {}" + "\n FilesChunked: {}" + "\n BytesHashed: {}" + "\n UniqueChunksFound: {}" + "\n UniqueSequencesFound: {}" + "\n UniqueBytesFound: {}" + "\n ElapsedWallTimeUS: {}", + ChunkingStats.FilesProcessed.load(), + ChunkingStats.FilesChunked.load(), + NiceBytes(ChunkingStats.BytesHashed.load()), + ChunkingStats.UniqueChunksFound.load(), + ChunkingStats.UniqueSequencesFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load()), + NiceLatencyNs(ChunkingStats.ElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE_VERBOSE( + "Find block stats:" + "\n FindBlockTimeMS: {}" + "\n PotentialChunkCount: {}" + "\n PotentialChunkByteCount: {}" + "\n FoundBlockCount: {}" + "\n FoundBlockChunkCount: {}" + "\n FoundBlockByteCount: {}" + "\n AcceptedBlockCount: {}" + "\n AcceptedChunkCount: {}" + "\n AcceptedByteCount: {}" + "\n RejectedBlockCount: {}" + "\n RejectedChunkCount: {}" + "\n RejectedByteCount: {}" + "\n AcceptedReduntantChunkCount: {}" + "\n AcceptedReduntantByteCount: {}" + "\n NewBlocksCount: {}" + "\n NewBlocksChunkCount: {}" + "\n NewBlocksChunkByteCount: {}", + NiceTimeSpanMs(FindBlocksStats.FindBlockTimeMS), + FindBlocksStats.PotentialChunkCount, + NiceBytes(FindBlocksStats.PotentialChunkByteCount), + FindBlocksStats.FoundBlockCount, + FindBlocksStats.FoundBlockChunkCount, + NiceBytes(FindBlocksStats.FoundBlockByteCount), + FindBlocksStats.AcceptedBlockCount, + FindBlocksStats.AcceptedChunkCount, + NiceBytes(FindBlocksStats.AcceptedByteCount), + FindBlocksStats.RejectedBlockCount, + FindBlocksStats.RejectedChunkCount, + NiceBytes(FindBlocksStats.RejectedByteCount), + FindBlocksStats.AcceptedReduntantChunkCount, + NiceBytes(FindBlocksStats.AcceptedReduntantByteCount), + FindBlocksStats.NewBlocksCount, + FindBlocksStats.NewBlocksChunkCount, + NiceBytes(FindBlocksStats.NewBlocksChunkByteCount)); + + ZEN_CONSOLE_VERBOSE( + "Generate blocks stats:" + "\n GeneratedBlockByteCount: {}" + "\n GeneratedBlockCount: {}" + "\n GenerateBlocksElapsedWallTimeUS: {}", + NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount.load()), + GenerateBlocksStats.GeneratedBlockCount.load(), + NiceLatencyNs(GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE_VERBOSE( + "Generate blocks stats:" + "\n ChunkCount: {}" + "\n ChunkByteCount: {}" + "\n CompressedChunkCount: {}" + "\n CompressChunksElapsedWallTimeUS: {}", + LooseChunksStats.ChunkCount, + NiceBytes(LooseChunksStats.ChunkByteCount), + LooseChunksStats.CompressedChunkCount.load(), + NiceBytes(LooseChunksStats.CompressedChunkBytes.load()), + NiceLatencyNs(LooseChunksStats.CompressChunksElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE_VERBOSE( + "Disk stats:" + "\n OpenReadCount: {}" + "\n OpenWriteCount: {}" + "\n ReadCount: {}" + "\n ReadByteCount: {}" + "\n WriteCount: {}" + "\n WriteByteCount: {}" + "\n CurrentOpenFileCount: {}", + DiskStats.OpenReadCount.load(), + DiskStats.OpenWriteCount.load(), + DiskStats.ReadCount.load(), + NiceBytes(DiskStats.ReadByteCount.load()), + DiskStats.WriteCount.load(), + NiceBytes(DiskStats.WriteByteCount.load()), + DiskStats.CurrentOpenFileCount.load()); + + ZEN_CONSOLE_VERBOSE( + "Upload stats:" + "\n BlockCount: {}" + "\n BlocksBytes: {}" + "\n ChunkCount: {}" + "\n ChunksBytes: {}" + "\n ReadFromDiskBytes: {}" + "\n MultipartAttachmentCount: {}" + "\n ElapsedWallTimeUS: {}", + UploadStats.BlockCount.load(), + NiceBytes(UploadStats.BlocksBytes.load()), + UploadStats.ChunkCount.load(), + NiceBytes(UploadStats.ChunksBytes.load()), + NiceBytes(UploadStats.ReadFromDiskBytes.load()), + UploadStats.MultipartAttachmentCount.load(), + NiceLatencyNs(UploadStats.ElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE( + "Uploaded {}\n" + " Delta: {}/{} ({:.1f}%)\n" + " Blocks: {} ({})\n" + " Chunks: {} ({}){}\n" + " Rate: {}bits/sec", + NiceBytes(UploadStats.BlocksBytes + UploadStats.ChunksBytes), + + NiceBytes(FindBlocksStats.NewBlocksChunkByteCount + LooseChunksStats.CompressedChunkBytes), + NiceBytes(ChunkingStats.BytesHashed), + DeltaByteCountPercent, + + UploadStats.BlockCount.load(), + NiceBytes(UploadStats.BlocksBytes), + UploadStats.ChunkCount.load(), + NiceBytes(UploadStats.ChunksBytes), + LargeAttachmentStats, + + NiceNum(GetBytesPerSecond(UploadStats.ElapsedWallTimeUS, (UploadStats.ChunksBytes + UploadStats.BlocksBytes * 8)))); + + ZEN_CONSOLE("Uploaded ({}) build {} part {} ({}) in {}", + NiceBytes(FindBlocksStats.NewBlocksChunkByteCount + LooseChunksStats.CompressedChunkBytes), + BuildId, + BuildPartName, + BuildPartId, + NiceTimeSpanMs(ProcessTimer.GetElapsedTimeMs())); + return false; + } + + void VerifyFolder(const ChunkedFolderContent& Content, const std::filesystem::path& Path, std::atomic& AbortFlag) + { + ProgressBar ProgressBar(UsePlainProgress); + std::atomic FilesVerified(0); + std::atomic FilesFailed(0); + std::atomic ReadBytes(0); + + WorkerThreadPool& VerifyPool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // + + ParallellWork Work(AbortFlag); + + const uint32_t PathCount = gsl::narrow(Content.Paths.size()); + + RwLock ErrorLock; + std::vector Errors; + + auto IsAcceptedFolder = [ExcludeFolders = DefaultExcludeFolders](const std::string_view& RelativePath) -> bool { + for (const std::string_view& ExcludeFolder : ExcludeFolders) + { + if (RelativePath.starts_with(ExcludeFolder)) + { + if (RelativePath.length() == ExcludeFolder.length()) + { + return false; + } + else if (RelativePath[ExcludeFolder.length()] == '/') + { + return false; + } + } + } + return true; + }; + + const ChunkedContentLookup Lookup = BuildChunkedContentLookup(Content); + + for (uint32_t PathIndex = 0; PathIndex < PathCount; PathIndex++) + { + if (Work.IsAborted()) + { + break; + } + + Work.ScheduleWork( + VerifyPool, + [&, PathIndex](std::atomic& AbortFlag) { + if (!AbortFlag) + { + // TODO: Convert ScheduleWork body to function + + const std::filesystem::path TargetPath = (Path / Content.Paths[PathIndex]).make_preferred(); + if (IsAcceptedFolder(TargetPath.parent_path().generic_string())) + { + const uint64_t ExpectedSize = Content.RawSizes[PathIndex]; + if (!std::filesystem::exists(TargetPath)) + { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back(fmt::format("File {} with expected size {} does not exist", TargetPath, ExpectedSize)); + }); + FilesFailed++; + } + else + { + std::error_code Ec; + uint64_t SizeOnDisk = gsl::narrow(std::filesystem::file_size(TargetPath, Ec)); + if (Ec) + { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back( + fmt::format("Failed to get size of file {}: {} ({})", TargetPath, Ec.message(), Ec.value())); + }); + FilesFailed++; + } + else if (SizeOnDisk < ExpectedSize) + { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back(fmt::format("Size of file {} is smaller than expected. Expected: {}, Found: {}", + TargetPath, + ExpectedSize, + SizeOnDisk)); + }); + FilesFailed++; + } + else if (SizeOnDisk > ExpectedSize) + { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back(fmt::format("Size of file {} is bigger than expected. Expected: {}, Found: {}", + TargetPath, + ExpectedSize, + SizeOnDisk)); + }); + FilesFailed++; + } + else if (SizeOnDisk > 0) + { + const IoHash& ExpectedRawHash = Content.RawHashes[PathIndex]; + IoBuffer Buffer = IoBufferBuilder::MakeFromFile(TargetPath); + IoHash RawHash = IoHash::HashBuffer(Buffer); + if (RawHash != ExpectedRawHash) + { + uint64_t FileOffset = 0; + const uint32_t SequenceRawHashesIndex = Lookup.RawHashToSequenceRawHashIndex.at(ExpectedRawHash); + const uint32_t OrderOffset = Lookup.SequenceRawHashIndexChunkOrderOffset[SequenceRawHashesIndex]; + for (uint32_t OrderIndex = OrderOffset; + OrderIndex < OrderOffset + Content.ChunkedContent.ChunkCounts[SequenceRawHashesIndex]; + OrderIndex++) + { + uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex]; + uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + IoHash ChunkHash = Content.ChunkedContent.ChunkHashes[ChunkIndex]; + IoBuffer FileChunk = IoBuffer(Buffer, FileOffset, ChunkSize); + if (IoHash::HashBuffer(FileChunk) != ChunkHash) + { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back(fmt::format( + "WARNING: Hash of file {} does not match expected hash. Expected: {}, Found: {}. " + "Mismatch at chunk {}", + TargetPath, + ExpectedRawHash, + RawHash, + OrderIndex - OrderOffset)); + }); + break; + } + FileOffset += ChunkSize; + } + FilesFailed++; + } + ReadBytes += SizeOnDisk; + } + } + } + FilesVerified++; + } + }, + [&, PathIndex](const std::exception& Ex, std::atomic& AbortFlag) { + ZEN_UNUSED(AbortFlag); + + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back(fmt::format("Failed verifying file '{}'. Reason: {}", + (Path / Content.Paths[PathIndex]).make_preferred(), + Ex.what())); + }); + FilesFailed++; + }); + } + + Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, PendingWork); + ProgressBar.UpdateState({.Task = "Verifying files ", + .Details = fmt::format("Verified {} files out of {}. Verfied: {}. Failed files: {}", + FilesVerified.load(), + PathCount, + NiceBytes(ReadBytes.load()), + FilesFailed.load()), + .TotalCount = gsl::narrow(PathCount), + .RemainingCount = gsl::narrow(PathCount - FilesVerified.load())}, + false); + }); + ProgressBar.Finish(); + for (const std::string& Error : Errors) + { + ZEN_CONSOLE("{}", Error); + } + if (!Errors.empty()) + { + throw std::runtime_error(fmt::format("Verify failed with {} errors", Errors.size())); + } + } + + class WriteFileCache + { + public: + WriteFileCache() {} + ~WriteFileCache() { Flush(); } + + template + void WriteToFile(uint32_t TargetIndex, + std::function&& GetTargetPath, + const TBufferType& Buffer, + uint64_t FileOffset, + uint64_t TargetFinalSize) + { + if (!SeenTargetIndexes.empty() && SeenTargetIndexes.back() == TargetIndex) + { + ZEN_ASSERT(OpenFileWriter); + OpenFileWriter->Write(Buffer, FileOffset); + } + else + { + Flush(); + const std::filesystem::path& TargetPath = GetTargetPath(TargetIndex); + CreateDirectories(TargetPath.parent_path()); + uint32_t Tries = 5; + std::unique_ptr NewOutputFile( + std::make_unique(TargetPath, BasicFile::Mode::kWrite, [&Tries, TargetPath](std::error_code& Ec) { + if (Tries < 3) + { + ZEN_CONSOLE("Failed opening file '{}': {}{}", TargetPath, Ec.message(), Tries > 1 ? " Retrying"sv : ""sv); + } + if (Tries > 1) + { + Sleep(100); + } + return --Tries > 0; + })); + + const bool CacheWriter = TargetFinalSize > Buffer.GetSize(); + if (CacheWriter) + { + ZEN_ASSERT(std::find(SeenTargetIndexes.begin(), SeenTargetIndexes.end(), TargetIndex) == SeenTargetIndexes.end()); + + OutputFile = std::move(NewOutputFile); + OpenFileWriter = std::make_unique(*OutputFile, Min(TargetFinalSize, 256u * 1024u)); + OpenFileWriter->Write(Buffer, FileOffset); + SeenTargetIndexes.push_back(TargetIndex); + } + else + { + NewOutputFile->Write(Buffer, FileOffset); + } + } + } + + void Flush() + { + OpenFileWriter = {}; + OutputFile = {}; + } + std::vector SeenTargetIndexes; + std::unique_ptr OutputFile; + std::unique_ptr OpenFileWriter; + }; + + std::vector GetRemainingChunkTargets( + const std::vector& RemotePathIndexWantsCopyFromCacheFlags, + const ChunkedContentLookup& Lookup, + uint32_t ChunkIndex) + { + std::span ChunkSources = GetChunkLocations(Lookup, ChunkIndex); + std::vector ChunkTargetPtrs; + if (!ChunkSources.empty()) + { + ChunkTargetPtrs.reserve(ChunkSources.size()); + for (const ChunkedContentLookup::ChunkLocation& Source : ChunkSources) + { + if (!RemotePathIndexWantsCopyFromCacheFlags[Source.PathIndex]) + { + ChunkTargetPtrs.push_back(&Source); + } + } + } + return ChunkTargetPtrs; + }; + + bool WriteBlockToDisk(const std::filesystem::path& Path, + const ChunkedFolderContent& Content, + const std::vector& RemotePathIndexWantsCopyFromCacheFlags, + const CompositeBuffer& DecompressedBlockBuffer, + const ChunkedContentLookup& Lookup, + std::atomic* RemoteChunkIndexNeedsCopyFromSourceFlags, + uint32_t& OutChunksComplete, + uint64_t& OutBytesWritten) + { + std::vector ChunkBuffers; + struct WriteOpData + { + const ChunkedContentLookup::ChunkLocation* Target; + size_t ChunkBufferIndex; + }; + std::vector WriteOps; + + SharedBuffer BlockBuffer = DecompressedBlockBuffer.Flatten(); + uint64_t HeaderSize = 0; + if (IterateChunkBlock( + BlockBuffer, + [&](CompressedBuffer&& Chunk, const IoHash& ChunkHash) { + if (auto It = Lookup.ChunkHashToChunkIndex.find(ChunkHash); It != Lookup.ChunkHashToChunkIndex.end()) + { + const uint32_t ChunkIndex = It->second; + std::vector ChunkTargetPtrs = + GetRemainingChunkTargets(RemotePathIndexWantsCopyFromCacheFlags, Lookup, ChunkIndex); + + if (!ChunkTargetPtrs.empty()) + { + bool NeedsWrite = true; + if (RemoteChunkIndexNeedsCopyFromSourceFlags[ChunkIndex].compare_exchange_strong(NeedsWrite, false)) + { + CompositeBuffer Decompressed = Chunk.DecompressToComposite(); + if (!Decompressed) + { + throw std::runtime_error(fmt::format("Decompression of build blob {} failed", ChunkHash)); + } + ZEN_ASSERT_SLOW(ChunkHash == IoHash::HashBuffer(Decompressed)); + ZEN_ASSERT(Decompressed.GetSize() == Content.ChunkedContent.ChunkRawSizes[ChunkIndex]); + for (const ChunkedContentLookup::ChunkLocation* Target : ChunkTargetPtrs) + { + WriteOps.push_back(WriteOpData{.Target = Target, .ChunkBufferIndex = ChunkBuffers.size()}); + } + ChunkBuffers.emplace_back(std::move(Decompressed)); + } + } + } + }, + HeaderSize)) + { + if (!WriteOps.empty()) + { + std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOpData& Lhs, const WriteOpData& Rhs) { + if (Lhs.Target->PathIndex < Rhs.Target->PathIndex) + { + return true; + } + if (Lhs.Target->PathIndex > Rhs.Target->PathIndex) + { + return false; + } + return Lhs.Target->Offset < Rhs.Target->Offset; + }); + + WriteFileCache OpenFileCache; + for (const WriteOpData& WriteOp : WriteOps) + { + const CompositeBuffer& Chunk = ChunkBuffers[WriteOp.ChunkBufferIndex]; + const uint32_t PathIndex = WriteOp.Target->PathIndex; + const uint64_t ChunkSize = Chunk.GetSize(); + const uint64_t FileOffset = WriteOp.Target->Offset; + ZEN_ASSERT(FileOffset + ChunkSize <= Content.RawSizes[PathIndex]); + + OpenFileCache.WriteToFile( + PathIndex, + [&Path, &Content](uint32_t TargetIndex) { return (Path / Content.Paths[TargetIndex]).make_preferred(); }, + Chunk, + FileOffset, + Content.RawSizes[PathIndex]); + OutBytesWritten += ChunkSize; + } + OutChunksComplete += gsl::narrow(ChunkBuffers.size()); + } + return true; + } + return false; + } + + SharedBuffer Decompress(const IoBuffer& CompressedChunk, const IoHash& ChunkHash, const uint64_t ChunkRawSize) + { + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(CompressedChunk), RawHash, RawSize); + if (!Compressed) + { + throw std::runtime_error(fmt::format("Invalid build blob format for chunk {}", ChunkHash)); + } + if (RawHash != ChunkHash) + { + throw std::runtime_error(fmt::format("Mismatching build blob {}, but compressed header rawhash is {}", ChunkHash, RawHash)); + } + if (RawSize != ChunkRawSize) + { + throw std::runtime_error( + fmt::format("Mismatching build blob {}, expected raw size {} but recevied raw size {}", ChunkHash, ChunkRawSize, RawSize)); + } + if (!Compressed) + { + throw std::runtime_error(fmt::format("Invalid build blob {}, not a compressed buffer", ChunkHash)); + } + + SharedBuffer Decompressed = Compressed.Decompress(); + + if (!Decompressed) + { + throw std::runtime_error(fmt::format("Decompression of build blob {} failed", ChunkHash)); + } + return Decompressed; + } + + void WriteChunkToDisk(const std::filesystem::path& Path, + const ChunkedFolderContent& Content, + std::span ChunkTargets, + const CompositeBuffer& ChunkData, + WriteFileCache& OpenFileCache, + uint64_t& OutBytesWritten) + { + for (const ChunkedContentLookup::ChunkLocation* TargetPtr : ChunkTargets) + { + const auto& Target = *TargetPtr; + const uint64_t FileOffset = Target.Offset; + + OpenFileCache.WriteToFile( + Target.PathIndex, + [&Path, &Content](uint32_t TargetIndex) { return (Path / Content.Paths[TargetIndex]).make_preferred(); }, + ChunkData, + FileOffset, + Content.RawSizes[Target.PathIndex]); + OutBytesWritten += ChunkData.GetSize(); + } + } + + void DownloadLargeBlob(BuildStorage& Storage, + const std::filesystem::path& Path, + const ChunkedFolderContent& RemoteContent, + const ChunkedContentLookup& RemoteLookup, + const Oid& BuildId, + const IoHash& ChunkHash, + const std::uint64_t PreferredMultipartChunkSize, + const std::vector& ChunkTargetPtrs, + ParallellWork& Work, + WorkerThreadPool& WritePool, + WorkerThreadPool& NetworkPool, + std::atomic& AbortFlag, + std::atomic& BytesWritten, + std::atomic& WriteToDiskBytes, + std::atomic& BytesDownloaded, + std::atomic& LooseChunksBytes, + std::atomic& DownloadedChunks, + std::atomic& ChunksComplete, + std::atomic& MultipartAttachmentCount) + { + struct WorkloadData + { + TemporaryFile TempFile; + }; + std::shared_ptr Workload(std::make_shared()); + + std::error_code Ec; + Workload->TempFile.CreateTemporary(Path / ZenTempChunkFolderName, Ec); + if (Ec) + { + throw std::runtime_error( + fmt::format("Failed opening temporary file '{}': {} ({})", Workload->TempFile.GetPath(), Ec.message(), Ec.value())); + } + std::vector> WorkItems = Storage.GetLargeBuildBlob( + BuildId, + ChunkHash, + PreferredMultipartChunkSize, + [&Path, + &RemoteContent, + &RemoteLookup, + &Work, + &WritePool, + Workload, + ChunkHash, + &BytesDownloaded, + &LooseChunksBytes, + &BytesWritten, + &WriteToDiskBytes, + &DownloadedChunks, + &ChunksComplete, + ChunkTargetPtrs = std::vector(ChunkTargetPtrs), + &AbortFlag](uint64_t Offset, const IoBuffer& Chunk, uint64_t BytesRemaining) { + BytesDownloaded += Chunk.GetSize(); + LooseChunksBytes += Chunk.GetSize(); + + if (!AbortFlag.load()) + { + Workload->TempFile.Write(Chunk.GetView(), Offset); + if (Chunk.GetSize() == BytesRemaining) + { + DownloadedChunks++; + + Work.ScheduleWork( + WritePool, + [&Path, + &RemoteContent, + &RemoteLookup, + ChunkHash, + Workload, + Offset, + BytesRemaining, + &ChunksComplete, + &BytesWritten, + &WriteToDiskBytes, + ChunkTargetPtrs](std::atomic& AbortFlag) { + if (!AbortFlag) + { + uint64_t CompressedSize = Workload->TempFile.FileSize(); + void* FileHandle = Workload->TempFile.Detach(); + IoBuffer CompressedPart = IoBuffer(IoBuffer::File, + FileHandle, + 0, + CompressedSize, + /*IsWholeFile*/ true); + if (!CompressedPart) + { + throw std::runtime_error( + fmt::format("Multipart build blob {} is not a compressed buffer", ChunkHash)); + } + CompressedPart.SetDeleteOnClose(true); + + uint64_t TotalBytesWritten = 0; + + uint32_t ChunkIndex = RemoteLookup.ChunkHashToChunkIndex.at(ChunkHash); + + SharedBuffer Chunk = + Decompress(CompressedPart, ChunkHash, RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]); + + // ZEN_ASSERT_SLOW(ChunkHash == + // IoHash::HashBuffer(Chunk.AsIoBuffer())); + + { + WriteFileCache OpenFileCache; + + WriteChunkToDisk(Path, + RemoteContent, + ChunkTargetPtrs, + CompositeBuffer(Chunk), + OpenFileCache, + TotalBytesWritten); + ChunksComplete++; + BytesWritten += TotalBytesWritten; + WriteToDiskBytes += TotalBytesWritten; + } + } + }, + [&, ChunkHash](const std::exception& Ex, std::atomic& AbortFlag) { + ZEN_CONSOLE("Failed writing chunk {}. Reason: {}", ChunkHash, Ex.what()); + AbortFlag = true; + }); + } + } + }); + if (!WorkItems.empty()) + { + MultipartAttachmentCount++; + } + for (auto& WorkItem : WorkItems) + { + Work.ScheduleWork( + NetworkPool, + [WorkItem = std::move(WorkItem)](std::atomic& AbortFlag) { + if (!AbortFlag) + { + WorkItem(); + } + }, + [&, ChunkHash](const std::exception& Ex, std::atomic& AbortFlag) { + ZEN_CONSOLE("Failed uploading multipart blob {}. Reason: {}", ChunkHash, Ex.what()); + AbortFlag = true; + }); + } + } + + bool UpdateFolder(BuildStorage& Storage, + const Oid& BuildId, + const std::filesystem::path& Path, + const std::uint64_t LargeAttachmentSize, + const std::uint64_t PreferredMultipartChunkSize, + const ChunkedFolderContent& LocalContent, + const ChunkedFolderContent& RemoteContent, + const std::vector& BlockDescriptions, + const std::vector& LooseChunkHashes, + bool WipeTargetFolder, + std::atomic& AbortFlag, + FolderContent& OutLocalFolderState) + { + std::atomic DownloadedBlocks = 0; + std::atomic BlockBytes = 0; + std::atomic DownloadedChunks = 0; + std::atomic LooseChunksBytes = 0; + std::atomic WriteToDiskBytes = 0; + std::atomic MultipartAttachmentCount = 0; + + DiskStatistics DiskStats; + + Stopwatch IndexTimer; + + const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalContent); + + const ChunkedContentLookup RemoteLookup = BuildChunkedContentLookup(RemoteContent); + + ZEN_CONSOLE("Indexed local and remote content in {}", NiceTimeSpanMs(IndexTimer.GetElapsedTimeMs())); + + const std::filesystem::path CacheFolderPath = Path / ZenTempReuseFolderName; + + tsl::robin_map LocalRawHashToPathIndex; + + if (!WipeTargetFolder) + { + Stopwatch CacheTimer; + + for (uint32_t LocalPathIndex = 0; LocalPathIndex < LocalContent.Paths.size(); LocalPathIndex++) + { + if (LocalContent.RawSizes[LocalPathIndex] > 0) + { + const uint32_t SequenceRawHashIndex = + LocalLookup.RawHashToSequenceRawHashIndex.at(LocalContent.RawHashes[LocalPathIndex]); + uint32_t ChunkCount = LocalContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + if (ChunkCount > 0) + { + const IoHash LocalRawHash = LocalContent.RawHashes[LocalPathIndex]; + if (!LocalRawHashToPathIndex.contains(LocalRawHash)) + { + LocalRawHashToPathIndex.insert_or_assign(LocalRawHash, LocalPathIndex); + } + } + } + } + + { + std::vector IncludeLocalFiles(LocalContent.Paths.size(), false); + + for (const IoHash& ChunkHash : RemoteContent.ChunkedContent.ChunkHashes) + { + if (auto It = LocalLookup.ChunkHashToChunkIndex.find(ChunkHash); It != LocalLookup.ChunkHashToChunkIndex.end()) + { + const uint32_t LocalChunkIndex = It->second; + std::span LocalChunkTargetRange = + GetChunkLocations(LocalLookup, LocalChunkIndex); + if (!LocalChunkTargetRange.empty()) + { + std::uint32_t LocalPathIndex = LocalChunkTargetRange[0].PathIndex; + IncludeLocalFiles[LocalPathIndex] = true; + } + } + } + for (const IoHash& RawHash : RemoteContent.RawHashes) + { + if (auto It = LocalRawHashToPathIndex.find(RawHash); It != LocalRawHashToPathIndex.end()) + { + uint32_t LocalPathIndex = It->second; + IncludeLocalFiles[LocalPathIndex] = true; + } + } + + for (uint32_t LocalPathIndex = 0; LocalPathIndex < LocalContent.Paths.size(); LocalPathIndex++) + { + if (!IncludeLocalFiles[LocalPathIndex]) + { + LocalRawHashToPathIndex.erase(LocalContent.RawHashes[LocalPathIndex]); + } + } + } + + uint64_t CachedBytes = 0; + CreateDirectories(CacheFolderPath); + for (auto& CachedLocalFile : LocalRawHashToPathIndex) + { + const IoHash& LocalRawHash = CachedLocalFile.first; + const uint32_t LocalPathIndex = CachedLocalFile.second; + const std::filesystem::path LocalFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred(); + const std::filesystem::path CacheFilePath = (CacheFolderPath / LocalRawHash.ToHexString()).make_preferred(); + + SetFileReadOnly(LocalFilePath, false); + + std::filesystem::rename(LocalFilePath, CacheFilePath); + CachedBytes += std::filesystem::file_size(CacheFilePath); + } + + ZEN_CONSOLE("Cached {} ({}) local files in {}", + LocalRawHashToPathIndex.size(), + NiceBytes(CachedBytes), + NiceTimeSpanMs(CacheTimer.GetElapsedTimeMs())); + } + + if (AbortFlag) + { + return true; + } + + CleanDirectory(Path, DefaultExcludeFolders); + + Stopwatch CacheMappingTimer; + + std::atomic BytesWritten = 0; + uint64_t CacheMappedBytesForReuse = 0; + + std::vector RemotePathIndexWantsCopyFromCacheFlags(RemoteContent.Paths.size(), false); + std::vector> RemoteChunkIndexWantsCopyFromCacheFlags(RemoteContent.ChunkedContent.ChunkHashes.size()); + // Guard if he same chunks is in multiple blocks (can happen due to block reuse, cache reuse blocks writes directly) + std::vector> RemoteChunkIndexNeedsCopyFromSourceFlags(RemoteContent.ChunkedContent.ChunkHashes.size()); + + struct CacheCopyData + { + std::filesystem::path OriginalSourceFileName; + IoHash LocalFileRawHash; + uint64_t LocalFileRawSize = 0; + std::vector RemotePathIndexes; + std::vector ChunkSourcePtrs; + struct ChunkTarget + { + uint32_t ChunkSourceCount; + uint64_t ChunkRawSize; + uint64_t LocalFileOffset; + }; + std::vector ChunkTargets; + }; + + tsl::robin_map RawHashToCacheCopyDataIndex; + std::vector CacheCopyDatas; + uint32_t ChunkCountToWrite = 0; + + // Pick up all whole files to copy and/or move + for (uint32_t RemotePathIndex = 0; RemotePathIndex < RemoteContent.Paths.size(); RemotePathIndex++) + { + const IoHash& RemoteRawHash = RemoteContent.RawHashes[RemotePathIndex]; + if (auto It = LocalRawHashToPathIndex.find(RemoteRawHash); It != LocalRawHashToPathIndex.end()) + { + if (auto CopySourceIt = RawHashToCacheCopyDataIndex.find(RemoteRawHash); CopySourceIt != RawHashToCacheCopyDataIndex.end()) + { + CacheCopyData& Data = CacheCopyDatas[CopySourceIt->second]; + Data.RemotePathIndexes.push_back(RemotePathIndex); + } + else + { + const uint32_t LocalPathIndex = It->second; + ZEN_ASSERT(LocalContent.RawSizes[LocalPathIndex] == RemoteContent.RawSizes[RemotePathIndex]); + ZEN_ASSERT(LocalContent.RawHashes[LocalPathIndex] == RemoteContent.RawHashes[RemotePathIndex]); + RawHashToCacheCopyDataIndex.insert_or_assign(RemoteRawHash, CacheCopyDatas.size()); + CacheCopyDatas.push_back(CacheCopyData{.OriginalSourceFileName = LocalContent.Paths[LocalPathIndex], + .LocalFileRawHash = RemoteRawHash, + .LocalFileRawSize = LocalContent.RawSizes[LocalPathIndex], + .RemotePathIndexes = {RemotePathIndex}}); + CacheMappedBytesForReuse += RemoteContent.RawSizes[RemotePathIndex]; + ChunkCountToWrite++; + } + RemotePathIndexWantsCopyFromCacheFlags[RemotePathIndex] = true; + } + } + + // Pick up all chunks in cached files and make sure we block moving of cache files if we need part of them + for (auto& CachedLocalFile : LocalRawHashToPathIndex) + { + const IoHash& LocalFileRawHash = CachedLocalFile.first; + const uint32_t LocalPathIndex = CachedLocalFile.second; + const uint32_t LocalSequenceRawHashIndex = LocalLookup.RawHashToSequenceRawHashIndex.at(LocalFileRawHash); + const uint32_t LocalOrderOffset = + LocalLookup.SequenceRawHashIndexChunkOrderOffset[LocalSequenceRawHashIndex]; // CachedLocalFile.second.ChunkOrderOffset; + + { + uint64_t SourceOffset = 0; + const uint32_t LocalChunkCount = LocalContent.ChunkedContent.ChunkCounts[LocalSequenceRawHashIndex]; + for (uint32_t LocalOrderIndex = 0; LocalOrderIndex < LocalChunkCount; LocalOrderIndex++) + { + const uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[LocalOrderOffset + LocalOrderIndex]; + const IoHash& LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; + const uint64_t LocalChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[LocalChunkIndex]; + if (auto RemoteChunkIt = RemoteLookup.ChunkHashToChunkIndex.find(LocalChunkHash); + RemoteChunkIt != RemoteLookup.ChunkHashToChunkIndex.end()) + { + const uint32_t RemoteChunkIndex = RemoteChunkIt->second; + if (!RemoteChunkIndexWantsCopyFromCacheFlags[RemoteChunkIndex]) + { + std::vector ChunkTargetPtrs = + GetRemainingChunkTargets(RemotePathIndexWantsCopyFromCacheFlags, RemoteLookup, RemoteChunkIndex); + + if (!ChunkTargetPtrs.empty()) + { + CacheCopyData::ChunkTarget Target = {.ChunkSourceCount = gsl::narrow(ChunkTargetPtrs.size()), + .ChunkRawSize = LocalChunkRawSize, + .LocalFileOffset = SourceOffset}; + if (auto CopySourceIt = RawHashToCacheCopyDataIndex.find(LocalFileRawHash); + CopySourceIt != RawHashToCacheCopyDataIndex.end()) + { + CacheCopyData& Data = CacheCopyDatas[CopySourceIt->second]; + Data.ChunkSourcePtrs.insert(Data.ChunkSourcePtrs.end(), ChunkTargetPtrs.begin(), ChunkTargetPtrs.end()); + Data.ChunkTargets.push_back(Target); + } + else + { + RawHashToCacheCopyDataIndex.insert_or_assign(LocalFileRawHash, CacheCopyDatas.size()); + CacheCopyDatas.push_back( + CacheCopyData{.OriginalSourceFileName = LocalContent.Paths[LocalPathIndex], + .LocalFileRawHash = LocalFileRawHash, + .LocalFileRawSize = LocalContent.RawSizes[LocalPathIndex], + .RemotePathIndexes = {}, + .ChunkSourcePtrs = ChunkTargetPtrs, + .ChunkTargets = std::vector{Target}}); + } + CacheMappedBytesForReuse += LocalChunkRawSize; + } + RemoteChunkIndexWantsCopyFromCacheFlags[RemoteChunkIndex] = true; + } + } + SourceOffset += LocalChunkRawSize; + } + } + } + + for (uint32_t RemoteChunkIndex = 0; RemoteChunkIndex < RemoteContent.ChunkedContent.ChunkHashes.size(); RemoteChunkIndex++) + { + if (RemoteChunkIndexWantsCopyFromCacheFlags[RemoteChunkIndex]) + { + ChunkCountToWrite++; + } + else + { + std::vector ChunkTargetPtrs = + GetRemainingChunkTargets(RemotePathIndexWantsCopyFromCacheFlags, RemoteLookup, RemoteChunkIndex); + if (!ChunkTargetPtrs.empty()) + { + RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex] = true; + ChunkCountToWrite++; + } + } + } + std::atomic ChunkCountWritten = 0; + + ZEN_CONSOLE("Mapped {} cached data for reuse in {}", + NiceBytes(CacheMappedBytesForReuse), + NiceTimeSpanMs(CacheMappingTimer.GetElapsedTimeMs())); + + auto CopyChunksFromCacheFile = [](const std::filesystem::path& Path, + BufferedOpenFile& SourceFile, + WriteFileCache& OpenFileCache, + const ChunkedFolderContent& RemoteContent, + const uint64_t LocalFileSourceOffset, + const uint64_t LocalChunkRawSize, + std::span ChunkTargetPtrs, + uint64_t& OutBytesWritten) { + CompositeBuffer Chunk = SourceFile.GetRange(LocalFileSourceOffset, LocalChunkRawSize); + uint64_t TotalBytesWritten = 0; + + WriteChunkToDisk(Path, RemoteContent, ChunkTargetPtrs, Chunk, OpenFileCache, TotalBytesWritten); + OutBytesWritten += TotalBytesWritten; + }; + + auto CloneFullFileFromCache = [](const std::filesystem::path& Path, + const std::filesystem::path& CacheFolderPath, + const ChunkedFolderContent& RemoteContent, + const IoHash& FileRawHash, + const uint64_t FileRawSize, + std::span FullCloneRemotePathIndexes, + bool CanMove, + uint64_t& OutBytesWritten) { + const std::filesystem::path CacheFilePath = (CacheFolderPath / FileRawHash.ToHexString()).make_preferred(); + + size_t CopyCount = FullCloneRemotePathIndexes.size(); + if (CanMove) + { + // If every reference to this chunk has are full files we can move the cache file to the last target + CopyCount--; + } + + for (uint32_t RemotePathIndex : FullCloneRemotePathIndexes) + { + const std::filesystem::path TargetPath = (Path / RemoteContent.Paths[RemotePathIndex]).make_preferred(); + CreateDirectories(TargetPath.parent_path()); + + if (CopyCount == 0) + { + std::filesystem::rename(CacheFilePath, TargetPath); + } + else + { + CopyFile(CacheFilePath, TargetPath, {.EnableClone = false}); + ZEN_ASSERT(CopyCount > 0); + CopyCount--; + } + OutBytesWritten += FileRawSize; + } + }; + + WorkerThreadPool& NetworkPool = GetSmallWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // + WorkerThreadPool& WritePool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // + + ProgressBar WriteProgressBar(UsePlainProgress); + ParallellWork Work(AbortFlag); + + std::atomic BytesDownloaded = 0; + + for (size_t CopyDataIndex = 0; CopyDataIndex < CacheCopyDatas.size(); CopyDataIndex++) + { + if (AbortFlag) + { + break; + } + + Work.ScheduleWork( + WritePool, // GetSyncWorkerPool(),// + [&, CopyDataIndex](std::atomic& AbortFlag) { + if (!AbortFlag) + { + const CacheCopyData& CopyData = CacheCopyDatas[CopyDataIndex]; + const std::filesystem::path CacheFilePath = + (CacheFolderPath / CopyData.LocalFileRawHash.ToHexString()).make_preferred(); + + if (!CopyData.ChunkSourcePtrs.empty()) + { + uint64_t CacheLocalFileBytesRead = 0; + + size_t TargetStart = 0; + const std::span AllTargets(CopyData.ChunkSourcePtrs); + + struct WriteOp + { + const ChunkedContentLookup::ChunkLocation* Target; + uint64_t LocalFileOffset; + uint64_t ChunkSize; + }; + + std::vector WriteOps; + WriteOps.reserve(CopyData.ChunkSourcePtrs.size()); + + for (const CacheCopyData::ChunkTarget& ChunkTarget : CopyData.ChunkTargets) + { + std::span TargetRange = + AllTargets.subspan(TargetStart, ChunkTarget.ChunkSourceCount); + for (const ChunkedContentLookup::ChunkLocation* Target : TargetRange) + { + WriteOps.push_back(WriteOp{.Target = Target, + .LocalFileOffset = ChunkTarget.LocalFileOffset, + .ChunkSize = ChunkTarget.ChunkRawSize}); + } + TargetStart += ChunkTarget.ChunkSourceCount; + } + + std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOp& Lhs, const WriteOp& Rhs) { + if (Lhs.Target->PathIndex < Rhs.Target->PathIndex) + { + return true; + } + else if (Lhs.Target->PathIndex > Rhs.Target->PathIndex) + { + return false; + } + if (Lhs.Target->Offset < Rhs.Target->Offset) + { + return true; + } + return false; + }); + + BufferedOpenFile SourceFile(CacheFilePath); + WriteFileCache OpenFileCache; + for (const WriteOp& Op : WriteOps) + { + const uint32_t RemotePathIndex = Op.Target->PathIndex; + const uint64_t ChunkSize = Op.ChunkSize; + CompositeBuffer ChunkSource = SourceFile.GetRange(Op.LocalFileOffset, ChunkSize); + + ZEN_ASSERT(Op.Target->Offset + ChunkSource.GetSize() <= RemoteContent.RawSizes[RemotePathIndex]); + + OpenFileCache.WriteToFile( + RemotePathIndex, + [&Path, &RemoteContent](uint32_t TargetIndex) { + return (Path / RemoteContent.Paths[TargetIndex]).make_preferred(); + }, + ChunkSource, + Op.Target->Offset, + RemoteContent.RawSizes[RemotePathIndex]); + BytesWritten += ChunkSize; + WriteToDiskBytes += ChunkSize; + CacheLocalFileBytesRead += ChunkSize; // TODO: This should be the sum of unique chunk sizes? + } + ChunkCountWritten += gsl::narrow(CopyData.ChunkTargets.size()); + ZEN_DEBUG("Copied {} from {}", NiceBytes(CacheLocalFileBytesRead), CopyData.OriginalSourceFileName); + } + + if (CopyData.RemotePathIndexes.empty()) + { + std::filesystem::remove(CacheFilePath); + } + else + { + uint64_t LocalBytesWritten = 0; + CloneFullFileFromCache(Path, + CacheFolderPath, + RemoteContent, + CopyData.LocalFileRawHash, + CopyData.LocalFileRawSize, + CopyData.RemotePathIndexes, + true, + LocalBytesWritten); + // CacheLocalFileBytesRead += CopyData.LocalFileRawSize; + BytesWritten += LocalBytesWritten; + WriteToDiskBytes += LocalBytesWritten; + ChunkCountWritten++; + + ZEN_DEBUG("Used full cached file {} ({}) for {} ({}) targets", + CopyData.OriginalSourceFileName, + NiceBytes(CopyData.LocalFileRawSize), + CopyData.RemotePathIndexes.size(), + NiceBytes(LocalBytesWritten)); + } + } + }, + [&, CopyDataIndex](const std::exception& Ex, std::atomic& AbortFlag) { + ZEN_CONSOLE("Failed reading cached file {}. Reason: {}", + CacheCopyDatas[CopyDataIndex].OriginalSourceFileName, + Ex.what()); + AbortFlag = true; + }); + } + + for (const IoHash ChunkHash : LooseChunkHashes) + { + if (AbortFlag) + { + break; + } + + uint32_t RemoteChunkIndex = RemoteLookup.ChunkHashToChunkIndex.at(ChunkHash); + if (RemoteChunkIndexWantsCopyFromCacheFlags[RemoteChunkIndex]) + { + ZEN_DEBUG("Skipping chunk {} due to cache reuse", ChunkHash); + continue; + } + bool NeedsCopy = true; + if (RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex].compare_exchange_strong(NeedsCopy, false)) + { + std::vector ChunkTargetPtrs = + GetRemainingChunkTargets(RemotePathIndexWantsCopyFromCacheFlags, RemoteLookup, RemoteChunkIndex); + + if (ChunkTargetPtrs.empty()) + { + ZEN_DEBUG("Skipping chunk {} due to cache reuse", ChunkHash); + } + else + { + Work.ScheduleWork( + NetworkPool, + [&, ChunkHash, RemoteChunkIndex, ChunkTargetPtrs](std::atomic& AbortFlag) { + if (!AbortFlag) + { + if (RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex] >= LargeAttachmentSize) + { + DownloadLargeBlob(Storage, + Path, + RemoteContent, + RemoteLookup, + BuildId, + ChunkHash, + PreferredMultipartChunkSize, + ChunkTargetPtrs, + Work, + WritePool, + NetworkPool, + AbortFlag, + BytesWritten, + WriteToDiskBytes, + BytesDownloaded, + LooseChunksBytes, + DownloadedChunks, + ChunkCountWritten, + MultipartAttachmentCount); + } + else + { + IoBuffer CompressedPart = Storage.GetBuildBlob(BuildId, ChunkHash); + if (!CompressedPart) + { + throw std::runtime_error(fmt::format("Chunk {} is missing", ChunkHash)); + } + BytesDownloaded += CompressedPart.GetSize(); + LooseChunksBytes += CompressedPart.GetSize(); + DownloadedChunks++; + + if (!AbortFlag) + { + Work.ScheduleWork( + WritePool, + [&, ChunkHash, RemoteChunkIndex, ChunkTargetPtrs, CompressedPart = std::move(CompressedPart)]( + std::atomic& AbortFlag) { + if (!AbortFlag) + { + uint64_t TotalBytesWritten = 0; + SharedBuffer Chunk = + Decompress(CompressedPart, + ChunkHash, + RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex]); + WriteFileCache OpenFileCache; + + WriteChunkToDisk(Path, + RemoteContent, + ChunkTargetPtrs, + CompositeBuffer(Chunk), + OpenFileCache, + TotalBytesWritten); + ChunkCountWritten++; + BytesWritten += TotalBytesWritten; + WriteToDiskBytes += TotalBytesWritten; + } + }, + [&, ChunkHash](const std::exception& Ex, std::atomic& AbortFlag) { + ZEN_CONSOLE("Failed writing chunk {}. Reason: {}", ChunkHash, Ex.what()); + AbortFlag = true; + }); + } + } + } + }, + [&, ChunkHash](const std::exception& Ex, std::atomic& AbortFlag) { + ZEN_CONSOLE("Failed downloading chunk {}. Reason: {}", ChunkHash, Ex.what()); + AbortFlag = true; + }); + } + } + } + + size_t BlockCount = BlockDescriptions.size(); + std::atomic BlocksComplete = 0; + + auto IsBlockNeeded = [&RemoteContent, &RemoteLookup, &RemoteChunkIndexNeedsCopyFromSourceFlags]( + const ChunkBlockDescription& BlockDescription) -> bool { + for (const IoHash& ChunkHash : BlockDescription.ChunkRawHashes) + { + if (auto It = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); It != RemoteLookup.ChunkHashToChunkIndex.end()) + { + const uint32_t RemoteChunkIndex = It->second; + if (RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex]) + { + return true; + } + } + } + return false; + }; + + for (size_t BlockIndex = 0; BlockIndex < BlockCount; BlockIndex++) + { + if (Work.IsAborted()) + { + break; + } + Work.ScheduleWork( + WritePool, + [&, BlockIndex](std::atomic& AbortFlag) { + if (!AbortFlag) + { + if (IsBlockNeeded(BlockDescriptions[BlockIndex])) + { + Work.ScheduleWork( + NetworkPool, + [&, BlockIndex](std::atomic& AbortFlag) { + IoBuffer BlockBuffer = Storage.GetBuildBlob(BuildId, BlockDescriptions[BlockIndex].BlockHash); + if (!BlockBuffer) + { + throw std::runtime_error( + fmt::format("Block {} is missing", BlockDescriptions[BlockIndex].BlockHash)); + } + BytesDownloaded += BlockBuffer.GetSize(); + BlockBytes += BlockBuffer.GetSize(); + DownloadedBlocks++; + + if (!AbortFlag) + { + Work.ScheduleWork( + WritePool, + [&, BlockIndex, BlockBuffer = std::move(BlockBuffer)](std::atomic& AbortFlag) { + if (!AbortFlag) + { + IoHash BlockRawHash; + uint64_t BlockRawSize; + CompressedBuffer CompressedBlockBuffer = + CompressedBuffer::FromCompressed(SharedBuffer(std::move(BlockBuffer)), + BlockRawHash, + BlockRawSize); + if (!CompressedBlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} is not a compressed buffer", + BlockDescriptions[BlockIndex].BlockHash)); + } + + if (BlockRawHash != BlockDescriptions[BlockIndex].BlockHash) + { + throw std::runtime_error( + fmt::format("Block {} header has a mismatching raw hash {}", + BlockDescriptions[BlockIndex].BlockHash, + BlockRawHash)); + } + + CompositeBuffer DecompressedBlockBuffer = CompressedBlockBuffer.DecompressToComposite(); + if (!DecompressedBlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} failed to decompress", + BlockDescriptions[BlockIndex].BlockHash)); + } + + ZEN_ASSERT_SLOW(BlockDescriptions[BlockIndex].BlockHash == + IoHash::HashBuffer(DecompressedBlockBuffer)); + + uint64_t BytesWrittenToDisk = 0; + uint32_t ChunksReadFromBlock = 0; + if (WriteBlockToDisk(Path, + RemoteContent, + RemotePathIndexWantsCopyFromCacheFlags, + DecompressedBlockBuffer, + RemoteLookup, + RemoteChunkIndexNeedsCopyFromSourceFlags.data(), + ChunksReadFromBlock, + BytesWrittenToDisk)) + { + BytesWritten += BytesWrittenToDisk; + WriteToDiskBytes += BytesWrittenToDisk; + ChunkCountWritten += ChunksReadFromBlock; + } + else + { + throw std::runtime_error( + fmt::format("Block {} is malformed", BlockDescriptions[BlockIndex].BlockHash)); + } + BlocksComplete++; + } + }, + [&, BlockIndex](const std::exception& Ex, std::atomic& AbortFlag) { + ZEN_CONSOLE("Failed writing block {}. Reason: {}", + BlockDescriptions[BlockIndex].BlockHash, + Ex.what()); + AbortFlag = true; + }); + } + }, + [&, BlockIndex](const std::exception& Ex, std::atomic& AbortFlag) { + ZEN_CONSOLE("Failed downloading block {}. Reason: {}", + BlockDescriptions[BlockIndex].BlockHash, + Ex.what()); + AbortFlag = true; + }); + } + else + { + ZEN_DEBUG("Skipping block {} due to cache reuse", BlockDescriptions[BlockIndex].BlockHash); + BlocksComplete++; + } + } + }, + [&, BlockIndex](const std::exception& Ex, std::atomic& AbortFlag) { + ZEN_CONSOLE("Failed determning if block {} is needed. Reason: {}", BlockDescriptions[BlockIndex].BlockHash, Ex.what()); + AbortFlag = true; + }); + } + for (uint32_t PathIndex = 0; PathIndex < RemoteContent.Paths.size(); PathIndex++) + { + if (Work.IsAborted()) + { + break; + } + if (RemoteContent.RawSizes[PathIndex] == 0) + { + Work.ScheduleWork( + WritePool, + [&, PathIndex](std::atomic& AbortFlag) { + if (!AbortFlag) + { + const std::filesystem::path TargetPath = (Path / RemoteContent.Paths[PathIndex]).make_preferred(); + CreateDirectories(TargetPath.parent_path()); + BasicFile OutputFile; + OutputFile.Open(TargetPath, BasicFile::Mode::kTruncate); + } + }, + [&, PathIndex](const std::exception& Ex, std::atomic& AbortFlag) { + ZEN_CONSOLE("Failed creating file at {}. Reason: {}", RemoteContent.Paths[PathIndex], Ex.what()); + AbortFlag = true; + }); + } + } + + Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, PendingWork); + ZEN_ASSERT(ChunkCountToWrite >= ChunkCountWritten.load()); + WriteProgressBar.UpdateState( + {.Task = "Writing chunks ", + .Details = fmt::format("Written {} chunks out of {}. {} ouf of {} blocks complete. Downloaded: {}. Written: {}", + ChunkCountWritten.load(), + ChunkCountToWrite, + BlocksComplete.load(), + BlockCount, + NiceBytes(BytesDownloaded.load()), + NiceBytes(BytesWritten.load())), + .TotalCount = gsl::narrow(ChunkCountToWrite), + .RemainingCount = gsl::narrow(ChunkCountToWrite - ChunkCountWritten.load())}, + false); + }); + WriteProgressBar.Finish(); + + { + ProgressBar PremissionsProgressBar(false); + if (!RemoteContent.Attributes.empty()) + { + auto SetNativeFileAttributes = + [](const std::filesystem::path FilePath, SourcePlatform SourcePlatform, uint32_t Attributes) -> uint32_t { +#if ZEN_PLATFORM_WINDOWS + if (SourcePlatform == SourcePlatform::Windows) + { + SetFileAttributes(FilePath, Attributes); + return Attributes; + } + else + { + uint32_t CurrentAttributes = GetFileAttributes(FilePath); + uint32_t NewAttributes = MakeFileAttributeReadOnly(CurrentAttributes, IsFileModeReadOnly(Attributes)); + if (CurrentAttributes != NewAttributes) + { + SetFileAttributes(FilePath, NewAttributes); + } + return NewAttributes; + } +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + if (SourcePlatform != SourcePlatform::Windows) + { + SetFileMode(FilePath, Attributes); + return Attributes; + } + else + { + uint32_t CurrentMode = GetFileMode(FilePath); + uint32_t NewMode = MakeFileModeReadOnly(CurrentMode, IsFileAttributeReadOnly(Attributes)); + if (CurrentMode != NewMode) + { + SetFileMode(FilePath, NewMode); + } + return NewMode; + } +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + }; + + OutLocalFolderState.Paths.reserve(RemoteContent.Paths.size()); + OutLocalFolderState.RawSizes.reserve(RemoteContent.Paths.size()); + OutLocalFolderState.Attributes.reserve(RemoteContent.Paths.size()); + OutLocalFolderState.ModificationTicks.reserve(RemoteContent.Paths.size()); + for (uint32_t PathIndex = 0; PathIndex < RemoteContent.Paths.size(); PathIndex++) + { + const std::filesystem::path LocalFilePath = (Path / RemoteContent.Paths[PathIndex]); + const uint32_t CurrentPlatformAttributes = + SetNativeFileAttributes(LocalFilePath, RemoteContent.Platform, RemoteContent.Attributes[PathIndex]); + + OutLocalFolderState.Paths.push_back(RemoteContent.Paths[PathIndex]); + OutLocalFolderState.RawSizes.push_back(RemoteContent.RawSizes[PathIndex]); + OutLocalFolderState.Attributes.push_back(CurrentPlatformAttributes); + OutLocalFolderState.ModificationTicks.push_back(GetModificationTickFromPath(LocalFilePath)); + + PremissionsProgressBar.UpdateState( + {.Task = "Set permissions ", + .Details = fmt::format("Updated {} files out of {}", PathIndex, RemoteContent.Paths.size()), + .TotalCount = RemoteContent.Paths.size(), + .RemainingCount = (RemoteContent.Paths.size() - PathIndex)}, + false); + } + } + PremissionsProgressBar.Finish(); + } + + return false; + } + + std::vector> ResolveBuildPartNames(BuildStorage& Storage, + const Oid& BuildId, + const std::vector& BuildPartIds, + std::span BuildPartNames, + std::uint64_t& OutPreferredMultipartChunkSize) + { + std::vector> Result; + { + Stopwatch GetBuildTimer; + + std::vector> AvailableParts; + + CbObject BuildObject = Storage.GetBuild(BuildId); + ZEN_CONSOLE("GetBuild took {}. Payload size: {}", + NiceLatencyNs(GetBuildTimer.GetElapsedTimeUs() * 1000), + NiceBytes(BuildObject.GetSize())); + + CbObjectView PartsObject = BuildObject["parts"sv].AsObjectView(); + if (!PartsObject) + { + throw std::runtime_error("Build object does not have a 'parts' object"); + } + + OutPreferredMultipartChunkSize = BuildObject["chunkSize"sv].AsUInt64(OutPreferredMultipartChunkSize); + + for (CbFieldView PartView : PartsObject) + { + const std::string BuildPartName = std::string(PartView.GetName()); + const Oid BuildPartId = PartView.AsObjectId(); + if (BuildPartId == Oid::Zero) + { + ExtendableStringBuilder<128> SB; + for (CbFieldView ScanPartView : PartsObject) + { + SB.Append(fmt::format("\n {}: {}", ScanPartView.GetName(), ScanPartView.AsObjectId())); + } + throw std::runtime_error( + fmt::format("Build object parts does not have a '{}' object id{}", BuildPartName, SB.ToView())); + } + AvailableParts.push_back({BuildPartId, BuildPartName}); + } + + if (BuildPartIds.empty() && BuildPartNames.empty()) + { + Result = AvailableParts; + } + else + { + for (const std::string& BuildPartName : BuildPartNames) + { + if (auto It = std::find_if(AvailableParts.begin(), + AvailableParts.end(), + [&BuildPartName](const auto& Part) { return Part.second == BuildPartName; }); + It != AvailableParts.end()) + { + Result.push_back(*It); + } + else + { + throw std::runtime_error(fmt::format("Build {} object does not have a part named '{}'", BuildId, BuildPartName)); + } + } + for (const Oid& BuildPartId : BuildPartIds) + { + if (auto It = std::find_if(AvailableParts.begin(), + AvailableParts.end(), + [&BuildPartId](const auto& Part) { return Part.first == BuildPartId; }); + It != AvailableParts.end()) + { + Result.push_back(*It); + } + else + { + throw std::runtime_error(fmt::format("Build {} object does not have a part with id '{}'", BuildId, BuildPartId)); + } + } + } + + if (Result.empty()) + { + throw std::runtime_error(fmt::format("Build object does not have any parts", BuildId)); + } + } + return Result; + } + + ChunkedFolderContent GetRemoteContent(BuildStorage& Storage, + const Oid& BuildId, + const std::vector>& BuildParts, + std::unique_ptr& OutChunkController, + std::vector& OutPartContents, + std::vector& OutBlockDescriptions, + std::vector& OutLooseChunkHashes) + { + Stopwatch GetBuildPartTimer; + CbObject BuildPartManifest = Storage.GetBuildPart(BuildId, BuildParts[0].first); + ZEN_CONSOLE("GetBuildPart {} ('{}') took {}. Payload size: {}", + BuildParts[0].first, + BuildParts[0].second, + NiceLatencyNs(GetBuildPartTimer.GetElapsedTimeUs() * 1000), + NiceBytes(BuildPartManifest.GetSize())); + + { + CbObjectView Chunker = BuildPartManifest["chunker"sv].AsObjectView(); + std::string_view ChunkerName = Chunker["name"sv].AsString(); + CbObjectView Parameters = Chunker["parameters"sv].AsObjectView(); + OutChunkController = CreateChunkingController(ChunkerName, Parameters); + } + + auto ParseBuildPartManifest = [](BuildStorage& Storage, + const Oid& BuildId, + const Oid& BuildPartId, + CbObject BuildPartManifest, + ChunkedFolderContent& OutRemoteContent, + std::vector& OutBlockDescriptions, + std::vector& OutLooseChunkHashes) { + std::vector AbsoluteChunkOrders; + std::vector LooseChunkRawSizes; + std::vector BlockRawHashes; + + ReadBuildContentFromCompactBinary(BuildPartManifest, + OutRemoteContent.Platform, + OutRemoteContent.Paths, + OutRemoteContent.RawHashes, + OutRemoteContent.RawSizes, + OutRemoteContent.Attributes, + OutRemoteContent.ChunkedContent.SequenceRawHashes, + OutRemoteContent.ChunkedContent.ChunkCounts, + AbsoluteChunkOrders, + OutLooseChunkHashes, + LooseChunkRawSizes, + BlockRawHashes); + + // TODO: GetBlockDescriptions for all BlockRawHashes in one go - check for local block descriptions when we cache them + + Stopwatch GetBlockMetadataTimer; + OutBlockDescriptions = Storage.GetBlockMetadata(BuildId, BlockRawHashes); + ZEN_CONSOLE("GetBlockMetadata for {} took {}. Found {} blocks", + BuildPartId, + NiceLatencyNs(GetBlockMetadataTimer.GetElapsedTimeUs() * 1000), + OutBlockDescriptions.size()); + + if (OutBlockDescriptions.size() != BlockRawHashes.size()) + { + bool AttemptFallback = false; + std::string ErrorDescription = + fmt::format("All required blocks could not be found, {} blocks does not have metadata in this context.", + BlockRawHashes.size() - OutBlockDescriptions.size()); + if (AttemptFallback) + { + ZEN_CONSOLE("{} Attemping fallback options.", ErrorDescription); + std::vector AugmentedBlockDescriptions; + AugmentedBlockDescriptions.reserve(BlockRawHashes.size()); + std::vector FoundBlocks = Storage.FindBlocks(BuildId); + + for (const IoHash& BlockHash : BlockRawHashes) + { + if (auto It = std::find_if( + OutBlockDescriptions.begin(), + OutBlockDescriptions.end(), + [BlockHash](const ChunkBlockDescription& Description) { return Description.BlockHash == BlockHash; }); + It != OutBlockDescriptions.end()) + { + AugmentedBlockDescriptions.emplace_back(std::move(*It)); + } + else if (auto ListBlocksIt = std::find_if( + FoundBlocks.begin(), + FoundBlocks.end(), + [BlockHash](const ChunkBlockDescription& Description) { return Description.BlockHash == BlockHash; }); + ListBlocksIt != FoundBlocks.end()) + { + ZEN_CONSOLE("Found block {} via context find successfully", BlockHash); + AugmentedBlockDescriptions.emplace_back(std::move(*ListBlocksIt)); + } + else + { + IoBuffer BlockBuffer = Storage.GetBuildBlob(BuildId, BlockHash); + if (!BlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} could not be found", BlockHash)); + } + IoHash BlockRawHash; + uint64_t BlockRawSize; + CompressedBuffer CompressedBlockBuffer = + CompressedBuffer::FromCompressed(SharedBuffer(std::move(BlockBuffer)), BlockRawHash, BlockRawSize); + if (!CompressedBlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} is not a compressed buffer", BlockHash)); + } + + if (BlockRawHash != BlockHash) + { + throw std::runtime_error( + fmt::format("Block {} header has a mismatching raw hash {}", BlockHash, BlockRawHash)); + } + + CompositeBuffer DecompressedBlockBuffer = CompressedBlockBuffer.DecompressToComposite(); + if (!DecompressedBlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} failed to decompress", BlockHash)); + } + + ChunkBlockDescription MissingChunkDescription = + GetChunkBlockDescription(DecompressedBlockBuffer.Flatten(), BlockHash); + AugmentedBlockDescriptions.emplace_back(std::move(MissingChunkDescription)); + } + } + OutBlockDescriptions.swap(AugmentedBlockDescriptions); + } + else + { + throw std::runtime_error(ErrorDescription); + } + } + + CalculateLocalChunkOrders(AbsoluteChunkOrders, + OutLooseChunkHashes, + LooseChunkRawSizes, + OutBlockDescriptions, + OutRemoteContent.ChunkedContent.ChunkHashes, + OutRemoteContent.ChunkedContent.ChunkRawSizes, + OutRemoteContent.ChunkedContent.ChunkOrders); + }; + + OutPartContents.resize(1); + ParseBuildPartManifest(Storage, + BuildId, + BuildParts[0].first, + BuildPartManifest, + OutPartContents[0], + OutBlockDescriptions, + OutLooseChunkHashes); + ChunkedFolderContent RemoteContent; + if (BuildParts.size() > 1) + { + std::vector OverlayBlockDescriptions; + std::vector OverlayLooseChunkHashes; + for (size_t PartIndex = 1; PartIndex < BuildParts.size(); PartIndex++) + { + const Oid& OverlayBuildPartId = BuildParts[PartIndex].first; + const std::string& OverlayBuildPartName = BuildParts[PartIndex].second; + Stopwatch GetOverlayBuildPartTimer; + CbObject OverlayBuildPartManifest = Storage.GetBuildPart(BuildId, OverlayBuildPartId); + ZEN_CONSOLE("GetBuildPart {} ('{}') took {}. Payload size: {}", + OverlayBuildPartId, + OverlayBuildPartName, + NiceLatencyNs(GetOverlayBuildPartTimer.GetElapsedTimeUs() * 1000), + NiceBytes(OverlayBuildPartManifest.GetSize())); + + ChunkedFolderContent OverlayPartContent; + std::vector OverlayPartBlockDescriptions; + std::vector OverlayPartLooseChunkHashes; + + ParseBuildPartManifest(Storage, + BuildId, + OverlayBuildPartId, + OverlayBuildPartManifest, + OverlayPartContent, + OverlayPartBlockDescriptions, + OverlayPartLooseChunkHashes); + OutPartContents.push_back(OverlayPartContent); + OverlayBlockDescriptions.insert(OverlayBlockDescriptions.end(), + OverlayPartBlockDescriptions.begin(), + OverlayPartBlockDescriptions.end()); + OverlayLooseChunkHashes.insert(OverlayLooseChunkHashes.end(), + OverlayPartLooseChunkHashes.begin(), + OverlayPartLooseChunkHashes.end()); + } + + RemoteContent = + MergeChunkedFolderContents(OutPartContents[0], std::span(OutPartContents).subspan(1)); + { + tsl::robin_set AllBlockHashes; + for (const ChunkBlockDescription& Description : OutBlockDescriptions) + { + AllBlockHashes.insert(Description.BlockHash); + } + for (const ChunkBlockDescription& Description : OverlayBlockDescriptions) + { + if (!AllBlockHashes.contains(Description.BlockHash)) + { + AllBlockHashes.insert(Description.BlockHash); + OutBlockDescriptions.push_back(Description); + } + } + } + { + tsl::robin_set AllLooseChunkHashes(OutLooseChunkHashes.begin(), OutLooseChunkHashes.end()); + for (const IoHash& OverlayLooseChunkHash : OverlayLooseChunkHashes) + { + if (!AllLooseChunkHashes.contains(OverlayLooseChunkHash)) + { + AllLooseChunkHashes.insert(OverlayLooseChunkHash); + OutLooseChunkHashes.push_back(OverlayLooseChunkHash); + } + } + } + } + else + { + RemoteContent = OutPartContents[0]; + } + return RemoteContent; + } + + ChunkedFolderContent GetLocalContent(GetFolderContentStatistics& LocalFolderScanStats, + ChunkingStatistics& ChunkingStats, + const std::filesystem::path& Path, + ChunkingController& ChunkController, + std::atomic& AbortFlag) + { + ChunkedFolderContent LocalContent; + + auto IsAcceptedFolder = [ExcludeFolders = DefaultExcludeFolders](const std::string_view& RelativePath) -> bool { + for (const std::string_view& ExcludeFolder : ExcludeFolders) + { + if (RelativePath.starts_with(ExcludeFolder)) + { + if (RelativePath.length() == ExcludeFolder.length()) + { + return false; + } + else if (RelativePath[ExcludeFolder.length()] == '/') + { + return false; + } + } + } + return true; + }; + + auto IsAcceptedFile = [ExcludeExtensions = + DefaultExcludeExtensions](const std::string_view& RelativePath, uint64_t, uint32_t) -> bool { + for (const std::string_view& ExcludeExtension : ExcludeExtensions) + { + if (RelativePath.ends_with(ExcludeExtension)) + { + return false; + } + } + return true; + }; + + FolderContent CurrentLocalFolderContent = GetFolderContent( + LocalFolderScanStats, + Path, + std::move(IsAcceptedFolder), + std::move(IsAcceptedFile), + GetMediumWorkerPool(EWorkloadType::Burst), + UsePlainProgress ? 5000 : 200, + [&](bool, std::ptrdiff_t) { ZEN_DEBUG("Found {} files in '{}'...", LocalFolderScanStats.AcceptedFileCount.load(), Path); }, + AbortFlag); + if (AbortFlag) + { + return {}; + } + + FolderContent LocalFolderState; + + bool ScanContent = true; + std::vector PathIndexesOufOfDate; + if (std::filesystem::is_regular_file(Path / ZenStateFilePath)) + { + try + { + Stopwatch ReadStateTimer; + CbObject CurrentStateObject = LoadCompactBinaryObject(Path / ZenStateFilePath).Object; + if (CurrentStateObject) + { + Oid CurrentBuildId; + std::vector SavedBuildPartIds; + std::vector SavedBuildPartsNames; + std::vector SavedPartContents; + if (ReadStateObject(CurrentStateObject, + CurrentBuildId, + SavedBuildPartIds, + SavedBuildPartsNames, + SavedPartContents, + LocalFolderState)) + { + if (!SavedPartContents.empty()) + { + if (SavedPartContents.size() == 1) + { + LocalContent = std::move(SavedPartContents[0]); + } + else + { + LocalContent = + MergeChunkedFolderContents(SavedPartContents[0], + std::span(SavedPartContents).subspan(1)); + } + + if (!LocalFolderState.AreKnownFilesEqual(CurrentLocalFolderContent)) + { + std::vector DeletedPaths; + FolderContent UpdatedContent = GetUpdatedContent(LocalFolderState, CurrentLocalFolderContent, DeletedPaths); + if (!DeletedPaths.empty()) + { + LocalContent = DeletePathsFromChunkedContent(LocalContent, DeletedPaths); + } + + ZEN_CONSOLE("Updating state, {} local files deleted and {} local files updated", + DeletedPaths.size(), + UpdatedContent.Paths.size()); + if (UpdatedContent.Paths.size() > 0) + { + uint64_t ByteCountToScan = 0; + for (const uint64_t RawSize : UpdatedContent.RawSizes) + { + ByteCountToScan += RawSize; + } + ProgressBar ProgressBar(false); + FilteredRate FilteredBytesHashed; + FilteredBytesHashed.Start(); + ChunkedFolderContent UpdatedLocalContent = ChunkFolderContent( + ChunkingStats, + GetMediumWorkerPool(EWorkloadType::Burst), + Path, + UpdatedContent, + ChunkController, + UsePlainProgress ? 5000 : 200, + [&](bool, std::ptrdiff_t) { + FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); + + ProgressBar.UpdateState( + {.Task = "Scanning files ", + .Details = fmt::format("{}/{} ({}/{}, {}B/s) files, {} ({}) chunks found", + ChunkingStats.FilesProcessed.load(), + UpdatedContent.Paths.size(), + NiceBytes(ChunkingStats.BytesHashed.load()), + NiceBytes(ByteCountToScan), + NiceNum(FilteredBytesHashed.GetCurrent()), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load())), + .TotalCount = ByteCountToScan, + .RemainingCount = ByteCountToScan - ChunkingStats.BytesHashed.load()}, + false); + }, + AbortFlag); + if (AbortFlag) + { + return {}; + } + FilteredBytesHashed.Stop(); + ProgressBar.Finish(); + LocalContent = MergeChunkedFolderContents(LocalContent, {{UpdatedLocalContent}}); + } + } + else + { + ZEN_CONSOLE("Using cached local state"); + } + ZEN_CONSOLE("Read local state in {}", NiceLatencyNs(ReadStateTimer.GetElapsedTimeUs() * 1000)); + ScanContent = false; + } + } + } + } + catch (const std::exception& Ex) + { + ZEN_CONSOLE("Failed reading state file, falling back to scannning. Reason: {}", Ex.what()); + } + } + + if (ScanContent) + { + uint64_t ByteCountToScan = 0; + for (const uint64_t RawSize : CurrentLocalFolderContent.RawSizes) + { + ByteCountToScan += RawSize; + } + ProgressBar ProgressBar(false); + FilteredRate FilteredBytesHashed; + FilteredBytesHashed.Start(); + ChunkedFolderContent UpdatedLocalContent = ChunkFolderContent( + ChunkingStats, + GetMediumWorkerPool(EWorkloadType::Burst), + Path, + CurrentLocalFolderContent, + ChunkController, + UsePlainProgress ? 5000 : 200, + [&](bool, std::ptrdiff_t) { + FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); + ProgressBar.UpdateState({.Task = "Scanning files ", + .Details = fmt::format("{}/{} ({}/{}, {}B/s) files, {} ({}) chunks found", + ChunkingStats.FilesProcessed.load(), + CurrentLocalFolderContent.Paths.size(), + NiceBytes(ChunkingStats.BytesHashed.load()), + ByteCountToScan, + NiceNum(FilteredBytesHashed.GetCurrent()), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load())), + .TotalCount = ByteCountToScan, + .RemainingCount = (ByteCountToScan - ChunkingStats.BytesHashed.load())}, + false); + }, + AbortFlag); + FilteredBytesHashed.Stop(); + ProgressBar.Finish(); + + if (AbortFlag) + { + return {}; + } + } + return LocalContent; + } + + bool DownloadFolder(BuildStorage& Storage, + const Oid& BuildId, + const std::vector& BuildPartIds, + std::span BuildPartNames, + const std::filesystem::path& Path, + bool AllowMultiparts, + bool WipeTargetFolder) + { + Stopwatch DownloadTimer; + std::atomic AbortFlag(false); + + const std::filesystem::path ZenTempFolder = Path / ZenTempFolderName; + CreateDirectories(ZenTempFolder); + auto _ = MakeGuard([&]() { + CleanDirectory(ZenTempFolder, {}); + std::filesystem::remove(ZenTempFolder); + }); + CreateDirectories(Path / ZenTempBlockFolderName); + CreateDirectories(Path / ZenTempChunkFolderName); + CreateDirectories(Path / ZenTempReuseFolderName); + + std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; + + std::vector> AllBuildParts = + ResolveBuildPartNames(Storage, BuildId, BuildPartIds, BuildPartNames, PreferredMultipartChunkSize); + + std::vector PartContents; + + std::unique_ptr ChunkController; + + std::vector BlockDescriptions; + std::vector LooseChunkHashes; + + ChunkedFolderContent RemoteContent = + GetRemoteContent(Storage, BuildId, AllBuildParts, ChunkController, PartContents, BlockDescriptions, LooseChunkHashes); + + const std::uint64_t LargeAttachmentSize = AllowMultiparts ? PreferredMultipartChunkSize * 4u : (std::uint64_t)-1; + if (!ChunkController) + { + ZEN_CONSOLE("Warning: Unspecified chunking algorith, using default"); + ChunkController = CreateBasicChunkingController(); + } + + GetFolderContentStatistics LocalFolderScanStats; + ChunkingStatistics ChunkingStats; + ChunkedFolderContent LocalContent; + if (std::filesystem::is_directory(Path)) + { + if (!WipeTargetFolder) + { + LocalContent = GetLocalContent(LocalFolderScanStats, ChunkingStats, Path, *ChunkController, AbortFlag); + } + } + else + { + CreateDirectories(Path); + } + if (AbortFlag.load()) + { + return true; + } + + auto CompareContent = [](const ChunkedFolderContent& Lsh, const ChunkedFolderContent& Rhs) { + tsl::robin_map RhsPathToIndex; + const size_t RhsPathCount = Rhs.Paths.size(); + RhsPathToIndex.reserve(RhsPathCount); + for (size_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++) + { + RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex}); + } + const size_t LhsPathCount = Lsh.Paths.size(); + for (size_t LhsPathIndex = 0; LhsPathIndex < LhsPathCount; LhsPathIndex++) + { + if (auto It = RhsPathToIndex.find(Lsh.Paths[LhsPathIndex].generic_string()); It != RhsPathToIndex.end()) + { + const size_t RhsPathIndex = It->second; + if ((Lsh.RawHashes[LhsPathIndex] != Rhs.RawHashes[RhsPathIndex]) || + (!FolderContent::AreFileAttributesEqual(Lsh.Attributes[LhsPathIndex], Rhs.Attributes[RhsPathIndex]))) + { + return false; + } + } + else + { + return false; + } + } + return true; + }; + + if (CompareContent(RemoteContent, LocalContent)) + { + ZEN_CONSOLE("Local state is identical to build to download. All done. Completed in {}.", + NiceLatencyNs(DownloadTimer.GetElapsedTimeUs() * 1000)); + } + else + { + ExtendableStringBuilder<128> SB; + for (const std::pair& BuildPart : AllBuildParts) + { + SB.Append(fmt::format(" {} ({})", BuildPart.second, BuildPart.first)); + } + ZEN_CONSOLE("Downloading build {}, parts:{}", BuildId, SB.ToView()); + FolderContent LocalFolderState; + if (UpdateFolder(Storage, + BuildId, + Path, + LargeAttachmentSize, + PreferredMultipartChunkSize, + LocalContent, + RemoteContent, + BlockDescriptions, + LooseChunkHashes, + WipeTargetFolder, + AbortFlag, + LocalFolderState)) + { + AbortFlag = true; + } + if (!AbortFlag) + { + VerifyFolder(RemoteContent, Path, AbortFlag); + } + + Stopwatch WriteStateTimer; + CbObject StateObject = CreateStateObject(BuildId, AllBuildParts, PartContents, LocalFolderState); + + CreateDirectories((Path / ZenStateFilePath).parent_path()); + TemporaryFile::SafeWriteFile(Path / ZenStateFilePath, StateObject.GetView()); + ZEN_CONSOLE("Wrote local state in {}", NiceLatencyNs(WriteStateTimer.GetElapsedTimeUs() * 1000)); + +#if 0 + ExtendableStringBuilder<1024> SB; + CompactBinaryToJson(StateObject, SB); + WriteFile(Path / ZenStateFileJsonPath, IoBuffer(IoBuffer::Wrap, SB.Data(), SB.Size())); +#endif // 0 + + ZEN_CONSOLE("Downloaded build in {}.", NiceLatencyNs(DownloadTimer.GetElapsedTimeUs() * 1000)); + } + + return AbortFlag.load(); + } + + bool DiffFolders(const std::filesystem::path& BasePath, const std::filesystem::path& ComparePath, bool OnlyChunked) + { + std::atomic AbortFlag(false); + + ChunkedFolderContent BaseFolderContent; + ChunkedFolderContent CompareFolderContent; + + { + std::unique_ptr ChunkController = CreateBasicChunkingController(); + std::vector ExcludeExtensions = DefaultExcludeExtensions; + if (OnlyChunked) + { + ExcludeExtensions.insert(ExcludeExtensions.end(), + DefaultChunkingExcludeExtensions.begin(), + DefaultChunkingExcludeExtensions.end()); + } + + auto IsAcceptedFolder = [ExcludeFolders = DefaultExcludeFolders](const std::string_view& RelativePath) -> bool { + for (const std::string_view& ExcludeFolder : ExcludeFolders) + { + if (RelativePath.starts_with(ExcludeFolder)) + { + if (RelativePath.length() == ExcludeFolder.length()) + { + return false; + } + else if (RelativePath[ExcludeFolder.length()] == '/') + { + return false; + } + } + } + return true; + }; + + auto IsAcceptedFile = [ExcludeExtensions](const std::string_view& RelativePath, uint64_t, uint32_t) -> bool { + for (const std::string_view& ExcludeExtension : ExcludeExtensions) + { + if (RelativePath.ends_with(ExcludeExtension)) + { + return false; + } + } + return true; + }; + + GetFolderContentStatistics BaseGetFolderContentStats; + ChunkingStatistics BaseChunkingStats; + BaseFolderContent = ScanAndChunkFolder(BaseGetFolderContentStats, + BaseChunkingStats, + BasePath, + IsAcceptedFolder, + IsAcceptedFile, + *ChunkController, + AbortFlag); + + GetFolderContentStatistics CompareGetFolderContentStats; + ChunkingStatistics CompareChunkingStats; + CompareFolderContent = ScanAndChunkFolder(CompareGetFolderContentStats, + CompareChunkingStats, + ComparePath, + IsAcceptedFolder, + IsAcceptedFile, + *ChunkController, + AbortFlag); + } + + std::vector AddedHashes; + std::vector RemovedHashes; + uint64_t RemovedSize = 0; + uint64_t AddedSize = 0; + + tsl::robin_map BaseRawHashLookup; + for (size_t PathIndex = 0; PathIndex < BaseFolderContent.RawHashes.size(); PathIndex++) + { + const IoHash& RawHash = BaseFolderContent.RawHashes[PathIndex]; + BaseRawHashLookup.insert_or_assign(RawHash, PathIndex); + } + tsl::robin_map CompareRawHashLookup; + for (size_t PathIndex = 0; PathIndex < CompareFolderContent.RawHashes.size(); PathIndex++) + { + const IoHash& RawHash = CompareFolderContent.RawHashes[PathIndex]; + if (!BaseRawHashLookup.contains(RawHash)) + { + AddedHashes.push_back(RawHash); + AddedSize += CompareFolderContent.RawSizes[PathIndex]; + } + CompareRawHashLookup.insert_or_assign(RawHash, PathIndex); + } + for (uint32_t PathIndex = 0; PathIndex < BaseFolderContent.Paths.size(); PathIndex++) + { + const IoHash& RawHash = BaseFolderContent.RawHashes[PathIndex]; + if (!CompareRawHashLookup.contains(RawHash)) + { + RemovedHashes.push_back(RawHash); + RemovedSize += BaseFolderContent.RawSizes[PathIndex]; + } + } + + uint64_t BaseTotalRawSize = 0; + for (uint32_t PathIndex = 0; PathIndex < BaseFolderContent.Paths.size(); PathIndex++) + { + BaseTotalRawSize += BaseFolderContent.RawSizes[PathIndex]; + } + + double KeptPercent = BaseTotalRawSize > 0 ? (100.0 * (BaseTotalRawSize - RemovedSize)) / BaseTotalRawSize : 0; + + ZEN_CONSOLE("{} ({}) files removed, {} ({}) files added, {} ({} {:.1f}%) files kept", + RemovedHashes.size(), + NiceBytes(RemovedSize), + AddedHashes.size(), + NiceBytes(AddedSize), + BaseFolderContent.Paths.size() - RemovedHashes.size(), + NiceBytes(BaseTotalRawSize - RemovedSize), + KeptPercent); + + uint64_t CompareTotalRawSize = 0; + + uint64_t FoundChunkCount = 0; + uint64_t FoundChunkSize = 0; + uint64_t NewChunkCount = 0; + uint64_t NewChunkSize = 0; + const ChunkedContentLookup BaseFolderLookup = BuildChunkedContentLookup(BaseFolderContent); + for (uint32_t ChunkIndex = 0; ChunkIndex < CompareFolderContent.ChunkedContent.ChunkHashes.size(); ChunkIndex++) + { + const IoHash& ChunkHash = CompareFolderContent.ChunkedContent.ChunkHashes[ChunkIndex]; + if (BaseFolderLookup.ChunkHashToChunkIndex.contains(ChunkHash)) + { + FoundChunkCount++; + FoundChunkSize += CompareFolderContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + else + { + NewChunkCount++; + NewChunkSize += CompareFolderContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + CompareTotalRawSize += CompareFolderContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + + double FoundPercent = CompareTotalRawSize > 0 ? (100.0 * FoundChunkSize) / CompareTotalRawSize : 0; + double NewPercent = CompareTotalRawSize > 0 ? (100.0 * NewChunkSize) / CompareTotalRawSize : 0; + + ZEN_CONSOLE("Found {} ({} {:.1f}%) out of {} ({}) chunks in {} ({}) base chunks. Added {} ({} {:.1f}%) chunks.", + FoundChunkCount, + NiceBytes(FoundChunkSize), + FoundPercent, + CompareFolderContent.ChunkedContent.ChunkHashes.size(), + NiceBytes(CompareTotalRawSize), + BaseFolderContent.ChunkedContent.ChunkHashes.size(), + NiceBytes(BaseTotalRawSize), + NewChunkCount, + NiceBytes(NewChunkSize), + NewPercent); + + return false; + } + +} // namespace + +////////////////////////////////////////////////////////////////////////////////////////////////////// + +BuildsCommand::BuildsCommand() +{ + m_Options.add_options()("h,help", "Print help"); + + auto AddAuthOptions = [this](cxxopts::Options& Ops) { + Ops.add_option("", "", "system-dir", "Specify system root", cxxopts::value(m_SystemRootDir), ""); + + // Direct access token (may expire) + Ops.add_option("auth-token", + "", + "access-token", + "Cloud/Builds Storage access token", + cxxopts::value(m_AccessToken), + ""); + Ops.add_option("auth-token", + "", + "access-token-env", + "Name of environment variable that holds the cloud/builds Storage access token", + cxxopts::value(m_AccessTokenEnv)->default_value(DefaultAccessTokenEnvVariableName), + ""); + Ops.add_option("auth-token", + "", + "access-token-path", + "Path to json file that holds the cloud/builds Storage access token", + cxxopts::value(m_AccessTokenPath), + ""); + + // Auth manager token encryption + Ops.add_option("security", + "", + "encryption-aes-key", + "256 bit AES encryption key", + cxxopts::value(m_EncryptionKey), + ""); + Ops.add_option("security", + "", + "encryption-aes-iv", + "128 bit AES encryption initialization vector", + cxxopts::value(m_EncryptionIV), + ""); + + // OpenId acccess token + Ops.add_option("openid", + "", + "openid-provider-name", + "Open ID provider name", + cxxopts::value(m_OpenIdProviderName), + "Default"); + Ops.add_option("openid", "", "openid-provider-url", "Open ID provider url", cxxopts::value(m_OpenIdProviderUrl), ""); + Ops.add_option("openid", "", "openid-client-id", "Open ID client id", cxxopts::value(m_OpenIdClientId), ""); + Ops.add_option("openid", + "", + "openid-refresh-token", + "Open ID refresh token", + cxxopts::value(m_OpenIdRefreshToken), + ""); + + // OAuth acccess token + Ops.add_option("oauth", "", "oauth-url", "OAuth provier url", cxxopts::value(m_OAuthUrl)->default_value(""), ""); + Ops.add_option("oauth", + "", + "oauth-clientid", + "OAuth client id", + cxxopts::value(m_OAuthClientId)->default_value(""), + ""); + Ops.add_option("oauth", + "", + "oauth-clientsecret", + "OAuth client secret", + cxxopts::value(m_OAuthClientSecret)->default_value(""), + ""); + }; + + auto AddCloudOptions = [this, &AddAuthOptions](cxxopts::Options& Ops) { + AddAuthOptions(Ops); + + Ops.add_option("cloud build", "", "url", "Cloud Builds URL", cxxopts::value(m_BuildsUrl), ""); + Ops.add_option("cloud build", + "", + "assume-http2", + "Assume that the builds endpoint is a HTTP/2 endpoint skipping HTTP/1.1 upgrade handshake", + cxxopts::value(m_AssumeHttp2), + ""); + + Ops.add_option("cloud build", "", "namespace", "Builds Storage namespace", cxxopts::value(m_Namespace), ""); + Ops.add_option("cloud build", "", "bucket", "Builds Storage bucket", cxxopts::value(m_Bucket), ""); + }; + + auto AddFileOptions = [this](cxxopts::Options& Ops) { + Ops.add_option("filestorage", "", "storage-path", "Builds Storage Path", cxxopts::value(m_StoragePath), ""); + Ops.add_option("filestorage", + "", + "json-metadata", + "Write build, part and block metadata as .json files in addition to .cb files", + cxxopts::value(m_WriteMetadataAsJson), + ""); + }; + + auto AddOutputOptions = [this](cxxopts::Options& Ops) { + Ops.add_option("output", "", "plain-progress", "Show progress using plain output", cxxopts::value(m_PlainProgress), ""); + Ops.add_option("output", "", "verbose", "Enable verbose console output", cxxopts::value(m_Verbose), ""); + }; + + m_Options.add_option("", "v", "verb", "Verb for build - list, upload, download, diff", cxxopts::value(m_Verb), ""); + m_Options.parse_positional({"verb"}); + m_Options.positional_help("verb"); + + // list + AddCloudOptions(m_ListOptions); + AddFileOptions(m_ListOptions); + AddOutputOptions(m_ListOptions); + m_ListOptions.add_options()("h,help", "Print help"); + + // upload + AddCloudOptions(m_UploadOptions); + AddFileOptions(m_UploadOptions); + AddOutputOptions(m_UploadOptions); + m_UploadOptions.add_options()("h,help", "Print help"); + m_UploadOptions.add_option("", "l", "local-path", "Root file system folder for build", cxxopts::value(m_Path), ""); + m_UploadOptions.add_option("", + "", + "create-build", + "Set to true to create the containing build, if unset a builds-id must be given and the build already exist", + cxxopts::value(m_CreateBuild), + ""); + m_UploadOptions.add_option("", "", "build-id", "Build Id", cxxopts::value(m_BuildId), ""); + m_UploadOptions.add_option("", + "", + "build-part-id", + "Build part Id, if not given it will be auto generated", + cxxopts::value(m_BuildPartId), + ""); + m_UploadOptions.add_option("", + "", + "build-part-name", + "Name of the build part, if not given it will be be named after the directory name at end of local-path", + cxxopts::value(m_BuildPartName), + ""); + m_UploadOptions.add_option("", + "", + "metadata-path", + "Path to json file that holds the metadata for the build. Requires the create-build option to be set", + cxxopts::value(m_BuildMetadataPath), + ""); + m_UploadOptions.add_option( + "", + "", + "metadata", + "Key-value pairs separated by ';' with build meta data. (key1=value1;key2=value2). Requires the create-build option to be set", + cxxopts::value(m_BuildMetadata), + ""); + m_UploadOptions.add_option("", "", "clean", "Ignore existing blocks", cxxopts::value(m_Clean), ""); + m_UploadOptions.add_option("", + "", + "block-min-reuse", + "Percent of an existing block that must be relevant for it to be resused. Defaults to 85.", + cxxopts::value(m_BlockReuseMinPercentLimit), + ""); + m_UploadOptions.add_option("", + "", + "allow-multipart", + "Allow large attachments to be transfered using multipart protocol. Defaults to true.", + cxxopts::value(m_AllowMultiparts), + ""); + m_UploadOptions.add_option("", + "", + "manifest-path", + "Path to a text file with one line of [TAB] per file to include.", + cxxopts::value(m_ManifestPath), + ""); + + m_UploadOptions.parse_positional({"local-path", "build-id"}); + m_UploadOptions.positional_help("local-path build-id"); + + // download + AddCloudOptions(m_DownloadOptions); + AddFileOptions(m_DownloadOptions); + AddOutputOptions(m_DownloadOptions); + m_DownloadOptions.add_options()("h,help", "Print help"); + m_DownloadOptions.add_option("", "l", "local-path", "Root file system folder for build", cxxopts::value(m_Path), ""); + m_DownloadOptions.add_option("", "", "build-id", "Build Id", cxxopts::value(m_BuildId), ""); + m_DownloadOptions.add_option( + "", + "", + "build-part-id", + "Build part Ids list separated by ',', if no build-part-ids or build-part-names are given all parts will be downloaded", + cxxopts::value(m_BuildPartIds), + ""); + m_DownloadOptions.add_option( + "", + "", + "build-part-name", + "Name of the build parts list separated by ',', if no build-part-ids or build-part-names are given all parts will be downloaded", + cxxopts::value(m_BuildPartNames), + ""); + m_DownloadOptions + .add_option("", "", "clean", "Delete all data in target folder before downloading", cxxopts::value(m_Clean), ""); + m_DownloadOptions.add_option("", + "", + "allow-multipart", + "Allow large attachments to be transfered using multipart protocol. Defaults to true.", + cxxopts::value(m_AllowMultiparts), + ""); + m_DownloadOptions.parse_positional({"local-path", "build-id", "build-part-name"}); + m_DownloadOptions.positional_help("local-path build-id build-part-name"); + + AddOutputOptions(m_DiffOptions); + m_DiffOptions.add_options()("h,help", "Print help"); + m_DiffOptions.add_option("", "l", "local-path", "Root file system folder used as base", cxxopts::value(m_Path), ""); + m_DiffOptions.add_option("", "c", "compare-path", "Root file system folder used as diff", cxxopts::value(m_DiffPath), ""); + m_DiffOptions.add_option("", + "", + "only-chunked", + "Skip files from diff summation that are not processed with chunking", + cxxopts::value(m_OnlyChunked), + ""); + m_DiffOptions.parse_positional({"local-path", "compare-path"}); + m_DiffOptions.positional_help("local-path compare-path"); + + AddCloudOptions(m_TestOptions); + AddFileOptions(m_TestOptions); + AddOutputOptions(m_TestOptions); + m_TestOptions.add_options()("h,help", "Print help"); + m_TestOptions.add_option("", "l", "local-path", "Root file system folder used as base", cxxopts::value(m_Path), ""); + m_TestOptions.parse_positional({"local-path"}); + m_TestOptions.positional_help("local-path"); + + AddCloudOptions(m_FetchBlobOptions); + AddFileOptions(m_FetchBlobOptions); + AddOutputOptions(m_FetchBlobOptions); + m_FetchBlobOptions.add_option("", "", "build-id", "Build Id", cxxopts::value(m_BuildId), ""); + m_FetchBlobOptions + .add_option("", "", "blob-hash", "IoHash in hex form identifying the blob to download", cxxopts::value(m_BlobHash), ""); + m_FetchBlobOptions.parse_positional({"build-id", "blob-hash"}); + m_FetchBlobOptions.positional_help("build-id blob-hash"); + + AddCloudOptions(m_ValidateBuildPartOptions); + AddFileOptions(m_ValidateBuildPartOptions); + AddOutputOptions(m_ValidateBuildPartOptions); + m_ValidateBuildPartOptions.add_option("", "", "build-id", "Build Id", cxxopts::value(m_BuildId), ""); + m_ValidateBuildPartOptions.add_option("", + "", + "build-part-id", + "Build part Id, if not given it will be auto generated", + cxxopts::value(m_BuildPartId), + ""); + m_ValidateBuildPartOptions.add_option( + "", + "", + "build-part-name", + "Name of the build part, if not given it will be be named after the directory name at end of local-path", + cxxopts::value(m_BuildPartName), + ""); + m_ValidateBuildPartOptions.parse_positional({"build-id", "build-part-id"}); + m_ValidateBuildPartOptions.positional_help("build-id build-part-id"); +} + +BuildsCommand::~BuildsCommand() = default; + +int +BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) +{ + ZEN_UNUSED(GlobalOptions); + + using namespace std::literals; + + std::vector SubCommandArguments; + cxxopts::Options* SubOption = nullptr; + int ParentCommandArgCount = GetSubCommand(m_Options, argc, argv, m_SubCommands, SubOption, SubCommandArguments); + if (!ParseOptions(ParentCommandArgCount, argv)) + { + return 0; + } + + if (SubOption == nullptr) + { + throw zen::OptionParseException("command verb is missing"); + } + + if (!ParseOptions(*SubOption, gsl::narrow(SubCommandArguments.size()), SubCommandArguments.data())) + { + return 0; + } + + auto ParseStorageOptions = [&]() { + if (!m_BuildsUrl.empty()) + { + if (!m_StoragePath.empty()) + { + throw zen::OptionParseException(fmt::format("url is not compatible with the storage-path option\n{}", m_Options.help())); + } + if (m_Namespace.empty() || m_Bucket.empty()) + { + throw zen::OptionParseException( + fmt::format("namespace and bucket options are required for url option\n{}", m_Options.help())); + } + } + }; + + std::unique_ptr Auth; + HttpClientSettings ClientSettings{.AssumeHttp2 = m_AssumeHttp2, .AllowResume = true, .RetryCount = 2}; + + auto CreateAuthMgr = [&]() { + if (!Auth) + { + std::filesystem::path DataRoot = m_SystemRootDir.empty() ? PickDefaultSystemRootDirectory() : m_SystemRootDir; + + if (m_EncryptionKey.empty()) + { + m_EncryptionKey = "abcdefghijklmnopqrstuvxyz0123456"; + ZEN_CONSOLE("Warning: Using default encryption key"); + } + + if (m_EncryptionIV.empty()) + { + m_EncryptionIV = "0123456789abcdef"; + ZEN_CONSOLE("Warning: Using default encryption initialization vector"); + } + + AuthConfig AuthMgrConfig = {.RootDirectory = DataRoot / "auth", + .EncryptionKey = AesKey256Bit::FromString(m_EncryptionKey), + .EncryptionIV = AesIV128Bit::FromString(m_EncryptionIV)}; + if (!AuthMgrConfig.EncryptionKey.IsValid()) + { + throw zen::OptionParseException("Invalid AES encryption key"); + } + if (!AuthMgrConfig.EncryptionIV.IsValid()) + { + throw zen::OptionParseException("Invalid AES initialization vector"); + } + Auth = AuthMgr::Create(AuthMgrConfig); + } + }; + + auto ParseAuthOptions = [&]() { + if (!m_OpenIdProviderUrl.empty() && !m_OpenIdClientId.empty()) + { + CreateAuthMgr(); + std::string ProviderName = m_OpenIdProviderName.empty() ? "Default" : m_OpenIdProviderName; + Auth->AddOpenIdProvider({.Name = ProviderName, .Url = m_OpenIdProviderUrl, .ClientId = m_OpenIdClientId}); + if (!m_OpenIdRefreshToken.empty()) + { + Auth->AddOpenIdToken({.ProviderName = ProviderName, .RefreshToken = m_OpenIdRefreshToken}); + } + } + + if (!m_AccessToken.empty()) + { + ClientSettings.AccessTokenProvider = httpclientauth::CreateFromStaticToken(m_AccessToken); + } + else if (!m_AccessTokenPath.empty()) + { + std::string ResolvedAccessToken = ReadAccessTokenFromFile(m_AccessTokenPath); + if (!ResolvedAccessToken.empty()) + { + ClientSettings.AccessTokenProvider = httpclientauth::CreateFromStaticToken(ResolvedAccessToken); + } + } + else if (!m_AccessTokenEnv.empty()) + { + std::string ResolvedAccessToken = GetEnvVariable(m_AccessTokenEnv); + if (!ResolvedAccessToken.empty()) + { + ClientSettings.AccessTokenProvider = httpclientauth::CreateFromStaticToken(ResolvedAccessToken); + } + } + else if (!m_OAuthUrl.empty()) + { + ClientSettings.AccessTokenProvider = httpclientauth::CreateFromOAuthClientCredentials( + {.Url = m_OAuthUrl, .ClientId = m_OAuthClientId, .ClientSecret = m_OAuthClientSecret}); + } + else if (!m_OpenIdProviderName.empty()) + { + CreateAuthMgr(); + ClientSettings.AccessTokenProvider = httpclientauth::CreateFromOpenIdProvider(*Auth, m_OpenIdProviderName); + } + else + { + CreateAuthMgr(); + ClientSettings.AccessTokenProvider = httpclientauth::CreateFromDefaultOpenIdProvider(*Auth); + } + + if (!m_BuildsUrl.empty() && !ClientSettings.AccessTokenProvider) + { + ZEN_CONSOLE("Warning: No auth provider given, attempting operation without credentials."); + } + }; + + auto ParseOutputOptions = [&]() { + IsVerbose = m_Verbose; + UsePlainProgress = IsVerbose || m_PlainProgress; + }; + ParseOutputOptions(); + + if (SubOption == &m_ListOptions) + { + ParseStorageOptions(); + ParseAuthOptions(); + + HttpClient Http(m_BuildsUrl, ClientSettings); + + CbObjectWriter QueryWriter; + QueryWriter.BeginObject("query"); + { + // QueryWriter.BeginObject("platform"); + // { + // QueryWriter.AddString("$eq", "Windows"); + // } + // QueryWriter.EndObject(); // changelist + } + QueryWriter.EndObject(); // query + + BuildStorage::Statistics StorageStats; + std::unique_ptr Storage; + if (!m_BuildsUrl.empty()) + { + ZEN_CONSOLE("Querying builds in cloud endpoint '{}'. SessionId: '{}'. Namespace '{}', Bucket '{}'", + m_BuildsUrl, + Http.GetSessionId(), + m_Namespace, + m_Bucket); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, std::filesystem::path{}); + } + else if (!m_StoragePath.empty()) + { + ZEN_CONSOLE("Querying builds in folder '{}'.", m_StoragePath); + Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false); // , .0015, 0.00004 + } + else + { + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); + } + + CbObject Response = Storage->ListBuilds(QueryWriter.Save()); + ExtendableStringBuilder<1024> SB; + CompactBinaryToJson(Response.GetView(), SB); + ZEN_CONSOLE("{}", SB.ToView()); + return 0; + } + + if (SubOption == &m_UploadOptions) + { + ParseStorageOptions(); + ParseAuthOptions(); + + HttpClient Http(m_BuildsUrl, ClientSettings); + + if (m_Path.empty()) + { + throw zen::OptionParseException(fmt::format("local-path is required\n{}", m_UploadOptions.help())); + } + + if (m_CreateBuild) + { + if (m_BuildMetadataPath.empty() && m_BuildMetadata.empty()) + { + throw zen::OptionParseException(fmt::format("Options for builds target are missing\n{}", m_UploadOptions.help())); + } + if (!m_BuildMetadataPath.empty() && !m_BuildMetadata.empty()) + { + throw zen::OptionParseException(fmt::format("Conflicting options for builds target\n{}", m_UploadOptions.help())); + } + } + else + { + if (!m_BuildMetadataPath.empty()) + { + throw zen::OptionParseException( + fmt::format("metadata-path option is only valid if creating a build\n{}", m_UploadOptions.help())); + } + if (!m_BuildMetadata.empty()) + { + throw zen::OptionParseException( + fmt::format("metadata option is only valid if creating a build\n{}", m_UploadOptions.help())); + } + } + + if (m_BuildPartName.empty()) + { + m_BuildPartName = m_Path.filename().string(); + } + + const bool GeneratedBuildId = m_BuildId.empty(); + if (GeneratedBuildId) + { + m_BuildId = Oid::NewOid().ToString(); + } + else if (m_BuildId.length() != Oid::StringLength) + { + throw zen::OptionParseException(fmt::format("Invalid build id\n{}", m_UploadOptions.help())); + } + else if (Oid::FromHexString(m_BuildId) == Oid::Zero) + { + throw zen::OptionParseException(fmt::format("Invalid build id\n{}", m_UploadOptions.help())); + } + + const bool GeneratedBuildPartId = m_BuildPartId.empty(); + if (GeneratedBuildPartId) + { + m_BuildPartId = Oid::NewOid().ToString(); + } + else if (m_BuildPartId.length() != Oid::StringLength) + { + throw zen::OptionParseException(fmt::format("Invalid build id\n{}", m_UploadOptions.help())); + } + else if (Oid::FromHexString(m_BuildPartId) == Oid::Zero) + { + throw zen::OptionParseException(fmt::format("Invalid build part id\n{}", m_UploadOptions.help())); + } + + BuildStorage::Statistics StorageStats; + const Oid BuildId = Oid::FromHexString(m_BuildId); + const Oid BuildPartId = Oid::FromHexString(m_BuildPartId); + std::unique_ptr Storage; + std::string StorageName; + if (!m_BuildsUrl.empty()) + { + ZEN_CONSOLE("Uploading '{}' from '{}' to cloud endpoint '{}'. SessionId: '{}'. Namespace '{}', Bucket '{}', {}BuildId '{}'", + m_BuildPartName, + m_Path, + m_BuildsUrl, + Http.GetSessionId(), + m_Namespace, + m_Bucket, + GeneratedBuildId ? "Generated " : "", + BuildId); + CreateDirectories(m_Path / ZenTempStorageFolderName); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); + StorageName = "Cloud DDC"; + } + else if (!m_StoragePath.empty()) + { + ZEN_CONSOLE("Uploading '{}' from '{}' to folder '{}'. {}BuildId '{}'", + m_BuildPartName, + m_Path, + m_StoragePath, + GeneratedBuildId ? "Generated " : "", + BuildId); + Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, m_WriteMetadataAsJson); // , .0015, 0.00004 + StorageName = fmt::format("Disk {}", m_StoragePath.stem()); + } + else + { + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); + } + + CbObject MetaData; + if (m_CreateBuild) + { + if (!m_BuildMetadataPath.empty()) + { + std::filesystem::path MetadataPath(m_BuildMetadataPath); + IoBuffer MetaDataJson = ReadFile(MetadataPath).Flatten(); + std::string_view Json(reinterpret_cast(MetaDataJson.GetData()), MetaDataJson.GetSize()); + std::string JsonError; + MetaData = LoadCompactBinaryFromJson(Json, JsonError).AsObject(); + if (!JsonError.empty()) + { + throw std::runtime_error( + fmt::format("build metadata file '{}' is malformed. Reason: '{}'", m_BuildMetadataPath, JsonError)); + } + } + if (!m_BuildMetadata.empty()) + { + CbObjectWriter MetaDataWriter(1024); + ForEachStrTok(m_BuildMetadata, ';', [&](std::string_view Pair) { + size_t SplitPos = Pair.find('='); + if (SplitPos == std::string::npos || SplitPos == 0) + { + throw std::runtime_error(fmt::format("build metadata key-value pair '{}' is malformed", Pair)); + } + MetaDataWriter.AddString(Pair.substr(0, SplitPos), Pair.substr(SplitPos + 1)); + return true; + }); + MetaData = MetaDataWriter.Save(); + } + } + + bool Aborted = UploadFolder(*Storage, + BuildId, + BuildPartId, + m_BuildPartName, + m_Path, + m_ManifestPath, + m_BlockReuseMinPercentLimit, + m_AllowMultiparts, + MetaData, + m_CreateBuild, + m_Clean); + if (Aborted) + { + ZEN_CONSOLE("Upload failed."); + } + + if (false) + { + ZEN_CONSOLE( + "{}:\n" + "Read: {}\n" + "Write: {}\n" + "Requests: {}\n" + "Avg Request Time: {}\n" + "Avg I/O Time: {}", + StorageName, + NiceBytes(StorageStats.TotalBytesRead.load()), + NiceBytes(StorageStats.TotalBytesWritten.load()), + StorageStats.TotalRequestCount.load(), + StorageStats.TotalExecutionTimeUs.load() > 0 + ? NiceTimeSpanMs(StorageStats.TotalExecutionTimeUs.load() / 1000 / StorageStats.TotalRequestCount.load()) + : 0, + StorageStats.TotalRequestCount.load() > 0 + ? NiceTimeSpanMs(StorageStats.TotalRequestTimeUs.load() / 1000 / StorageStats.TotalRequestCount.load()) + : 0); + } + return Aborted ? 11 : 0; + } + + if (SubOption == &m_DownloadOptions) + { + ParseStorageOptions(); + ParseAuthOptions(); + + HttpClient Http(m_BuildsUrl, ClientSettings); + + if (m_Path.empty()) + { + throw zen::OptionParseException(fmt::format("local-path is required\n{}", m_DownloadOptions.help())); + } + if (m_BuildId.empty()) + { + throw zen::OptionParseException(fmt::format("build-id is required\n{}", m_DownloadOptions.help())); + } + Oid BuildId = Oid::TryFromHexString(m_BuildId); + if (BuildId == Oid::Zero) + { + throw zen::OptionParseException(fmt::format("build-id is invalid\n{}", m_DownloadOptions.help())); + } + + if (!m_BuildPartName.empty() && !m_BuildPartId.empty()) + { + throw zen::OptionParseException(fmt::format("build-part-id conflicts with build-part-name\n{}", m_DownloadOptions.help())); + } + + std::vector BuildPartIds; + for (const std::string& BuildPartId : m_BuildPartIds) + { + BuildPartIds.push_back(Oid::TryFromHexString(BuildPartId)); + if (BuildPartIds.back() == Oid::Zero) + { + throw zen::OptionParseException(fmt::format("build-part-id '{}' is invalid\n{}", BuildPartId, m_DownloadOptions.help())); + } + } + + BuildStorage::Statistics StorageStats; + std::unique_ptr Storage; + std::string StorageName; + if (!m_BuildsUrl.empty()) + { + ZEN_CONSOLE("Downloading '{}' to '{}' from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}', BuildId '{}'", + BuildId, + m_Path, + m_BuildsUrl, + Http.GetSessionId(), + m_Namespace, + m_Bucket, + BuildId); + CreateDirectories(m_Path / ZenTempStorageFolderName); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); + StorageName = "Cloud DDC"; + } + else if (!m_StoragePath.empty()) + { + ZEN_CONSOLE("Downloading '{}' to '{}' from folder {}. BuildId '{}'", BuildId, m_Path, m_StoragePath, BuildId); + Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false); // , .0015, 0.00004 + StorageName = fmt::format("Disk {}", m_StoragePath.stem()); + } + else + { + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); + } + + bool Aborted = DownloadFolder(*Storage, BuildId, BuildPartIds, m_BuildPartNames, m_Path, m_AllowMultiparts, m_Clean); + if (Aborted) + { + ZEN_CONSOLE("Download failed."); + } + if (false) + { + ZEN_CONSOLE( + "{}:\n" + "Read: {}\n" + "Write: {}\n" + "Requests: {}\n" + "Avg Request Time: {}\n" + "Avg I/O Time: {}", + StorageName, + NiceBytes(StorageStats.TotalBytesRead.load()), + NiceBytes(StorageStats.TotalBytesWritten.load()), + StorageStats.TotalRequestCount.load(), + StorageStats.TotalExecutionTimeUs.load() > 0 + ? NiceTimeSpanMs(StorageStats.TotalExecutionTimeUs.load() / 1000 / StorageStats.TotalRequestCount.load()) + : 0, + StorageStats.TotalRequestCount.load() > 0 + ? NiceTimeSpanMs(StorageStats.TotalRequestTimeUs.load() / 1000 / StorageStats.TotalRequestCount.load()) + : 0); + } + + return Aborted ? 11 : 0; + } + if (SubOption == &m_DiffOptions) + { + if (m_Path.empty()) + { + throw zen::OptionParseException(fmt::format("local-path is required\n{}", m_DownloadOptions.help())); + } + if (m_DiffPath.empty()) + { + throw zen::OptionParseException(fmt::format("compare-path is required\n{}", m_DownloadOptions.help())); + } + bool Aborted = DiffFolders(m_Path, m_DiffPath, m_OnlyChunked); + return Aborted ? 11 : 0; + } + + if (SubOption == &m_TestOptions) + { + ParseStorageOptions(); + ParseAuthOptions(); + + HttpClient Http(m_BuildsUrl, ClientSettings); + + if (m_Path.empty()) + { + throw zen::OptionParseException(fmt::format("local-path is required\n{}", m_DownloadOptions.help())); + } + + m_BuildId = Oid::NewOid().ToString(); + m_BuildPartName = m_Path.filename().string(); + m_BuildPartId = Oid::NewOid().ToString(); + m_CreateBuild = true; + + BuildStorage::Statistics StorageStats; + const Oid BuildId = Oid::FromHexString(m_BuildId); + const Oid BuildPartId = Oid::FromHexString(m_BuildPartId); + std::unique_ptr Storage; + std::string StorageName; + + if (m_BuildsUrl.empty() && m_StoragePath.empty()) + { + m_StoragePath = GetRunningExecutablePath().parent_path() / ".tmpstore"; + CreateDirectories(m_StoragePath); + CleanDirectory(m_StoragePath); + } + auto _ = MakeGuard([&]() { + if (m_BuildsUrl.empty() && m_StoragePath.empty()) + { + DeleteDirectories(m_StoragePath); + } + }); + + if (!m_BuildsUrl.empty()) + { + ZEN_CONSOLE("Using '{}' to '{}' from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}', BuildId '{}'", + m_BuildPartName.empty() ? m_BuildPartId : m_BuildPartName, + m_Path, + m_BuildsUrl, + Http.GetSessionId(), + m_Namespace, + m_Bucket, + BuildId); + CreateDirectories(m_Path / ZenTempStorageFolderName); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); + StorageName = "Cloud DDC"; + } + else if (!m_StoragePath.empty()) + { + ZEN_CONSOLE("Using '{}' to '{}' from folder {}. BuildId '{}'", + m_BuildPartName.empty() ? m_BuildPartId : m_BuildPartName, + m_Path, + m_StoragePath, + BuildId); + Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false); // , .0015, 0.00004 + StorageName = fmt::format("Disk {}", m_StoragePath.stem()); + } + else + { + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); + } + + auto MakeMetaData = [](const Oid& BuildId) -> CbObject { + CbObjectWriter BuildMetaDataWriter; + { + const uint32_t CL = BuildId.OidBits[2]; + BuildMetaDataWriter.AddString("name", fmt::format("++Test+Main-CL-{}", CL)); + BuildMetaDataWriter.AddString("branch", "ZenTestBuild"); + BuildMetaDataWriter.AddString("baselineBranch", "ZenTestBuild"); + BuildMetaDataWriter.AddString("platform", "Windows"); + BuildMetaDataWriter.AddString("project", "Test"); + BuildMetaDataWriter.AddInteger("changelist", CL); + BuildMetaDataWriter.AddString("buildType", "test-folder"); + } + return BuildMetaDataWriter.Save(); + }; + CbObject MetaData = MakeMetaData(Oid::TryFromHexString(m_BuildId)); + { + ExtendableStringBuilder<256> SB; + CompactBinaryToJson(MetaData, SB); + ZEN_CONSOLE("Upload Build {}, Part {} ({})\n{}", m_BuildId, BuildPartId, m_BuildPartName, SB.ToView()); + } + + bool Aborted = UploadFolder(*Storage, + BuildId, + BuildPartId, + m_BuildPartName, + m_Path, + {}, + m_BlockReuseMinPercentLimit, + m_AllowMultiparts, + MetaData, + m_CreateBuild, + m_Clean); + if (Aborted) + { + ZEN_CONSOLE("Upload failed."); + return 11; + } + + const std::filesystem::path DownloadPath = m_Path.parent_path() / (m_BuildPartName + "_download"); + ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}'", BuildId, BuildPartId, m_BuildPartName, DownloadPath); + Aborted = DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, true); + if (Aborted) + { + ZEN_CONSOLE("Download failed."); + return 11; + } + + ZEN_CONSOLE("\nRe-download Build {}, Part {} ({}) to '{}' (identical target)", BuildId, BuildPartId, m_BuildPartName, DownloadPath); + Aborted = DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, false); + if (Aborted) + { + ZEN_CONSOLE("Re-download failed. (identical target)"); + return 11; + } + + auto ScrambleDir = [](const std::filesystem::path& Path) { + ZEN_CONSOLE("\nScrambling '{}'", Path); + Stopwatch Timer; + DirectoryContent DownloadContent; + GetDirectoryContent( + Path, + DirectoryContentFlags::Recursive | DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes, + DownloadContent); + auto IsAcceptedFolder = [ExcludeFolders = DefaultExcludeFolders, Path](const std::filesystem::path& AbsolutePath) -> bool { + std::string RelativePath = std::filesystem::relative(AbsolutePath, Path).generic_string(); + for (const std::string_view& ExcludeFolder : ExcludeFolders) + { + if (RelativePath.starts_with(ExcludeFolder)) + { + if (RelativePath.length() == ExcludeFolder.length()) + { + return false; + } + else if (RelativePath[ExcludeFolder.length()] == '/') + { + return false; + } + } + } + return true; + }; + + std::atomic AbortFlag = false; + ParallellWork Work(AbortFlag); + + uint32_t Randomizer = 0; + auto FileSizeIt = DownloadContent.FileSizes.begin(); + for (const std::filesystem::path& FilePath : DownloadContent.Files) + { + if (IsAcceptedFolder(FilePath)) + { + uint32_t Case = (Randomizer++) % 7; + switch (Case) + { + case 0: + { + uint64_t SourceSize = *FileSizeIt; + if (SourceSize > 0) + { + Work.ScheduleWork( + GetMediumWorkerPool(EWorkloadType::Burst), + [SourceSize, FilePath](std::atomic& AbortFlag) { + if (!AbortFlag) + { + IoBuffer Scrambled(SourceSize); + { + IoBuffer Source = IoBufferBuilder::MakeFromFile(FilePath); + Scrambled.GetMutableView().CopyFrom( + Source.GetView().Mid(SourceSize / 3, SourceSize / 3)); + Scrambled.GetMutableView() + .Mid(SourceSize / 3) + .CopyFrom(Source.GetView().Mid(0, SourceSize / 3)); + Scrambled.GetMutableView() + .Mid((SourceSize / 3) * 2) + .CopyFrom(Source.GetView().Mid(SourceSize / 2, SourceSize / 3)); + } + bool IsReadOnly = SetFileReadOnly(FilePath, false); + WriteFile(FilePath, Scrambled); + if (IsReadOnly) + { + SetFileReadOnly(FilePath, true); + } + } + }, + [FilePath](const std::exception& Ex, std::atomic& AbortFlag) { + ZEN_CONSOLE("Failed scrambling file {}. Reason: {}", FilePath, Ex.what()); + AbortFlag = true; + }); + } + } + break; + case 1: + std::filesystem::remove(FilePath); + break; + default: + break; + } + } + FileSizeIt++; + } + Work.Wait(5000, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted); + ZEN_CONSOLE("Scrambling files, {} remaining", PendingWork); + }); + ZEN_ASSERT(!AbortFlag.load()); + ZEN_CONSOLE("Scrambled files in {}", NiceLatencyNs(Timer.GetElapsedTimeUs() * 1000)); + }; + + ScrambleDir(DownloadPath); + ZEN_CONSOLE("\nRe-download Build {}, Part {} ({}) to '{}' (scrambled target)", BuildId, BuildPartId, m_BuildPartName, DownloadPath); + Aborted = DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, false); + if (Aborted) + { + ZEN_CONSOLE("Re-download failed. (scrambled target)"); + return 11; + } + + ScrambleDir(DownloadPath); + + Oid BuildId2 = Oid::NewOid(); + Oid BuildPartId2 = Oid::NewOid(); + + CbObject MetaData2 = MakeMetaData(BuildId2); + { + ExtendableStringBuilder<256> SB; + CompactBinaryToJson(MetaData, SB); + ZEN_CONSOLE("\nUpload scrambled Build {}, Part {} ({})\n{}\n", BuildId2, BuildPartId2, m_BuildPartName, SB.ToView()); + } + + Aborted = UploadFolder(*Storage, + BuildId2, + BuildPartId2, + m_BuildPartName, + DownloadPath, + {}, + m_BlockReuseMinPercentLimit, + m_AllowMultiparts, + MetaData2, + true, + false); + if (Aborted) + { + ZEN_CONSOLE("Upload of scrambled failed."); + return 11; + } + + ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}' (original)", BuildId, BuildPartId, m_BuildPartName, DownloadPath); + Aborted = DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, false); + if (Aborted) + { + ZEN_CONSOLE("Re-download failed."); + return 11; + } + + ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}' (scrambled)", BuildId2, BuildPartId2, m_BuildPartName, DownloadPath); + Aborted = DownloadFolder(*Storage, BuildId2, {BuildPartId2}, {}, DownloadPath, m_AllowMultiparts, false); + if (Aborted) + { + ZEN_CONSOLE("Re-download failed."); + return 11; + } + + ZEN_CONSOLE("\nRe-download Build {}, Part {} ({}) to '{}' (scrambled)", BuildId2, BuildPartId2, m_BuildPartName, DownloadPath); + Aborted = DownloadFolder(*Storage, BuildId2, {BuildPartId2}, {}, DownloadPath, m_AllowMultiparts, false); + if (Aborted) + { + ZEN_CONSOLE("Re-download failed."); + return 11; + } + + return 0; + } + + if (SubOption == &m_FetchBlobOptions) + { + ParseStorageOptions(); + ParseAuthOptions(); + + HttpClient Http(m_BuildsUrl, ClientSettings); + + if (m_BlobHash.empty()) + { + throw zen::OptionParseException(fmt::format("Blob hash string is missing\n{}", m_UploadOptions.help())); + } + + IoHash BlobHash; + if (!IoHash::TryParse(m_BlobHash, BlobHash)) + { + throw zen::OptionParseException(fmt::format("Blob hash string is invalid\n{}", m_UploadOptions.help())); + } + + if (m_BuildsUrl.empty() && m_StoragePath.empty()) + { + throw zen::OptionParseException(fmt::format("At least one storage option is required\n{}", m_UploadOptions.help())); + } + + BuildStorage::Statistics StorageStats; + const Oid BuildId = Oid::FromHexString(m_BuildId); + std::unique_ptr Storage; + std::string StorageName; + + if (!m_BuildsUrl.empty()) + { + ZEN_CONSOLE("Using from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}', BuildId '{}'", + m_BuildsUrl, + Http.GetSessionId(), + m_Namespace, + m_Bucket, + BuildId); + CreateDirectories(m_Path / ZenTempStorageFolderName); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); + StorageName = "Cloud DDC"; + } + else if (!m_StoragePath.empty()) + { + ZEN_CONSOLE("Using folder {}. BuildId '{}'", m_StoragePath, BuildId); + Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false); // , .0015, 0.00004 + StorageName = fmt::format("Disk {}", m_StoragePath.stem()); + } + else + { + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); + } + + uint64_t CompressedSize; + uint64_t DecompressedSize; + ValidateBlob(*Storage, BuildId, BlobHash, CompressedSize, DecompressedSize); + ZEN_CONSOLE("Blob '{}' has a compressed size {} and a decompressed size of {} bytes", BlobHash, CompressedSize, DecompressedSize); + return 0; + } + + if (SubOption == &m_ValidateBuildPartOptions) + { + ParseStorageOptions(); + ParseAuthOptions(); + + HttpClient Http(m_BuildsUrl, ClientSettings); + + if (m_BuildsUrl.empty() && m_StoragePath.empty()) + { + throw zen::OptionParseException(fmt::format("At least one storage option is required\n{}", m_UploadOptions.help())); + } + + if (m_BuildId.empty()) + { + throw zen::OptionParseException(fmt::format("build-id is required\n{}", m_DownloadOptions.help())); + } + Oid BuildId = Oid::TryFromHexString(m_BuildId); + if (BuildId == Oid::Zero) + { + throw zen::OptionParseException(fmt::format("build-id is invalid\n{}", m_DownloadOptions.help())); + } + + if (!m_BuildPartName.empty() && !m_BuildPartId.empty()) + { + throw zen::OptionParseException(fmt::format("build-part-id conflicts with build-part-name\n{}", m_DownloadOptions.help())); + } + + BuildStorage::Statistics StorageStats; + std::unique_ptr Storage; + std::string StorageName; + + if (!m_BuildsUrl.empty()) + { + ZEN_CONSOLE("Using from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}', BuildId '{}'", + m_BuildsUrl, + Http.GetSessionId(), + m_Namespace, + m_Bucket, + BuildId); + CreateDirectories(m_Path / ZenTempStorageFolderName); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); + StorageName = "Cloud DDC"; + } + else if (!m_StoragePath.empty()) + { + ZEN_CONSOLE("Using folder {}. BuildId '{}'", m_StoragePath, BuildId); + Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false); // , .0015, 0.00004 + StorageName = fmt::format("Disk {}", m_StoragePath.stem()); + } + else + { + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); + } + Oid BuildPartId = Oid::TryFromHexString(m_BuildPartId); + CbObject Build = Storage->GetBuild(BuildId); + if (!m_BuildPartName.empty()) + { + BuildPartId = Build["parts"sv].AsObjectView()[m_BuildPartName].AsObjectId(); + if (BuildPartId == Oid::Zero) + { + throw std::runtime_error(fmt::format("Build {} does not have a part named '{}'", m_BuildId, m_BuildPartName)); + } + } + CbObject BuildPart = Storage->GetBuildPart(BuildId, BuildPartId); + ZEN_CONSOLE("Validating build part {}/{} ({})", BuildId, BuildPartId, NiceBytes(BuildPart.GetSize())); + std::vector ChunkAttachments; + for (CbFieldView LooseFileView : BuildPart["chunkAttachments"sv].AsObjectView()["rawHashes"sv]) + { + ChunkAttachments.push_back(LooseFileView.AsBinaryAttachment()); + } + std::vector BlockAttachments; + for (CbFieldView BlocksView : BuildPart["blockAttachments"sv].AsObjectView()["rawHashes"sv]) + { + BlockAttachments.push_back(BlocksView.AsBinaryAttachment()); + } + + for (const IoHash& ChunkAttachment : ChunkAttachments) + { + uint64_t CompressedSize; + uint64_t DecompressedSize; + try + { + ValidateBlob(*Storage, BuildId, ChunkAttachment, CompressedSize, DecompressedSize); + ZEN_CONSOLE("Chunk attachment {} ({} -> {}) is valid", + ChunkAttachment, + NiceBytes(CompressedSize), + NiceBytes(DecompressedSize)); + } + catch (const std::exception& Ex) + { + ZEN_CONSOLE("Failed validating chunk attachment {}: {}", ChunkAttachment, Ex.what()); + } + } + + for (const IoHash& BlockAttachment : BlockAttachments) + { + uint64_t CompressedSize; + uint64_t DecompressedSize; + try + { + ValidateChunkBlock(*Storage, BuildId, BlockAttachment, CompressedSize, DecompressedSize); + ZEN_CONSOLE("Block attachment {} ({} -> {}) is valid", + BlockAttachment, + NiceBytes(CompressedSize), + NiceBytes(DecompressedSize)); + } + catch (const std::exception& Ex) + { + ZEN_CONSOLE("Failed validating block attachment {}: {}", BlockAttachment, Ex.what()); + } + } + + return 0; + } + + ZEN_ASSERT(false); +} + +} // namespace zen diff --git a/src/zen/cmds/builds_cmd.h b/src/zen/cmds/builds_cmd.h new file mode 100644 index 000000000..fa223943b --- /dev/null +++ b/src/zen/cmds/builds_cmd.h @@ -0,0 +1,106 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "../zen.h" + +#include +#include +#include + +namespace zen { + +class BuildsCommand : public CacheStoreCommand +{ +public: + static constexpr char Name[] = "builds"; + static constexpr char Description[] = "Manage builds - list, upload, download, diff"; + + BuildsCommand(); + ~BuildsCommand(); + + virtual int Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) override; + virtual cxxopts::Options& Options() override { return m_Options; } + +private: + cxxopts::Options m_Options{Name, Description}; + + std::filesystem::path m_SystemRootDir; + + bool m_PlainProgress = false; + bool m_Verbose = false; + + // cloud builds + std::string m_BuildsUrl; + bool m_AssumeHttp2 = false; + std::string m_Namespace; + std::string m_Bucket; + + // file storage + std::filesystem::path m_StoragePath; + bool m_WriteMetadataAsJson = false; + + std::string m_BuildId; + bool m_CreateBuild = false; + std::string m_BuildMetadataPath; + std::string m_BuildMetadata; + std::string m_BuildPartName; // Defaults to name of leaf folder in m_Path + std::string m_BuildPartId; // Defaults to a generated id when creating part, looked up when downloading using m_BuildPartName + bool m_Clean = false; + uint8_t m_BlockReuseMinPercentLimit = 85; + bool m_AllowMultiparts = true; + std::filesystem::path m_ManifestPath; + + // Direct access token (may expire) + std::string m_AccessToken; + std::string m_AccessTokenEnv; + std::string m_AccessTokenPath; + + // Auth manager token encryption + std::string m_EncryptionKey; // 256 bit AES encryption key + std::string m_EncryptionIV; // 128 bit AES initialization vector + + // OpenId acccess token + std::string m_OpenIdProviderName; + std::string m_OpenIdProviderUrl; + std::string m_OpenIdClientId; + std::string m_OpenIdRefreshToken; + + // OAuth acccess token + std::string m_OAuthUrl; + std::string m_OAuthClientId; + std::string m_OAuthClientSecret; + + std::string m_Verb; // list, upload, download + + cxxopts::Options m_ListOptions{"list", "List available builds"}; + + std::filesystem::path m_Path; + + cxxopts::Options m_UploadOptions{"upload", "Upload a folder"}; + + cxxopts::Options m_DownloadOptions{"download", "Download a folder"}; + std::vector m_BuildPartNames; + std::vector m_BuildPartIds; + + cxxopts::Options m_DiffOptions{"diff", "Compare two local folders"}; + std::filesystem::path m_DiffPath; + bool m_OnlyChunked = false; + + cxxopts::Options m_TestOptions{"test", "Test upload and download with verify"}; + + cxxopts::Options m_FetchBlobOptions{"fetch-blob", "Fetch a blob from remote store"}; + std::string m_BlobHash; + + cxxopts::Options m_ValidateBuildPartOptions{"validate-part", "Fetch a build part and validate all referenced attachments"}; + + cxxopts::Options* m_SubCommands[7] = {&m_ListOptions, + &m_UploadOptions, + &m_DownloadOptions, + &m_DiffOptions, + &m_TestOptions, + &m_FetchBlobOptions, + &m_ValidateBuildPartOptions}; +}; + +} // namespace zen diff --git a/src/zen/zen.cpp b/src/zen/zen.cpp index 872ea8941..2e230ed53 100644 --- a/src/zen/zen.cpp +++ b/src/zen/zen.cpp @@ -7,6 +7,7 @@ #include "cmds/admin_cmd.h" #include "cmds/bench_cmd.h" +#include "cmds/builds_cmd.h" #include "cmds/cache_cmd.h" #include "cmds/copy_cmd.h" #include "cmds/dedup_cmd.h" @@ -396,6 +397,7 @@ main(int argc, char** argv) AttachCommand AttachCmd; BenchCommand BenchCmd; + BuildsCommand BuildsCmd; CacheDetailsCommand CacheDetailsCmd; CacheGetCommand CacheGetCmd; CacheGenerateCommand CacheGenerateCmd; @@ -451,6 +453,7 @@ main(int argc, char** argv) // clang-format off {"attach", &AttachCmd, "Add a sponsor process to a running zen service"}, {"bench", &BenchCmd, "Utility command for benchmarking"}, + {BuildsCommand::Name, &BuildsCmd, BuildsCommand::Description}, {"cache-details", &CacheDetailsCmd, "Details on cache"}, {"cache-info", &CacheInfoCmd, "Info on cache, namespace or bucket"}, {CacheGetCommand::Name, &CacheGetCmd, CacheGetCommand::Description}, diff --git a/src/zencore/filesystem.cpp b/src/zencore/filesystem.cpp index 5716d1255..8279fb952 100644 --- a/src/zencore/filesystem.cpp +++ b/src/zencore/filesystem.cpp @@ -1469,6 +1469,36 @@ GetModificationTickFromHandle(void* NativeHandle, std::error_code& Ec) return 0; } +uint64_t +GetModificationTickFromPath(const std::filesystem::path& Filename) +{ + // PathFromHandle + void* Handle; +#if ZEN_PLATFORM_WINDOWS + Handle = CreateFileW(Filename.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, 0, nullptr); + if (Handle == INVALID_HANDLE_VALUE) + { + ThrowLastError(fmt::format("Failed to open file {} to check modification tick.", Filename)); + } + auto _ = MakeGuard([Handle]() { CloseHandle(Handle); }); +#else + int Fd = open(Filename.c_str(), O_RDONLY | O_CLOEXEC); + if (Fd <= 9) + { + ThrowLastError(fmt::format("Failed to open file {} to check modification tick.", Filename)); + } + Handle = (void*)uintptr_t(Fd); + auto _ = MakeGuard([Handle]() { close(int(uintptr_t(Handle))); }); +#endif + std::error_code Ec; + uint64_t ModificatonTick = GetModificationTickFromHandle(Handle, Ec); + if (Ec) + { + ThrowSystemError(Ec.value(), Ec.message()); + } + return ModificatonTick; +} + std::filesystem::path GetRunningExecutablePath() { @@ -1895,6 +1925,116 @@ PickDefaultSystemRootDirectory() #endif // ZEN_PLATFORM_WINDOWS } +#if ZEN_PLATFORM_WINDOWS + +uint32_t +GetFileAttributes(const std::filesystem::path& Filename) +{ + DWORD Attributes = ::GetFileAttributes(Filename.native().c_str()); + if (Attributes == INVALID_FILE_ATTRIBUTES) + { + ThrowLastError(fmt::format("failed to get attributes of file {}", Filename)); + } + return (uint32_t)Attributes; +} + +void +SetFileAttributes(const std::filesystem::path& Filename, uint32_t Attributes) +{ + if (::SetFileAttributes(Filename.native().c_str(), Attributes) == 0) + { + ThrowLastError(fmt::format("failed to set attributes of file {}", Filename)); + } +} + +#endif // ZEN_PLATFORM_WINDOWS + +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + +uint32_t +GetFileMode(const std::filesystem::path& Filename) +{ + struct stat Stat; + int err = stat(Filename.native().c_str(), &Stat); + if (err) + { + ThrowLastError(fmt::format("Failed to get mode of file {}", Filename)); + } + return (uint32_t)Stat.st_mode; +} + +void +SetFileMode(const std::filesystem::path& Filename, uint32_t Attributes) +{ + int err = chmod(Filename.native().c_str(), (mode_t)Attributes); + if (err) + { + ThrowLastError(fmt::format("Failed to set mode of file {}", Filename)); + } +} + +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + +#if ZEN_PLATFORM_WINDOWS +const uint32_t FileAttributesSystemReadOnlyFlag = FILE_ATTRIBUTE_READONLY; +#else +const uint32_t FileAttributesSystemReadOnlyFlag = 0x00000001; +#endif // ZEN_PLATFORM_WINDOWS + +const uint32_t FileModeWriteEnableFlags = 0222; + +bool +IsFileAttributeReadOnly(uint32_t FileAttributes) +{ +#if ZEN_PLATFORM_WINDOWS + return (FileAttributes & FileAttributesSystemReadOnlyFlag) != 0; +#else + return (FileAttributes & 0x00000001) != 0; +#endif // ZEN_PLATFORM_WINDOWS +} + +bool +IsFileModeReadOnly(uint32_t FileMode) +{ + return (FileMode & FileModeWriteEnableFlags) == 0; +} + +uint32_t +MakeFileAttributeReadOnly(uint32_t FileAttributes, bool ReadOnly) +{ + return ReadOnly ? (FileAttributes | FileAttributesSystemReadOnlyFlag) : (FileAttributes & ~FileAttributesSystemReadOnlyFlag); +} + +uint32_t +MakeFileModeReadOnly(uint32_t FileMode, bool ReadOnly) +{ + return ReadOnly ? (FileMode & ~FileModeWriteEnableFlags) : (FileMode | FileModeWriteEnableFlags); +} + +bool +SetFileReadOnly(const std::filesystem::path& Filename, bool ReadOnly) +{ +#if ZEN_PLATFORM_WINDOWS + uint32_t CurrentAttributes = GetFileAttributes(Filename); + uint32_t NewAttributes = MakeFileAttributeReadOnly(CurrentAttributes, ReadOnly); + if (CurrentAttributes != NewAttributes) + { + SetFileAttributes(Filename, NewAttributes); + return true; + } +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + uint32_t CurrentMode = GetFileMode(Filename); + uint32_t NewMode = MakeFileModeReadOnly(CurrentMode, ReadOnly); + if (CurrentMode != NewMode) + { + SetFileMode(Filename, NewMode); + return true; + } +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + return false; +} + ////////////////////////////////////////////////////////////////////////// // // Testing related code follows... diff --git a/src/zencore/include/zencore/filesystem.h b/src/zencore/include/zencore/filesystem.h index 250745e86..20f6dc56c 100644 --- a/src/zencore/include/zencore/filesystem.h +++ b/src/zencore/include/zencore/filesystem.h @@ -52,6 +52,10 @@ ZENCORE_API uint64_t FileSizeFromHandle(void* NativeHandle); */ ZENCORE_API uint64_t GetModificationTickFromHandle(void* NativeHandle, std::error_code& Ec); +/** Get a native time tick of last modification time + */ +ZENCORE_API uint64_t GetModificationTickFromPath(const std::filesystem::path& Filename); + ZENCORE_API std::filesystem::path GetRunningExecutablePath(); /** Set the max open file handle count to max allowed for the current process on Linux and MacOS @@ -271,6 +275,23 @@ std::error_code RotateDirectories(const std::filesystem::path& DirectoryName, st std::filesystem::path PickDefaultSystemRootDirectory(); +#if ZEN_PLATFORM_WINDOWS +uint32_t GetFileAttributes(const std::filesystem::path& Filename); +void SetFileAttributes(const std::filesystem::path& Filename, uint32_t Attributes); +#endif // ZEN_PLATFORM_WINDOWS + +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC +uint32_t GetFileMode(const std::filesystem::path& Filename); +void SetFileMode(const std::filesystem::path& Filename, uint32_t Attributes); +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + +bool IsFileAttributeReadOnly(uint32_t FileAttributes); +bool IsFileModeReadOnly(uint32_t FileMode); +uint32_t MakeFileAttributeReadOnly(uint32_t FileAttributes, bool ReadOnly); +uint32_t MakeFileModeReadOnly(uint32_t FileMode, bool ReadOnly); + +bool SetFileReadOnly(const std::filesystem::path& Filename, bool ReadOnly); + ////////////////////////////////////////////////////////////////////////// void filesystem_forcelink(); // internal diff --git a/src/zenserver/projectstore/buildsremoteprojectstore.cpp b/src/zenserver/projectstore/buildsremoteprojectstore.cpp index e4e91104c..fbb9bc344 100644 --- a/src/zenserver/projectstore/buildsremoteprojectstore.cpp +++ b/src/zenserver/projectstore/buildsremoteprojectstore.cpp @@ -344,7 +344,12 @@ public: m_BuildId); return Result; } - Result.Blocks = std::move(Blocks.value()); + Result.Blocks.reserve(Blocks.value().size()); + for (ChunkBlockDescription& BlockDescription : Blocks.value()) + { + Result.Blocks.push_back(ThinChunkBlockDescription{.BlockHash = BlockDescription.BlockHash, + .ChunkRawHashes = std::move(BlockDescription.ChunkRawHashes)}); + } return Result; } diff --git a/src/zenserver/projectstore/fileremoteprojectstore.cpp b/src/zenserver/projectstore/fileremoteprojectstore.cpp index 5a21a7540..98e292d91 100644 --- a/src/zenserver/projectstore/fileremoteprojectstore.cpp +++ b/src/zenserver/projectstore/fileremoteprojectstore.cpp @@ -192,8 +192,8 @@ public: return GetKnownBlocksResult{{.ErrorCode = static_cast(HttpResponseCode::NoContent), .ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000}}; } - std::vector KnownBlocks = GetBlocksFromOplog(LoadResult.ContainerObject, ExistingBlockHashes); - GetKnownBlocksResult Result{{.ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000}}; + std::vector KnownBlocks = GetBlocksFromOplog(LoadResult.ContainerObject, ExistingBlockHashes); + GetKnownBlocksResult Result{{.ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000}}; Result.Blocks = std::move(KnownBlocks); return Result; } diff --git a/src/zenserver/projectstore/jupiterremoteprojectstore.cpp b/src/zenserver/projectstore/jupiterremoteprojectstore.cpp index 2b6a437d1..e5839ad3b 100644 --- a/src/zenserver/projectstore/jupiterremoteprojectstore.cpp +++ b/src/zenserver/projectstore/jupiterremoteprojectstore.cpp @@ -193,7 +193,7 @@ public: return GetKnownBlocksResult{{.ErrorCode = static_cast(HttpResponseCode::NoContent), .ElapsedSeconds = LoadResult.ElapsedSeconds + ExistsResult.ElapsedSeconds}}; } - std::vector KnownBlocks = GetBlocksFromOplog(LoadResult.ContainerObject, ExistingBlockHashes); + std::vector KnownBlocks = GetBlocksFromOplog(LoadResult.ContainerObject, ExistingBlockHashes); GetKnownBlocksResult Result{ {.ElapsedSeconds = LoadResult.ElapsedSeconds + ExistsResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000.0}}; diff --git a/src/zenserver/projectstore/projectstore.cpp b/src/zenserver/projectstore/projectstore.cpp index f6f7eba99..53df12b14 100644 --- a/src/zenserver/projectstore/projectstore.cpp +++ b/src/zenserver/projectstore/projectstore.cpp @@ -8628,7 +8628,11 @@ TEST_CASE("project.store.block") } ChunkBlockDescription Block; CompressedBuffer BlockBuffer = GenerateChunkBlock(std::move(Chunks), Block); - CHECK(IterateChunkBlock(BlockBuffer.Decompress(), [](CompressedBuffer&&, const IoHash&) {})); + uint64_t HeaderSize; + CHECK(IterateChunkBlock( + BlockBuffer.Decompress(), + [](CompressedBuffer&&, const IoHash&) {}, + HeaderSize)); } TEST_CASE("project.store.iterateoplog") diff --git a/src/zenserver/projectstore/remoteprojectstore.cpp b/src/zenserver/projectstore/remoteprojectstore.cpp index b4b2c6fc4..a7263da83 100644 --- a/src/zenserver/projectstore/remoteprojectstore.cpp +++ b/src/zenserver/projectstore/remoteprojectstore.cpp @@ -516,21 +516,23 @@ namespace remotestore_impl { return; } - bool StoreChunksOK = IterateChunkBlock( - BlockPayload, - [&WantedChunks, &WriteAttachmentBuffers, &WriteRawHashes, &Info](CompressedBuffer&& Chunk, - const IoHash& AttachmentRawHash) { - if (WantedChunks.contains(AttachmentRawHash)) - { - WriteAttachmentBuffers.emplace_back(Chunk.GetCompressed().Flatten().AsIoBuffer()); - IoHash RawHash; - uint64_t RawSize; - ZEN_ASSERT(CompressedBuffer::ValidateCompressedHeader(WriteAttachmentBuffers.back(), RawHash, RawSize)); - ZEN_ASSERT(RawHash == AttachmentRawHash); - WriteRawHashes.emplace_back(AttachmentRawHash); - WantedChunks.erase(AttachmentRawHash); - } - }); + uint64_t BlockHeaderSize = 0; + bool StoreChunksOK = IterateChunkBlock( + BlockPayload, + [&WantedChunks, &WriteAttachmentBuffers, &WriteRawHashes, &Info](CompressedBuffer&& Chunk, + const IoHash& AttachmentRawHash) { + if (WantedChunks.contains(AttachmentRawHash)) + { + WriteAttachmentBuffers.emplace_back(Chunk.GetCompressed().Flatten().AsIoBuffer()); + IoHash RawHash; + uint64_t RawSize; + ZEN_ASSERT(CompressedBuffer::ValidateCompressedHeader(WriteAttachmentBuffers.back(), RawHash, RawSize)); + ZEN_ASSERT(RawHash == AttachmentRawHash); + WriteRawHashes.emplace_back(AttachmentRawHash); + WantedChunks.erase(AttachmentRawHash); + } + }, + BlockHeaderSize); if (!StoreChunksOK) { @@ -1101,11 +1103,11 @@ GetBlockHashesFromOplog(CbObjectView ContainerObject) return BlockHashes; } -std::vector +std::vector GetBlocksFromOplog(CbObjectView ContainerObject, std::span IncludeBlockHashes) { using namespace std::literals; - std::vector Result; + std::vector Result; CbArrayView BlocksArray = ContainerObject["blocks"sv].AsArrayView(); tsl::robin_set IncludeSet; IncludeSet.insert(IncludeBlockHashes.begin(), IncludeBlockHashes.end()); @@ -1128,7 +1130,7 @@ GetBlocksFromOplog(CbObjectView ContainerObject, std::span Include { ChunkHashes.push_back(ChunkField.AsHash()); } - Result.push_back({.BlockHash = BlockHash, .ChunkHashes = std::move(ChunkHashes)}); + Result.push_back(ThinChunkBlockDescription{.BlockHash = BlockHash, .ChunkRawHashes = std::move(ChunkHashes)}); } } return Result; @@ -1144,7 +1146,7 @@ BuildContainer(CidStore& ChunkStore, bool BuildBlocks, bool IgnoreMissingAttachments, bool AllowChunking, - const std::vector& KnownBlocks, + const std::vector& KnownBlocks, WorkerThreadPool& WorkerPool, const std::function& AsyncOnBlock, const std::function& OnLargeAttachment, @@ -1386,7 +1388,7 @@ BuildContainer(CidStore& ChunkStore, return {}; } - auto FindReuseBlocks = [](const std::vector& KnownBlocks, + auto FindReuseBlocks = [](const std::vector& KnownBlocks, const std::unordered_set& Attachments, JobContext* OptionalContext) -> std::vector { std::vector ReuseBlockIndexes; @@ -1399,14 +1401,14 @@ BuildContainer(CidStore& ChunkStore, for (size_t KnownBlockIndex = 0; KnownBlockIndex < KnownBlocks.size(); KnownBlockIndex++) { - const ChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; - size_t BlockAttachmentCount = KnownBlock.ChunkHashes.size(); + const ThinChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; + size_t BlockAttachmentCount = KnownBlock.ChunkRawHashes.size(); if (BlockAttachmentCount == 0) { continue; } size_t FoundAttachmentCount = 0; - for (const IoHash& KnownHash : KnownBlock.ChunkHashes) + for (const IoHash& KnownHash : KnownBlock.ChunkRawHashes) { if (Attachments.contains(KnownHash)) { @@ -1447,8 +1449,8 @@ BuildContainer(CidStore& ChunkStore, std::vector ReusedBlockIndexes = FindReuseBlocks(KnownBlocks, FoundHashes, OptionalContext); for (size_t KnownBlockIndex : ReusedBlockIndexes) { - const ChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; - for (const IoHash& KnownHash : KnownBlock.ChunkHashes) + const ThinChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; + for (const IoHash& KnownHash : KnownBlock.ChunkRawHashes) { if (UploadAttachments.erase(KnownHash) == 1) { @@ -1784,8 +1786,8 @@ BuildContainer(CidStore& ChunkStore, std::vector ReusedBlockFromChunking = FindReuseBlocks(KnownBlocks, ChunkedHashes, OptionalContext); for (size_t KnownBlockIndex : ReusedBlockIndexes) { - const ChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; - for (const IoHash& KnownHash : KnownBlock.ChunkHashes) + const ThinChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; + for (const IoHash& KnownHash : KnownBlock.ChunkRawHashes) { if (ChunkedHashes.erase(KnownHash) == 1) { @@ -1803,7 +1805,7 @@ BuildContainer(CidStore& ChunkStore, Blocks.reserve(ReuseBlockCount); for (auto It = ReusedBlockIndexes.begin(); It != UniqueKnownBlocksEnd; It++) { - Blocks.push_back(KnownBlocks[*It]); + Blocks.push_back({KnownBlocks[*It]}); } remotestore_impl::ReportMessage(OptionalContext, fmt::format("Reused {} attachments from {} blocks", ReusedAttachmentCount, ReuseBlockCount)); @@ -1919,9 +1921,9 @@ BuildContainer(CidStore& ChunkStore, { // We can share the lock as we are not resizing the vector and only touch BlockHash at our own index RwLock::SharedLockScope _(BlocksLock); - Blocks[BlockIndex].ChunkHashes.insert(Blocks[BlockIndex].ChunkHashes.end(), - BlockAttachmentHashes.begin(), - BlockAttachmentHashes.end()); + Blocks[BlockIndex].ChunkRawHashes.insert(Blocks[BlockIndex].ChunkRawHashes.end(), + BlockAttachmentHashes.begin(), + BlockAttachmentHashes.end()); } uint64_t NowMS = Timer.GetElapsedTimeMs(); ZEN_INFO("Assembled block {} with {} chunks in {} ({})", @@ -2167,7 +2169,7 @@ BuildContainer(CidStore& ChunkStore, { for (const ChunkBlockDescription& B : Blocks) { - ZEN_ASSERT(!B.ChunkHashes.empty()); + ZEN_ASSERT(!B.ChunkRawHashes.empty()); if (BuildBlocks) { ZEN_ASSERT(B.BlockHash != IoHash::Zero); @@ -2177,7 +2179,7 @@ BuildContainer(CidStore& ChunkStore, OplogContinerWriter.AddBinaryAttachment("rawhash"sv, B.BlockHash); OplogContinerWriter.BeginArray("chunks"sv); { - for (const IoHash& RawHash : B.ChunkHashes) + for (const IoHash& RawHash : B.ChunkRawHashes) { OplogContinerWriter.AddHash(RawHash); } @@ -2193,7 +2195,7 @@ BuildContainer(CidStore& ChunkStore, { OplogContinerWriter.BeginArray("chunks"sv); { - for (const IoHash& RawHash : B.ChunkHashes) + for (const IoHash& RawHash : B.ChunkRawHashes) { OplogContinerWriter.AddBinaryAttachment(RawHash); } @@ -2389,7 +2391,7 @@ SaveOplog(CidStore& ChunkStore, OnBlock = UploadBlock; } - std::vector KnownBlocks; + std::vector KnownBlocks; uint64_t TransferWallTimeMS = 0; diff --git a/src/zenserver/projectstore/remoteprojectstore.h b/src/zenserver/projectstore/remoteprojectstore.h index 1ef0416b7..1210afc7c 100644 --- a/src/zenserver/projectstore/remoteprojectstore.h +++ b/src/zenserver/projectstore/remoteprojectstore.h @@ -66,7 +66,7 @@ public: struct GetKnownBlocksResult : public Result { - std::vector Blocks; + std::vector Blocks; }; struct RemoteStoreInfo @@ -166,7 +166,7 @@ RemoteProjectStore::Result LoadOplog(CidStore& ChunkStore, bool CleanOplog, JobContext* OptionalContext); -std::vector GetBlockHashesFromOplog(CbObjectView ContainerObject); -std::vector GetBlocksFromOplog(CbObjectView ContainerObject, std::span IncludeBlockHashes); +std::vector GetBlockHashesFromOplog(CbObjectView ContainerObject); +std::vector GetBlocksFromOplog(CbObjectView ContainerObject, std::span IncludeBlockHashes); } // namespace zen diff --git a/src/zenutil/chunkblock.cpp b/src/zenutil/chunkblock.cpp index 6dae5af11..a19cf5c1b 100644 --- a/src/zenutil/chunkblock.cpp +++ b/src/zenutil/chunkblock.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -18,20 +19,27 @@ ParseChunkBlockDescription(const CbObjectView& BlockObject) Result.BlockHash = BlockObject["rawHash"sv].AsHash(); if (Result.BlockHash != IoHash::Zero) { + Result.HeaderSize = BlockObject["headerSize"sv].AsUInt64(); CbArrayView ChunksArray = BlockObject["rawHashes"sv].AsArrayView(); - Result.ChunkHashes.reserve(ChunksArray.Num()); + Result.ChunkRawHashes.reserve(ChunksArray.Num()); for (CbFieldView ChunkView : ChunksArray) { - Result.ChunkHashes.push_back(ChunkView.AsHash()); + Result.ChunkRawHashes.push_back(ChunkView.AsHash()); } - CbArrayView ChunkRawLengthsArray = BlockObject["chunkRawLengths"sv].AsArrayView(); - std::vector ChunkLengths; + CbArrayView ChunkRawLengthsArray = BlockObject["chunkRawLengths"sv].AsArrayView(); Result.ChunkRawLengths.reserve(ChunkRawLengthsArray.Num()); for (CbFieldView ChunkView : ChunkRawLengthsArray) { Result.ChunkRawLengths.push_back(ChunkView.AsUInt32()); } + + CbArrayView ChunkCompressedLengthsArray = BlockObject["chunkCompressedLengths"sv].AsArrayView(); + Result.ChunkCompressedLengths.reserve(ChunkCompressedLengthsArray.Num()); + for (CbFieldView ChunkView : ChunkCompressedLengthsArray) + { + Result.ChunkCompressedLengths.push_back(ChunkView.AsUInt32()); + } } return Result; } @@ -57,18 +65,23 @@ ParseChunkBlockDescriptionList(const CbObjectView& BlocksObject) CbObject BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView MetaData) { - ZEN_ASSERT(Block.ChunkRawLengths.size() == Block.ChunkHashes.size()); + ZEN_ASSERT(Block.BlockHash != IoHash::Zero); + ZEN_ASSERT(Block.HeaderSize > 0); + ZEN_ASSERT(Block.ChunkRawLengths.size() == Block.ChunkRawHashes.size()); + ZEN_ASSERT(Block.ChunkCompressedLengths.size() == Block.ChunkRawHashes.size()); CbObjectWriter Writer; Writer.AddHash("rawHash"sv, Block.BlockHash); + Writer.AddInteger("headerSize"sv, Block.HeaderSize); Writer.BeginArray("rawHashes"sv); { - for (const IoHash& ChunkHash : Block.ChunkHashes) + for (const IoHash& ChunkHash : Block.ChunkRawHashes) { Writer.AddHash(ChunkHash); } } Writer.EndArray(); + Writer.BeginArray("chunkRawLengths"); { for (uint32_t ChunkSize : Block.ChunkRawLengths) @@ -78,11 +91,58 @@ BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView Meta } Writer.EndArray(); + Writer.BeginArray("chunkCompressedLengths"); + { + for (uint32_t ChunkSize : Block.ChunkCompressedLengths) + { + Writer.AddInteger(ChunkSize); + } + } + Writer.EndArray(); + Writer.AddObject("metadata", MetaData); return Writer.Save(); } +ChunkBlockDescription +GetChunkBlockDescription(const SharedBuffer& BlockPayload, const IoHash& RawHash) +{ + ChunkBlockDescription BlockDescription = {{.BlockHash = IoHash::HashBuffer(BlockPayload)}}; + if (BlockDescription.BlockHash != RawHash) + { + throw std::runtime_error(fmt::format("Block {} content hash {} does not match block hash", RawHash, BlockDescription.BlockHash)); + } + if (IterateChunkBlock( + BlockPayload, + [&BlockDescription, RawHash](CompressedBuffer&& Chunk, const IoHash& AttachmentHash) { + if (CompositeBuffer Decompressed = Chunk.DecompressToComposite(); Decompressed) + { + IoHash ChunkHash = IoHash::HashBuffer(Decompressed.Flatten()); + if (ChunkHash != AttachmentHash) + { + throw std::runtime_error( + fmt::format("Chunk {} in block {} content hash {} does not match chunk", AttachmentHash, RawHash, ChunkHash)); + } + BlockDescription.ChunkRawHashes.push_back(AttachmentHash); + BlockDescription.ChunkRawLengths.push_back(gsl::narrow(Decompressed.GetSize())); + BlockDescription.ChunkCompressedLengths.push_back(gsl::narrow(Chunk.GetCompressedSize())); + } + else + { + throw std::runtime_error(fmt::format("Chunk {} in block {} is not a compressed buffer", AttachmentHash, RawHash)); + } + }, + BlockDescription.HeaderSize)) + { + return BlockDescription; + } + else + { + throw std::runtime_error(fmt::format("Block {} is malformed", RawHash)); + } +} + CompressedBuffer GenerateChunkBlock(std::vector>&& FetchChunks, ChunkBlockDescription& OutBlock) { @@ -91,8 +151,9 @@ GenerateChunkBlock(std::vector>&& FetchChunks, std::vector ChunkSegments; ChunkSegments.resize(1); ChunkSegments.reserve(1 + ChunkCount); - OutBlock.ChunkHashes.reserve(ChunkCount); + OutBlock.ChunkRawHashes.reserve(ChunkCount); OutBlock.ChunkRawLengths.reserve(ChunkCount); + OutBlock.ChunkCompressedLengths.reserve(ChunkCount); { IoBuffer TempBuffer(ChunkCount * 9); MutableMemoryView View = TempBuffer.GetMutableView(); @@ -106,16 +167,19 @@ GenerateChunkBlock(std::vector>&& FetchChunks, std::span Segments = Chunk.second.GetCompressed().GetSegments(); for (const SharedBuffer& Segment : Segments) { + ZEN_ASSERT(Segment.IsOwned()); ChunkSize += Segment.GetSize(); ChunkSegments.push_back(Segment); } BufferEndPtr += WriteVarUInt(ChunkSize, BufferEndPtr); - OutBlock.ChunkHashes.push_back(It.first); + OutBlock.ChunkRawHashes.push_back(It.first); OutBlock.ChunkRawLengths.push_back(gsl::narrow(Chunk.first)); + OutBlock.ChunkCompressedLengths.push_back(gsl::narrow(ChunkSize)); } ZEN_ASSERT(BufferEndPtr <= View.GetDataEnd()); ptrdiff_t TempBufferLength = std::distance(BufferStartPtr, BufferEndPtr); ChunkSegments[0] = SharedBuffer(IoBuffer(TempBuffer, 0, gsl::narrow(TempBufferLength))); + OutBlock.HeaderSize = TempBufferLength; } CompressedBuffer CompressedBlock = CompressedBuffer::Compress(CompositeBuffer(std::move(ChunkSegments)), OodleCompressor::Mermaid, OodleCompressionLevel::None); @@ -124,7 +188,9 @@ GenerateChunkBlock(std::vector>&& FetchChunks, } bool -IterateChunkBlock(const SharedBuffer& BlockPayload, std::function Visitor) +IterateChunkBlock(const SharedBuffer& BlockPayload, + std::function Visitor, + uint64_t& OutHeaderSize) { ZEN_ASSERT(BlockPayload); if (BlockPayload.GetSize() < 1) @@ -144,21 +210,23 @@ IterateChunkBlock(const SharedBuffer& BlockPayload, std::function + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +ZEN_THIRD_PARTY_INCLUDES_START +#include +#include +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +using namespace std::literals; + +namespace { + void AddCunkSequence(ChunkingStatistics& Stats, + ChunkedContentData& InOutChunkedContent, + tsl::robin_map& ChunkHashToChunkIndex, + const IoHash& RawHash, + std::span ChunkSequence, + std::span ChunkHashes, + std::span ChunkRawSizes) + { + ZEN_ASSERT(ChunkHashes.size() == ChunkRawSizes.size()); + InOutChunkedContent.ChunkCounts.push_back(gsl::narrow(ChunkSequence.size())); + InOutChunkedContent.ChunkOrders.reserve(InOutChunkedContent.ChunkOrders.size() + ChunkSequence.size()); + + for (uint32_t ChunkedSequenceIndex : ChunkSequence) + { + const IoHash& ChunkHash = ChunkHashes[ChunkedSequenceIndex]; + if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) + { + uint32_t ChunkIndex = gsl::narrow(It->second); + InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); + } + else + { + uint32_t ChunkIndex = gsl::narrow(InOutChunkedContent.ChunkHashes.size()); + ChunkHashToChunkIndex.insert_or_assign(ChunkHash, ChunkIndex); + InOutChunkedContent.ChunkHashes.push_back(ChunkHash); + InOutChunkedContent.ChunkRawSizes.push_back(ChunkRawSizes[ChunkedSequenceIndex]); + InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); + Stats.UniqueChunksFound++; + Stats.UniqueBytesFound += ChunkRawSizes[ChunkedSequenceIndex]; + } + } + InOutChunkedContent.SequenceRawHashes.push_back(RawHash); + Stats.UniqueSequencesFound++; + } + + void AddCunkSequence(ChunkingStatistics& Stats, + ChunkedContentData& InOutChunkedContent, + tsl::robin_map& ChunkHashToChunkIndex, + const IoHash& RawHash, + const uint64_t RawSize) + { + InOutChunkedContent.ChunkCounts.push_back(1); + + if (auto It = ChunkHashToChunkIndex.find(RawHash); It != ChunkHashToChunkIndex.end()) + { + uint32_t ChunkIndex = gsl::narrow(It->second); + InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); + } + else + { + uint32_t ChunkIndex = gsl::narrow(InOutChunkedContent.ChunkHashes.size()); + ChunkHashToChunkIndex.insert_or_assign(RawHash, ChunkIndex); + InOutChunkedContent.ChunkHashes.push_back(RawHash); + InOutChunkedContent.ChunkRawSizes.push_back(RawSize); + InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); + Stats.UniqueChunksFound++; + Stats.UniqueBytesFound += RawSize; + } + InOutChunkedContent.SequenceRawHashes.push_back(RawHash); + Stats.UniqueSequencesFound++; + } + + IoHash HashOneFile(ChunkingStatistics& Stats, + const ChunkingController& InChunkingController, + ChunkedFolderContent& OutChunkedContent, + tsl::robin_map& ChunkHashToChunkIndex, + tsl::robin_map& RawHashToSequenceRawHashIndex, + RwLock& Lock, + const std::filesystem::path& FolderPath, + uint32_t PathIndex) + { + const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex]; + const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex]; + + if (RawSize == 0) + { + return IoHash::Zero; + } + else + { + ChunkedInfoWithSource Chunked; + const bool DidChunking = + InChunkingController.ProcessFile((FolderPath / Path).make_preferred(), RawSize, Chunked, Stats.BytesHashed); + if (DidChunking) + { + Lock.WithExclusiveLock([&]() { + if (!RawHashToSequenceRawHashIndex.contains(Chunked.Info.RawHash)) + { + RawHashToSequenceRawHashIndex.insert( + {Chunked.Info.RawHash, gsl::narrow(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); + std::vector ChunkSizes; + ChunkSizes.reserve(Chunked.ChunkSources.size()); + for (const ChunkSource& Source : Chunked.ChunkSources) + { + ChunkSizes.push_back(Source.Size); + } + AddCunkSequence(Stats, + OutChunkedContent.ChunkedContent, + ChunkHashToChunkIndex, + Chunked.Info.RawHash, + Chunked.Info.ChunkSequence, + Chunked.Info.ChunkHashes, + ChunkSizes); + Stats.UniqueSequencesFound++; + } + }); + Stats.FilesChunked++; + return Chunked.Info.RawHash; + } + else + { + IoBuffer Buffer = IoBufferBuilder::MakeFromFile((FolderPath / Path).make_preferred()); + const IoHash Hash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed); + + Lock.WithExclusiveLock([&]() { + if (!RawHashToSequenceRawHashIndex.contains(Hash)) + { + RawHashToSequenceRawHashIndex.insert( + {Hash, gsl::narrow(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); + AddCunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Hash, RawSize); + Stats.UniqueSequencesFound++; + } + }); + return Hash; + } + } + } + + std::string PathCompareString(const std::filesystem::path& Path) { return ToLower(Path.generic_string()); } + +} // namespace + +std::string_view FolderContentSourcePlatformNames[(size_t)SourcePlatform::_Count] = {"Windows"sv, "Linux"sv, "MacOS"sv}; + +std::string_view +ToString(SourcePlatform Platform) +{ + return FolderContentSourcePlatformNames[(size_t)Platform]; +} + +SourcePlatform +FromString(std::string_view Platform, SourcePlatform Default) +{ + for (size_t Index = 0; Index < (size_t)SourcePlatform::_Count; Index++) + { + if (Platform == FolderContentSourcePlatformNames[Index]) + { + return (SourcePlatform)Index; + } + } + return Default; +} + +SourcePlatform +GetSourceCurrentPlatform() +{ +#if ZEN_PLATFORM_WINDOWS + return SourcePlatform::Windows; +#endif +#if ZEN_PLATFORM_MAC + return SourcePlatform::MacOS; +#endif +#if ZEN_PLATFORM_LINUX + return SourcePlatform::Linux; +#endif +} + +bool +FolderContent::AreFileAttributesEqual(const uint32_t Lhs, const uint32_t Rhs) +{ +#if ZEN_PLATFORM_WINDOWS + return (Lhs & 0xff) == (Rhs & 0xff); +#endif +#if ZEN_PLATFORM_MAC + return Lhs == Rhs; +#endif +#if ZEN_PLATFORM_LINUX + return Lhs == Rhs; +#endif +} + +bool +FolderContent::operator==(const FolderContent& Rhs) const +{ + if ((Platform == Rhs.Platform) && (RawSizes == Rhs.RawSizes) && (Attributes == Rhs.Attributes) && + (ModificationTicks == Rhs.ModificationTicks) && (Paths.size() == Rhs.Paths.size())) + { + size_t PathCount = 0; + for (size_t PathIndex = 0; PathIndex < PathCount; PathIndex++) + { + if (Paths[PathIndex].generic_string() != Rhs.Paths[PathIndex].generic_string()) + { + return false; + } + } + return true; + } + return false; +} + +bool +FolderContent::AreKnownFilesEqual(const FolderContent& Rhs) const +{ + tsl::robin_map RhsPathToIndex; + const size_t RhsPathCount = Rhs.Paths.size(); + RhsPathToIndex.reserve(RhsPathCount); + for (size_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++) + { + RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex}); + } + const size_t PathCount = Paths.size(); + for (size_t PathIndex = 0; PathIndex < PathCount; PathIndex++) + { + if (auto It = RhsPathToIndex.find(Paths[PathIndex].generic_string()); It != RhsPathToIndex.end()) + { + const size_t RhsPathIndex = It->second; + if ((RawSizes[PathIndex] != Rhs.RawSizes[RhsPathIndex]) || + (!AreFileAttributesEqual(Attributes[PathIndex], Rhs.Attributes[RhsPathIndex])) || + (ModificationTicks[PathIndex] != Rhs.ModificationTicks[RhsPathIndex])) + { + return false; + } + } + else + { + return false; + } + } + return true; +} + +void +FolderContent::UpdateState(const FolderContent& Rhs, std::vector& OutPathIndexesOufOfDate) +{ + tsl::robin_map RhsPathToIndex; + const uint32_t RhsPathCount = gsl::narrow(Rhs.Paths.size()); + RhsPathToIndex.reserve(RhsPathCount); + for (uint32_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++) + { + RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex}); + } + uint32_t PathCount = gsl::narrow(Paths.size()); + for (uint32_t PathIndex = 0; PathIndex < PathCount;) + { + if (auto It = RhsPathToIndex.find(Paths[PathIndex].generic_string()); It != RhsPathToIndex.end()) + { + const uint32_t RhsPathIndex = It->second; + + if ((RawSizes[PathIndex] != Rhs.RawSizes[RhsPathIndex]) || + (ModificationTicks[PathIndex] != Rhs.ModificationTicks[RhsPathIndex])) + { + RawSizes[PathIndex] = Rhs.RawSizes[RhsPathIndex]; + ModificationTicks[PathIndex] = Rhs.ModificationTicks[RhsPathIndex]; + OutPathIndexesOufOfDate.push_back(PathIndex); + } + Attributes[PathIndex] = Rhs.Attributes[RhsPathIndex]; + PathIndex++; + } + else + { + Paths.erase(Paths.begin() + PathIndex); + RawSizes.erase(RawSizes.begin() + PathIndex); + Attributes.erase(Attributes.begin() + PathIndex); + ModificationTicks.erase(ModificationTicks.begin() + PathIndex); + PathCount--; + } + } +} + +FolderContent +GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vector& OutDeletedPathIndexes) +{ + FolderContent Result = {.Platform = Old.Platform}; + tsl::robin_map NewPathToIndex; + const uint32_t NewPathCount = gsl::narrow(New.Paths.size()); + NewPathToIndex.reserve(NewPathCount); + for (uint32_t NewPathIndex = 0; NewPathIndex < NewPathCount; NewPathIndex++) + { + NewPathToIndex.insert({New.Paths[NewPathIndex].generic_string(), NewPathIndex}); + } + uint32_t OldPathCount = gsl::narrow(Old.Paths.size()); + for (uint32_t OldPathIndex = 0; OldPathIndex < OldPathCount; OldPathIndex++) + { + if (auto It = NewPathToIndex.find(Old.Paths[OldPathIndex].generic_string()); It != NewPathToIndex.end()) + { + const uint32_t NewPathIndex = It->second; + + if ((Old.RawSizes[OldPathIndex] != New.RawSizes[NewPathIndex]) || + (Old.ModificationTicks[OldPathIndex] != New.ModificationTicks[NewPathIndex])) + { + Result.Paths.push_back(New.Paths[NewPathIndex]); + Result.RawSizes.push_back(New.RawSizes[NewPathIndex]); + Result.Attributes.push_back(New.Attributes[NewPathIndex]); + Result.ModificationTicks.push_back(New.ModificationTicks[NewPathIndex]); + } + } + else + { + OutDeletedPathIndexes.push_back(Old.Paths[OldPathIndex]); + } + } + return Result; +} + +void +SaveFolderContentToCompactBinary(const FolderContent& Content, CbWriter& Output) +{ + Output.AddString("platform"sv, ToString(Content.Platform)); + compactbinary_helpers::WriteArray(Content.Paths, "paths"sv, Output); + compactbinary_helpers::WriteArray(Content.RawSizes, "rawSizes"sv, Output); + compactbinary_helpers::WriteArray(Content.Attributes, "attributes"sv, Output); + compactbinary_helpers::WriteArray(Content.ModificationTicks, "modificationTimes"sv, Output); +} + +FolderContent +LoadFolderContentToCompactBinary(CbObjectView Input) +{ + FolderContent Content; + Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform()); + compactbinary_helpers::ReadArray("paths"sv, Input, Content.Paths); + compactbinary_helpers::ReadArray("rawSizes"sv, Input, Content.RawSizes); + compactbinary_helpers::ReadArray("attributes"sv, Input, Content.Attributes); + compactbinary_helpers::ReadArray("modificationTimes"sv, Input, Content.ModificationTicks); + return Content; +} + +FolderContent +GetFolderContent(GetFolderContentStatistics& Stats, + const std::filesystem::path& RootPath, + std::function&& AcceptDirectory, + std::function&& AcceptFile, + WorkerThreadPool& WorkerPool, + int32_t UpdateInteralMS, + std::function&& UpdateCallback, + std::atomic& AbortFlag) +{ + Stopwatch Timer; + auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); + + FolderContent Content; + struct AsyncVisitor : public GetDirectoryContentVisitor + { + AsyncVisitor(GetFolderContentStatistics& Stats, + std::atomic& AbortFlag, + FolderContent& Content, + std::function&& AcceptDirectory, + std::function&& AcceptFile) + : m_Stats(Stats) + , m_AbortFlag(AbortFlag) + , m_FoundContent(Content) + , m_AcceptDirectory(std::move(AcceptDirectory)) + , m_AcceptFile(std::move(AcceptFile)) + { + } + virtual void AsyncVisitDirectory(const std::filesystem::path& RelativeRoot, DirectoryContent&& Content) override + { + if (!m_AbortFlag) + { + m_Stats.FoundFileCount += Content.FileNames.size(); + for (uint64_t FileSize : Content.FileSizes) + { + m_Stats.FoundFileByteCount += FileSize; + } + std::string RelativeDirectoryPath = RelativeRoot.generic_string(); + if (m_AcceptDirectory(RelativeDirectoryPath)) + { + std::vector Paths; + std::vector RawSizes; + std::vector Attributes; + std::vector ModificatonTicks; + Paths.reserve(Content.FileNames.size()); + RawSizes.reserve(Content.FileNames.size()); + Attributes.reserve(Content.FileNames.size()); + ModificatonTicks.reserve(Content.FileModificationTicks.size()); + + for (size_t FileIndex = 0; FileIndex < Content.FileNames.size(); FileIndex++) + { + const std::filesystem::path& FileName = Content.FileNames[FileIndex]; + std::string RelativePath = (RelativeRoot / FileName).generic_string(); + std::replace(RelativePath.begin(), RelativePath.end(), '\\', '/'); + if (m_AcceptFile(RelativePath, Content.FileSizes[FileIndex], Content.FileAttributes[FileIndex])) + { + Paths.emplace_back(std::move(RelativePath)); + RawSizes.emplace_back(Content.FileSizes[FileIndex]); + Attributes.emplace_back(Content.FileAttributes[FileIndex]); + ModificatonTicks.emplace_back(Content.FileModificationTicks[FileIndex]); + + m_Stats.AcceptedFileCount++; + m_Stats.AcceptedFileByteCount += Content.FileSizes[FileIndex]; + } + } + m_Lock.WithExclusiveLock([&]() { + m_FoundContent.Paths.insert(m_FoundContent.Paths.end(), Paths.begin(), Paths.end()); + m_FoundContent.RawSizes.insert(m_FoundContent.RawSizes.end(), RawSizes.begin(), RawSizes.end()); + m_FoundContent.Attributes.insert(m_FoundContent.Attributes.end(), Attributes.begin(), Attributes.end()); + m_FoundContent.ModificationTicks.insert(m_FoundContent.ModificationTicks.end(), + ModificatonTicks.begin(), + ModificatonTicks.end()); + }); + } + } + } + + GetFolderContentStatistics& m_Stats; + std::atomic& m_AbortFlag; + RwLock m_Lock; + FolderContent& m_FoundContent; + std::function m_AcceptDirectory; + std::function m_AcceptFile; + } Visitor(Stats, AbortFlag, Content, std::move(AcceptDirectory), std::move(AcceptFile)); + + Latch PendingWork(1); + GetDirectoryContent(RootPath, + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive | DirectoryContentFlags::IncludeFileSizes | + DirectoryContentFlags::IncludeAttributes | DirectoryContentFlags::IncludeModificationTick, + Visitor, + WorkerPool, + PendingWork); + PendingWork.CountDown(); + while (!PendingWork.Wait(UpdateInteralMS)) + { + UpdateCallback(AbortFlag.load(), PendingWork.Remaining()); + } + std::vector Order; + size_t PathCount = Content.Paths.size(); + Order.resize(Content.Paths.size()); + std::vector Parents; + Parents.reserve(PathCount); + std::vector Filenames; + Filenames.reserve(PathCount); + for (size_t OrderIndex = 0; OrderIndex < PathCount; OrderIndex++) + { + Order[OrderIndex] = OrderIndex; + Parents.emplace_back(Content.Paths[OrderIndex].parent_path().generic_string()); + Filenames.emplace_back(Content.Paths[OrderIndex].filename().generic_string()); + } + std::sort(Order.begin(), Order.end(), [&Parents, &Filenames](size_t Lhs, size_t Rhs) { + const std::string& LhsParent = Parents[Lhs]; + const std::string& RhsParent = Parents[Rhs]; + if (LhsParent < RhsParent) + { + return true; + } + else if (LhsParent > RhsParent) + { + return false; + } + return Filenames[Lhs] < Filenames[Rhs]; + }); + FolderContent OrderedContent; + OrderedContent.Paths.reserve(PathCount); + OrderedContent.RawSizes.reserve(PathCount); + OrderedContent.Attributes.reserve(PathCount); + OrderedContent.ModificationTicks.reserve(PathCount); + for (size_t OrderIndex : Order) + { + OrderedContent.Paths.emplace_back(std::move(Content.Paths[OrderIndex])); + OrderedContent.RawSizes.emplace_back(Content.RawSizes[OrderIndex]); + OrderedContent.Attributes.emplace_back(Content.Attributes[OrderIndex]); + OrderedContent.ModificationTicks.emplace_back(Content.ModificationTicks[OrderIndex]); + } + return OrderedContent; +} + +void +SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbWriter& Output) +{ + Output.AddString("platform"sv, ToString(Content.Platform)); + compactbinary_helpers::WriteArray(Content.Paths, "paths"sv, Output); + compactbinary_helpers::WriteArray(Content.RawSizes, "rawSizes"sv, Output); + compactbinary_helpers::WriteArray(Content.Attributes, "attributes"sv, Output); + compactbinary_helpers::WriteArray(Content.RawHashes, "rawHashes"sv, Output); + + Output.BeginObject("chunkedContent"); + compactbinary_helpers::WriteArray(Content.ChunkedContent.SequenceRawHashes, "sequenceRawHashes"sv, Output); + compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkCounts, "chunkCounts"sv, Output); + compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkOrders, "chunkOrders"sv, Output); + compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkHashes, "chunkHashes"sv, Output); + compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkRawSizes, "chunkRawSizes"sv, Output); + Output.EndObject(); // chunkedContent +} + +ChunkedFolderContent +LoadChunkedFolderContentToCompactBinary(CbObjectView Input) +{ + ChunkedFolderContent Content; + Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform()); + compactbinary_helpers::ReadArray("paths"sv, Input, Content.Paths); + compactbinary_helpers::ReadArray("rawSizes"sv, Input, Content.RawSizes); + compactbinary_helpers::ReadArray("attributes"sv, Input, Content.Attributes); + compactbinary_helpers::ReadArray("rawHashes"sv, Input, Content.RawHashes); + + CbObjectView ChunkedContentView = Input["chunkedContent"sv].AsObjectView(); + compactbinary_helpers::ReadArray("sequenceRawHashes"sv, ChunkedContentView, Content.ChunkedContent.SequenceRawHashes); + compactbinary_helpers::ReadArray("chunkCounts"sv, ChunkedContentView, Content.ChunkedContent.ChunkCounts); + compactbinary_helpers::ReadArray("chunkOrders"sv, ChunkedContentView, Content.ChunkedContent.ChunkOrders); + compactbinary_helpers::ReadArray("chunkHashes"sv, ChunkedContentView, Content.ChunkedContent.ChunkHashes); + compactbinary_helpers::ReadArray("chunkRawSizes"sv, ChunkedContentView, Content.ChunkedContent.ChunkRawSizes); + return Content; +} + +ChunkedFolderContent +MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span Overlays) +{ + ZEN_ASSERT(!Overlays.empty()); + + ChunkedFolderContent Result; + const size_t BasePathCount = Base.Paths.size(); + Result.Paths.reserve(BasePathCount); + Result.RawSizes.reserve(BasePathCount); + Result.Attributes.reserve(BasePathCount); + Result.RawHashes.reserve(BasePathCount); + + const size_t BaseChunkCount = Base.ChunkedContent.ChunkHashes.size(); + Result.ChunkedContent.SequenceRawHashes.reserve(Base.ChunkedContent.SequenceRawHashes.size()); + Result.ChunkedContent.ChunkCounts.reserve(BaseChunkCount); + Result.ChunkedContent.ChunkHashes.reserve(BaseChunkCount); + Result.ChunkedContent.ChunkRawSizes.reserve(BaseChunkCount); + Result.ChunkedContent.ChunkOrders.reserve(Base.ChunkedContent.ChunkOrders.size()); + + tsl::robin_map GenericPathToActualPath; + for (const std::filesystem::path& Path : Base.Paths) + { + GenericPathToActualPath.insert({PathCompareString(Path), Path}); + } + for (const ChunkedFolderContent& Overlay : Overlays) + { + for (const std::filesystem::path& Path : Overlay.Paths) + { + GenericPathToActualPath.insert({PathCompareString(Path), Path}); + } + } + + tsl::robin_map RawHashToSequenceRawHashIndex; + + auto BuildOverlayPaths = [](std::span Overlays) -> tsl::robin_set { + tsl::robin_set Result; + for (const ChunkedFolderContent& OverlayContent : Overlays) + { + for (const std::filesystem::path& Path : OverlayContent.Paths) + { + Result.insert(PathCompareString(Path)); + } + } + return Result; + }; + + auto AddContent = [&BuildOverlayPaths](ChunkedFolderContent& Result, + const ChunkedFolderContent& OverlayContent, + tsl::robin_map& ChunkHashToChunkIndex, + tsl::robin_map& RawHashToSequenceRawHashIndex, + const tsl::robin_map& GenericPathToActualPath, + std::span Overlays) { + const ChunkedContentLookup OverlayLookup = BuildChunkedContentLookup(OverlayContent); + tsl::robin_set BaseOverlayPaths = BuildOverlayPaths(Overlays); + for (uint32_t PathIndex = 0; PathIndex < OverlayContent.Paths.size(); PathIndex++) + { + std::string GenericPath = PathCompareString(OverlayContent.Paths[PathIndex]); + if (!BaseOverlayPaths.contains(GenericPath)) + { + // This asset will not be overridden by a later layer - add it + + const std::filesystem::path OriginalPath = GenericPathToActualPath.at(GenericPath); + Result.Paths.push_back(OriginalPath); + const IoHash& RawHash = OverlayContent.RawHashes[PathIndex]; + Result.RawSizes.push_back(OverlayContent.RawSizes[PathIndex]); + Result.Attributes.push_back(OverlayContent.Attributes[PathIndex]); + Result.RawHashes.push_back(RawHash); + + if (OverlayContent.RawSizes[PathIndex] > 0) + { + if (!RawHashToSequenceRawHashIndex.contains(RawHash)) + { + RawHashToSequenceRawHashIndex.insert( + {RawHash, gsl::narrow(Result.ChunkedContent.SequenceRawHashes.size())}); + const uint32_t SequenceRawHashIndex = OverlayLookup.RawHashToSequenceRawHashIndex.at(RawHash); + const uint32_t OrderIndexOffset = OverlayLookup.SequenceRawHashIndexChunkOrderOffset[SequenceRawHashIndex]; + const uint32_t ChunkCount = OverlayContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + ChunkingStatistics Stats; + std::span OriginalChunkOrder = + std::span(OverlayContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); + AddCunkSequence(Stats, + Result.ChunkedContent, + ChunkHashToChunkIndex, + RawHash, + OriginalChunkOrder, + OverlayContent.ChunkedContent.ChunkHashes, + OverlayContent.ChunkedContent.ChunkRawSizes); + Stats.UniqueSequencesFound++; + } + } + } + } + }; + + tsl::robin_map MergedChunkHashToChunkIndex; + AddContent(Result, Base, MergedChunkHashToChunkIndex, RawHashToSequenceRawHashIndex, GenericPathToActualPath, Overlays); + for (uint32_t OverlayIndex = 0; OverlayIndex < Overlays.size(); OverlayIndex++) + { + AddContent(Result, + Overlays[OverlayIndex], + MergedChunkHashToChunkIndex, + RawHashToSequenceRawHashIndex, + GenericPathToActualPath, + Overlays.subspan(OverlayIndex + 1)); + } + return Result; +} + +ChunkedFolderContent +DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span DeletedPaths) +{ + ZEN_ASSERT(DeletedPaths.size() <= BaseContent.Paths.size()); + ChunkedFolderContent Result = {.Platform = BaseContent.Platform}; + if (DeletedPaths.size() < BaseContent.Paths.size()) + { + tsl::robin_set DeletedPathSet; + DeletedPathSet.reserve(DeletedPaths.size()); + for (const std::filesystem::path& DeletedPath : DeletedPaths) + { + DeletedPathSet.insert(PathCompareString(DeletedPath)); + } + const ChunkedContentLookup BaseLookup = BuildChunkedContentLookup(BaseContent); + tsl::robin_map ChunkHashToChunkIndex; + + tsl::robin_map RawHashToSequenceRawHashIndex; + for (uint32_t PathIndex = 0; PathIndex < BaseContent.Paths.size(); PathIndex++) + { + const std::filesystem::path& Path = BaseContent.Paths[PathIndex]; + if (!DeletedPathSet.contains(PathCompareString(Path))) + { + const IoHash& RawHash = BaseContent.RawHashes[PathIndex]; + const uint64_t RawSize = BaseContent.RawSizes[PathIndex]; + Result.Paths.push_back(Path); + Result.RawSizes.push_back(RawSize); + Result.Attributes.push_back(BaseContent.Attributes[PathIndex]); + Result.RawHashes.push_back(RawHash); + if (RawSize > 0) + { + if (!RawHashToSequenceRawHashIndex.contains(RawHash)) + { + RawHashToSequenceRawHashIndex.insert( + {RawHash, gsl::narrow(Result.ChunkedContent.SequenceRawHashes.size())}); + const uint32_t SequenceRawHashIndex = BaseLookup.RawHashToSequenceRawHashIndex.at(RawHash); + const uint32_t OrderIndexOffset = BaseLookup.SequenceRawHashIndexChunkOrderOffset[SequenceRawHashIndex]; + const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + ChunkingStatistics Stats; + std::span OriginalChunkOrder = + std::span(BaseContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); + AddCunkSequence(Stats, + Result.ChunkedContent, + ChunkHashToChunkIndex, + RawHash, + OriginalChunkOrder, + BaseContent.ChunkedContent.ChunkHashes, + BaseContent.ChunkedContent.ChunkRawSizes); + Stats.UniqueSequencesFound++; + } + } + } + } + } + return Result; +} + +ChunkedFolderContent +ChunkFolderContent(ChunkingStatistics& Stats, + WorkerThreadPool& WorkerPool, + const std::filesystem::path& RootPath, + const FolderContent& Content, + const ChunkingController& InChunkingController, + int32_t UpdateInteralMS, + std::function&& UpdateCallback, + std::atomic& AbortFlag) +{ + Stopwatch Timer; + auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); + + ChunkedFolderContent Result = {.Platform = Content.Platform, + .Paths = Content.Paths, + .RawSizes = Content.RawSizes, + .Attributes = Content.Attributes}; + const size_t ItemCount = Result.Paths.size(); + Result.RawHashes.resize(ItemCount, IoHash::Zero); + Result.ChunkedContent.SequenceRawHashes.reserve(ItemCount); // Up to 1 per file, maybe less + Result.ChunkedContent.ChunkCounts.reserve(ItemCount); // Up to one per file + Result.ChunkedContent.ChunkOrders.reserve(ItemCount); // At least 1 per file, maybe more + Result.ChunkedContent.ChunkHashes.reserve(ItemCount); // At least 1 per file, maybe more + Result.ChunkedContent.ChunkRawSizes.reserve(ItemCount); // At least 1 per file, maybe more + tsl::robin_map ChunkHashToChunkIndex; + tsl::robin_map RawHashToChunkSequenceIndex; + RawHashToChunkSequenceIndex.reserve(ItemCount); + ChunkHashToChunkIndex.reserve(ItemCount); + { + std::vector Order; + Order.resize(ItemCount); + for (uint32_t I = 0; I < ItemCount; I++) + { + Order[I] = I; + } + + // Handle the biggest files first so we don't end up with one straggling large file at the end + // std::sort(Order.begin(), Order.end(), [&](uint32_t Lhs, uint32_t Rhs) { return Result.RawSizes[Lhs] > Result.RawSizes[Rhs]; + //}); + + tsl::robin_map RawHashToSequenceRawHashIndex; + RawHashToSequenceRawHashIndex.reserve(ItemCount); + + RwLock Lock; + + ParallellWork Work(AbortFlag); + + for (uint32_t PathIndex : Order) + { + if (Work.IsAborted()) + { + break; + } + Work.ScheduleWork( + WorkerPool, // GetSyncWorkerPool() + [&, PathIndex](std::atomic& AbortFlag) { + if (!AbortFlag) + { + IoHash RawHash = HashOneFile(Stats, + InChunkingController, + Result, + ChunkHashToChunkIndex, + RawHashToSequenceRawHashIndex, + Lock, + RootPath, + PathIndex); + Lock.WithExclusiveLock([&]() { Result.RawHashes[PathIndex] = RawHash; }); + Stats.FilesProcessed++; + } + }, + [&, PathIndex](const std::exception& Ex, std::atomic& AbortFlag) { + ZEN_CONSOLE("Failed scanning file {}. Reason: {}", Result.Paths[PathIndex], Ex.what()); + AbortFlag = true; + }); + } + + Work.Wait(UpdateInteralMS, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted); + ZEN_UNUSED(PendingWork); + UpdateCallback(Work.IsAborted(), Work.PendingWork().Remaining()); + }); + } + return Result; +} + +ChunkedContentLookup +BuildChunkedContentLookup(const ChunkedFolderContent& Content) +{ + struct ChunkLocationReference + { + uint32_t ChunkIndex; + ChunkedContentLookup::ChunkLocation Location; + }; + + ChunkedContentLookup Result; + { + const uint32_t SequenceRawHashesCount = gsl::narrow(Content.ChunkedContent.SequenceRawHashes.size()); + Result.RawHashToSequenceRawHashIndex.reserve(SequenceRawHashesCount); + Result.SequenceRawHashIndexChunkOrderOffset.reserve(SequenceRawHashesCount); + uint32_t OrderOffset = 0; + for (uint32_t SequenceRawHashIndex = 0; SequenceRawHashIndex < Content.ChunkedContent.SequenceRawHashes.size(); + SequenceRawHashIndex++) + { + Result.RawHashToSequenceRawHashIndex.insert( + {Content.ChunkedContent.SequenceRawHashes[SequenceRawHashIndex], SequenceRawHashIndex}); + Result.SequenceRawHashIndexChunkOrderOffset.push_back(OrderOffset); + OrderOffset += Content.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + } + } + + std::vector Locations; + Locations.reserve(Content.ChunkedContent.ChunkOrders.size()); + for (uint32_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++) + { + if (Content.RawSizes[PathIndex] > 0) + { + const IoHash& RawHash = Content.RawHashes[PathIndex]; + uint32_t SequenceRawHashIndex = Result.RawHashToSequenceRawHashIndex.at(RawHash); + const uint32_t OrderOffset = Result.SequenceRawHashIndexChunkOrderOffset[SequenceRawHashIndex]; + const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + uint64_t LocationOffset = 0; + for (size_t OrderIndex = OrderOffset; OrderIndex < OrderOffset + ChunkCount; OrderIndex++) + { + uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex]; + + Locations.push_back(ChunkLocationReference{ChunkIndex, ChunkedContentLookup::ChunkLocation{PathIndex, LocationOffset}}); + + LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + ZEN_ASSERT(LocationOffset == Content.RawSizes[PathIndex]); + } + } + + std::sort(Locations.begin(), Locations.end(), [](const ChunkLocationReference& Lhs, const ChunkLocationReference& Rhs) { + if (Lhs.ChunkIndex < Rhs.ChunkIndex) + { + return true; + } + if (Lhs.ChunkIndex > Rhs.ChunkIndex) + { + return false; + } + if (Lhs.Location.PathIndex < Rhs.Location.PathIndex) + { + return true; + } + if (Lhs.Location.PathIndex > Rhs.Location.PathIndex) + { + return false; + } + return Lhs.Location.Offset < Rhs.Location.Offset; + }); + + Result.ChunkLocations.reserve(Locations.size()); + const uint32_t ChunkCount = gsl::narrow(Content.ChunkedContent.ChunkHashes.size()); + Result.ChunkHashToChunkIndex.reserve(ChunkCount); + size_t RangeOffset = 0; + for (uint32_t ChunkIndex = 0; ChunkIndex < ChunkCount; ChunkIndex++) + { + Result.ChunkHashToChunkIndex.insert({Content.ChunkedContent.ChunkHashes[ChunkIndex], ChunkIndex}); + uint32_t Count = 0; + while (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex) + { + Result.ChunkLocations.push_back(Locations[RangeOffset + Count].Location); + Count++; + } + Result.ChunkLocationOffset.push_back(RangeOffset); + Result.ChunkLocationCounts.push_back(Count); + RangeOffset += Count; + } + + return Result; +} + +} // namespace zen diff --git a/src/zenutil/chunkingcontroller.cpp b/src/zenutil/chunkingcontroller.cpp new file mode 100644 index 000000000..bc0e57b14 --- /dev/null +++ b/src/zenutil/chunkingcontroller.cpp @@ -0,0 +1,265 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include + +#include +#include + +ZEN_THIRD_PARTY_INCLUDES_START +#include +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { +using namespace std::literals; + +namespace { + std::vector ReadStringArray(CbArrayView StringArray) + { + std::vector Result; + Result.reserve(StringArray.Num()); + for (CbFieldView FieldView : StringArray) + { + Result.emplace_back(FieldView.AsString()); + } + return Result; + } + + ChunkedParams ReadChunkParams(CbObjectView Params) + { + bool UseThreshold = Params["UseThreshold"sv].AsBool(true); + size_t MinSize = Params["MinSize"sv].AsUInt64(DefaultChunkedParams.MinSize); + size_t MaxSize = Params["MaxSize"sv].AsUInt64(DefaultChunkedParams.MaxSize); + size_t AvgSize = Params["AvgSize"sv].AsUInt64(DefaultChunkedParams.AvgSize); + + return ChunkedParams{.UseThreshold = UseThreshold, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize}; + } + +} // namespace + +class BasicChunkingController : public ChunkingController +{ +public: + BasicChunkingController(std::span ExcludeExtensions, + uint64_t ChunkFileSizeLimit, + const ChunkedParams& ChunkingParams) + : m_ChunkExcludeExtensions(ExcludeExtensions.begin(), ExcludeExtensions.end()) + , m_ChunkFileSizeLimit(ChunkFileSizeLimit) + , m_ChunkingParams(ChunkingParams) + { + } + + BasicChunkingController(CbObjectView Parameters) + : m_ChunkExcludeExtensions(ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView())) + , m_ChunkFileSizeLimit(Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit)) + , m_ChunkingParams(ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())) + { + } + + virtual bool ProcessFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + ChunkedInfoWithSource& OutChunked, + std::atomic& BytesProcessed) const override + { + const bool ExcludeFromChunking = + std::find(m_ChunkExcludeExtensions.begin(), m_ChunkExcludeExtensions.end(), InputPath.extension()) != + m_ChunkExcludeExtensions.end(); + + if (ExcludeFromChunking || (RawSize < m_ChunkFileSizeLimit)) + { + return false; + } + + BasicFile Buffer(InputPath, BasicFile::Mode::kRead); + OutChunked = ChunkData(Buffer, 0, RawSize, m_ChunkingParams, &BytesProcessed); + return true; + } + + virtual std::string_view GetName() const override { return Name; } + + virtual CbObject GetParameters() const override + { + CbObjectWriter Writer; + Writer.BeginArray("ChunkExcludeExtensions"sv); + { + for (const std::string& Extension : m_ChunkExcludeExtensions) + { + Writer.AddString(Extension); + } + } + Writer.EndArray(); // ChunkExcludeExtensions + Writer.AddInteger("ChunkFileSizeLimit"sv, m_ChunkFileSizeLimit); + Writer.BeginObject("ChunkingParams"sv); + { + Writer.AddBool("UseThreshold"sv, m_ChunkingParams.UseThreshold); + + Writer.AddInteger("MinSize"sv, (uint64_t)m_ChunkingParams.MinSize); + Writer.AddInteger("MaxSize"sv, (uint64_t)m_ChunkingParams.MaxSize); + Writer.AddInteger("AvgSize"sv, (uint64_t)m_ChunkingParams.AvgSize); + } + Writer.EndObject(); // ChunkingParams + return Writer.Save(); + } + static constexpr std::string_view Name = "BasicChunkingController"sv; + +protected: + const std::vector m_ChunkExcludeExtensions; + const uint64_t m_ChunkFileSizeLimit; + const ChunkedParams m_ChunkingParams; +}; + +class ChunkingControllerWithFixedChunking : public ChunkingController +{ +public: + ChunkingControllerWithFixedChunking(std::span FixedChunkingExtensions, + uint64_t ChunkFileSizeLimit, + const ChunkedParams& ChunkingParams, + uint32_t FixedChunkingChunkSize) + : m_FixedChunkingExtensions(FixedChunkingExtensions.begin(), FixedChunkingExtensions.end()) + , m_ChunkFileSizeLimit(ChunkFileSizeLimit) + , m_ChunkingParams(ChunkingParams) + , m_FixedChunkingChunkSize(FixedChunkingChunkSize) + { + } + + ChunkingControllerWithFixedChunking(CbObjectView Parameters) + : m_FixedChunkingExtensions(ReadStringArray(Parameters["FixedChunkingExtensions"sv].AsArrayView())) + , m_ChunkFileSizeLimit(Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit)) + , m_ChunkingParams(ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())) + , m_FixedChunkingChunkSize(Parameters["FixedChunkingChunkSize"sv].AsUInt32(16u * 1024u * 1024u)) + { + } + + virtual bool ProcessFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + ChunkedInfoWithSource& OutChunked, + std::atomic& BytesProcessed) const override + { + if (RawSize < m_ChunkFileSizeLimit) + { + return false; + } + const bool FixedChunking = std::find(m_FixedChunkingExtensions.begin(), m_FixedChunkingExtensions.end(), InputPath.extension()) != + m_FixedChunkingExtensions.end(); + + if (FixedChunking) + { + IoHashStream FullHash; + IoBuffer Source = IoBufferBuilder::MakeFromFile(InputPath); + uint64_t Offset = 0; + tsl::robin_map ChunkHashToChunkIndex; + ChunkHashToChunkIndex.reserve(1 + (RawSize / m_FixedChunkingChunkSize)); + while (Offset < RawSize) + { + uint64_t ChunkSize = std::min(RawSize - Offset, m_FixedChunkingChunkSize); + IoBuffer Chunk(Source, Offset, ChunkSize); + MemoryView ChunkData = Chunk.GetView(); + FullHash.Append(ChunkData); + + IoHash ChunkHash = IoHash::HashBuffer(ChunkData); + if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) + { + OutChunked.Info.ChunkSequence.push_back(It->second); + } + else + { + uint32_t ChunkIndex = gsl::narrow(OutChunked.Info.ChunkHashes.size()); + OutChunked.Info.ChunkHashes.push_back(ChunkHash); + OutChunked.Info.ChunkSequence.push_back(ChunkIndex); + OutChunked.ChunkSources.push_back({.Offset = Offset, .Size = gsl::narrow(ChunkSize)}); + } + Offset += ChunkSize; + BytesProcessed.fetch_add(ChunkSize); + } + OutChunked.Info.RawSize = RawSize; + OutChunked.Info.RawHash = FullHash.GetHash(); + return true; + } + else + { + BasicFile Buffer(InputPath, BasicFile::Mode::kRead); + OutChunked = ChunkData(Buffer, 0, RawSize, m_ChunkingParams, &BytesProcessed); + return true; + } + } + + virtual std::string_view GetName() const override { return Name; } + + virtual CbObject GetParameters() const override + { + CbObjectWriter Writer; + Writer.BeginArray("FixedChunkingExtensions"); + { + for (const std::string& Extension : m_FixedChunkingExtensions) + { + Writer.AddString(Extension); + } + } + Writer.EndArray(); // ChunkExcludeExtensions + Writer.AddInteger("ChunkFileSizeLimit"sv, m_ChunkFileSizeLimit); + Writer.BeginObject("ChunkingParams"sv); + { + Writer.AddBool("UseThreshold"sv, m_ChunkingParams.UseThreshold); + + Writer.AddInteger("MinSize"sv, (uint64_t)m_ChunkingParams.MinSize); + Writer.AddInteger("MaxSize"sv, (uint64_t)m_ChunkingParams.MaxSize); + Writer.AddInteger("AvgSize"sv, (uint64_t)m_ChunkingParams.AvgSize); + } + Writer.EndObject(); // ChunkingParams + Writer.AddInteger("FixedChunkingChunkSize"sv, m_FixedChunkingChunkSize); + return Writer.Save(); + } + + static constexpr std::string_view Name = "ChunkingControllerWithFixedChunking"sv; + +protected: + const std::vector m_FixedChunkingExtensions; + const uint64_t m_ChunkFileSizeLimit; + const ChunkedParams m_ChunkingParams; + const uint32_t m_FixedChunkingChunkSize; +}; + +std::unique_ptr +CreateBasicChunkingController(std::span ExcludeExtensions, + uint64_t ChunkFileSizeLimit, + const ChunkedParams& ChunkingParams) +{ + return std::make_unique(ExcludeExtensions, ChunkFileSizeLimit, ChunkingParams); +} +std::unique_ptr +CreateBasicChunkingController(CbObjectView Parameters) +{ + return std::make_unique(Parameters); +} + +std::unique_ptr +CreateChunkingControllerWithFixedChunking(std::span FixedChunkingExtensions, + uint64_t ChunkFileSizeLimit, + const ChunkedParams& ChunkingParams, + uint32_t FixedChunkingChunkSize) +{ + return std::make_unique(FixedChunkingExtensions, + ChunkFileSizeLimit, + ChunkingParams, + FixedChunkingChunkSize); +} +std::unique_ptr +CreateChunkingControllerWithFixedChunking(CbObjectView Parameters) +{ + return std::make_unique(Parameters); +} + +std::unique_ptr +CreateChunkingController(std::string_view Name, CbObjectView Parameters) +{ + if (Name == BasicChunkingController::Name) + { + return CreateBasicChunkingController(Parameters); + } + else if (Name == ChunkingControllerWithFixedChunking::Name) + { + return CreateChunkingControllerWithFixedChunking(Parameters); + } + return {}; +} + +} // namespace zen diff --git a/src/zenutil/filebuildstorage.cpp b/src/zenutil/filebuildstorage.cpp new file mode 100644 index 000000000..78ebcdd55 --- /dev/null +++ b/src/zenutil/filebuildstorage.cpp @@ -0,0 +1,616 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include + +#include +#include +#include +#include +#include +#include + +namespace zen { + +using namespace std::literals; + +class FileBuildStorage : public BuildStorage +{ +public: + explicit FileBuildStorage(const std::filesystem::path& StoragePath, + BuildStorage::Statistics& Stats, + bool EnableJsonOutput, + double LatencySec, + double DelayPerKBSec) + : m_StoragePath(StoragePath) + , m_Stats(Stats) + , m_EnableJsonOutput(EnableJsonOutput) + , m_LatencySec(LatencySec) + , m_DelayPerKBSec(DelayPerKBSec) + { + CreateDirectories(GetBuildsFolder()); + CreateDirectories(GetBlobsFolder()); + CreateDirectories(GetBlobsMetadataFolder()); + } + + virtual ~FileBuildStorage() {} + + virtual CbObject ListBuilds(CbObject Query) override + { + ZEN_UNUSED(Query); + + SimulateLatency(Query.GetSize(), 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BuildFolder = GetBuildsFolder(); + DirectoryContent Content; + GetDirectoryContent(BuildFolder, DirectoryContentFlags::IncludeDirs, Content); + CbObjectWriter Writer; + Writer.BeginArray("results"); + { + for (const std::filesystem::path& BuildPath : Content.Directories) + { + Oid BuildId = Oid::TryFromHexString(BuildPath.stem().string()); + if (BuildId != Oid::Zero) + { + Writer.BeginObject(); + { + Writer.AddObjectId("buildId", BuildId); + Writer.AddObject("metadata", ReadBuild(BuildId)["metadata"sv].AsObjectView()); + } + Writer.EndObject(); + } + } + } + Writer.EndArray(); // builds + Writer.Save(); + SimulateLatency(Writer.GetSaveSize(), 0); + return Writer.Save(); + } + + virtual CbObject PutBuild(const Oid& BuildId, const CbObject& MetaData) override + { + SimulateLatency(MetaData.GetSize(), 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + CbObjectWriter BuildObject; + BuildObject.AddObject("metadata", MetaData); + BuildObject.AddInteger("chunkSize"sv, 32u * 1024u * 1024u); + WriteBuild(BuildId, BuildObject.Save()); + + CbObjectWriter BuildResponse; + BuildResponse.AddInteger("chunkSize"sv, 32u * 1024u * 1024u); + BuildResponse.Save(); + + SimulateLatency(0, BuildResponse.GetSaveSize()); + return BuildResponse.Save(); + } + + virtual CbObject GetBuild(const Oid& BuildId) override + { + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + CbObject Build = ReadBuild(BuildId); + SimulateLatency(0, Build.GetSize()); + return Build; + } + + virtual void FinalizeBuild(const Oid& BuildId) override + { + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + ZEN_UNUSED(BuildId); + SimulateLatency(0, 0); + } + + virtual std::pair> PutBuildPart(const Oid& BuildId, + const Oid& BuildPartId, + std::string_view PartName, + const CbObject& MetaData) override + { + SimulateLatency(MetaData.GetSize(), 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BuildPartDataPath = GetBuildPartPath(BuildId, BuildPartId); + CreateDirectories(BuildPartDataPath.parent_path()); + + TemporaryFile::SafeWriteFile(BuildPartDataPath, MetaData.GetView()); + m_WrittenBytes += MetaData.GetSize(); + WriteAsJson(BuildPartDataPath, MetaData); + + IoHash RawHash = IoHash::HashBuffer(MetaData.GetView()); + + CbObjectWriter Writer; + { + CbObject BuildObject = ReadBuild(BuildId); + CbObjectView PartsObject = BuildObject["parts"sv].AsObjectView(); + CbObjectView MetaDataView = BuildObject["metadata"sv].AsObjectView(); + + Writer.AddObject("metadata"sv, MetaDataView); + Writer.BeginObject("parts"sv); + { + for (CbFieldView PartView : PartsObject) + { + if (PartView.GetName() != PartName) + { + Writer.AddObjectId(PartView.GetName(), PartView.AsObjectId()); + } + } + Writer.AddObjectId(PartName, BuildPartId); + } + Writer.EndObject(); // parts + } + WriteBuild(BuildId, Writer.Save()); + + std::vector NeededAttachments = GetNeededAttachments(MetaData); + + SimulateLatency(0, sizeof(IoHash) * NeededAttachments.size()); + + return std::make_pair(RawHash, std::move(NeededAttachments)); + } + + virtual CbObject GetBuildPart(const Oid& BuildId, const Oid& BuildPartId) override + { + SimulateLatency(0, 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BuildPartDataPath = GetBuildPartPath(BuildId, BuildPartId); + + IoBuffer Payload = ReadFile(BuildPartDataPath).Flatten(); + m_Stats.TotalBytesRead += Payload.GetSize(); + + ZEN_ASSERT(ValidateCompactBinary(Payload.GetView(), CbValidateMode::Default) == CbValidateError::None); + + CbObject BuildPartObject = CbObject(SharedBuffer(Payload)); + + SimulateLatency(0, BuildPartObject.GetSize()); + + return BuildPartObject; + } + + virtual std::vector FinalizeBuildPart(const Oid& BuildId, const Oid& BuildPartId, const IoHash& PartHash) override + { + SimulateLatency(0, 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BuildPartDataPath = GetBuildPartPath(BuildId, BuildPartId); + IoBuffer Payload = ReadFile(BuildPartDataPath).Flatten(); + m_Stats.TotalBytesRead += Payload.GetSize(); + IoHash RawHash = IoHash::HashBuffer(Payload.GetView()); + if (RawHash != PartHash) + { + throw std::runtime_error( + fmt::format("Failed finalizing build part {}: Expected hash {}, got {}", BuildPartId, PartHash, RawHash)); + } + + CbObject BuildPartObject = CbObject(SharedBuffer(Payload)); + std::vector NeededAttachments(GetNeededAttachments(BuildPartObject)); + + SimulateLatency(0, NeededAttachments.size() * sizeof(IoHash)); + + return NeededAttachments; + } + + virtual void PutBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + const CompositeBuffer& Payload) override + { + ZEN_UNUSED(BuildId); + ZEN_ASSERT(ContentType == ZenContentType::kCompressedBinary); + SimulateLatency(Payload.GetSize(), 0); + + ZEN_ASSERT_SLOW(ValidateCompressedBuffer(RawHash, Payload)); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash); + if (!std::filesystem::is_regular_file(BlockPath)) + { + CreateDirectories(BlockPath.parent_path()); + TemporaryFile::SafeWriteFile(BlockPath, Payload.Flatten().GetView()); + } + m_Stats.TotalBytesWritten += Payload.GetSize(); + SimulateLatency(0, 0); + } + + virtual std::vector> PutLargeBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + uint64_t PayloadSize, + std::function&& Transmitter, + std::function&& OnSentBytes) override + { + ZEN_UNUSED(BuildId); + ZEN_UNUSED(ContentType); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash); + if (!std::filesystem::is_regular_file(BlockPath)) + { + CreateDirectories(BlockPath.parent_path()); + + struct WorkloadData + { + std::function Transmitter; + std::function OnSentBytes; + TemporaryFile TempFile; + std::atomic PartsLeft; + }; + + std::shared_ptr Workload(std::make_shared()); + Workload->Transmitter = std::move(Transmitter); + Workload->OnSentBytes = std::move(OnSentBytes); + std::error_code Ec; + Workload->TempFile.CreateTemporary(BlockPath.parent_path(), Ec); + + if (Ec) + { + throw std::runtime_error( + fmt::format("Failed opening temporary file '{}': {} ({})", Workload->TempFile.GetPath(), Ec.message(), Ec.value())); + } + + std::vector> WorkItems; + uint64_t Offset = 0; + while (Offset < PayloadSize) + { + uint64_t Size = Min(32u * 1024u * 1024u, PayloadSize - Offset); + + WorkItems.push_back([this, RawHash, BlockPath, Workload, Offset, Size]() { + IoBuffer PartPayload = Workload->Transmitter(Offset, Size); + SimulateLatency(PartPayload.GetSize(), 0); + + std::error_code Ec; + Workload->TempFile.Write(PartPayload, Offset, Ec); + if (Ec) + { + throw std::runtime_error(fmt::format("Failed writing to temporary file '{}': {} ({})", + Workload->TempFile.GetPath(), + Ec.message(), + Ec.value())); + } + uint64_t BytesWritten = PartPayload.GetSize(); + m_Stats.TotalBytesWritten += BytesWritten; + const bool IsLastPart = Workload->PartsLeft.fetch_sub(1) == 1; + if (IsLastPart) + { + Workload->TempFile.Flush(); + ZEN_ASSERT_SLOW(ValidateCompressedBuffer(RawHash, CompositeBuffer(Workload->TempFile.ReadAll()))); + Workload->TempFile.MoveTemporaryIntoPlace(BlockPath, Ec); + if (Ec) + { + throw std::runtime_error(fmt::format("Failed moving temporary file '{}' to '{}': {} ({})", + Workload->TempFile.GetPath(), + BlockPath, + Ec.message(), + Ec.value())); + } + } + Workload->OnSentBytes(BytesWritten, IsLastPart); + SimulateLatency(0, 0); + }); + + Offset += Size; + } + Workload->PartsLeft.store(WorkItems.size()); + + SimulateLatency(0, 0); + return WorkItems; + } + SimulateLatency(0, 0); + return {}; + } + + virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash) override + { + ZEN_UNUSED(BuildId); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash); + if (std::filesystem::is_regular_file(BlockPath)) + { + IoBuffer Payload = ReadFile(BlockPath).Flatten(); + ZEN_ASSERT_SLOW(ValidateCompressedBuffer(RawHash, CompositeBuffer(SharedBuffer(Payload)))); + m_Stats.TotalBytesRead += Payload.GetSize(); + Payload.SetContentType(ZenContentType::kCompressedBinary); + SimulateLatency(0, Payload.GetSize()); + return Payload; + } + SimulateLatency(0, 0); + return IoBuffer{}; + } + + virtual std::vector> GetLargeBuildBlob( + const Oid& BuildId, + const IoHash& RawHash, + uint64_t ChunkSize, + std::function&& Receiver) override + { + ZEN_UNUSED(BuildId); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash); + if (std::filesystem::is_regular_file(BlockPath)) + { + struct WorkloadData + { + std::atomic BytesRemaining; + IoBuffer BlobFile; + std::function Receiver; + }; + + std::shared_ptr Workload(std::make_shared()); + Workload->BlobFile = IoBufferBuilder::MakeFromFile(BlockPath); + const uint64_t BlobSize = Workload->BlobFile.GetSize(); + + Workload->Receiver = std::move(Receiver); + Workload->BytesRemaining = BlobSize; + + std::vector> WorkItems; + uint64_t Offset = 0; + while (Offset < BlobSize) + { + uint64_t Size = Min(ChunkSize, BlobSize - Offset); + WorkItems.push_back([this, BlockPath, Workload, Offset, Size]() { + SimulateLatency(0, 0); + IoBuffer PartPayload(Workload->BlobFile, Offset, Size); + m_Stats.TotalBytesRead += PartPayload.GetSize(); + uint64_t ByteRemaning = Workload->BytesRemaining.fetch_sub(Size); + Workload->Receiver(Offset, PartPayload, ByteRemaning); + SimulateLatency(Size, PartPayload.GetSize()); + }); + + Offset += Size; + } + SimulateLatency(0, 0); + return WorkItems; + } + return {}; + } + + virtual void PutBlockMetadata(const Oid& BuildId, const IoHash& BlockRawHash, const CbObject& MetaData) override + { + ZEN_UNUSED(BuildId); + + SimulateLatency(MetaData.GetSize(), 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BlockMetaDataPath = GetBlobMetadataPath(BlockRawHash); + CreateDirectories(BlockMetaDataPath.parent_path()); + TemporaryFile::SafeWriteFile(BlockMetaDataPath, MetaData.GetView()); + m_Stats.TotalBytesWritten += MetaData.GetSize(); + WriteAsJson(BlockMetaDataPath, MetaData); + SimulateLatency(0, 0); + } + + virtual std::vector FindBlocks(const Oid& BuildId) override + { + ZEN_UNUSED(BuildId); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + DirectoryContent Content; + GetDirectoryContent(GetBlobsMetadataFolder(), DirectoryContentFlags::IncludeFiles, Content); + std::vector Result; + for (const std::filesystem::path& MetaDataFile : Content.Files) + { + IoHash ChunkHash; + if (IoHash::TryParse(MetaDataFile.stem().string(), ChunkHash)) + { + std::filesystem::path BlockPath = GetBlobPayloadPath(ChunkHash); + if (std::filesystem::is_regular_file(BlockPath)) + { + IoBuffer BlockMetaDataPayload = ReadFile(MetaDataFile).Flatten(); + + m_Stats.TotalBytesRead += BlockMetaDataPayload.GetSize(); + + CbObject BlockObject = CbObject(SharedBuffer(BlockMetaDataPayload)); + Result.emplace_back(ParseChunkBlockDescription(BlockObject)); + } + } + } + SimulateLatency(0, sizeof(IoHash) * Result.size()); + return Result; + } + + virtual std::vector GetBlockMetadata(const Oid& BuildId, std::span BlockHashes) override + { + ZEN_UNUSED(BuildId); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + std::vector Result; + for (const IoHash& BlockHash : BlockHashes) + { + std::filesystem::path MetaDataFile = GetBlobMetadataPath(BlockHash); + if (std::filesystem::is_regular_file(MetaDataFile)) + { + IoBuffer BlockMetaDataPayload = ReadFile(MetaDataFile).Flatten(); + + m_Stats.TotalBytesRead += BlockMetaDataPayload.GetSize(); + + CbObject BlockObject = CbObject(SharedBuffer(BlockMetaDataPayload)); + Result.emplace_back(ParseChunkBlockDescription(BlockObject)); + } + } + SimulateLatency(sizeof(BlockHashes) * BlockHashes.size(), sizeof(ChunkBlockDescription) * Result.size()); + return Result; + } + +protected: + std::filesystem::path GetBuildsFolder() const { return m_StoragePath / "builds"; } + std::filesystem::path GetBlobsFolder() const { return m_StoragePath / "blobs"; } + std::filesystem::path GetBlobsMetadataFolder() const { return m_StoragePath / "blocks"; } + std::filesystem::path GetBuildFolder(const Oid& BuildId) const { return GetBuildsFolder() / BuildId.ToString(); } + + std::filesystem::path GetBuildPath(const Oid& BuildId) const { return GetBuildFolder(BuildId) / "metadata.cb"; } + + std::filesystem::path GetBuildPartFolder(const Oid& BuildId, const Oid& BuildPartId) const + { + return GetBuildFolder(BuildId) / "parts" / BuildPartId.ToString(); + } + + std::filesystem::path GetBuildPartPath(const Oid& BuildId, const Oid& BuildPartId) const + { + return GetBuildPartFolder(BuildId, BuildPartId) / "metadata.cb"; + } + + std::filesystem::path GetBlobPayloadPath(const IoHash& RawHash) const { return GetBlobsFolder() / fmt::format("{}.cbz", RawHash); } + + std::filesystem::path GetBlobMetadataPath(const IoHash& RawHash) const + { + return GetBlobsMetadataFolder() / fmt::format("{}.cb", RawHash); + } + + void SimulateLatency(uint64_t ReceiveSize, uint64_t SendSize) + { + double SleepSec = m_LatencySec; + if (m_DelayPerKBSec > 0.0) + { + SleepSec += m_DelayPerKBSec * (double(SendSize + ReceiveSize) / 1024u); + } + if (SleepSec > 0) + { + Sleep(int(SleepSec * 1000)); + } + } + + void WriteAsJson(const std::filesystem::path& OriginalPath, CbObjectView Data) const + { + if (m_EnableJsonOutput) + { + ExtendableStringBuilder<128> SB; + CompactBinaryToJson(Data, SB); + std::filesystem::path JsonPath = OriginalPath; + JsonPath.replace_extension(".json"); + std::string_view JsonMetaData = SB.ToView(); + TemporaryFile::SafeWriteFile(JsonPath, MemoryView(JsonMetaData.data(), JsonMetaData.length())); + } + } + + void WriteBuild(const Oid& BuildId, CbObjectView Data) + { + const std::filesystem::path BuildDataPath = GetBuildPath(BuildId); + CreateDirectories(BuildDataPath.parent_path()); + TemporaryFile::SafeWriteFile(BuildDataPath, Data.GetView()); + m_Stats.TotalBytesWritten += Data.GetSize(); + WriteAsJson(BuildDataPath, Data); + } + + CbObject ReadBuild(const Oid& BuildId) + { + const std::filesystem::path BuildDataPath = GetBuildPath(BuildId); + FileContents Content = ReadFile(BuildDataPath); + if (Content.ErrorCode) + { + throw std::runtime_error(fmt::format("Failed reading build '{}' from '{}': {} ({})", + BuildId, + BuildDataPath, + Content.ErrorCode.message(), + Content.ErrorCode.value())); + } + IoBuffer Payload = Content.Flatten(); + m_Stats.TotalBytesRead += Payload.GetSize(); + ZEN_ASSERT(ValidateCompactBinary(Payload.GetView(), CbValidateMode::Default) == CbValidateError::None); + CbObject BuildObject = CbObject(SharedBuffer(Payload)); + return BuildObject; + } + + std::vector GetNeededAttachments(CbObjectView BuildPartObject) + { + std::vector NeededAttachments; + BuildPartObject.IterateAttachments([&](CbFieldView FieldView) { + const IoHash AttachmentHash = FieldView.AsBinaryAttachment(); + const std::filesystem::path BlockPath = GetBlobPayloadPath(AttachmentHash); + if (!std::filesystem::is_regular_file(BlockPath)) + { + NeededAttachments.push_back(AttachmentHash); + } + }); + return NeededAttachments; + } + + bool ValidateCompressedBuffer(const IoHash& RawHash, const CompositeBuffer& Payload) + { + IoHash VerifyHash; + uint64_t VerifySize; + CompressedBuffer ValidateBuffer = CompressedBuffer::FromCompressed(Payload, VerifyHash, VerifySize); + if (!ValidateBuffer) + { + return false; + } + if (VerifyHash != RawHash) + { + return false; + } + CompositeBuffer Decompressed = ValidateBuffer.DecompressToComposite(); + if (!Decompressed) + { + return false; + } + IoHash Hash = IoHash::HashBuffer(Decompressed); + if (Hash != RawHash) + { + return false; + } + return true; + } + +private: + const std::filesystem::path m_StoragePath; + BuildStorage::Statistics& m_Stats; + const bool m_EnableJsonOutput = false; + std::atomic m_WrittenBytes; + + const double m_LatencySec = 0.0; + const double m_DelayPerKBSec = 0.0; +}; + +std::unique_ptr +CreateFileBuildStorage(const std::filesystem::path& StoragePath, + BuildStorage::Statistics& Stats, + bool EnableJsonOutput, + double LatencySec, + double DelayPerKBSec) +{ + return std::make_unique(StoragePath, Stats, EnableJsonOutput, LatencySec, DelayPerKBSec); +} + +} // namespace zen diff --git a/src/zenutil/include/zenutil/buildstorage.h b/src/zenutil/include/zenutil/buildstorage.h new file mode 100644 index 000000000..9c236310f --- /dev/null +++ b/src/zenutil/include/zenutil/buildstorage.h @@ -0,0 +1,55 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include +#include + +namespace zen { + +class BuildStorage +{ +public: + struct Statistics + { + std::atomic TotalBytesRead = 0; + std::atomic TotalBytesWritten = 0; + std::atomic TotalRequestCount = 0; + std::atomic TotalRequestTimeUs = 0; + std::atomic TotalExecutionTimeUs = 0; + }; + + virtual ~BuildStorage() {} + + virtual CbObject ListBuilds(CbObject Query) = 0; + virtual CbObject PutBuild(const Oid& BuildId, const CbObject& MetaData) = 0; + virtual CbObject GetBuild(const Oid& BuildId) = 0; + virtual void FinalizeBuild(const Oid& BuildId) = 0; + + virtual std::pair> PutBuildPart(const Oid& BuildId, + const Oid& BuildPartId, + std::string_view PartName, + const CbObject& MetaData) = 0; + virtual CbObject GetBuildPart(const Oid& BuildId, const Oid& BuildPartId) = 0; + virtual std::vector FinalizeBuildPart(const Oid& BuildId, const Oid& BuildPartId, const IoHash& PartHash) = 0; + virtual void PutBuildBlob(const Oid& BuildId, const IoHash& RawHash, ZenContentType ContentType, const CompositeBuffer& Payload) = 0; + virtual std::vector> PutLargeBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + uint64_t PayloadSize, + std::function&& Transmitter, + std::function&& OnSentBytes) = 0; + + virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash) = 0; + virtual std::vector> GetLargeBuildBlob( + const Oid& BuildId, + const IoHash& RawHash, + uint64_t ChunkSize, + std::function&& Receiver) = 0; + + virtual void PutBlockMetadata(const Oid& BuildId, const IoHash& BlockRawHash, const CbObject& MetaData) = 0; + virtual std::vector FindBlocks(const Oid& BuildId) = 0; + virtual std::vector GetBlockMetadata(const Oid& BuildId, std::span BlockHashes) = 0; +}; + +} // namespace zen diff --git a/src/zenutil/include/zenutil/chunkblock.h b/src/zenutil/include/zenutil/chunkblock.h index 9b7414629..21107fb7c 100644 --- a/src/zenutil/include/zenutil/chunkblock.h +++ b/src/zenutil/include/zenutil/chunkblock.h @@ -12,21 +12,28 @@ namespace zen { -struct ChunkBlockDescription +struct ThinChunkBlockDescription { - IoHash BlockHash; - std::vector ChunkHashes; + IoHash BlockHash; + std::vector ChunkRawHashes; +}; + +struct ChunkBlockDescription : public ThinChunkBlockDescription +{ + uint64_t HeaderSize; std::vector ChunkRawLengths; + std::vector ChunkCompressedLengths; }; std::vector ParseChunkBlockDescriptionList(const CbObjectView& BlocksObject); ChunkBlockDescription ParseChunkBlockDescription(const CbObjectView& BlockObject); CbObject BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView MetaData); - +ChunkBlockDescription GetChunkBlockDescription(const SharedBuffer& BlockPayload, const IoHash& RawHash); typedef std::function(const IoHash& RawHash)> FetchChunkFunc; CompressedBuffer GenerateChunkBlock(std::vector>&& FetchChunks, ChunkBlockDescription& OutBlock); bool IterateChunkBlock(const SharedBuffer& BlockPayload, - std::function Visitor); + std::function Visitor, + uint64_t& OutHeaderSize); } // namespace zen diff --git a/src/zenutil/include/zenutil/chunkedcontent.h b/src/zenutil/include/zenutil/chunkedcontent.h new file mode 100644 index 000000000..15c687462 --- /dev/null +++ b/src/zenutil/include/zenutil/chunkedcontent.h @@ -0,0 +1,256 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include +#include +#include + +#include +#include + +ZEN_THIRD_PARTY_INCLUDES_START +#include +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +class CbWriter; +class ChunkingController; +class WorkerThreadPool; + +enum class SourcePlatform +{ + Windows = 0, + Linux = 1, + MacOS = 2, + _Count +}; + +std::string_view ToString(SourcePlatform Platform); +SourcePlatform FromString(std::string_view Platform, SourcePlatform Default); +SourcePlatform GetSourceCurrentPlatform(); + +struct FolderContent +{ + SourcePlatform Platform = GetSourceCurrentPlatform(); + std::vector Paths; + std::vector RawSizes; + std::vector Attributes; + std::vector ModificationTicks; + + bool operator==(const FolderContent& Rhs) const; + + bool AreKnownFilesEqual(const FolderContent& Rhs) const; + void UpdateState(const FolderContent& Rhs, std::vector& PathIndexesOufOfDate); + static bool AreFileAttributesEqual(const uint32_t Lhs, const uint32_t Rhs); +}; + +FolderContent GetUpdatedContent(const FolderContent& Old, + const FolderContent& New, + std::vector& OutDeletedPathIndexes); + +void SaveFolderContentToCompactBinary(const FolderContent& Content, CbWriter& Output); +FolderContent LoadFolderContentToCompactBinary(CbObjectView Input); + +struct GetFolderContentStatistics +{ + std::atomic FoundFileCount = 0; + std::atomic FoundFileByteCount = 0; + std::atomic AcceptedFileCount = 0; + std::atomic AcceptedFileByteCount = 0; + uint64_t ElapsedWallTimeUS = 0; +}; + +FolderContent GetFolderContent(GetFolderContentStatistics& Stats, + const std::filesystem::path& RootPath, + std::function&& AcceptDirectory, + std::function&& AcceptFile, + WorkerThreadPool& WorkerPool, + int32_t UpdateInteralMS, + std::function&& UpdateCallback, + std::atomic& AbortFlag); + +struct ChunkedContentData +{ + // To describe one asset with a particular RawHash, find the index of the hash in SequenceRawHashes + // ChunkCounts for that index will be the number of indexes in ChunkOrders that describe + // the sequence of chunks required to reconstruct the asset. + // Offset into ChunkOrders is based on how many entries in ChunkOrders the previous [n - 1] SequenceRawHashes uses + std::vector SequenceRawHashes; // Raw hash for Chunk sequence + std::vector ChunkCounts; // Chunk count of ChunkOrder for SequenceRawHashes[n] + std::vector ChunkOrders; // Chunk sequence indexed into ChunkHashes, ChunkCounts[n] indexes per SequenceRawHashes[n] + std::vector ChunkHashes; // Unique chunk hashes + std::vector ChunkRawSizes; // Unique chunk raw size for ChunkHash[n] +}; + +struct ChunkedFolderContent +{ + SourcePlatform Platform = GetSourceCurrentPlatform(); + std::vector Paths; + std::vector RawSizes; + std::vector Attributes; + std::vector RawHashes; + ChunkedContentData ChunkedContent; +}; + +void SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbWriter& Output); +ChunkedFolderContent LoadChunkedFolderContentToCompactBinary(CbObjectView Input); + +ChunkedFolderContent MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span Overlays); +ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& Base, std::span DeletedPaths); + +struct ChunkingStatistics +{ + std::atomic FilesProcessed = 0; + std::atomic FilesChunked = 0; + std::atomic BytesHashed = 0; + std::atomic UniqueChunksFound = 0; + std::atomic UniqueSequencesFound = 0; + std::atomic UniqueBytesFound = 0; + uint64_t ElapsedWallTimeUS = 0; +}; + +ChunkedFolderContent ChunkFolderContent(ChunkingStatistics& Stats, + WorkerThreadPool& WorkerPool, + const std::filesystem::path& RootPath, + const FolderContent& Content, + const ChunkingController& InChunkingController, + int32_t UpdateInteralMS, + std::function&& UpdateCallback, + std::atomic& AbortFlag); + +struct ChunkedContentLookup +{ + struct ChunkLocation + { + uint32_t PathIndex; + uint64_t Offset; + }; + tsl::robin_map ChunkHashToChunkIndex; + tsl::robin_map RawHashToSequenceRawHashIndex; + std::vector SequenceRawHashIndexChunkOrderOffset; + std::vector ChunkLocations; + std::vector ChunkLocationOffset; // ChunkLocations[ChunkLocationOffset[ChunkIndex]] -> start of sources for ChunkIndex + std::vector ChunkLocationCounts; // ChunkLocationCounts[ChunkIndex] count of chunk locations for ChunkIndex +}; + +ChunkedContentLookup BuildChunkedContentLookup(const ChunkedFolderContent& Content); + +inline std::pair +GetChunkLocationRange(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) +{ + return std::make_pair(Lookup.ChunkLocationOffset[ChunkIndex], Lookup.ChunkLocationCounts[ChunkIndex]); +} + +inline std::span +GetChunkLocations(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) +{ + std::pair Range = GetChunkLocationRange(Lookup, ChunkIndex); + return std::span(Lookup.ChunkLocations).subspan(Range.first, Range.second); +} + +namespace compactbinary_helpers { + template + void WriteArray(std::span Values, std::string_view ArrayName, CbWriter& Output) + { + Output.BeginArray(ArrayName); + for (const Type Value : Values) + { + Output << Value; + } + Output.EndArray(); + } + + template + void WriteArray(const std::vector& Values, std::string_view ArrayName, CbWriter& Output) + { + WriteArray(std::span(Values), ArrayName, Output); + } + + template<> + inline void WriteArray(std::span Values, std::string_view ArrayName, CbWriter& Output) + { + Output.BeginArray(ArrayName); + for (const std::filesystem::path& Path : Values) + { + Output.AddString((const char*)Path.generic_u8string().c_str()); + } + Output.EndArray(); + } + + template<> + inline void WriteArray(const std::vector& Values, std::string_view ArrayName, CbWriter& Output) + { + WriteArray(std::span(Values), ArrayName, Output); + } + + inline void WriteBinaryAttachmentArray(std::span Values, std::string_view ArrayName, CbWriter& Output) + { + Output.BeginArray(ArrayName); + for (const IoHash& Hash : Values) + { + Output.AddBinaryAttachment(Hash); + } + Output.EndArray(); + } + + inline void WriteBinaryAttachmentArray(const std::vector& Values, std::string_view ArrayName, CbWriter& Output) + { + WriteArray(std::span(Values), ArrayName, Output); + } + + inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector& Result) + { + CbArrayView Array = Input[ArrayName].AsArrayView(); + Result.reserve(Array.Num()); + for (CbFieldView ItemView : Array) + { + Result.push_back(ItemView.AsUInt32()); + } + } + + inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector& Result) + { + CbArrayView Array = Input[ArrayName].AsArrayView(); + Result.reserve(Array.Num()); + for (CbFieldView ItemView : Array) + { + Result.push_back(ItemView.AsUInt64()); + } + } + + inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector& Result) + { + CbArrayView Array = Input[ArrayName].AsArrayView(); + Result.reserve(Array.Num()); + for (CbFieldView ItemView : Array) + { + std::u8string_view U8Path = ItemView.AsU8String(); + Result.push_back(std::filesystem::path(U8Path)); + } + } + + inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector& Result) + { + CbArrayView Array = Input[ArrayName].AsArrayView(); + Result.reserve(Array.Num()); + for (CbFieldView ItemView : Array) + { + Result.push_back(ItemView.AsHash()); + } + } + + inline void ReadBinaryAttachmentArray(std::string_view ArrayName, CbObjectView Input, std::vector& Result) + { + CbArrayView Array = Input[ArrayName].AsArrayView(); + Result.reserve(Array.Num()); + for (CbFieldView ItemView : Array) + { + Result.push_back(ItemView.AsBinaryAttachment()); + } + } + +} // namespace compactbinary_helpers + +} // namespace zen diff --git a/src/zenutil/include/zenutil/chunkingcontroller.h b/src/zenutil/include/zenutil/chunkingcontroller.h new file mode 100644 index 000000000..fe4fc1bb5 --- /dev/null +++ b/src/zenutil/include/zenutil/chunkingcontroller.h @@ -0,0 +1,55 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include + +#include + +#include +#include + +namespace zen { + +const std::vector DefaultChunkingExcludeExtensions = {".exe", ".dll", ".pdb", ".self"}; + +const ChunkedParams DefaultChunkedParams = {.MinSize = ((8u * 1u) * 1024u) - 128u, + .MaxSize = 128u * 1024u, + .AvgSize = ((8u * 4u) * 1024u) + 128u}; + +const size_t DefaultChunkingFileSizeLimit = DefaultChunkedParams.MaxSize; + +const uint32_t DefaultFixedChunkingChunkSize = 16u * 1024u * 1024u; + +struct ChunkedInfoWithSource; + +class ChunkingController +{ +public: + virtual ~ChunkingController() {} + + // Return true if the input file was processed. If true is returned OutChunked will contain the chunked info + virtual bool ProcessFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + ChunkedInfoWithSource& OutChunked, + std::atomic& BytesProcessed) const = 0; + virtual std::string_view GetName() const = 0; + virtual CbObject GetParameters() const = 0; +}; + +std::unique_ptr CreateBasicChunkingController( + std::span ExcludeExtensions = DefaultChunkingExcludeExtensions, + uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit, + const ChunkedParams& ChunkingParams = DefaultChunkedParams); +std::unique_ptr CreateBasicChunkingController(CbObjectView Parameters); + +std::unique_ptr CreateChunkingControllerWithFixedChunking( + std::span ExcludeExtensions = DefaultChunkingExcludeExtensions, + uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit, + const ChunkedParams& ChunkingParams = DefaultChunkedParams, + uint32_t FixedChunkingChunkSize = DefaultFixedChunkingChunkSize); +std::unique_ptr CreateChunkingControllerWithFixedChunking(CbObjectView Parameters); + +std::unique_ptr CreateChunkingController(std::string_view Name, CbObjectView Parameters); + +} // namespace zen diff --git a/src/zenutil/include/zenutil/filebuildstorage.h b/src/zenutil/include/zenutil/filebuildstorage.h new file mode 100644 index 000000000..c95fb32e6 --- /dev/null +++ b/src/zenutil/include/zenutil/filebuildstorage.h @@ -0,0 +1,16 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include +#include + +namespace zen { +class HttpClient; + +std::unique_ptr CreateFileBuildStorage(const std::filesystem::path& StoragePath, + BuildStorage::Statistics& Stats, + bool EnableJsonOutput, + double LatencySec = 0.0, + double DelayPerKBSec = 0.0); +} // namespace zen diff --git a/src/zenutil/include/zenutil/jupiter/jupiterbuildstorage.h b/src/zenutil/include/zenutil/jupiter/jupiterbuildstorage.h new file mode 100644 index 000000000..89fc70140 --- /dev/null +++ b/src/zenutil/include/zenutil/jupiter/jupiterbuildstorage.h @@ -0,0 +1,17 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include +#include + +namespace zen { +class HttpClient; + +std::unique_ptr CreateJupiterBuildStorage(LoggerRef InLog, + HttpClient& InHttpClient, + BuildStorage::Statistics& Stats, + std::string_view Namespace, + std::string_view Bucket, + const std::filesystem::path& TempFolderPath); +} // namespace zen diff --git a/src/zenutil/include/zenutil/parallellwork.h b/src/zenutil/include/zenutil/parallellwork.h new file mode 100644 index 000000000..7a8218c51 --- /dev/null +++ b/src/zenutil/include/zenutil/parallellwork.h @@ -0,0 +1,69 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include +#include + +#include + +namespace zen { + +class ParallellWork +{ +public: + ParallellWork(std::atomic& AbortFlag) : m_AbortFlag(AbortFlag), m_PendingWork(1) {} + + ~ParallellWork() + { + // Make sure to call Wait before destroying + ZEN_ASSERT(m_PendingWork.Remaining() == 0); + } + + void ScheduleWork(WorkerThreadPool& WorkerPool, + std::function& AbortFlag)>&& Work, + std::function& AbortFlag)>&& OnError) + { + m_PendingWork.AddCount(1); + try + { + WorkerPool.ScheduleWork([this, Work = std::move(Work), OnError = std::move(OnError)] { + try + { + Work(m_AbortFlag); + } + catch (const std::exception& Ex) + { + OnError(Ex, m_AbortFlag); + } + m_PendingWork.CountDown(); + }); + } + catch (const std::exception&) + { + m_PendingWork.CountDown(); + throw; + } + } + + void Abort() { m_AbortFlag = true; } + + bool IsAborted() const { return m_AbortFlag.load(); } + + void Wait(int32_t UpdateInteralMS, std::function&& UpdateCallback) + { + ZEN_ASSERT(m_PendingWork.Remaining() > 0); + m_PendingWork.CountDown(); + while (!m_PendingWork.Wait(UpdateInteralMS)) + { + UpdateCallback(m_AbortFlag.load(), m_PendingWork.Remaining()); + } + } + Latch& PendingWork() { return m_PendingWork; } + +private: + std::atomic& m_AbortFlag; + Latch m_PendingWork; +}; + +} // namespace zen diff --git a/src/zenutil/jupiter/jupiterbuildstorage.cpp b/src/zenutil/jupiter/jupiterbuildstorage.cpp new file mode 100644 index 000000000..481e9146f --- /dev/null +++ b/src/zenutil/jupiter/jupiterbuildstorage.cpp @@ -0,0 +1,371 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include + +#include +#include +#include +#include +#include + +ZEN_THIRD_PARTY_INCLUDES_START +#include +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +using namespace std::literals; + +class JupiterBuildStorage : public BuildStorage +{ +public: + JupiterBuildStorage(LoggerRef InLog, + HttpClient& InHttpClient, + Statistics& Stats, + std::string_view Namespace, + std::string_view Bucket, + const std::filesystem::path& TempFolderPath) + : m_Session(InLog, InHttpClient) + , m_Stats(Stats) + , m_Namespace(Namespace) + , m_Bucket(Bucket) + , m_TempFolderPath(TempFolderPath) + { + } + virtual ~JupiterBuildStorage() {} + + virtual CbObject ListBuilds(CbObject Query) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + IoBuffer Payload = Query.GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + JupiterResult ListResult = m_Session.ListBuilds(m_Namespace, m_Bucket, Payload); + AddStatistic(ListResult); + if (!ListResult.Success) + { + throw std::runtime_error(fmt::format("Failed listing builds: {} ({})", ListResult.Reason, ListResult.ErrorCode)); + } + return PayloadToJson("Failed listing builds"sv, ListResult.Response); + } + + virtual CbObject PutBuild(const Oid& BuildId, const CbObject& MetaData) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + IoBuffer Payload = MetaData.GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + JupiterResult PutResult = m_Session.PutBuild(m_Namespace, m_Bucket, BuildId, Payload); + AddStatistic(PutResult); + if (!PutResult.Success) + { + throw std::runtime_error(fmt::format("Failed creating build: {} ({})", PutResult.Reason, PutResult.ErrorCode)); + } + return PayloadToJson(fmt::format("Failed creating build: {}", BuildId), PutResult.Response); + } + + virtual CbObject GetBuild(const Oid& BuildId) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult GetBuildResult = m_Session.GetBuild(m_Namespace, m_Bucket, BuildId); + AddStatistic(GetBuildResult); + if (!GetBuildResult.Success) + { + throw std::runtime_error(fmt::format("Failed fetching build: {} ({})", GetBuildResult.Reason, GetBuildResult.ErrorCode)); + } + return PayloadToJson(fmt::format("Failed fetching build {}:", BuildId), GetBuildResult.Response); + } + + virtual void FinalizeBuild(const Oid& BuildId) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult FinalizeBuildResult = m_Session.FinalizeBuild(m_Namespace, m_Bucket, BuildId); + AddStatistic(FinalizeBuildResult); + if (!FinalizeBuildResult.Success) + { + throw std::runtime_error( + fmt::format("Failed finalizing build part: {} ({})", FinalizeBuildResult.Reason, FinalizeBuildResult.ErrorCode)); + } + } + + virtual std::pair> PutBuildPart(const Oid& BuildId, + const Oid& BuildPartId, + std::string_view PartName, + const CbObject& MetaData) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + IoBuffer Payload = MetaData.GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + PutBuildPartResult PutPartResult = m_Session.PutBuildPart(m_Namespace, m_Bucket, BuildId, BuildPartId, PartName, Payload); + AddStatistic(PutPartResult); + if (!PutPartResult.Success) + { + throw std::runtime_error(fmt::format("Failed creating build part: {} ({})", PutPartResult.Reason, PutPartResult.ErrorCode)); + } + return std::make_pair(PutPartResult.RawHash, std::move(PutPartResult.Needs)); + } + + virtual CbObject GetBuildPart(const Oid& BuildId, const Oid& BuildPartId) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult GetBuildPartResult = m_Session.GetBuildPart(m_Namespace, m_Bucket, BuildId, BuildPartId); + AddStatistic(GetBuildPartResult); + if (!GetBuildPartResult.Success) + { + throw std::runtime_error(fmt::format("Failed fetching build part {}: {} ({})", + BuildPartId, + GetBuildPartResult.Reason, + GetBuildPartResult.ErrorCode)); + } + return PayloadToJson(fmt::format("Failed fetching build part {}:", BuildPartId), GetBuildPartResult.Response); + } + + virtual std::vector FinalizeBuildPart(const Oid& BuildId, const Oid& BuildPartId, const IoHash& PartHash) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + FinalizeBuildPartResult FinalizePartResult = m_Session.FinalizeBuildPart(m_Namespace, m_Bucket, BuildId, BuildPartId, PartHash); + AddStatistic(FinalizePartResult); + if (!FinalizePartResult.Success) + { + throw std::runtime_error( + fmt::format("Failed finalizing build part: {} ({})", FinalizePartResult.Reason, FinalizePartResult.ErrorCode)); + } + return std::move(FinalizePartResult.Needs); + } + + virtual void PutBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + const CompositeBuffer& Payload) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult PutBlobResult = m_Session.PutBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, ContentType, Payload); + AddStatistic(PutBlobResult); + if (!PutBlobResult.Success) + { + throw std::runtime_error(fmt::format("Failed putting build part: {} ({})", PutBlobResult.Reason, PutBlobResult.ErrorCode)); + } + } + + virtual std::vector> PutLargeBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + uint64_t PayloadSize, + std::function&& Transmitter, + std::function&& OnSentBytes) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + std::vector> WorkItems; + JupiterResult PutMultipartBlobResult = m_Session.PutMultipartBuildBlob(m_Namespace, + m_Bucket, + BuildId, + RawHash, + ContentType, + PayloadSize, + std::move(Transmitter), + WorkItems); + AddStatistic(PutMultipartBlobResult); + if (!PutMultipartBlobResult.Success) + { + throw std::runtime_error( + fmt::format("Failed putting build part: {} ({})", PutMultipartBlobResult.Reason, PutMultipartBlobResult.ErrorCode)); + } + OnSentBytes(PutMultipartBlobResult.SentBytes, WorkItems.empty()); + + std::vector> WorkList; + for (auto& WorkItem : WorkItems) + { + WorkList.emplace_back([this, WorkItem = std::move(WorkItem), OnSentBytes]() { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + bool IsComplete = false; + JupiterResult PartResult = WorkItem(IsComplete); + AddStatistic(PartResult); + if (!PartResult.Success) + { + throw std::runtime_error(fmt::format("Failed putting build part: {} ({})", PartResult.Reason, PartResult.ErrorCode)); + } + OnSentBytes(PartResult.SentBytes, IsComplete); + }); + } + return WorkList; + } + + virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult GetBuildBlobResult = m_Session.GetBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, m_TempFolderPath); + AddStatistic(GetBuildBlobResult); + if (!GetBuildBlobResult.Success) + { + throw std::runtime_error( + fmt::format("Failed fetching build blob {}: {} ({})", RawHash, GetBuildBlobResult.Reason, GetBuildBlobResult.ErrorCode)); + } + return std::move(GetBuildBlobResult.Response); + } + + virtual std::vector> GetLargeBuildBlob( + const Oid& BuildId, + const IoHash& RawHash, + uint64_t ChunkSize, + std::function&& Receiver) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + std::vector> WorkItems; + JupiterResult GetMultipartBlobResult = + m_Session.GetMultipartBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, ChunkSize, std::move(Receiver), WorkItems); + + AddStatistic(GetMultipartBlobResult); + if (!GetMultipartBlobResult.Success) + { + throw std::runtime_error( + fmt::format("Failed getting build part: {} ({})", GetMultipartBlobResult.Reason, GetMultipartBlobResult.ErrorCode)); + } + std::vector> WorkList; + for (auto& WorkItem : WorkItems) + { + WorkList.emplace_back([this, WorkItem = std::move(WorkItem)]() { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult PartResult = WorkItem(); + AddStatistic(PartResult); + if (!PartResult.Success) + { + throw std::runtime_error(fmt::format("Failed getting build part: {} ({})", PartResult.Reason, PartResult.ErrorCode)); + } + }); + } + return WorkList; + } + + virtual void PutBlockMetadata(const Oid& BuildId, const IoHash& BlockRawHash, const CbObject& MetaData) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + IoBuffer Payload = MetaData.GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + JupiterResult PutMetaResult = m_Session.PutBlockMetadata(m_Namespace, m_Bucket, BuildId, BlockRawHash, Payload); + AddStatistic(PutMetaResult); + if (!PutMetaResult.Success) + { + throw std::runtime_error( + fmt::format("Failed putting build block metadata: {} ({})", PutMetaResult.Reason, PutMetaResult.ErrorCode)); + } + } + + virtual std::vector FindBlocks(const Oid& BuildId) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult FindResult = m_Session.FindBlocks(m_Namespace, m_Bucket, BuildId); + AddStatistic(FindResult); + if (!FindResult.Success) + { + throw std::runtime_error(fmt::format("Failed fetching known blocks: {} ({})", FindResult.Reason, FindResult.ErrorCode)); + } + return ParseChunkBlockDescriptionList(PayloadToJson("Failed fetching known blocks"sv, FindResult.Response)); + } + + virtual std::vector GetBlockMetadata(const Oid& BuildId, std::span BlockHashes) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + CbObjectWriter Request; + + Request.BeginArray("blocks"sv); + for (const IoHash& BlockHash : BlockHashes) + { + Request.AddHash(BlockHash); + } + Request.EndArray(); + + IoBuffer Payload = Request.Save().GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + JupiterResult GetBlockMetadataResult = m_Session.GetBlockMetadata(m_Namespace, m_Bucket, BuildId, Payload); + AddStatistic(GetBlockMetadataResult); + if (!GetBlockMetadataResult.Success) + { + throw std::runtime_error( + fmt::format("Failed fetching block metadatas: {} ({})", GetBlockMetadataResult.Reason, GetBlockMetadataResult.ErrorCode)); + } + std::vector UnorderedList = + ParseChunkBlockDescriptionList(PayloadToJson("Failed fetching block metadatas", GetBlockMetadataResult.Response)); + tsl::robin_map BlockDescriptionLookup; + for (size_t DescriptionIndex = 0; DescriptionIndex < UnorderedList.size(); DescriptionIndex++) + { + const ChunkBlockDescription& Description = UnorderedList[DescriptionIndex]; + BlockDescriptionLookup.insert_or_assign(Description.BlockHash, DescriptionIndex); + } + std::vector SortedBlockDescriptions; + SortedBlockDescriptions.reserve(BlockDescriptionLookup.size()); + for (const IoHash& BlockHash : BlockHashes) + { + if (auto It = BlockDescriptionLookup.find(BlockHash); It != BlockDescriptionLookup.end()) + { + SortedBlockDescriptions.push_back(std::move(UnorderedList[It->second])); + } + } + return SortedBlockDescriptions; + } + +private: + static CbObject PayloadToJson(std::string_view Context, const IoBuffer& Payload) + { + if (Payload.GetContentType() == ZenContentType::kJSON) + { + std::string_view Json(reinterpret_cast(Payload.GetData()), Payload.GetSize()); + return LoadCompactBinaryFromJson(Json).AsObject(); + } + else if (Payload.GetContentType() == ZenContentType::kCbObject) + { + return LoadCompactBinaryObject(Payload); + } + else if (Payload.GetContentType() == ZenContentType::kCompressedBinary) + { + IoHash RawHash; + uint64_t RawSize; + return LoadCompactBinaryObject(CompressedBuffer::FromCompressed(SharedBuffer(Payload), RawHash, RawSize)); + } + else + { + throw std::runtime_error( + fmt::format("{}: {} ({})", "Unsupported response format", Context, ToString(Payload.GetContentType()))); + } + } + + void AddStatistic(const JupiterResult& Result) + { + m_Stats.TotalBytesWritten += Result.SentBytes; + m_Stats.TotalBytesRead += Result.ReceivedBytes; + m_Stats.TotalRequestTimeUs += uint64_t(Result.ElapsedSeconds * 1000000.0); + m_Stats.TotalRequestCount++; + } + + JupiterSession m_Session; + Statistics& m_Stats; + const std::string m_Namespace; + const std::string m_Bucket; + const std::filesystem::path m_TempFolderPath; +}; + +std::unique_ptr +CreateJupiterBuildStorage(LoggerRef InLog, + HttpClient& InHttpClient, + BuildStorage::Statistics& Stats, + std::string_view Namespace, + std::string_view Bucket, + const std::filesystem::path& TempFolderPath) +{ + return std::make_unique(InLog, InHttpClient, Stats, Namespace, Bucket, TempFolderPath); +} + +} // namespace zen -- cgit v1.2.3 From 40bf8e292618e21a07223994ce438e580d6fdd20 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Wed, 26 Feb 2025 18:08:37 +0100 Subject: added include to zentest-appstub.cpp --- src/zentest-appstub/zentest-appstub.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/zentest-appstub/zentest-appstub.cpp b/src/zentest-appstub/zentest-appstub.cpp index 66e6e03fd..e6b1c4bd1 100644 --- a/src/zentest-appstub/zentest-appstub.cpp +++ b/src/zentest-appstub/zentest-appstub.cpp @@ -4,6 +4,7 @@ #include #include #include +#include using namespace std::chrono_literals; -- cgit v1.2.3 From 1ee30cf395b3a57865ed21c7be22c894a8fdae38 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Wed, 26 Feb 2025 18:18:11 +0100 Subject: clang-format fix --- src/zentest-appstub/zentest-appstub.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/zentest-appstub/zentest-appstub.cpp b/src/zentest-appstub/zentest-appstub.cpp index e6b1c4bd1..24cf21e97 100644 --- a/src/zentest-appstub/zentest-appstub.cpp +++ b/src/zentest-appstub/zentest-appstub.cpp @@ -1,10 +1,10 @@ // Copyright Epic Games, Inc. All Rights Reserved. #include +#include #include #include #include -#include using namespace std::chrono_literals; -- cgit v1.2.3 From aef396302e8f9177aa9185bd37d5a8657056f73c Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 26 Feb 2025 21:27:13 +0100 Subject: Fix bug causing crash if large file was exact multiple of 256KB when using zen builds command (#286) --- src/zen/cmds/builds_cmd.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index ececab29e..ed1eb2d19 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -855,7 +855,7 @@ namespace { ZEN_ASSERT((Offset + Size) <= SourceSize); const uint64_t BlockIndexStart = Offset / BlockSize; - const uint64_t BlockIndexEnd = (Offset + Size) / BlockSize; + const uint64_t BlockIndexEnd = (Offset + Size - 1) / BlockSize; std::vector BufferRanges; BufferRanges.reserve(BlockIndexEnd - BlockIndexStart + 1); -- cgit v1.2.3 From 38a58059214bacc18c8a6406acf7f46c57f51e86 Mon Sep 17 00:00:00 2001 From: Zousar Shaker Date: Thu, 27 Feb 2025 01:27:34 -0700 Subject: Zs/auth bad function fix (#287) * Describe fix in changelog * remove JupiterClient::m_TokenProvider --------- Co-authored-by: zousar <2936246+zousar@users.noreply.github.com> Co-authored-by: Dan Engelbrecht --- src/zenutil/include/zenutil/jupiter/jupiterclient.h | 11 +++++------ src/zenutil/jupiter/jupiterclient.cpp | 1 - 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/zenutil/include/zenutil/jupiter/jupiterclient.h b/src/zenutil/include/zenutil/jupiter/jupiterclient.h index defe50edc..8a51bd60a 100644 --- a/src/zenutil/include/zenutil/jupiter/jupiterclient.h +++ b/src/zenutil/include/zenutil/jupiter/jupiterclient.h @@ -44,12 +44,11 @@ public: HttpClient& Client() { return m_HttpClient; } private: - LoggerRef m_Log; - const std::string m_DefaultDdcNamespace; - const std::string m_DefaultBlobStoreNamespace; - const std::string m_ComputeCluster; - std::function m_TokenProvider; - HttpClient m_HttpClient; + LoggerRef m_Log; + const std::string m_DefaultDdcNamespace; + const std::string m_DefaultBlobStoreNamespace; + const std::string m_ComputeCluster; + HttpClient m_HttpClient; friend class JupiterSession; }; diff --git a/src/zenutil/jupiter/jupiterclient.cpp b/src/zenutil/jupiter/jupiterclient.cpp index 5e5da3750..dbac218a4 100644 --- a/src/zenutil/jupiter/jupiterclient.cpp +++ b/src/zenutil/jupiter/jupiterclient.cpp @@ -11,7 +11,6 @@ JupiterClient::JupiterClient(const JupiterClientOptions& Options, std::function< , m_DefaultDdcNamespace(Options.DdcNamespace) , m_DefaultBlobStoreNamespace(Options.BlobStoreNamespace) , m_ComputeCluster(Options.ComputeCluster) -, m_TokenProvider(std::move(TokenProvider)) , m_HttpClient(Options.ServiceUrl, HttpClientSettings{.ConnectTimeout = Options.ConnectTimeout, .Timeout = Options.Timeout, -- cgit v1.2.3 From 5791f51cccea1d4e5365456c8da89dbac0dd3ec0 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Fri, 28 Feb 2025 12:39:48 +0100 Subject: improve error handling (#289) * clearer errors * quicker abort * handle deleted local files * simplify parallellwork error handling * don't finish progress on destructor - gives wrong impression * graceful ctrl-c handling --- src/zen/cmds/builds_cmd.cpp | 1922 +++++++++++----------- src/zen/zen.cpp | 2 +- src/zenutil/chunkedcontent.cpp | 13 +- src/zenutil/chunkedfile.cpp | 11 +- src/zenutil/chunkingcontroller.cpp | 12 +- src/zenutil/include/zenutil/chunkedfile.h | 3 +- src/zenutil/include/zenutil/chunkingcontroller.h | 7 +- src/zenutil/include/zenutil/parallellwork.h | 48 + 8 files changed, 1040 insertions(+), 978 deletions(-) (limited to 'src') diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index ed1eb2d19..18cc7cf9e 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -28,6 +28,7 @@ #include #include +#include #include ZEN_THIRD_PARTY_INCLUDES_START @@ -49,6 +50,21 @@ ZEN_THIRD_PARTY_INCLUDES_END namespace zen { namespace { + static std::atomic AbortFlag = false; + static void SignalCallbackHandler(int SigNum) + { + if (SigNum == SIGINT) + { + AbortFlag = true; + } +#if ZEN_PLATFORM_WINDOWS + if (SigNum == SIGBREAK) + { + AbortFlag = true; + } +#endif // ZEN_PLATFORM_WINDOWS + } + using namespace std::literals; static const size_t DefaultMaxBlockSize = 64u * 1024u * 1024u; @@ -273,8 +289,7 @@ namespace { const std::filesystem::path& Path, std::function&& IsAcceptedFolder, std::function&& IsAcceptedFile, - ChunkingController& ChunkController, - std::atomic& AbortFlag) + ChunkingController& ChunkController) { FolderContent Content = GetFolderContent( GetFolderContentStats, @@ -316,6 +331,10 @@ namespace { false); }, AbortFlag); + if (AbortFlag) + { + return {}; + } FilteredBytesHashed.Stop(); ProgressBar.Finish(); @@ -1214,7 +1233,6 @@ namespace { const ChunkedContentLookup& Lookup, BuildStorage& Storage, const Oid& BuildId, - std::atomic& AbortFlag, const std::vector>& NewBlockChunks, GeneratedBlocks& OutBlocks, DiskStatistics& DiskStats, @@ -1254,7 +1272,7 @@ namespace { const std::vector& ChunksInBlock = NewBlockChunks[BlockIndex]; Work.ScheduleWork( GenerateBlobsPool, - [&, BlockIndex](std::atomic& AbortFlag) { + [&, BlockIndex](std::atomic&) { if (!AbortFlag) { FilteredGeneratedBytesPerSecond.Start(); @@ -1295,7 +1313,7 @@ namespace { PendingUploadCount++; Work.ScheduleWork( UploadBlocksPool, - [&, BlockIndex, Payload = std::move(Payload)](std::atomic& AbortFlag) { + [&, BlockIndex, Payload = std::move(Payload)](std::atomic&) { if (!AbortFlag) { if (GenerateBlocksStats.GeneratedBlockCount == NewBlockCount) @@ -1340,17 +1358,11 @@ namespace { } } }, - [&](const std::exception& Ex, std::atomic& AbortFlag) { - ZEN_CONSOLE("Failed uploading block. Reason: {}", Ex.what()); - AbortFlag = true; - }); + Work.DefaultErrorFunction()); } } }, - [&](const std::exception& Ex, std::atomic& AbortFlag) { - ZEN_CONSOLE("Failed generating block. Reason: {}", Ex.what()); - AbortFlag = true; - }); + Work.DefaultErrorFunction()); } Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { @@ -1394,7 +1406,6 @@ namespace { GeneratedBlocks& NewBlocks, std::span LooseChunkIndexes, const std::uint64_t LargeAttachmentSize, - std::atomic& AbortFlag, DiskStatistics& DiskStats, UploadStatistics& UploadStats, GenerateBlocksStatistics& GenerateBlocksStats, @@ -1424,9 +1435,6 @@ namespace { ChunkIndexToLooseChunkOrderIndex.insert_or_assign(LooseChunkIndexes[OrderIndex], OrderIndex); } - std::vector FoundChunkHashes; - FoundChunkHashes.reserve(RawHashes.size()); - std::vector BlockIndexes; std::vector LooseChunkOrderIndexes; @@ -1458,7 +1466,7 @@ namespace { auto AsyncUploadBlock = [&](const size_t BlockIndex, const IoHash BlockHash, CompositeBuffer&& Payload) { Work.ScheduleWork( UploadChunkPool, - [&, BlockIndex, BlockHash, Payload = CompositeBuffer(std::move(Payload))](std::atomic& AbortFlag) { + [&, BlockIndex, BlockHash, Payload = std::move(Payload)](std::atomic&) { if (!AbortFlag) { FilteredUploadedBytesPerSecond.Start(); @@ -1490,16 +1498,13 @@ namespace { } } }, - [&](const std::exception& Ex, std::atomic& AbortFlag) { - ZEN_CONSOLE("Failed uploading block. Reason: {}", Ex.what()); - AbortFlag = true; - }); + Work.DefaultErrorFunction()); }; auto AsyncUploadLooseChunk = [&](const IoHash& RawHash, CompositeBuffer&& Payload) { Work.ScheduleWork( UploadChunkPool, - [&, RawHash, Payload = CompositeBuffer(std::move(Payload))](std::atomic& AbortFlag) { + [&, RawHash, Payload = CompositeBuffer(std::move(Payload))](std::atomic&) { if (!AbortFlag) { const uint64_t PayloadSize = Payload.GetSize(); @@ -1536,16 +1541,13 @@ namespace { { Work.ScheduleWork( UploadChunkPool, - [Work = std::move(WorkPart)](std::atomic& AbortFlag) { + [Work = std::move(WorkPart)](std::atomic&) { if (!AbortFlag) { Work(); } }, - [&, RawHash](const std::exception& Ex, std::atomic& AbortFlag) { - ZEN_CONSOLE("Failed uploading multipart blob {}. Reason: {}", RawHash, Ex.what()); - AbortFlag = true; - }); + Work.DefaultErrorFunction()); } ZEN_CONSOLE_VERBOSE("Uploaded multipart chunk {} ({})", RawHash, NiceBytes(PayloadSize)); } @@ -1564,10 +1566,7 @@ namespace { } } }, - [&](const std::exception& Ex, std::atomic& AbortFlag) { - ZEN_CONSOLE("Failed uploading chunk. Reason: {}", Ex.what()); - AbortFlag = true; - }); + Work.DefaultErrorFunction()); }; std::vector GenerateBlockIndexes; @@ -1581,7 +1580,6 @@ namespace { if (CompositeBuffer BlockPayload = std::move(NewBlocks.BlockBuffers[BlockIndex]); BlockPayload) { const IoHash& BlockHash = NewBlocks.BlockDescriptions[BlockIndex].BlockHash; - FoundChunkHashes.push_back(BlockHash); if (!AbortFlag) { AsyncUploadBlock(BlockIndex, BlockHash, std::move(BlockPayload)); @@ -1606,12 +1604,11 @@ namespace { for (const size_t BlockIndex : GenerateBlockIndexes) { const IoHash& BlockHash = NewBlocks.BlockDescriptions[BlockIndex].BlockHash; - FoundChunkHashes.push_back(BlockHash); if (!AbortFlag) { Work.ScheduleWork( ReadChunkPool, - [&, BlockIndex](std::atomic& AbortFlag) { + [&, BlockIndex](std::atomic&) { if (!AbortFlag) { FilteredGenerateBlockBytesPerSecond.Start(); @@ -1646,10 +1643,7 @@ namespace { NewBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); } }, - [&](const std::exception& Ex, std::atomic& AbortFlag) { - ZEN_CONSOLE("Failed generating block. Reason: {}", Ex.what()); - AbortFlag = true; - }); + Work.DefaultErrorFunction()); } } @@ -1662,7 +1656,7 @@ namespace { const uint32_t ChunkIndex = LooseChunkIndexes[CompressLooseChunkOrderIndex]; Work.ScheduleWork( ReadChunkPool, - [&, ChunkIndex](std::atomic& AbortFlag) { + [&, ChunkIndex](std::atomic&) { if (!AbortFlag) { FilteredCompressedBytesPerSecond.Start(); @@ -1686,12 +1680,7 @@ namespace { } } }, - [&, ChunkIndex](const std::exception& Ex, std::atomic& AbortFlag) { - ZEN_CONSOLE("Failed compressing part blob {}. Reason: {}", - Content.ChunkedContent.ChunkHashes[ChunkIndex], - Ex.what()); - AbortFlag = true; - }); + Work.DefaultErrorFunction()); } Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { @@ -1884,7 +1873,7 @@ namespace { return FilteredReuseBlockIndexes; }; - bool UploadFolder(BuildStorage& Storage, + void UploadFolder(BuildStorage& Storage, const Oid& BuildId, const Oid& BuildPartId, const std::string_view BuildPartName, @@ -1898,8 +1887,6 @@ namespace { { Stopwatch ProcessTimer; - std::atomic AbortFlag = false; - const std::filesystem::path ZenTempFolder = Path / ZenTempFolderName; CreateDirectories(ZenTempFolder); CleanDirectory(ZenTempFolder, {}); @@ -2101,15 +2088,14 @@ namespace { false); }, AbortFlag); + if (AbortFlag) + { + return; + } FilteredBytesHashed.Stop(); ProgressBar.Finish(); } - if (AbortFlag) - { - return true; - } - ZEN_CONSOLE("Found {} ({}) files divided into {} ({}) unique chunks in '{}' in {}. Average hash rate {}B/sec", LocalContent.Paths.size(), NiceBytes(TotalRawSize), @@ -2289,7 +2275,6 @@ namespace { LocalLookup, Storage, BuildId, - AbortFlag, NewBlockChunks, NewBlocks, DiskStats, @@ -2297,11 +2282,6 @@ namespace { GenerateBlocksStats); } - if (AbortFlag) - { - return true; - } - CbObject PartManifest; { CbObjectWriter PartManifestWriter; @@ -2508,7 +2488,6 @@ namespace { NewBlocks, LooseChunkIndexes, LargeAttachmentSize, - AbortFlag, DiskStats, TempUploadStats, TempGenerateBlocksStats, @@ -2558,21 +2537,8 @@ namespace { { break; } - if (AbortFlag) - { - return true; - } ZEN_CONSOLE_VERBOSE("FinalizeBuildPart needs attachments: {}", FormatArray(Needs, "\n "sv)); UploadAttachments(Needs); - if (AbortFlag) - { - return true; - } - } - - if (AbortFlag) - { - return true; } if (CreateBuild) @@ -2612,9 +2578,8 @@ namespace { std::vector VerifyBlockDescriptions = Storage.GetBlockMetadata(BuildId, BlockHashes); if (VerifyBlockDescriptions.size() != BlockHashes.size()) { - ZEN_CONSOLE("Uploaded blocks could not all be found, {} blocks are missing", - BlockHashes.size() - VerifyBlockDescriptions.size()); - return true; + throw std::runtime_error(fmt::format("Uploaded blocks could not all be found, {} blocks are missing", + BlockHashes.size() - VerifyBlockDescriptions.size())); } } @@ -2774,10 +2739,9 @@ namespace { BuildPartName, BuildPartId, NiceTimeSpanMs(ProcessTimer.GetElapsedTimeMs())); - return false; } - void VerifyFolder(const ChunkedFolderContent& Content, const std::filesystem::path& Path, std::atomic& AbortFlag) + void VerifyFolder(const ChunkedFolderContent& Content, const std::filesystem::path& Path) { ProgressBar ProgressBar(UsePlainProgress); std::atomic FilesVerified(0); @@ -2822,7 +2786,7 @@ namespace { Work.ScheduleWork( VerifyPool, - [&, PathIndex](std::atomic& AbortFlag) { + [&, PathIndex](std::atomic&) { if (!AbortFlag) { // TODO: Convert ScheduleWork body to function @@ -2912,9 +2876,7 @@ namespace { FilesVerified++; } }, - [&, PathIndex](const std::exception& Ex, std::atomic& AbortFlag) { - ZEN_UNUSED(AbortFlag); - + [&, PathIndex](const std::exception& Ex, std::atomic&) { ErrorLock.WithExclusiveLock([&]() { Errors.push_back(fmt::format("Failed verifying file '{}'. Reason: {}", (Path / Content.Paths[PathIndex]).make_preferred(), @@ -3085,36 +3047,39 @@ namespace { { if (!WriteOps.empty()) { - std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOpData& Lhs, const WriteOpData& Rhs) { - if (Lhs.Target->PathIndex < Rhs.Target->PathIndex) - { - return true; - } - if (Lhs.Target->PathIndex > Rhs.Target->PathIndex) + if (!AbortFlag) + { + std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOpData& Lhs, const WriteOpData& Rhs) { + if (Lhs.Target->PathIndex < Rhs.Target->PathIndex) + { + return true; + } + if (Lhs.Target->PathIndex > Rhs.Target->PathIndex) + { + return false; + } + return Lhs.Target->Offset < Rhs.Target->Offset; + }); + + WriteFileCache OpenFileCache; + for (const WriteOpData& WriteOp : WriteOps) { - return false; + const CompositeBuffer& Chunk = ChunkBuffers[WriteOp.ChunkBufferIndex]; + const uint32_t PathIndex = WriteOp.Target->PathIndex; + const uint64_t ChunkSize = Chunk.GetSize(); + const uint64_t FileOffset = WriteOp.Target->Offset; + ZEN_ASSERT(FileOffset + ChunkSize <= Content.RawSizes[PathIndex]); + + OpenFileCache.WriteToFile( + PathIndex, + [&Path, &Content](uint32_t TargetIndex) { return (Path / Content.Paths[TargetIndex]).make_preferred(); }, + Chunk, + FileOffset, + Content.RawSizes[PathIndex]); + OutBytesWritten += ChunkSize; } - return Lhs.Target->Offset < Rhs.Target->Offset; - }); - - WriteFileCache OpenFileCache; - for (const WriteOpData& WriteOp : WriteOps) - { - const CompositeBuffer& Chunk = ChunkBuffers[WriteOp.ChunkBufferIndex]; - const uint32_t PathIndex = WriteOp.Target->PathIndex; - const uint64_t ChunkSize = Chunk.GetSize(); - const uint64_t FileOffset = WriteOp.Target->Offset; - ZEN_ASSERT(FileOffset + ChunkSize <= Content.RawSizes[PathIndex]); - - OpenFileCache.WriteToFile( - PathIndex, - [&Path, &Content](uint32_t TargetIndex) { return (Path / Content.Paths[TargetIndex]).make_preferred(); }, - Chunk, - FileOffset, - Content.RawSizes[PathIndex]); - OutBytesWritten += ChunkSize; + OutChunksComplete += gsl::narrow(ChunkBuffers.size()); } - OutChunksComplete += gsl::narrow(ChunkBuffers.size()); } return true; } @@ -3186,7 +3151,6 @@ namespace { ParallellWork& Work, WorkerThreadPool& WritePool, WorkerThreadPool& NetworkPool, - std::atomic& AbortFlag, std::atomic& BytesWritten, std::atomic& WriteToDiskBytes, std::atomic& BytesDownloaded, @@ -3225,8 +3189,8 @@ namespace { &WriteToDiskBytes, &DownloadedChunks, &ChunksComplete, - ChunkTargetPtrs = std::vector(ChunkTargetPtrs), - &AbortFlag](uint64_t Offset, const IoBuffer& Chunk, uint64_t BytesRemaining) { + ChunkTargetPtrs = std::vector( + ChunkTargetPtrs)](uint64_t Offset, const IoBuffer& Chunk, uint64_t BytesRemaining) { BytesDownloaded += Chunk.GetSize(); LooseChunksBytes += Chunk.GetSize(); @@ -3249,7 +3213,7 @@ namespace { &ChunksComplete, &BytesWritten, &WriteToDiskBytes, - ChunkTargetPtrs](std::atomic& AbortFlag) { + ChunkTargetPtrs](std::atomic&) { if (!AbortFlag) { uint64_t CompressedSize = Workload->TempFile.FileSize(); @@ -3276,6 +3240,7 @@ namespace { // ZEN_ASSERT_SLOW(ChunkHash == // IoHash::HashBuffer(Chunk.AsIoBuffer())); + if (!AbortFlag) { WriteFileCache OpenFileCache; @@ -3291,10 +3256,7 @@ namespace { } } }, - [&, ChunkHash](const std::exception& Ex, std::atomic& AbortFlag) { - ZEN_CONSOLE("Failed writing chunk {}. Reason: {}", ChunkHash, Ex.what()); - AbortFlag = true; - }); + Work.DefaultErrorFunction()); } } }); @@ -3306,20 +3268,17 @@ namespace { { Work.ScheduleWork( NetworkPool, - [WorkItem = std::move(WorkItem)](std::atomic& AbortFlag) { + [WorkItem = std::move(WorkItem)](std::atomic&) { if (!AbortFlag) { WorkItem(); } }, - [&, ChunkHash](const std::exception& Ex, std::atomic& AbortFlag) { - ZEN_CONSOLE("Failed uploading multipart blob {}. Reason: {}", ChunkHash, Ex.what()); - AbortFlag = true; - }); + Work.DefaultErrorFunction()); } } - bool UpdateFolder(BuildStorage& Storage, + void UpdateFolder(BuildStorage& Storage, const Oid& BuildId, const std::filesystem::path& Path, const std::uint64_t LargeAttachmentSize, @@ -3329,7 +3288,6 @@ namespace { const std::vector& BlockDescriptions, const std::vector& LooseChunkHashes, bool WipeTargetFolder, - std::atomic& AbortFlag, FolderContent& OutLocalFolderState) { std::atomic DownloadedBlocks = 0; @@ -3433,7 +3391,7 @@ namespace { if (AbortFlag) { - return true; + return; } CleanDirectory(Path, DefaultExcludeFolders); @@ -3646,7 +3604,7 @@ namespace { Work.ScheduleWork( WritePool, // GetSyncWorkerPool(),// - [&, CopyDataIndex](std::atomic& AbortFlag) { + [&, CopyDataIndex](std::atomic&) { if (!AbortFlag) { const CacheCopyData& CopyData = CacheCopyDatas[CopyDataIndex]; @@ -3703,6 +3661,10 @@ namespace { WriteFileCache OpenFileCache; for (const WriteOp& Op : WriteOps) { + if (AbortFlag) + { + break; + } const uint32_t RemotePathIndex = Op.Target->PathIndex; const uint64_t ChunkSize = Op.ChunkSize; CompositeBuffer ChunkSource = SourceFile.GetRange(Op.LocalFileOffset, ChunkSize); @@ -3721,15 +3683,18 @@ namespace { WriteToDiskBytes += ChunkSize; CacheLocalFileBytesRead += ChunkSize; // TODO: This should be the sum of unique chunk sizes? } - ChunkCountWritten += gsl::narrow(CopyData.ChunkTargets.size()); - ZEN_DEBUG("Copied {} from {}", NiceBytes(CacheLocalFileBytesRead), CopyData.OriginalSourceFileName); + if (!AbortFlag) + { + ChunkCountWritten += gsl::narrow(CopyData.ChunkTargets.size()); + ZEN_DEBUG("Copied {} from {}", NiceBytes(CacheLocalFileBytesRead), CopyData.OriginalSourceFileName); + } } if (CopyData.RemotePathIndexes.empty()) { std::filesystem::remove(CacheFilePath); } - else + else if (!AbortFlag) { uint64_t LocalBytesWritten = 0; CloneFullFileFromCache(Path, @@ -3753,12 +3718,7 @@ namespace { } } }, - [&, CopyDataIndex](const std::exception& Ex, std::atomic& AbortFlag) { - ZEN_CONSOLE("Failed reading cached file {}. Reason: {}", - CacheCopyDatas[CopyDataIndex].OriginalSourceFileName, - Ex.what()); - AbortFlag = true; - }); + Work.DefaultErrorFunction()); } for (const IoHash ChunkHash : LooseChunkHashes) @@ -3788,7 +3748,7 @@ namespace { { Work.ScheduleWork( NetworkPool, - [&, ChunkHash, RemoteChunkIndex, ChunkTargetPtrs](std::atomic& AbortFlag) { + [&, ChunkHash, RemoteChunkIndex, ChunkTargetPtrs](std::atomic&) { if (!AbortFlag) { if (RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex] >= LargeAttachmentSize) @@ -3804,7 +3764,6 @@ namespace { Work, WritePool, NetworkPool, - AbortFlag, BytesWritten, WriteToDiskBytes, BytesDownloaded, @@ -3829,7 +3788,7 @@ namespace { Work.ScheduleWork( WritePool, [&, ChunkHash, RemoteChunkIndex, ChunkTargetPtrs, CompressedPart = std::move(CompressedPart)]( - std::atomic& AbortFlag) { + std::atomic&) { if (!AbortFlag) { uint64_t TotalBytesWritten = 0; @@ -3850,18 +3809,12 @@ namespace { WriteToDiskBytes += TotalBytesWritten; } }, - [&, ChunkHash](const std::exception& Ex, std::atomic& AbortFlag) { - ZEN_CONSOLE("Failed writing chunk {}. Reason: {}", ChunkHash, Ex.what()); - AbortFlag = true; - }); + Work.DefaultErrorFunction()); } } } }, - [&, ChunkHash](const std::exception& Ex, std::atomic& AbortFlag) { - ZEN_CONSOLE("Failed downloading chunk {}. Reason: {}", ChunkHash, Ex.what()); - AbortFlag = true; - }); + Work.DefaultErrorFunction()); } } } @@ -3893,98 +3846,97 @@ namespace { } Work.ScheduleWork( WritePool, - [&, BlockIndex](std::atomic& AbortFlag) { + [&, BlockIndex](std::atomic&) { if (!AbortFlag) { if (IsBlockNeeded(BlockDescriptions[BlockIndex])) { Work.ScheduleWork( NetworkPool, - [&, BlockIndex](std::atomic& AbortFlag) { - IoBuffer BlockBuffer = Storage.GetBuildBlob(BuildId, BlockDescriptions[BlockIndex].BlockHash); - if (!BlockBuffer) - { - throw std::runtime_error( - fmt::format("Block {} is missing", BlockDescriptions[BlockIndex].BlockHash)); - } - BytesDownloaded += BlockBuffer.GetSize(); - BlockBytes += BlockBuffer.GetSize(); - DownloadedBlocks++; - + [&, BlockIndex](std::atomic&) { if (!AbortFlag) { - Work.ScheduleWork( - WritePool, - [&, BlockIndex, BlockBuffer = std::move(BlockBuffer)](std::atomic& AbortFlag) { - if (!AbortFlag) - { - IoHash BlockRawHash; - uint64_t BlockRawSize; - CompressedBuffer CompressedBlockBuffer = - CompressedBuffer::FromCompressed(SharedBuffer(std::move(BlockBuffer)), - BlockRawHash, - BlockRawSize); - if (!CompressedBlockBuffer) - { - throw std::runtime_error(fmt::format("Block {} is not a compressed buffer", - BlockDescriptions[BlockIndex].BlockHash)); - } - - if (BlockRawHash != BlockDescriptions[BlockIndex].BlockHash) - { - throw std::runtime_error( - fmt::format("Block {} header has a mismatching raw hash {}", - BlockDescriptions[BlockIndex].BlockHash, - BlockRawHash)); - } - - CompositeBuffer DecompressedBlockBuffer = CompressedBlockBuffer.DecompressToComposite(); - if (!DecompressedBlockBuffer) - { - throw std::runtime_error(fmt::format("Block {} failed to decompress", - BlockDescriptions[BlockIndex].BlockHash)); - } + IoBuffer BlockBuffer = Storage.GetBuildBlob(BuildId, BlockDescriptions[BlockIndex].BlockHash); + if (!BlockBuffer) + { + throw std::runtime_error( + fmt::format("Block {} is missing", BlockDescriptions[BlockIndex].BlockHash)); + } + BytesDownloaded += BlockBuffer.GetSize(); + BlockBytes += BlockBuffer.GetSize(); + DownloadedBlocks++; - ZEN_ASSERT_SLOW(BlockDescriptions[BlockIndex].BlockHash == - IoHash::HashBuffer(DecompressedBlockBuffer)); - - uint64_t BytesWrittenToDisk = 0; - uint32_t ChunksReadFromBlock = 0; - if (WriteBlockToDisk(Path, - RemoteContent, - RemotePathIndexWantsCopyFromCacheFlags, - DecompressedBlockBuffer, - RemoteLookup, - RemoteChunkIndexNeedsCopyFromSourceFlags.data(), - ChunksReadFromBlock, - BytesWrittenToDisk)) - { - BytesWritten += BytesWrittenToDisk; - WriteToDiskBytes += BytesWrittenToDisk; - ChunkCountWritten += ChunksReadFromBlock; - } - else + if (!AbortFlag) + { + Work.ScheduleWork( + WritePool, + [&, BlockIndex, BlockBuffer = std::move(BlockBuffer)](std::atomic&) { + if (!AbortFlag) { - throw std::runtime_error( - fmt::format("Block {} is malformed", BlockDescriptions[BlockIndex].BlockHash)); + IoHash BlockRawHash; + uint64_t BlockRawSize; + CompressedBuffer CompressedBlockBuffer = + CompressedBuffer::FromCompressed(SharedBuffer(std::move(BlockBuffer)), + BlockRawHash, + BlockRawSize); + if (!CompressedBlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} is not a compressed buffer", + BlockDescriptions[BlockIndex].BlockHash)); + } + + if (BlockRawHash != BlockDescriptions[BlockIndex].BlockHash) + { + throw std::runtime_error( + fmt::format("Block {} header has a mismatching raw hash {}", + BlockDescriptions[BlockIndex].BlockHash, + BlockRawHash)); + } + + CompositeBuffer DecompressedBlockBuffer = + CompressedBlockBuffer.DecompressToComposite(); + if (!DecompressedBlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} failed to decompress", + BlockDescriptions[BlockIndex].BlockHash)); + } + + ZEN_ASSERT_SLOW(BlockDescriptions[BlockIndex].BlockHash == + IoHash::HashBuffer(DecompressedBlockBuffer)); + + uint64_t BytesWrittenToDisk = 0; + uint32_t ChunksReadFromBlock = 0; + if (WriteBlockToDisk(Path, + RemoteContent, + RemotePathIndexWantsCopyFromCacheFlags, + DecompressedBlockBuffer, + RemoteLookup, + RemoteChunkIndexNeedsCopyFromSourceFlags.data(), + ChunksReadFromBlock, + BytesWrittenToDisk)) + { + BytesWritten += BytesWrittenToDisk; + WriteToDiskBytes += BytesWrittenToDisk; + ChunkCountWritten += ChunksReadFromBlock; + } + else + { + throw std::runtime_error(fmt::format("Block {} is malformed", + BlockDescriptions[BlockIndex].BlockHash)); + } + BlocksComplete++; } - BlocksComplete++; - } - }, - [&, BlockIndex](const std::exception& Ex, std::atomic& AbortFlag) { - ZEN_CONSOLE("Failed writing block {}. Reason: {}", - BlockDescriptions[BlockIndex].BlockHash, - Ex.what()); - AbortFlag = true; - }); + }, + [&, BlockIndex](const std::exception& Ex, std::atomic&) { + ZEN_ERROR("Failed writing block {}. Reason: {}", + BlockDescriptions[BlockIndex].BlockHash, + Ex.what()); + AbortFlag = true; + }); + } } }, - [&, BlockIndex](const std::exception& Ex, std::atomic& AbortFlag) { - ZEN_CONSOLE("Failed downloading block {}. Reason: {}", - BlockDescriptions[BlockIndex].BlockHash, - Ex.what()); - AbortFlag = true; - }); + Work.DefaultErrorFunction()); } else { @@ -3993,10 +3945,7 @@ namespace { } } }, - [&, BlockIndex](const std::exception& Ex, std::atomic& AbortFlag) { - ZEN_CONSOLE("Failed determning if block {} is needed. Reason: {}", BlockDescriptions[BlockIndex].BlockHash, Ex.what()); - AbortFlag = true; - }); + Work.DefaultErrorFunction()); } for (uint32_t PathIndex = 0; PathIndex < RemoteContent.Paths.size(); PathIndex++) { @@ -4008,7 +3957,7 @@ namespace { { Work.ScheduleWork( WritePool, - [&, PathIndex](std::atomic& AbortFlag) { + [&, PathIndex](std::atomic&) { if (!AbortFlag) { const std::filesystem::path TargetPath = (Path / RemoteContent.Paths[PathIndex]).make_preferred(); @@ -4017,10 +3966,7 @@ namespace { OutputFile.Open(TargetPath, BasicFile::Mode::kTruncate); } }, - [&, PathIndex](const std::exception& Ex, std::atomic& AbortFlag) { - ZEN_CONSOLE("Failed creating file at {}. Reason: {}", RemoteContent.Paths[PathIndex], Ex.what()); - AbortFlag = true; - }); + Work.DefaultErrorFunction()); } } @@ -4040,6 +3986,12 @@ namespace { .RemainingCount = gsl::narrow(ChunkCountToWrite - ChunkCountWritten.load())}, false); }); + + if (AbortFlag) + { + return; + } + WriteProgressBar.Finish(); { @@ -4109,8 +4061,6 @@ namespace { } PremissionsProgressBar.Finish(); } - - return false; } std::vector> ResolveBuildPartNames(BuildStorage& Storage, @@ -4421,8 +4371,7 @@ namespace { ChunkedFolderContent GetLocalContent(GetFolderContentStatistics& LocalFolderScanStats, ChunkingStatistics& ChunkingStats, const std::filesystem::path& Path, - ChunkingController& ChunkController, - std::atomic& AbortFlag) + ChunkingController& ChunkController) { ChunkedFolderContent LocalContent; @@ -4564,6 +4513,26 @@ namespace { } else { + // Remove files from LocalContent no longer in LocalFolderState + tsl::robin_set LocalFolderPaths; + LocalFolderPaths.reserve(LocalFolderState.Paths.size()); + for (const std::filesystem::path& LocalFolderPath : LocalFolderState.Paths) + { + LocalFolderPaths.insert(LocalFolderPath.generic_string()); + } + std::vector DeletedPaths; + for (const std::filesystem::path& LocalContentPath : LocalContent.Paths) + { + if (!LocalFolderPaths.contains(LocalContentPath.generic_string())) + { + DeletedPaths.push_back(LocalContentPath); + } + } + if (!DeletedPaths.empty()) + { + LocalContent = DeletePathsFromChunkedContent(LocalContent, DeletedPaths); + } + ZEN_CONSOLE("Using cached local state"); } ZEN_CONSOLE("Read local state in {}", NiceLatencyNs(ReadStateTimer.GetElapsedTimeUs() * 1000)); @@ -4611,18 +4580,19 @@ namespace { false); }, AbortFlag); - FilteredBytesHashed.Stop(); - ProgressBar.Finish(); if (AbortFlag) { return {}; } + + FilteredBytesHashed.Stop(); + ProgressBar.Finish(); } return LocalContent; } - bool DownloadFolder(BuildStorage& Storage, + void DownloadFolder(BuildStorage& Storage, const Oid& BuildId, const std::vector& BuildPartIds, std::span BuildPartNames, @@ -4630,8 +4600,7 @@ namespace { bool AllowMultiparts, bool WipeTargetFolder) { - Stopwatch DownloadTimer; - std::atomic AbortFlag(false); + Stopwatch DownloadTimer; const std::filesystem::path ZenTempFolder = Path / ZenTempFolderName; CreateDirectories(ZenTempFolder); @@ -4672,19 +4641,19 @@ namespace { { if (!WipeTargetFolder) { - LocalContent = GetLocalContent(LocalFolderScanStats, ChunkingStats, Path, *ChunkController, AbortFlag); + LocalContent = GetLocalContent(LocalFolderScanStats, ChunkingStats, Path, *ChunkController); } } else { CreateDirectories(Path); } - if (AbortFlag.load()) + if (AbortFlag) { - return true; + return; } - auto CompareContent = [](const ChunkedFolderContent& Lsh, const ChunkedFolderContent& Rhs) { + auto CompareContent = [](const ChunkedFolderContent& Lhs, const ChunkedFolderContent& Rhs) { tsl::robin_map RhsPathToIndex; const size_t RhsPathCount = Rhs.Paths.size(); RhsPathToIndex.reserve(RhsPathCount); @@ -4692,14 +4661,14 @@ namespace { { RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex}); } - const size_t LhsPathCount = Lsh.Paths.size(); + const size_t LhsPathCount = Lhs.Paths.size(); for (size_t LhsPathIndex = 0; LhsPathIndex < LhsPathCount; LhsPathIndex++) { - if (auto It = RhsPathToIndex.find(Lsh.Paths[LhsPathIndex].generic_string()); It != RhsPathToIndex.end()) + if (auto It = RhsPathToIndex.find(Lhs.Paths[LhsPathIndex].generic_string()); It != RhsPathToIndex.end()) { const size_t RhsPathIndex = It->second; - if ((Lsh.RawHashes[LhsPathIndex] != Rhs.RawHashes[RhsPathIndex]) || - (!FolderContent::AreFileAttributesEqual(Lsh.Attributes[LhsPathIndex], Rhs.Attributes[RhsPathIndex]))) + if ((Lhs.RawHashes[LhsPathIndex] != Rhs.RawHashes[RhsPathIndex]) || + (!FolderContent::AreFileAttributesEqual(Lhs.Attributes[LhsPathIndex], Rhs.Attributes[RhsPathIndex]))) { return false; } @@ -4709,6 +4678,20 @@ namespace { return false; } } + tsl::robin_set LhsPathExists; + LhsPathExists.reserve(LhsPathCount); + for (size_t LhsPathIndex = 0; LhsPathIndex < LhsPathCount; LhsPathIndex++) + { + LhsPathExists.insert({Lhs.Paths[LhsPathIndex].generic_string()}); + } + for (size_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++) + { + if (!LhsPathExists.contains(Rhs.Paths[RhsPathIndex].generic_string())) + { + return false; + } + } + return true; }; @@ -4726,49 +4709,42 @@ namespace { } ZEN_CONSOLE("Downloading build {}, parts:{}", BuildId, SB.ToView()); FolderContent LocalFolderState; - if (UpdateFolder(Storage, - BuildId, - Path, - LargeAttachmentSize, - PreferredMultipartChunkSize, - LocalContent, - RemoteContent, - BlockDescriptions, - LooseChunkHashes, - WipeTargetFolder, - AbortFlag, - LocalFolderState)) - { - AbortFlag = true; - } + UpdateFolder(Storage, + BuildId, + Path, + LargeAttachmentSize, + PreferredMultipartChunkSize, + LocalContent, + RemoteContent, + BlockDescriptions, + LooseChunkHashes, + WipeTargetFolder, + LocalFolderState); + if (!AbortFlag) { - VerifyFolder(RemoteContent, Path, AbortFlag); - } + VerifyFolder(RemoteContent, Path); - Stopwatch WriteStateTimer; - CbObject StateObject = CreateStateObject(BuildId, AllBuildParts, PartContents, LocalFolderState); + Stopwatch WriteStateTimer; + CbObject StateObject = CreateStateObject(BuildId, AllBuildParts, PartContents, LocalFolderState); - CreateDirectories((Path / ZenStateFilePath).parent_path()); - TemporaryFile::SafeWriteFile(Path / ZenStateFilePath, StateObject.GetView()); - ZEN_CONSOLE("Wrote local state in {}", NiceLatencyNs(WriteStateTimer.GetElapsedTimeUs() * 1000)); + CreateDirectories((Path / ZenStateFilePath).parent_path()); + TemporaryFile::SafeWriteFile(Path / ZenStateFilePath, StateObject.GetView()); + ZEN_CONSOLE("Wrote local state in {}", NiceLatencyNs(WriteStateTimer.GetElapsedTimeUs() * 1000)); #if 0 - ExtendableStringBuilder<1024> SB; - CompactBinaryToJson(StateObject, SB); - WriteFile(Path / ZenStateFileJsonPath, IoBuffer(IoBuffer::Wrap, SB.Data(), SB.Size())); + ExtendableStringBuilder<1024> SB; + CompactBinaryToJson(StateObject, SB); + WriteFile(Path / ZenStateFileJsonPath, IoBuffer(IoBuffer::Wrap, SB.Data(), SB.Size())); #endif // 0 - ZEN_CONSOLE("Downloaded build in {}.", NiceLatencyNs(DownloadTimer.GetElapsedTimeUs() * 1000)); + ZEN_CONSOLE("Downloaded build in {}.", NiceLatencyNs(DownloadTimer.GetElapsedTimeUs() * 1000)); + } } - - return AbortFlag.load(); } - bool DiffFolders(const std::filesystem::path& BasePath, const std::filesystem::path& ComparePath, bool OnlyChunked) + void DiffFolders(const std::filesystem::path& BasePath, const std::filesystem::path& ComparePath, bool OnlyChunked) { - std::atomic AbortFlag(false); - ChunkedFolderContent BaseFolderContent; ChunkedFolderContent CompareFolderContent; @@ -4818,8 +4794,11 @@ namespace { BasePath, IsAcceptedFolder, IsAcceptedFile, - *ChunkController, - AbortFlag); + *ChunkController); + if (AbortFlag) + { + return; + } GetFolderContentStatistics CompareGetFolderContentStats; ChunkingStatistics CompareChunkingStats; @@ -4828,8 +4807,12 @@ namespace { ComparePath, IsAcceptedFolder, IsAcceptedFile, - *ChunkController, - AbortFlag); + *ChunkController); + + if (AbortFlag) + { + return; + } } std::vector AddedHashes; @@ -4918,8 +4901,6 @@ namespace { NewChunkCount, NiceBytes(NewChunkSize), NewPercent); - - return false; } } // namespace @@ -5190,6 +5171,11 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) { ZEN_UNUSED(GlobalOptions); + signal(SIGINT, SignalCallbackHandler); +#if ZEN_PLATFORM_WINDOWS + signal(SIGBREAK, SignalCallbackHandler); +#endif // ZEN_PLATFORM_WINDOWS + using namespace std::literals; std::vector SubCommandArguments; @@ -5320,786 +5306,798 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) }; ParseOutputOptions(); - if (SubOption == &m_ListOptions) + try { - ParseStorageOptions(); - ParseAuthOptions(); - - HttpClient Http(m_BuildsUrl, ClientSettings); - - CbObjectWriter QueryWriter; - QueryWriter.BeginObject("query"); - { - // QueryWriter.BeginObject("platform"); - // { - // QueryWriter.AddString("$eq", "Windows"); - // } - // QueryWriter.EndObject(); // changelist - } - QueryWriter.EndObject(); // query - - BuildStorage::Statistics StorageStats; - std::unique_ptr Storage; - if (!m_BuildsUrl.empty()) - { - ZEN_CONSOLE("Querying builds in cloud endpoint '{}'. SessionId: '{}'. Namespace '{}', Bucket '{}'", - m_BuildsUrl, - Http.GetSessionId(), - m_Namespace, - m_Bucket); - Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, std::filesystem::path{}); - } - else if (!m_StoragePath.empty()) - { - ZEN_CONSOLE("Querying builds in folder '{}'.", m_StoragePath); - Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false); // , .0015, 0.00004 - } - else + if (SubOption == &m_ListOptions) { - throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); - } - - CbObject Response = Storage->ListBuilds(QueryWriter.Save()); - ExtendableStringBuilder<1024> SB; - CompactBinaryToJson(Response.GetView(), SB); - ZEN_CONSOLE("{}", SB.ToView()); - return 0; - } + ParseStorageOptions(); + ParseAuthOptions(); - if (SubOption == &m_UploadOptions) - { - ParseStorageOptions(); - ParseAuthOptions(); - - HttpClient Http(m_BuildsUrl, ClientSettings); - - if (m_Path.empty()) - { - throw zen::OptionParseException(fmt::format("local-path is required\n{}", m_UploadOptions.help())); - } + HttpClient Http(m_BuildsUrl, ClientSettings); - if (m_CreateBuild) - { - if (m_BuildMetadataPath.empty() && m_BuildMetadata.empty()) + CbObjectWriter QueryWriter; + QueryWriter.BeginObject("query"); { - throw zen::OptionParseException(fmt::format("Options for builds target are missing\n{}", m_UploadOptions.help())); + // QueryWriter.BeginObject("platform"); + // { + // QueryWriter.AddString("$eq", "Windows"); + // } + // QueryWriter.EndObject(); // changelist } - if (!m_BuildMetadataPath.empty() && !m_BuildMetadata.empty()) + QueryWriter.EndObject(); // query + + BuildStorage::Statistics StorageStats; + std::unique_ptr Storage; + if (!m_BuildsUrl.empty()) { - throw zen::OptionParseException(fmt::format("Conflicting options for builds target\n{}", m_UploadOptions.help())); + ZEN_CONSOLE("Querying builds in cloud endpoint '{}'. SessionId: '{}'. Namespace '{}', Bucket '{}'", + m_BuildsUrl, + Http.GetSessionId(), + m_Namespace, + m_Bucket); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, std::filesystem::path{}); } - } - else - { - if (!m_BuildMetadataPath.empty()) + else if (!m_StoragePath.empty()) { - throw zen::OptionParseException( - fmt::format("metadata-path option is only valid if creating a build\n{}", m_UploadOptions.help())); + ZEN_CONSOLE("Querying builds in folder '{}'.", m_StoragePath); + Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false); // , .0015, 0.00004 } - if (!m_BuildMetadata.empty()) + else { - throw zen::OptionParseException( - fmt::format("metadata option is only valid if creating a build\n{}", m_UploadOptions.help())); + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); } - } - if (m_BuildPartName.empty()) - { - m_BuildPartName = m_Path.filename().string(); + CbObject Response = Storage->ListBuilds(QueryWriter.Save()); + ExtendableStringBuilder<1024> SB; + CompactBinaryToJson(Response.GetView(), SB); + ZEN_CONSOLE("{}", SB.ToView()); + return 0; } - const bool GeneratedBuildId = m_BuildId.empty(); - if (GeneratedBuildId) - { - m_BuildId = Oid::NewOid().ToString(); - } - else if (m_BuildId.length() != Oid::StringLength) + if (SubOption == &m_UploadOptions) { - throw zen::OptionParseException(fmt::format("Invalid build id\n{}", m_UploadOptions.help())); - } - else if (Oid::FromHexString(m_BuildId) == Oid::Zero) - { - throw zen::OptionParseException(fmt::format("Invalid build id\n{}", m_UploadOptions.help())); - } + ParseStorageOptions(); + ParseAuthOptions(); - const bool GeneratedBuildPartId = m_BuildPartId.empty(); - if (GeneratedBuildPartId) - { - m_BuildPartId = Oid::NewOid().ToString(); - } - else if (m_BuildPartId.length() != Oid::StringLength) - { - throw zen::OptionParseException(fmt::format("Invalid build id\n{}", m_UploadOptions.help())); - } - else if (Oid::FromHexString(m_BuildPartId) == Oid::Zero) - { - throw zen::OptionParseException(fmt::format("Invalid build part id\n{}", m_UploadOptions.help())); - } + HttpClient Http(m_BuildsUrl, ClientSettings); - BuildStorage::Statistics StorageStats; - const Oid BuildId = Oid::FromHexString(m_BuildId); - const Oid BuildPartId = Oid::FromHexString(m_BuildPartId); - std::unique_ptr Storage; - std::string StorageName; - if (!m_BuildsUrl.empty()) - { - ZEN_CONSOLE("Uploading '{}' from '{}' to cloud endpoint '{}'. SessionId: '{}'. Namespace '{}', Bucket '{}', {}BuildId '{}'", - m_BuildPartName, - m_Path, - m_BuildsUrl, - Http.GetSessionId(), - m_Namespace, - m_Bucket, - GeneratedBuildId ? "Generated " : "", - BuildId); - CreateDirectories(m_Path / ZenTempStorageFolderName); - Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); - StorageName = "Cloud DDC"; - } - else if (!m_StoragePath.empty()) - { - ZEN_CONSOLE("Uploading '{}' from '{}' to folder '{}'. {}BuildId '{}'", - m_BuildPartName, - m_Path, - m_StoragePath, - GeneratedBuildId ? "Generated " : "", - BuildId); - Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, m_WriteMetadataAsJson); // , .0015, 0.00004 - StorageName = fmt::format("Disk {}", m_StoragePath.stem()); - } - else - { - throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); - } + if (m_Path.empty()) + { + throw zen::OptionParseException(fmt::format("local-path is required\n{}", m_UploadOptions.help())); + } - CbObject MetaData; - if (m_CreateBuild) - { - if (!m_BuildMetadataPath.empty()) + if (m_CreateBuild) { - std::filesystem::path MetadataPath(m_BuildMetadataPath); - IoBuffer MetaDataJson = ReadFile(MetadataPath).Flatten(); - std::string_view Json(reinterpret_cast(MetaDataJson.GetData()), MetaDataJson.GetSize()); - std::string JsonError; - MetaData = LoadCompactBinaryFromJson(Json, JsonError).AsObject(); - if (!JsonError.empty()) + if (m_BuildMetadataPath.empty() && m_BuildMetadata.empty()) { - throw std::runtime_error( - fmt::format("build metadata file '{}' is malformed. Reason: '{}'", m_BuildMetadataPath, JsonError)); + throw zen::OptionParseException(fmt::format("Options for builds target are missing\n{}", m_UploadOptions.help())); + } + if (!m_BuildMetadataPath.empty() && !m_BuildMetadata.empty()) + { + throw zen::OptionParseException(fmt::format("Conflicting options for builds target\n{}", m_UploadOptions.help())); } } - if (!m_BuildMetadata.empty()) + else { - CbObjectWriter MetaDataWriter(1024); - ForEachStrTok(m_BuildMetadata, ';', [&](std::string_view Pair) { - size_t SplitPos = Pair.find('='); - if (SplitPos == std::string::npos || SplitPos == 0) - { - throw std::runtime_error(fmt::format("build metadata key-value pair '{}' is malformed", Pair)); - } - MetaDataWriter.AddString(Pair.substr(0, SplitPos), Pair.substr(SplitPos + 1)); - return true; - }); - MetaData = MetaDataWriter.Save(); + if (!m_BuildMetadataPath.empty()) + { + throw zen::OptionParseException( + fmt::format("metadata-path option is only valid if creating a build\n{}", m_UploadOptions.help())); + } + if (!m_BuildMetadata.empty()) + { + throw zen::OptionParseException( + fmt::format("metadata option is only valid if creating a build\n{}", m_UploadOptions.help())); + } } - } - - bool Aborted = UploadFolder(*Storage, - BuildId, - BuildPartId, - m_BuildPartName, - m_Path, - m_ManifestPath, - m_BlockReuseMinPercentLimit, - m_AllowMultiparts, - MetaData, - m_CreateBuild, - m_Clean); - if (Aborted) - { - ZEN_CONSOLE("Upload failed."); - } - - if (false) - { - ZEN_CONSOLE( - "{}:\n" - "Read: {}\n" - "Write: {}\n" - "Requests: {}\n" - "Avg Request Time: {}\n" - "Avg I/O Time: {}", - StorageName, - NiceBytes(StorageStats.TotalBytesRead.load()), - NiceBytes(StorageStats.TotalBytesWritten.load()), - StorageStats.TotalRequestCount.load(), - StorageStats.TotalExecutionTimeUs.load() > 0 - ? NiceTimeSpanMs(StorageStats.TotalExecutionTimeUs.load() / 1000 / StorageStats.TotalRequestCount.load()) - : 0, - StorageStats.TotalRequestCount.load() > 0 - ? NiceTimeSpanMs(StorageStats.TotalRequestTimeUs.load() / 1000 / StorageStats.TotalRequestCount.load()) - : 0); - } - return Aborted ? 11 : 0; - } - - if (SubOption == &m_DownloadOptions) - { - ParseStorageOptions(); - ParseAuthOptions(); - - HttpClient Http(m_BuildsUrl, ClientSettings); - - if (m_Path.empty()) - { - throw zen::OptionParseException(fmt::format("local-path is required\n{}", m_DownloadOptions.help())); - } - if (m_BuildId.empty()) - { - throw zen::OptionParseException(fmt::format("build-id is required\n{}", m_DownloadOptions.help())); - } - Oid BuildId = Oid::TryFromHexString(m_BuildId); - if (BuildId == Oid::Zero) - { - throw zen::OptionParseException(fmt::format("build-id is invalid\n{}", m_DownloadOptions.help())); - } - if (!m_BuildPartName.empty() && !m_BuildPartId.empty()) - { - throw zen::OptionParseException(fmt::format("build-part-id conflicts with build-part-name\n{}", m_DownloadOptions.help())); - } + if (m_BuildPartName.empty()) + { + m_BuildPartName = m_Path.filename().string(); + } - std::vector BuildPartIds; - for (const std::string& BuildPartId : m_BuildPartIds) - { - BuildPartIds.push_back(Oid::TryFromHexString(BuildPartId)); - if (BuildPartIds.back() == Oid::Zero) + const bool GeneratedBuildId = m_BuildId.empty(); + if (GeneratedBuildId) { - throw zen::OptionParseException(fmt::format("build-part-id '{}' is invalid\n{}", BuildPartId, m_DownloadOptions.help())); + m_BuildId = Oid::NewOid().ToString(); + } + else if (m_BuildId.length() != Oid::StringLength) + { + throw zen::OptionParseException(fmt::format("Invalid build id\n{}", m_UploadOptions.help())); + } + else if (Oid::FromHexString(m_BuildId) == Oid::Zero) + { + throw zen::OptionParseException(fmt::format("Invalid build id\n{}", m_UploadOptions.help())); } - } - BuildStorage::Statistics StorageStats; - std::unique_ptr Storage; - std::string StorageName; - if (!m_BuildsUrl.empty()) - { - ZEN_CONSOLE("Downloading '{}' to '{}' from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}', BuildId '{}'", - BuildId, - m_Path, - m_BuildsUrl, - Http.GetSessionId(), - m_Namespace, - m_Bucket, - BuildId); - CreateDirectories(m_Path / ZenTempStorageFolderName); - Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); - StorageName = "Cloud DDC"; - } - else if (!m_StoragePath.empty()) - { - ZEN_CONSOLE("Downloading '{}' to '{}' from folder {}. BuildId '{}'", BuildId, m_Path, m_StoragePath, BuildId); - Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false); // , .0015, 0.00004 - StorageName = fmt::format("Disk {}", m_StoragePath.stem()); - } - else - { - throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); - } + const bool GeneratedBuildPartId = m_BuildPartId.empty(); + if (GeneratedBuildPartId) + { + m_BuildPartId = Oid::NewOid().ToString(); + } + else if (m_BuildPartId.length() != Oid::StringLength) + { + throw zen::OptionParseException(fmt::format("Invalid build id\n{}", m_UploadOptions.help())); + } + else if (Oid::FromHexString(m_BuildPartId) == Oid::Zero) + { + throw zen::OptionParseException(fmt::format("Invalid build part id\n{}", m_UploadOptions.help())); + } - bool Aborted = DownloadFolder(*Storage, BuildId, BuildPartIds, m_BuildPartNames, m_Path, m_AllowMultiparts, m_Clean); - if (Aborted) - { - ZEN_CONSOLE("Download failed."); - } - if (false) - { - ZEN_CONSOLE( - "{}:\n" - "Read: {}\n" - "Write: {}\n" - "Requests: {}\n" - "Avg Request Time: {}\n" - "Avg I/O Time: {}", - StorageName, - NiceBytes(StorageStats.TotalBytesRead.load()), - NiceBytes(StorageStats.TotalBytesWritten.load()), - StorageStats.TotalRequestCount.load(), - StorageStats.TotalExecutionTimeUs.load() > 0 - ? NiceTimeSpanMs(StorageStats.TotalExecutionTimeUs.load() / 1000 / StorageStats.TotalRequestCount.load()) - : 0, - StorageStats.TotalRequestCount.load() > 0 - ? NiceTimeSpanMs(StorageStats.TotalRequestTimeUs.load() / 1000 / StorageStats.TotalRequestCount.load()) - : 0); - } + BuildStorage::Statistics StorageStats; + const Oid BuildId = Oid::FromHexString(m_BuildId); + const Oid BuildPartId = Oid::FromHexString(m_BuildPartId); + std::unique_ptr Storage; + std::string StorageName; + if (!m_BuildsUrl.empty()) + { + ZEN_CONSOLE("Uploading '{}' from '{}' to cloud endpoint '{}'. SessionId: '{}'. Namespace '{}', Bucket '{}', {}BuildId '{}'", + m_BuildPartName, + m_Path, + m_BuildsUrl, + Http.GetSessionId(), + m_Namespace, + m_Bucket, + GeneratedBuildId ? "Generated " : "", + BuildId); + CreateDirectories(m_Path / ZenTempStorageFolderName); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); + StorageName = "Cloud DDC"; + } + else if (!m_StoragePath.empty()) + { + ZEN_CONSOLE("Uploading '{}' from '{}' to folder '{}'. {}BuildId '{}'", + m_BuildPartName, + m_Path, + m_StoragePath, + GeneratedBuildId ? "Generated " : "", + BuildId); + Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, m_WriteMetadataAsJson); // , .0015, 0.00004 + StorageName = fmt::format("Disk {}", m_StoragePath.stem()); + } + else + { + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); + } - return Aborted ? 11 : 0; - } - if (SubOption == &m_DiffOptions) - { - if (m_Path.empty()) - { - throw zen::OptionParseException(fmt::format("local-path is required\n{}", m_DownloadOptions.help())); + CbObject MetaData; + if (m_CreateBuild) + { + if (!m_BuildMetadataPath.empty()) + { + std::filesystem::path MetadataPath(m_BuildMetadataPath); + IoBuffer MetaDataJson = ReadFile(MetadataPath).Flatten(); + std::string_view Json(reinterpret_cast(MetaDataJson.GetData()), MetaDataJson.GetSize()); + std::string JsonError; + MetaData = LoadCompactBinaryFromJson(Json, JsonError).AsObject(); + if (!JsonError.empty()) + { + throw std::runtime_error( + fmt::format("build metadata file '{}' is malformed. Reason: '{}'", m_BuildMetadataPath, JsonError)); + } + } + if (!m_BuildMetadata.empty()) + { + CbObjectWriter MetaDataWriter(1024); + ForEachStrTok(m_BuildMetadata, ';', [&](std::string_view Pair) { + size_t SplitPos = Pair.find('='); + if (SplitPos == std::string::npos || SplitPos == 0) + { + throw std::runtime_error(fmt::format("build metadata key-value pair '{}' is malformed", Pair)); + } + MetaDataWriter.AddString(Pair.substr(0, SplitPos), Pair.substr(SplitPos + 1)); + return true; + }); + MetaData = MetaDataWriter.Save(); + } + } + + UploadFolder(*Storage, + BuildId, + BuildPartId, + m_BuildPartName, + m_Path, + m_ManifestPath, + m_BlockReuseMinPercentLimit, + m_AllowMultiparts, + MetaData, + m_CreateBuild, + m_Clean); + + if (false) + { + ZEN_CONSOLE( + "{}:\n" + "Read: {}\n" + "Write: {}\n" + "Requests: {}\n" + "Avg Request Time: {}\n" + "Avg I/O Time: {}", + StorageName, + NiceBytes(StorageStats.TotalBytesRead.load()), + NiceBytes(StorageStats.TotalBytesWritten.load()), + StorageStats.TotalRequestCount.load(), + StorageStats.TotalExecutionTimeUs.load() > 0 + ? NiceTimeSpanMs(StorageStats.TotalExecutionTimeUs.load() / 1000 / StorageStats.TotalRequestCount.load()) + : 0, + StorageStats.TotalRequestCount.load() > 0 + ? NiceTimeSpanMs(StorageStats.TotalRequestTimeUs.load() / 1000 / StorageStats.TotalRequestCount.load()) + : 0); + } + return AbortFlag ? 11 : 0; } - if (m_DiffPath.empty()) + + if (SubOption == &m_DownloadOptions) { - throw zen::OptionParseException(fmt::format("compare-path is required\n{}", m_DownloadOptions.help())); - } - bool Aborted = DiffFolders(m_Path, m_DiffPath, m_OnlyChunked); - return Aborted ? 11 : 0; - } + ParseStorageOptions(); + ParseAuthOptions(); - if (SubOption == &m_TestOptions) - { - ParseStorageOptions(); - ParseAuthOptions(); + HttpClient Http(m_BuildsUrl, ClientSettings); - HttpClient Http(m_BuildsUrl, ClientSettings); + if (m_Path.empty()) + { + throw zen::OptionParseException(fmt::format("local-path is required\n{}", m_DownloadOptions.help())); + } + if (m_BuildId.empty()) + { + throw zen::OptionParseException(fmt::format("build-id is required\n{}", m_DownloadOptions.help())); + } + Oid BuildId = Oid::TryFromHexString(m_BuildId); + if (BuildId == Oid::Zero) + { + throw zen::OptionParseException(fmt::format("build-id is invalid\n{}", m_DownloadOptions.help())); + } - if (m_Path.empty()) - { - throw zen::OptionParseException(fmt::format("local-path is required\n{}", m_DownloadOptions.help())); - } + if (!m_BuildPartName.empty() && !m_BuildPartId.empty()) + { + throw zen::OptionParseException(fmt::format("build-part-id conflicts with build-part-name\n{}", m_DownloadOptions.help())); + } + + std::vector BuildPartIds; + for (const std::string& BuildPartId : m_BuildPartIds) + { + BuildPartIds.push_back(Oid::TryFromHexString(BuildPartId)); + if (BuildPartIds.back() == Oid::Zero) + { + throw zen::OptionParseException( + fmt::format("build-part-id '{}' is invalid\n{}", BuildPartId, m_DownloadOptions.help())); + } + } - m_BuildId = Oid::NewOid().ToString(); - m_BuildPartName = m_Path.filename().string(); - m_BuildPartId = Oid::NewOid().ToString(); - m_CreateBuild = true; + BuildStorage::Statistics StorageStats; + std::unique_ptr Storage; + std::string StorageName; + if (!m_BuildsUrl.empty()) + { + ZEN_CONSOLE("Downloading '{}' to '{}' from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}', BuildId '{}'", + BuildId, + m_Path, + m_BuildsUrl, + Http.GetSessionId(), + m_Namespace, + m_Bucket, + BuildId); + CreateDirectories(m_Path / ZenTempStorageFolderName); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); + StorageName = "Cloud DDC"; + } + else if (!m_StoragePath.empty()) + { + ZEN_CONSOLE("Downloading '{}' to '{}' from folder {}. BuildId '{}'", BuildId, m_Path, m_StoragePath, BuildId); + Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false); // , .0015, 0.00004 + StorageName = fmt::format("Disk {}", m_StoragePath.stem()); + } + else + { + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); + } - BuildStorage::Statistics StorageStats; - const Oid BuildId = Oid::FromHexString(m_BuildId); - const Oid BuildPartId = Oid::FromHexString(m_BuildPartId); - std::unique_ptr Storage; - std::string StorageName; + DownloadFolder(*Storage, BuildId, BuildPartIds, m_BuildPartNames, m_Path, m_AllowMultiparts, m_Clean); - if (m_BuildsUrl.empty() && m_StoragePath.empty()) - { - m_StoragePath = GetRunningExecutablePath().parent_path() / ".tmpstore"; - CreateDirectories(m_StoragePath); - CleanDirectory(m_StoragePath); - } - auto _ = MakeGuard([&]() { - if (m_BuildsUrl.empty() && m_StoragePath.empty()) + if (false) { - DeleteDirectories(m_StoragePath); + ZEN_CONSOLE( + "{}:\n" + "Read: {}\n" + "Write: {}\n" + "Requests: {}\n" + "Avg Request Time: {}\n" + "Avg I/O Time: {}", + StorageName, + NiceBytes(StorageStats.TotalBytesRead.load()), + NiceBytes(StorageStats.TotalBytesWritten.load()), + StorageStats.TotalRequestCount.load(), + StorageStats.TotalExecutionTimeUs.load() > 0 + ? NiceTimeSpanMs(StorageStats.TotalExecutionTimeUs.load() / 1000 / StorageStats.TotalRequestCount.load()) + : 0, + StorageStats.TotalRequestCount.load() > 0 + ? NiceTimeSpanMs(StorageStats.TotalRequestTimeUs.load() / 1000 / StorageStats.TotalRequestCount.load()) + : 0); } - }); - if (!m_BuildsUrl.empty()) - { - ZEN_CONSOLE("Using '{}' to '{}' from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}', BuildId '{}'", - m_BuildPartName.empty() ? m_BuildPartId : m_BuildPartName, - m_Path, - m_BuildsUrl, - Http.GetSessionId(), - m_Namespace, - m_Bucket, - BuildId); - CreateDirectories(m_Path / ZenTempStorageFolderName); - Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); - StorageName = "Cloud DDC"; - } - else if (!m_StoragePath.empty()) - { - ZEN_CONSOLE("Using '{}' to '{}' from folder {}. BuildId '{}'", - m_BuildPartName.empty() ? m_BuildPartId : m_BuildPartName, - m_Path, - m_StoragePath, - BuildId); - Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false); // , .0015, 0.00004 - StorageName = fmt::format("Disk {}", m_StoragePath.stem()); + return AbortFlag ? 11 : 0; } - else + if (SubOption == &m_DiffOptions) { - throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); - } - - auto MakeMetaData = [](const Oid& BuildId) -> CbObject { - CbObjectWriter BuildMetaDataWriter; + if (m_Path.empty()) { - const uint32_t CL = BuildId.OidBits[2]; - BuildMetaDataWriter.AddString("name", fmt::format("++Test+Main-CL-{}", CL)); - BuildMetaDataWriter.AddString("branch", "ZenTestBuild"); - BuildMetaDataWriter.AddString("baselineBranch", "ZenTestBuild"); - BuildMetaDataWriter.AddString("platform", "Windows"); - BuildMetaDataWriter.AddString("project", "Test"); - BuildMetaDataWriter.AddInteger("changelist", CL); - BuildMetaDataWriter.AddString("buildType", "test-folder"); + throw zen::OptionParseException(fmt::format("local-path is required\n{}", m_DownloadOptions.help())); } - return BuildMetaDataWriter.Save(); - }; - CbObject MetaData = MakeMetaData(Oid::TryFromHexString(m_BuildId)); - { - ExtendableStringBuilder<256> SB; - CompactBinaryToJson(MetaData, SB); - ZEN_CONSOLE("Upload Build {}, Part {} ({})\n{}", m_BuildId, BuildPartId, m_BuildPartName, SB.ToView()); + if (m_DiffPath.empty()) + { + throw zen::OptionParseException(fmt::format("compare-path is required\n{}", m_DownloadOptions.help())); + } + DiffFolders(m_Path, m_DiffPath, m_OnlyChunked); + return AbortFlag ? 11 : 0; } - bool Aborted = UploadFolder(*Storage, - BuildId, - BuildPartId, - m_BuildPartName, - m_Path, - {}, - m_BlockReuseMinPercentLimit, - m_AllowMultiparts, - MetaData, - m_CreateBuild, - m_Clean); - if (Aborted) + if (SubOption == &m_TestOptions) { - ZEN_CONSOLE("Upload failed."); - return 11; - } + ParseStorageOptions(); + ParseAuthOptions(); - const std::filesystem::path DownloadPath = m_Path.parent_path() / (m_BuildPartName + "_download"); - ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}'", BuildId, BuildPartId, m_BuildPartName, DownloadPath); - Aborted = DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, true); - if (Aborted) - { - ZEN_CONSOLE("Download failed."); - return 11; - } + HttpClient Http(m_BuildsUrl, ClientSettings); - ZEN_CONSOLE("\nRe-download Build {}, Part {} ({}) to '{}' (identical target)", BuildId, BuildPartId, m_BuildPartName, DownloadPath); - Aborted = DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, false); - if (Aborted) - { - ZEN_CONSOLE("Re-download failed. (identical target)"); - return 11; - } + if (m_Path.empty()) + { + throw zen::OptionParseException(fmt::format("local-path is required\n{}", m_DownloadOptions.help())); + } - auto ScrambleDir = [](const std::filesystem::path& Path) { - ZEN_CONSOLE("\nScrambling '{}'", Path); - Stopwatch Timer; - DirectoryContent DownloadContent; - GetDirectoryContent( - Path, - DirectoryContentFlags::Recursive | DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes, - DownloadContent); - auto IsAcceptedFolder = [ExcludeFolders = DefaultExcludeFolders, Path](const std::filesystem::path& AbsolutePath) -> bool { - std::string RelativePath = std::filesystem::relative(AbsolutePath, Path).generic_string(); - for (const std::string_view& ExcludeFolder : ExcludeFolders) + m_BuildId = Oid::NewOid().ToString(); + m_BuildPartName = m_Path.filename().string(); + m_BuildPartId = Oid::NewOid().ToString(); + m_CreateBuild = true; + + BuildStorage::Statistics StorageStats; + const Oid BuildId = Oid::FromHexString(m_BuildId); + const Oid BuildPartId = Oid::FromHexString(m_BuildPartId); + std::unique_ptr Storage; + std::string StorageName; + + if (m_BuildsUrl.empty() && m_StoragePath.empty()) + { + m_StoragePath = GetRunningExecutablePath().parent_path() / ".tmpstore"; + CreateDirectories(m_StoragePath); + CleanDirectory(m_StoragePath); + } + auto _ = MakeGuard([&]() { + if (m_BuildsUrl.empty() && m_StoragePath.empty()) { - if (RelativePath.starts_with(ExcludeFolder)) + DeleteDirectories(m_StoragePath); + } + }); + + if (!m_BuildsUrl.empty()) + { + ZEN_CONSOLE("Using '{}' to '{}' from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}', BuildId '{}'", + m_BuildPartName.empty() ? m_BuildPartId : m_BuildPartName, + m_Path, + m_BuildsUrl, + Http.GetSessionId(), + m_Namespace, + m_Bucket, + BuildId); + CreateDirectories(m_Path / ZenTempStorageFolderName); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); + StorageName = "Cloud DDC"; + } + else if (!m_StoragePath.empty()) + { + ZEN_CONSOLE("Using '{}' to '{}' from folder {}. BuildId '{}'", + m_BuildPartName.empty() ? m_BuildPartId : m_BuildPartName, + m_Path, + m_StoragePath, + BuildId); + Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false); // , .0015, 0.00004 + StorageName = fmt::format("Disk {}", m_StoragePath.stem()); + } + else + { + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); + } + + auto MakeMetaData = [](const Oid& BuildId) -> CbObject { + CbObjectWriter BuildMetaDataWriter; + { + const uint32_t CL = BuildId.OidBits[2]; + BuildMetaDataWriter.AddString("name", fmt::format("++Test+Main-CL-{}", CL)); + BuildMetaDataWriter.AddString("branch", "ZenTestBuild"); + BuildMetaDataWriter.AddString("baselineBranch", "ZenTestBuild"); + BuildMetaDataWriter.AddString("platform", "Windows"); + BuildMetaDataWriter.AddString("project", "Test"); + BuildMetaDataWriter.AddInteger("changelist", CL); + BuildMetaDataWriter.AddString("buildType", "test-folder"); + } + return BuildMetaDataWriter.Save(); + }; + CbObject MetaData = MakeMetaData(Oid::TryFromHexString(m_BuildId)); + { + ExtendableStringBuilder<256> SB; + CompactBinaryToJson(MetaData, SB); + ZEN_CONSOLE("Upload Build {}, Part {} ({})\n{}", m_BuildId, BuildPartId, m_BuildPartName, SB.ToView()); + } + + UploadFolder(*Storage, + BuildId, + BuildPartId, + m_BuildPartName, + m_Path, + {}, + m_BlockReuseMinPercentLimit, + m_AllowMultiparts, + MetaData, + m_CreateBuild, + m_Clean); + if (AbortFlag) + { + ZEN_CONSOLE("Upload failed."); + return 11; + } + + const std::filesystem::path DownloadPath = m_Path.parent_path() / (m_BuildPartName + "_download"); + ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}'", BuildId, BuildPartId, m_BuildPartName, DownloadPath); + DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, true); + if (AbortFlag) + { + ZEN_CONSOLE("Download failed."); + return 11; + } + + ZEN_CONSOLE("\nRe-download Build {}, Part {} ({}) to '{}' (identical target)", + BuildId, + BuildPartId, + m_BuildPartName, + DownloadPath); + DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, false); + if (AbortFlag) + { + ZEN_CONSOLE("Re-download failed. (identical target)"); + return 11; + } + + auto ScrambleDir = [](const std::filesystem::path& Path) { + ZEN_CONSOLE("\nScrambling '{}'", Path); + Stopwatch Timer; + DirectoryContent DownloadContent; + GetDirectoryContent( + Path, + DirectoryContentFlags::Recursive | DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes, + DownloadContent); + auto IsAcceptedFolder = [ExcludeFolders = DefaultExcludeFolders, Path](const std::filesystem::path& AbsolutePath) -> bool { + std::string RelativePath = std::filesystem::relative(AbsolutePath, Path).generic_string(); + for (const std::string_view& ExcludeFolder : ExcludeFolders) { - if (RelativePath.length() == ExcludeFolder.length()) + if (RelativePath.starts_with(ExcludeFolder)) { - return false; - } - else if (RelativePath[ExcludeFolder.length()] == '/') - { - return false; + if (RelativePath.length() == ExcludeFolder.length()) + { + return false; + } + else if (RelativePath[ExcludeFolder.length()] == '/') + { + return false; + } } } - } - return true; - }; + return true; + }; - std::atomic AbortFlag = false; - ParallellWork Work(AbortFlag); + ParallellWork Work(AbortFlag); - uint32_t Randomizer = 0; - auto FileSizeIt = DownloadContent.FileSizes.begin(); - for (const std::filesystem::path& FilePath : DownloadContent.Files) - { - if (IsAcceptedFolder(FilePath)) + uint32_t Randomizer = 0; + auto FileSizeIt = DownloadContent.FileSizes.begin(); + for (const std::filesystem::path& FilePath : DownloadContent.Files) { - uint32_t Case = (Randomizer++) % 7; - switch (Case) + if (IsAcceptedFolder(FilePath)) { - case 0: - { - uint64_t SourceSize = *FileSizeIt; - if (SourceSize > 0) + uint32_t Case = (Randomizer++) % 7; + switch (Case) + { + case 0: { - Work.ScheduleWork( - GetMediumWorkerPool(EWorkloadType::Burst), - [SourceSize, FilePath](std::atomic& AbortFlag) { - if (!AbortFlag) - { - IoBuffer Scrambled(SourceSize); - { - IoBuffer Source = IoBufferBuilder::MakeFromFile(FilePath); - Scrambled.GetMutableView().CopyFrom( - Source.GetView().Mid(SourceSize / 3, SourceSize / 3)); - Scrambled.GetMutableView() - .Mid(SourceSize / 3) - .CopyFrom(Source.GetView().Mid(0, SourceSize / 3)); - Scrambled.GetMutableView() - .Mid((SourceSize / 3) * 2) - .CopyFrom(Source.GetView().Mid(SourceSize / 2, SourceSize / 3)); - } - bool IsReadOnly = SetFileReadOnly(FilePath, false); - WriteFile(FilePath, Scrambled); - if (IsReadOnly) + uint64_t SourceSize = *FileSizeIt; + if (SourceSize > 0) + { + Work.ScheduleWork( + GetMediumWorkerPool(EWorkloadType::Burst), + [SourceSize, FilePath](std::atomic&) { + if (!AbortFlag) { - SetFileReadOnly(FilePath, true); + IoBuffer Scrambled(SourceSize); + { + IoBuffer Source = IoBufferBuilder::MakeFromFile(FilePath); + Scrambled.GetMutableView().CopyFrom( + Source.GetView().Mid(SourceSize / 3, SourceSize / 3)); + Scrambled.GetMutableView() + .Mid(SourceSize / 3) + .CopyFrom(Source.GetView().Mid(0, SourceSize / 3)); + Scrambled.GetMutableView() + .Mid((SourceSize / 3) * 2) + .CopyFrom(Source.GetView().Mid(SourceSize / 2, SourceSize / 3)); + } + bool IsReadOnly = SetFileReadOnly(FilePath, false); + WriteFile(FilePath, Scrambled); + if (IsReadOnly) + { + SetFileReadOnly(FilePath, true); + } } - } - }, - [FilePath](const std::exception& Ex, std::atomic& AbortFlag) { - ZEN_CONSOLE("Failed scrambling file {}. Reason: {}", FilePath, Ex.what()); - AbortFlag = true; - }); + }, + Work.DefaultErrorFunction()); + } } - } - break; - case 1: - std::filesystem::remove(FilePath); - break; - default: - break; + break; + case 1: + std::filesystem::remove(FilePath); + break; + default: + break; + } } + FileSizeIt++; } - FileSizeIt++; - } - Work.Wait(5000, [&](bool IsAborted, std::ptrdiff_t PendingWork) { - ZEN_UNUSED(IsAborted); - ZEN_CONSOLE("Scrambling files, {} remaining", PendingWork); - }); - ZEN_ASSERT(!AbortFlag.load()); - ZEN_CONSOLE("Scrambled files in {}", NiceLatencyNs(Timer.GetElapsedTimeUs() * 1000)); - }; - - ScrambleDir(DownloadPath); - ZEN_CONSOLE("\nRe-download Build {}, Part {} ({}) to '{}' (scrambled target)", BuildId, BuildPartId, m_BuildPartName, DownloadPath); - Aborted = DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, false); - if (Aborted) - { - ZEN_CONSOLE("Re-download failed. (scrambled target)"); - return 11; - } - - ScrambleDir(DownloadPath); - - Oid BuildId2 = Oid::NewOid(); - Oid BuildPartId2 = Oid::NewOid(); + Work.Wait(5000, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted); + ZEN_CONSOLE("Scrambling files, {} remaining", PendingWork); + }); + ZEN_ASSERT(!AbortFlag.load()); + ZEN_CONSOLE("Scrambled files in {}", NiceLatencyNs(Timer.GetElapsedTimeUs() * 1000)); + }; - CbObject MetaData2 = MakeMetaData(BuildId2); - { - ExtendableStringBuilder<256> SB; - CompactBinaryToJson(MetaData, SB); - ZEN_CONSOLE("\nUpload scrambled Build {}, Part {} ({})\n{}\n", BuildId2, BuildPartId2, m_BuildPartName, SB.ToView()); - } + ScrambleDir(DownloadPath); + ZEN_CONSOLE("\nRe-download Build {}, Part {} ({}) to '{}' (scrambled target)", + BuildId, + BuildPartId, + m_BuildPartName, + DownloadPath); + DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, false); + if (AbortFlag) + { + ZEN_CONSOLE("Re-download failed. (scrambled target)"); + return 11; + } - Aborted = UploadFolder(*Storage, - BuildId2, - BuildPartId2, - m_BuildPartName, - DownloadPath, - {}, - m_BlockReuseMinPercentLimit, - m_AllowMultiparts, - MetaData2, - true, - false); - if (Aborted) - { - ZEN_CONSOLE("Upload of scrambled failed."); - return 11; - } + ScrambleDir(DownloadPath); - ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}' (original)", BuildId, BuildPartId, m_BuildPartName, DownloadPath); - Aborted = DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, false); - if (Aborted) - { - ZEN_CONSOLE("Re-download failed."); - return 11; - } + Oid BuildId2 = Oid::NewOid(); + Oid BuildPartId2 = Oid::NewOid(); - ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}' (scrambled)", BuildId2, BuildPartId2, m_BuildPartName, DownloadPath); - Aborted = DownloadFolder(*Storage, BuildId2, {BuildPartId2}, {}, DownloadPath, m_AllowMultiparts, false); - if (Aborted) - { - ZEN_CONSOLE("Re-download failed."); - return 11; - } + CbObject MetaData2 = MakeMetaData(BuildId2); + { + ExtendableStringBuilder<256> SB; + CompactBinaryToJson(MetaData, SB); + ZEN_CONSOLE("\nUpload scrambled Build {}, Part {} ({})\n{}\n", BuildId2, BuildPartId2, m_BuildPartName, SB.ToView()); + } - ZEN_CONSOLE("\nRe-download Build {}, Part {} ({}) to '{}' (scrambled)", BuildId2, BuildPartId2, m_BuildPartName, DownloadPath); - Aborted = DownloadFolder(*Storage, BuildId2, {BuildPartId2}, {}, DownloadPath, m_AllowMultiparts, false); - if (Aborted) - { - ZEN_CONSOLE("Re-download failed."); - return 11; - } + UploadFolder(*Storage, + BuildId2, + BuildPartId2, + m_BuildPartName, + DownloadPath, + {}, + m_BlockReuseMinPercentLimit, + m_AllowMultiparts, + MetaData2, + true, + false); + if (AbortFlag) + { + ZEN_CONSOLE("Upload of scrambled failed."); + return 11; + } - return 0; - } + ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}' (original)", BuildId, BuildPartId, m_BuildPartName, DownloadPath); + DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, false); + if (AbortFlag) + { + ZEN_CONSOLE("Re-download failed."); + return 11; + } - if (SubOption == &m_FetchBlobOptions) - { - ParseStorageOptions(); - ParseAuthOptions(); + ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}' (scrambled)", BuildId2, BuildPartId2, m_BuildPartName, DownloadPath); + DownloadFolder(*Storage, BuildId2, {BuildPartId2}, {}, DownloadPath, m_AllowMultiparts, false); + if (AbortFlag) + { + ZEN_CONSOLE("Re-download failed."); + return 11; + } - HttpClient Http(m_BuildsUrl, ClientSettings); + ZEN_CONSOLE("\nRe-download Build {}, Part {} ({}) to '{}' (scrambled)", BuildId2, BuildPartId2, m_BuildPartName, DownloadPath); + DownloadFolder(*Storage, BuildId2, {BuildPartId2}, {}, DownloadPath, m_AllowMultiparts, false); + if (AbortFlag) + { + ZEN_CONSOLE("Re-download failed."); + return 11; + } - if (m_BlobHash.empty()) - { - throw zen::OptionParseException(fmt::format("Blob hash string is missing\n{}", m_UploadOptions.help())); + return 0; } - IoHash BlobHash; - if (!IoHash::TryParse(m_BlobHash, BlobHash)) + if (SubOption == &m_FetchBlobOptions) { - throw zen::OptionParseException(fmt::format("Blob hash string is invalid\n{}", m_UploadOptions.help())); - } + ParseStorageOptions(); + ParseAuthOptions(); - if (m_BuildsUrl.empty() && m_StoragePath.empty()) - { - throw zen::OptionParseException(fmt::format("At least one storage option is required\n{}", m_UploadOptions.help())); - } + HttpClient Http(m_BuildsUrl, ClientSettings); - BuildStorage::Statistics StorageStats; - const Oid BuildId = Oid::FromHexString(m_BuildId); - std::unique_ptr Storage; - std::string StorageName; + if (m_BlobHash.empty()) + { + throw zen::OptionParseException(fmt::format("Blob hash string is missing\n{}", m_UploadOptions.help())); + } - if (!m_BuildsUrl.empty()) - { - ZEN_CONSOLE("Using from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}', BuildId '{}'", - m_BuildsUrl, - Http.GetSessionId(), - m_Namespace, - m_Bucket, - BuildId); - CreateDirectories(m_Path / ZenTempStorageFolderName); - Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); - StorageName = "Cloud DDC"; - } - else if (!m_StoragePath.empty()) - { - ZEN_CONSOLE("Using folder {}. BuildId '{}'", m_StoragePath, BuildId); - Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false); // , .0015, 0.00004 - StorageName = fmt::format("Disk {}", m_StoragePath.stem()); - } - else - { - throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); - } + IoHash BlobHash; + if (!IoHash::TryParse(m_BlobHash, BlobHash)) + { + throw zen::OptionParseException(fmt::format("Blob hash string is invalid\n{}", m_UploadOptions.help())); + } - uint64_t CompressedSize; - uint64_t DecompressedSize; - ValidateBlob(*Storage, BuildId, BlobHash, CompressedSize, DecompressedSize); - ZEN_CONSOLE("Blob '{}' has a compressed size {} and a decompressed size of {} bytes", BlobHash, CompressedSize, DecompressedSize); - return 0; - } + if (m_BuildsUrl.empty() && m_StoragePath.empty()) + { + throw zen::OptionParseException(fmt::format("At least one storage option is required\n{}", m_UploadOptions.help())); + } - if (SubOption == &m_ValidateBuildPartOptions) - { - ParseStorageOptions(); - ParseAuthOptions(); + BuildStorage::Statistics StorageStats; + const Oid BuildId = Oid::FromHexString(m_BuildId); + std::unique_ptr Storage; + std::string StorageName; - HttpClient Http(m_BuildsUrl, ClientSettings); + if (!m_BuildsUrl.empty()) + { + ZEN_CONSOLE("Using from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}', BuildId '{}'", + m_BuildsUrl, + Http.GetSessionId(), + m_Namespace, + m_Bucket, + BuildId); + CreateDirectories(m_Path / ZenTempStorageFolderName); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); + StorageName = "Cloud DDC"; + } + else if (!m_StoragePath.empty()) + { + ZEN_CONSOLE("Using folder {}. BuildId '{}'", m_StoragePath, BuildId); + Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false); // , .0015, 0.00004 + StorageName = fmt::format("Disk {}", m_StoragePath.stem()); + } + else + { + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); + } - if (m_BuildsUrl.empty() && m_StoragePath.empty()) - { - throw zen::OptionParseException(fmt::format("At least one storage option is required\n{}", m_UploadOptions.help())); + uint64_t CompressedSize; + uint64_t DecompressedSize; + ValidateBlob(*Storage, BuildId, BlobHash, CompressedSize, DecompressedSize); + if (AbortFlag) + { + return 11; + } + ZEN_CONSOLE("Blob '{}' has a compressed size {} and a decompressed size of {} bytes", + BlobHash, + CompressedSize, + DecompressedSize); + return 0; } - if (m_BuildId.empty()) - { - throw zen::OptionParseException(fmt::format("build-id is required\n{}", m_DownloadOptions.help())); - } - Oid BuildId = Oid::TryFromHexString(m_BuildId); - if (BuildId == Oid::Zero) + if (SubOption == &m_ValidateBuildPartOptions) { - throw zen::OptionParseException(fmt::format("build-id is invalid\n{}", m_DownloadOptions.help())); - } + ParseStorageOptions(); + ParseAuthOptions(); - if (!m_BuildPartName.empty() && !m_BuildPartId.empty()) - { - throw zen::OptionParseException(fmt::format("build-part-id conflicts with build-part-name\n{}", m_DownloadOptions.help())); - } + HttpClient Http(m_BuildsUrl, ClientSettings); - BuildStorage::Statistics StorageStats; - std::unique_ptr Storage; - std::string StorageName; + if (m_BuildsUrl.empty() && m_StoragePath.empty()) + { + throw zen::OptionParseException(fmt::format("At least one storage option is required\n{}", m_UploadOptions.help())); + } - if (!m_BuildsUrl.empty()) - { - ZEN_CONSOLE("Using from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}', BuildId '{}'", - m_BuildsUrl, - Http.GetSessionId(), - m_Namespace, - m_Bucket, - BuildId); - CreateDirectories(m_Path / ZenTempStorageFolderName); - Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); - StorageName = "Cloud DDC"; - } - else if (!m_StoragePath.empty()) - { - ZEN_CONSOLE("Using folder {}. BuildId '{}'", m_StoragePath, BuildId); - Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false); // , .0015, 0.00004 - StorageName = fmt::format("Disk {}", m_StoragePath.stem()); - } - else - { - throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); - } - Oid BuildPartId = Oid::TryFromHexString(m_BuildPartId); - CbObject Build = Storage->GetBuild(BuildId); - if (!m_BuildPartName.empty()) - { - BuildPartId = Build["parts"sv].AsObjectView()[m_BuildPartName].AsObjectId(); - if (BuildPartId == Oid::Zero) + if (m_BuildId.empty()) { - throw std::runtime_error(fmt::format("Build {} does not have a part named '{}'", m_BuildId, m_BuildPartName)); + throw zen::OptionParseException(fmt::format("build-id is required\n{}", m_DownloadOptions.help())); + } + Oid BuildId = Oid::TryFromHexString(m_BuildId); + if (BuildId == Oid::Zero) + { + throw zen::OptionParseException(fmt::format("build-id is invalid\n{}", m_DownloadOptions.help())); } - } - CbObject BuildPart = Storage->GetBuildPart(BuildId, BuildPartId); - ZEN_CONSOLE("Validating build part {}/{} ({})", BuildId, BuildPartId, NiceBytes(BuildPart.GetSize())); - std::vector ChunkAttachments; - for (CbFieldView LooseFileView : BuildPart["chunkAttachments"sv].AsObjectView()["rawHashes"sv]) - { - ChunkAttachments.push_back(LooseFileView.AsBinaryAttachment()); - } - std::vector BlockAttachments; - for (CbFieldView BlocksView : BuildPart["blockAttachments"sv].AsObjectView()["rawHashes"sv]) - { - BlockAttachments.push_back(BlocksView.AsBinaryAttachment()); - } - for (const IoHash& ChunkAttachment : ChunkAttachments) - { - uint64_t CompressedSize; - uint64_t DecompressedSize; - try + if (!m_BuildPartName.empty() && !m_BuildPartId.empty()) { - ValidateBlob(*Storage, BuildId, ChunkAttachment, CompressedSize, DecompressedSize); - ZEN_CONSOLE("Chunk attachment {} ({} -> {}) is valid", - ChunkAttachment, - NiceBytes(CompressedSize), - NiceBytes(DecompressedSize)); + throw zen::OptionParseException(fmt::format("build-part-id conflicts with build-part-name\n{}", m_DownloadOptions.help())); } - catch (const std::exception& Ex) + + BuildStorage::Statistics StorageStats; + std::unique_ptr Storage; + std::string StorageName; + + if (!m_BuildsUrl.empty()) { - ZEN_CONSOLE("Failed validating chunk attachment {}: {}", ChunkAttachment, Ex.what()); + ZEN_CONSOLE("Using from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}', BuildId '{}'", + m_BuildsUrl, + Http.GetSessionId(), + m_Namespace, + m_Bucket, + BuildId); + CreateDirectories(m_Path / ZenTempStorageFolderName); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); + StorageName = "Cloud DDC"; + } + else if (!m_StoragePath.empty()) + { + ZEN_CONSOLE("Using folder {}. BuildId '{}'", m_StoragePath, BuildId); + Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false); // , .0015, 0.00004 + StorageName = fmt::format("Disk {}", m_StoragePath.stem()); + } + else + { + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); + } + Oid BuildPartId = Oid::TryFromHexString(m_BuildPartId); + CbObject Build = Storage->GetBuild(BuildId); + if (!m_BuildPartName.empty()) + { + BuildPartId = Build["parts"sv].AsObjectView()[m_BuildPartName].AsObjectId(); + if (BuildPartId == Oid::Zero) + { + throw std::runtime_error(fmt::format("Build {} does not have a part named '{}'", m_BuildId, m_BuildPartName)); + } + } + CbObject BuildPart = Storage->GetBuildPart(BuildId, BuildPartId); + ZEN_CONSOLE("Validating build part {}/{} ({})", BuildId, BuildPartId, NiceBytes(BuildPart.GetSize())); + std::vector ChunkAttachments; + for (CbFieldView LooseFileView : BuildPart["chunkAttachments"sv].AsObjectView()["rawHashes"sv]) + { + ChunkAttachments.push_back(LooseFileView.AsBinaryAttachment()); + } + std::vector BlockAttachments; + for (CbFieldView BlocksView : BuildPart["blockAttachments"sv].AsObjectView()["rawHashes"sv]) + { + BlockAttachments.push_back(BlocksView.AsBinaryAttachment()); } - } - for (const IoHash& BlockAttachment : BlockAttachments) - { - uint64_t CompressedSize; - uint64_t DecompressedSize; - try + for (const IoHash& ChunkAttachment : ChunkAttachments) { - ValidateChunkBlock(*Storage, BuildId, BlockAttachment, CompressedSize, DecompressedSize); - ZEN_CONSOLE("Block attachment {} ({} -> {}) is valid", - BlockAttachment, - NiceBytes(CompressedSize), - NiceBytes(DecompressedSize)); + uint64_t CompressedSize; + uint64_t DecompressedSize; + try + { + ValidateBlob(*Storage, BuildId, ChunkAttachment, CompressedSize, DecompressedSize); + ZEN_CONSOLE("Chunk attachment {} ({} -> {}) is valid", + ChunkAttachment, + NiceBytes(CompressedSize), + NiceBytes(DecompressedSize)); + } + catch (const std::exception& Ex) + { + ZEN_CONSOLE("Failed validating chunk attachment {}: {}", ChunkAttachment, Ex.what()); + } } - catch (const std::exception& Ex) + + for (const IoHash& BlockAttachment : BlockAttachments) { - ZEN_CONSOLE("Failed validating block attachment {}: {}", BlockAttachment, Ex.what()); + uint64_t CompressedSize; + uint64_t DecompressedSize; + try + { + ValidateChunkBlock(*Storage, BuildId, BlockAttachment, CompressedSize, DecompressedSize); + ZEN_CONSOLE("Block attachment {} ({} -> {}) is valid", + BlockAttachment, + NiceBytes(CompressedSize), + NiceBytes(DecompressedSize)); + } + catch (const std::exception& Ex) + { + ZEN_CONSOLE("Failed validating block attachment {}: {}", BlockAttachment, Ex.what()); + } } - } - return 0; + return AbortFlag ? 13 : 0; + } + } + catch (const std::exception& Ex) + { + ZEN_ERROR("{}", Ex.what()); + return 3; } - ZEN_ASSERT(false); } diff --git a/src/zen/zen.cpp b/src/zen/zen.cpp index 2e230ed53..bee1fd676 100644 --- a/src/zen/zen.cpp +++ b/src/zen/zen.cpp @@ -264,7 +264,7 @@ ProgressBar::~ProgressBar() { try { - Finish(); + ForceLinebreak(); } catch (const std::exception& Ex) { diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp index a41b71972..6dc2a20d8 100644 --- a/src/zenutil/chunkedcontent.cpp +++ b/src/zenutil/chunkedcontent.cpp @@ -92,7 +92,8 @@ namespace { tsl::robin_map& RawHashToSequenceRawHashIndex, RwLock& Lock, const std::filesystem::path& FolderPath, - uint32_t PathIndex) + uint32_t PathIndex, + std::atomic& AbortFlag) { const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex]; const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex]; @@ -105,7 +106,7 @@ namespace { { ChunkedInfoWithSource Chunked; const bool DidChunking = - InChunkingController.ProcessFile((FolderPath / Path).make_preferred(), RawSize, Chunked, Stats.BytesHashed); + InChunkingController.ProcessFile((FolderPath / Path).make_preferred(), RawSize, Chunked, Stats.BytesHashed, AbortFlag); if (DidChunking) { Lock.WithExclusiveLock([&]() { @@ -753,15 +754,13 @@ ChunkFolderContent(ChunkingStatistics& Stats, RawHashToSequenceRawHashIndex, Lock, RootPath, - PathIndex); + PathIndex, + AbortFlag); Lock.WithExclusiveLock([&]() { Result.RawHashes[PathIndex] = RawHash; }); Stats.FilesProcessed++; } }, - [&, PathIndex](const std::exception& Ex, std::atomic& AbortFlag) { - ZEN_CONSOLE("Failed scanning file {}. Reason: {}", Result.Paths[PathIndex], Ex.what()); - AbortFlag = true; - }); + Work.DefaultErrorFunction()); } Work.Wait(UpdateInteralMS, [&](bool IsAborted, std::ptrdiff_t PendingWork) { diff --git a/src/zenutil/chunkedfile.cpp b/src/zenutil/chunkedfile.cpp index 3f3a6661c..4f9344039 100644 --- a/src/zenutil/chunkedfile.cpp +++ b/src/zenutil/chunkedfile.cpp @@ -112,7 +112,12 @@ Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, st } ChunkedInfoWithSource -ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Params, std::atomic* BytesProcessed) +ChunkData(BasicFile& RawData, + uint64_t Offset, + uint64_t Size, + ChunkedParams Params, + std::atomic* BytesProcessed, + std::atomic* AbortFlag) { ChunkedInfoWithSource Result; tsl::robin_map FoundChunks; @@ -129,6 +134,10 @@ ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Para IoHashStream RawHashStream; while (Offset < End) { + if (AbortFlag != nullptr && AbortFlag->load()) + { + return {}; + } size_t ScanLength = Chunker.ScanChunk(SliceView.GetData(), SliceSize); if (ScanLength == ZenChunkHelper::kNoBoundaryFound) { diff --git a/src/zenutil/chunkingcontroller.cpp b/src/zenutil/chunkingcontroller.cpp index bc0e57b14..017d12433 100644 --- a/src/zenutil/chunkingcontroller.cpp +++ b/src/zenutil/chunkingcontroller.cpp @@ -58,7 +58,8 @@ public: virtual bool ProcessFile(const std::filesystem::path& InputPath, uint64_t RawSize, ChunkedInfoWithSource& OutChunked, - std::atomic& BytesProcessed) const override + std::atomic& BytesProcessed, + std::atomic& AbortFlag) const override { const bool ExcludeFromChunking = std::find(m_ChunkExcludeExtensions.begin(), m_ChunkExcludeExtensions.end(), InputPath.extension()) != @@ -70,7 +71,7 @@ public: } BasicFile Buffer(InputPath, BasicFile::Mode::kRead); - OutChunked = ChunkData(Buffer, 0, RawSize, m_ChunkingParams, &BytesProcessed); + OutChunked = ChunkData(Buffer, 0, RawSize, m_ChunkingParams, &BytesProcessed, &AbortFlag); return true; } @@ -132,7 +133,8 @@ public: virtual bool ProcessFile(const std::filesystem::path& InputPath, uint64_t RawSize, ChunkedInfoWithSource& OutChunked, - std::atomic& BytesProcessed) const override + std::atomic& BytesProcessed, + std::atomic& AbortFlag) const override { if (RawSize < m_ChunkFileSizeLimit) { @@ -150,6 +152,10 @@ public: ChunkHashToChunkIndex.reserve(1 + (RawSize / m_FixedChunkingChunkSize)); while (Offset < RawSize) { + if (AbortFlag) + { + return false; + } uint64_t ChunkSize = std::min(RawSize - Offset, m_FixedChunkingChunkSize); IoBuffer Chunk(Source, Offset, ChunkSize); MemoryView ChunkData = Chunk.GetView(); diff --git a/src/zenutil/include/zenutil/chunkedfile.h b/src/zenutil/include/zenutil/chunkedfile.h index 7110ad317..4cec80fdb 100644 --- a/src/zenutil/include/zenutil/chunkedfile.h +++ b/src/zenutil/include/zenutil/chunkedfile.h @@ -47,7 +47,8 @@ ChunkedInfoWithSource ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Params = {}, - std::atomic* BytesProcessed = nullptr); + std::atomic* BytesProcessed = nullptr, + std::atomic* AbortFlag = nullptr); void Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, std::function GetChunk); diff --git a/src/zenutil/include/zenutil/chunkingcontroller.h b/src/zenutil/include/zenutil/chunkingcontroller.h index fe4fc1bb5..ebc80e207 100644 --- a/src/zenutil/include/zenutil/chunkingcontroller.h +++ b/src/zenutil/include/zenutil/chunkingcontroller.h @@ -32,9 +32,10 @@ public: virtual bool ProcessFile(const std::filesystem::path& InputPath, uint64_t RawSize, ChunkedInfoWithSource& OutChunked, - std::atomic& BytesProcessed) const = 0; - virtual std::string_view GetName() const = 0; - virtual CbObject GetParameters() const = 0; + std::atomic& BytesProcessed, + std::atomic& AbortFlag) const = 0; + virtual std::string_view GetName() const = 0; + virtual CbObject GetParameters() const = 0; }; std::unique_ptr CreateBasicChunkingController( diff --git a/src/zenutil/include/zenutil/parallellwork.h b/src/zenutil/include/zenutil/parallellwork.h index 7a8218c51..79798fc8d 100644 --- a/src/zenutil/include/zenutil/parallellwork.h +++ b/src/zenutil/include/zenutil/parallellwork.h @@ -2,6 +2,8 @@ #pragma once +#include +#include #include #include @@ -20,6 +22,14 @@ public: ZEN_ASSERT(m_PendingWork.Remaining() == 0); } + std::function& AbortFlag)> DefaultErrorFunction() + { + return [&](const std::exception& Ex, std::atomic& AbortFlag) { + m_ErrorLock.WithExclusiveLock([&]() { m_Errors.push_back(Ex.what()); }); + AbortFlag = true; + }; + } + void ScheduleWork(WorkerThreadPool& WorkerPool, std::function& AbortFlag)>&& Work, std::function& AbortFlag)>&& OnError) @@ -32,6 +42,27 @@ public: { Work(m_AbortFlag); } + catch (const AssertException& AssertEx) + { + OnError( + std::runtime_error(fmt::format("Caught assert exception while handling request: {}", AssertEx.FullDescription())), + m_AbortFlag); + } + catch (const std::system_error& SystemError) + { + if (IsOOM(SystemError.code())) + { + OnError(std::runtime_error(fmt::format("Out of memory. Reason: {}", SystemError.what())), m_AbortFlag); + } + else if (IsOOD(SystemError.code())) + { + OnError(std::runtime_error(fmt::format("Out of disk. Reason: {}", SystemError.what())), m_AbortFlag); + } + else + { + OnError(std::runtime_error(fmt::format("System error. Reason: {}", SystemError.what())), m_AbortFlag); + } + } catch (const std::exception& Ex) { OnError(Ex, m_AbortFlag); @@ -58,12 +89,29 @@ public: { UpdateCallback(m_AbortFlag.load(), m_PendingWork.Remaining()); } + if (m_Errors.size() == 1) + { + throw std::runtime_error(m_Errors.front()); + } + else if (m_Errors.size() > 1) + { + ExtendableStringBuilder<128> SB; + SB.Append("Multiple errors:"); + for (const std::string& Error : m_Errors) + { + SB.Append(fmt::format("\n {}", Error)); + } + throw std::runtime_error(SB.ToString()); + } } Latch& PendingWork() { return m_PendingWork; } private: std::atomic& m_AbortFlag; Latch m_PendingWork; + + RwLock m_ErrorLock; + std::vector m_Errors; }; } // namespace zen -- cgit v1.2.3 From 19b3c492dcc0fc3f8879ecb60124ca64dea9b7ef Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Sat, 1 Mar 2025 10:10:53 +0100 Subject: builds download incremental (#290) * incremental download * merge rebuild state and output state building * fix writing when > 1 zero size file --- src/zen/cmds/builds_cmd.cpp | 1235 +++++++++++++++--------------- src/zencore/filesystem.cpp | 7 +- src/zencore/include/zencore/filesystem.h | 10 +- src/zenutil/filebuildstorage.cpp | 12 +- 4 files changed, 635 insertions(+), 629 deletions(-) (limited to 'src') diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index 18cc7cf9e..28c794559 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -82,7 +82,7 @@ namespace { const std::string ZenStateFilePath = fmt::format("{}/current_state.cbo", ZenFolderName); const std::string ZenStateFileJsonPath = fmt::format("{}/current_state.json", ZenFolderName); const std::string ZenTempFolderName = fmt::format("{}/tmp", ZenFolderName); - const std::string ZenTempReuseFolderName = fmt::format("{}/reuse", ZenTempFolderName); + const std::string ZenTempCacheFolderName = fmt::format("{}/cache", ZenTempFolderName); const std::string ZenTempStorageFolderName = fmt::format("{}/storage", ZenTempFolderName); const std::string ZenTempBlockFolderName = fmt::format("{}/blocks", ZenTempFolderName); const std::string ZenTempChunkFolderName = fmt::format("{}/chunks", ZenTempFolderName); @@ -115,6 +115,54 @@ namespace { ); + uint32_t SetNativeFileAttributes(const std::filesystem::path FilePath, SourcePlatform SourcePlatform, uint32_t Attributes) + { +#if ZEN_PLATFORM_WINDOWS + if (SourcePlatform == SourcePlatform::Windows) + { + SetFileAttributes(FilePath, Attributes); + return Attributes; + } + else + { + uint32_t CurrentAttributes = GetFileAttributes(FilePath); + uint32_t NewAttributes = MakeFileAttributeReadOnly(CurrentAttributes, IsFileModeReadOnly(Attributes)); + if (CurrentAttributes != NewAttributes) + { + SetFileAttributes(FilePath, NewAttributes); + } + return NewAttributes; + } +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + if (SourcePlatform != SourcePlatform::Windows) + { + SetFileMode(FilePath, Attributes); + return Attributes; + } + else + { + uint32_t CurrentMode = GetFileMode(FilePath); + uint32_t NewMode = MakeFileModeReadOnly(CurrentMode, IsFileAttributeReadOnly(Attributes)); + if (CurrentMode != NewMode) + { + SetFileMode(FilePath, NewMode); + } + return NewMode; + } +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + }; + + uint32_t GetNativeFileAttributes(const std::filesystem::path FilePath) + { +#if ZEN_PLATFORM_WINDOWS + return GetFileAttributes(FilePath); +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + return GetFileMode(FilePath); +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + } + template std::string FormatArray(std::span Items, std::string_view Prefix) { @@ -181,9 +229,8 @@ namespace { // If this is a file based buffer or a compressed buffer with a memory-based header, we don't need to rewrite to disk to save memory std::span Segments = Buffer.GetSegments(); ZEN_ASSERT(Buffer.GetSegments().size() > 0); - size_t SegmentIndexToCheck = Segments.size() > 1 ? 1 : 0; IoBufferFileReference FileRef; - if (Segments[SegmentIndexToCheck].GetFileReference(FileRef)) + if (Segments.back().GetFileReference(FileRef)) { return Buffer; } @@ -2114,7 +2161,7 @@ namespace { Stopwatch PutBuildTimer; CbObject PutBuildResult = Storage.PutBuild(BuildId, MetaData); ZEN_CONSOLE("PutBuild took {}. Payload size: {}", - NiceLatencyNs(PutBuildTimer.GetElapsedTimeUs() * 1000), + NiceTimeSpanMs(PutBuildTimer.GetElapsedTimeMs()), NiceBytes(MetaData.GetSize())); PreferredMultipartChunkSize = PutBuildResult["chunkSize"sv].AsUInt64(PreferredMultipartChunkSize); } @@ -2122,9 +2169,7 @@ namespace { { Stopwatch GetBuildTimer; CbObject Build = Storage.GetBuild(BuildId); - ZEN_CONSOLE("GetBuild took {}. Payload size: {}", - NiceLatencyNs(GetBuildTimer.GetElapsedTimeUs() * 1000), - NiceBytes(Build.GetSize())); + ZEN_CONSOLE("GetBuild took {}. Payload size: {}", NiceTimeSpanMs(GetBuildTimer.GetElapsedTimeMs()), NiceBytes(Build.GetSize())); if (auto ChunkSize = Build["chunkSize"sv].AsUInt64(); ChunkSize != 0) { PreferredMultipartChunkSize = ChunkSize; @@ -2439,7 +2484,7 @@ namespace { Stopwatch PutBuildPartResultTimer; std::pair> PutBuildPartResult = Storage.PutBuildPart(BuildId, BuildPartId, BuildPartName, PartManifest); ZEN_CONSOLE("PutBuildPart took {}, payload size {}. {} attachments are missing.", - NiceLatencyNs(PutBuildPartResultTimer.GetElapsedTimeUs() * 1000), + NiceTimeSpanMs(PutBuildPartResultTimer.GetElapsedTimeMs()), NiceBytes(PartManifest.GetSize()), PutBuildPartResult.second.size()); IoHash PartHash = PutBuildPartResult.first; @@ -2531,7 +2576,7 @@ namespace { Stopwatch FinalizeBuildPartTimer; std::vector Needs = Storage.FinalizeBuildPart(BuildId, BuildPartId, PartHash); ZEN_CONSOLE("FinalizeBuildPart took {}. {} attachments are missing.", - NiceLatencyNs(FinalizeBuildPartTimer.GetElapsedTimeUs() * 1000), + NiceTimeSpanMs(FinalizeBuildPartTimer.GetElapsedTimeMs()), Needs.size()); if (Needs.empty()) { @@ -2545,7 +2590,7 @@ namespace { { Stopwatch FinalizeBuildTimer; Storage.FinalizeBuild(BuildId); - ZEN_CONSOLE("FinalizeBuild took {}", NiceLatencyNs(FinalizeBuildTimer.GetElapsedTimeUs() * 1000)); + ZEN_CONSOLE("FinalizeBuild took {}", NiceTimeSpanMs(FinalizeBuildTimer.GetElapsedTimeMs())); } if (!NewBlocks.BlockDescriptions.empty()) @@ -2949,7 +2994,7 @@ namespace { const bool CacheWriter = TargetFinalSize > Buffer.GetSize(); if (CacheWriter) { - ZEN_ASSERT(std::find(SeenTargetIndexes.begin(), SeenTargetIndexes.end(), TargetIndex) == SeenTargetIndexes.end()); + ZEN_ASSERT_SLOW(std::find(SeenTargetIndexes.begin(), SeenTargetIndexes.end(), TargetIndex) == SeenTargetIndexes.end()); OutputFile = std::move(NewOutputFile); OpenFileWriter = std::make_unique(*OutputFile, Min(TargetFinalSize, 256u * 1024u)); @@ -2994,7 +3039,7 @@ namespace { return ChunkTargetPtrs; }; - bool WriteBlockToDisk(const std::filesystem::path& Path, + bool WriteBlockToDisk(const std::filesystem::path& CacheFolderPath, const ChunkedFolderContent& Content, const std::vector& RemotePathIndexWantsCopyFromCacheFlags, const CompositeBuffer& DecompressedBlockBuffer, @@ -3061,22 +3106,30 @@ namespace { return Lhs.Target->Offset < Rhs.Target->Offset; }); - WriteFileCache OpenFileCache; - for (const WriteOpData& WriteOp : WriteOps) { - const CompositeBuffer& Chunk = ChunkBuffers[WriteOp.ChunkBufferIndex]; - const uint32_t PathIndex = WriteOp.Target->PathIndex; - const uint64_t ChunkSize = Chunk.GetSize(); - const uint64_t FileOffset = WriteOp.Target->Offset; - ZEN_ASSERT(FileOffset + ChunkSize <= Content.RawSizes[PathIndex]); - - OpenFileCache.WriteToFile( - PathIndex, - [&Path, &Content](uint32_t TargetIndex) { return (Path / Content.Paths[TargetIndex]).make_preferred(); }, - Chunk, - FileOffset, - Content.RawSizes[PathIndex]); - OutBytesWritten += ChunkSize; + WriteFileCache OpenFileCache; + for (const WriteOpData& WriteOp : WriteOps) + { + if (AbortFlag) + { + break; + } + const CompositeBuffer& Chunk = ChunkBuffers[WriteOp.ChunkBufferIndex]; + const uint32_t PathIndex = WriteOp.Target->PathIndex; + const uint64_t ChunkSize = Chunk.GetSize(); + const uint64_t FileOffset = WriteOp.Target->Offset; + ZEN_ASSERT(FileOffset + ChunkSize <= Content.RawSizes[PathIndex]); + + OpenFileCache.WriteToFile( + PathIndex, + [&CacheFolderPath, &Content](uint32_t TargetIndex) { + return (CacheFolderPath / Content.RawHashes[TargetIndex].ToHexString()).make_preferred(); + }, + Chunk, + FileOffset, + Content.RawSizes[PathIndex]); + OutBytesWritten += ChunkSize; + } } OutChunksComplete += gsl::narrow(ChunkBuffers.size()); } @@ -3086,11 +3139,11 @@ namespace { return false; } - SharedBuffer Decompress(const IoBuffer& CompressedChunk, const IoHash& ChunkHash, const uint64_t ChunkRawSize) + SharedBuffer Decompress(const CompositeBuffer& CompressedChunk, const IoHash& ChunkHash, const uint64_t ChunkRawSize) { IoHash RawHash; uint64_t RawSize; - CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(CompressedChunk), RawHash, RawSize); + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(CompressedChunk, RawHash, RawSize); if (!Compressed) { throw std::runtime_error(fmt::format("Invalid build blob format for chunk {}", ChunkHash)); @@ -3118,7 +3171,7 @@ namespace { return Decompressed; } - void WriteChunkToDisk(const std::filesystem::path& Path, + void WriteChunkToDisk(const std::filesystem::path& CacheFolderPath, const ChunkedFolderContent& Content, std::span ChunkTargets, const CompositeBuffer& ChunkData, @@ -3132,7 +3185,10 @@ namespace { OpenFileCache.WriteToFile( Target.PathIndex, - [&Path, &Content](uint32_t TargetIndex) { return (Path / Content.Paths[TargetIndex]).make_preferred(); }, + [&CacheFolderPath, &Content](uint32_t TargetIndex) { + return (CacheFolderPath / Content.RawHashes[TargetIndex].ToHexString()).make_preferred(); + // return (Path / Content.Paths[TargetIndex]).make_preferred(); + }, ChunkData, FileOffset, Content.RawSizes[Target.PathIndex]); @@ -3141,7 +3197,8 @@ namespace { } void DownloadLargeBlob(BuildStorage& Storage, - const std::filesystem::path& Path, + const std::filesystem::path& TempFolderPath, + const std::filesystem::path& CacheFolderPath, const ChunkedFolderContent& RemoteContent, const ChunkedContentLookup& RemoteLookup, const Oid& BuildId, @@ -3166,7 +3223,7 @@ namespace { std::shared_ptr Workload(std::make_shared()); std::error_code Ec; - Workload->TempFile.CreateTemporary(Path / ZenTempChunkFolderName, Ec); + Workload->TempFile.CreateTemporary(TempFolderPath, Ec); if (Ec) { throw std::runtime_error( @@ -3176,7 +3233,7 @@ namespace { BuildId, ChunkHash, PreferredMultipartChunkSize, - [&Path, + [&CacheFolderPath, &RemoteContent, &RemoteLookup, &Work, @@ -3203,7 +3260,7 @@ namespace { Work.ScheduleWork( WritePool, - [&Path, + [&CacheFolderPath, &RemoteContent, &RemoteLookup, ChunkHash, @@ -3234,8 +3291,9 @@ namespace { uint32_t ChunkIndex = RemoteLookup.ChunkHashToChunkIndex.at(ChunkHash); - SharedBuffer Chunk = - Decompress(CompressedPart, ChunkHash, RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]); + SharedBuffer Chunk = Decompress(CompositeBuffer(std::move(CompressedPart)), + ChunkHash, + RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]); // ZEN_ASSERT_SLOW(ChunkHash == // IoHash::HashBuffer(Chunk.AsIoBuffer())); @@ -3244,7 +3302,7 @@ namespace { { WriteFileCache OpenFileCache; - WriteChunkToDisk(Path, + WriteChunkToDisk(CacheFolderPath, RemoteContent, ChunkTargetPtrs, CompositeBuffer(Chunk), @@ -3290,6 +3348,7 @@ namespace { bool WipeTargetFolder, FolderContent& OutLocalFolderState) { + ZEN_UNUSED(WipeTargetFolder); std::atomic DownloadedBlocks = 0; std::atomic BlockBytes = 0; std::atomic DownloadedChunks = 0; @@ -3307,16 +3366,17 @@ namespace { ZEN_CONSOLE("Indexed local and remote content in {}", NiceTimeSpanMs(IndexTimer.GetElapsedTimeMs())); - const std::filesystem::path CacheFolderPath = Path / ZenTempReuseFolderName; + const std::filesystem::path CacheFolderPath = Path / ZenTempCacheFolderName; - tsl::robin_map LocalRawHashToPathIndex; + tsl::robin_map RawHashToLocalPathIndex; - if (!WipeTargetFolder) { Stopwatch CacheTimer; for (uint32_t LocalPathIndex = 0; LocalPathIndex < LocalContent.Paths.size(); LocalPathIndex++) { + ZEN_ASSERT_SLOW(std::filesystem::exists(Path / LocalContent.Paths[LocalPathIndex])); + if (LocalContent.RawSizes[LocalPathIndex] > 0) { const uint32_t SequenceRawHashIndex = @@ -3325,99 +3385,33 @@ namespace { if (ChunkCount > 0) { const IoHash LocalRawHash = LocalContent.RawHashes[LocalPathIndex]; - if (!LocalRawHashToPathIndex.contains(LocalRawHash)) + if (!RawHashToLocalPathIndex.contains(LocalRawHash)) { - LocalRawHashToPathIndex.insert_or_assign(LocalRawHash, LocalPathIndex); + RawHashToLocalPathIndex.insert_or_assign(LocalRawHash, LocalPathIndex); } } } } - - { - std::vector IncludeLocalFiles(LocalContent.Paths.size(), false); - - for (const IoHash& ChunkHash : RemoteContent.ChunkedContent.ChunkHashes) - { - if (auto It = LocalLookup.ChunkHashToChunkIndex.find(ChunkHash); It != LocalLookup.ChunkHashToChunkIndex.end()) - { - const uint32_t LocalChunkIndex = It->second; - std::span LocalChunkTargetRange = - GetChunkLocations(LocalLookup, LocalChunkIndex); - if (!LocalChunkTargetRange.empty()) - { - std::uint32_t LocalPathIndex = LocalChunkTargetRange[0].PathIndex; - IncludeLocalFiles[LocalPathIndex] = true; - } - } - } - for (const IoHash& RawHash : RemoteContent.RawHashes) - { - if (auto It = LocalRawHashToPathIndex.find(RawHash); It != LocalRawHashToPathIndex.end()) - { - uint32_t LocalPathIndex = It->second; - IncludeLocalFiles[LocalPathIndex] = true; - } - } - - for (uint32_t LocalPathIndex = 0; LocalPathIndex < LocalContent.Paths.size(); LocalPathIndex++) - { - if (!IncludeLocalFiles[LocalPathIndex]) - { - LocalRawHashToPathIndex.erase(LocalContent.RawHashes[LocalPathIndex]); - } - } - } - - uint64_t CachedBytes = 0; - CreateDirectories(CacheFolderPath); - for (auto& CachedLocalFile : LocalRawHashToPathIndex) - { - const IoHash& LocalRawHash = CachedLocalFile.first; - const uint32_t LocalPathIndex = CachedLocalFile.second; - const std::filesystem::path LocalFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred(); - const std::filesystem::path CacheFilePath = (CacheFolderPath / LocalRawHash.ToHexString()).make_preferred(); - - SetFileReadOnly(LocalFilePath, false); - - std::filesystem::rename(LocalFilePath, CacheFilePath); - CachedBytes += std::filesystem::file_size(CacheFilePath); - } - - ZEN_CONSOLE("Cached {} ({}) local files in {}", - LocalRawHashToPathIndex.size(), - NiceBytes(CachedBytes), - NiceTimeSpanMs(CacheTimer.GetElapsedTimeMs())); - } - - if (AbortFlag) - { - return; } - - CleanDirectory(Path, DefaultExcludeFolders); - Stopwatch CacheMappingTimer; std::atomic BytesWritten = 0; uint64_t CacheMappedBytesForReuse = 0; - std::vector RemotePathIndexWantsCopyFromCacheFlags(RemoteContent.Paths.size(), false); - std::vector> RemoteChunkIndexWantsCopyFromCacheFlags(RemoteContent.ChunkedContent.ChunkHashes.size()); + std::vector RemotePathIndexWantsCopyFromCacheFlags(RemoteContent.Paths.size(), false); + std::vector RemoteChunkIndexWantsCopyFromCacheFlags(RemoteContent.ChunkedContent.ChunkHashes.size()); // Guard if he same chunks is in multiple blocks (can happen due to block reuse, cache reuse blocks writes directly) std::vector> RemoteChunkIndexNeedsCopyFromSourceFlags(RemoteContent.ChunkedContent.ChunkHashes.size()); struct CacheCopyData { - std::filesystem::path OriginalSourceFileName; - IoHash LocalFileRawHash; - uint64_t LocalFileRawSize = 0; - std::vector RemotePathIndexes; - std::vector ChunkSourcePtrs; + uint32_t LocalPathIndex; + std::vector TargetChunkLocationPtrs; struct ChunkTarget { - uint32_t ChunkSourceCount; + uint32_t TargetChunkLocationCount; uint64_t ChunkRawSize; - uint64_t LocalFileOffset; + uint64_t CacheFileOffset; }; std::vector ChunkTargets; }; @@ -3426,42 +3420,24 @@ namespace { std::vector CacheCopyDatas; uint32_t ChunkCountToWrite = 0; - // Pick up all whole files to copy and/or move + // Pick up all whole files we can use from current local state for (uint32_t RemotePathIndex = 0; RemotePathIndex < RemoteContent.Paths.size(); RemotePathIndex++) { const IoHash& RemoteRawHash = RemoteContent.RawHashes[RemotePathIndex]; - if (auto It = LocalRawHashToPathIndex.find(RemoteRawHash); It != LocalRawHashToPathIndex.end()) + if (auto It = RawHashToLocalPathIndex.find(RemoteRawHash); It != RawHashToLocalPathIndex.end()) { - if (auto CopySourceIt = RawHashToCacheCopyDataIndex.find(RemoteRawHash); CopySourceIt != RawHashToCacheCopyDataIndex.end()) - { - CacheCopyData& Data = CacheCopyDatas[CopySourceIt->second]; - Data.RemotePathIndexes.push_back(RemotePathIndex); - } - else - { - const uint32_t LocalPathIndex = It->second; - ZEN_ASSERT(LocalContent.RawSizes[LocalPathIndex] == RemoteContent.RawSizes[RemotePathIndex]); - ZEN_ASSERT(LocalContent.RawHashes[LocalPathIndex] == RemoteContent.RawHashes[RemotePathIndex]); - RawHashToCacheCopyDataIndex.insert_or_assign(RemoteRawHash, CacheCopyDatas.size()); - CacheCopyDatas.push_back(CacheCopyData{.OriginalSourceFileName = LocalContent.Paths[LocalPathIndex], - .LocalFileRawHash = RemoteRawHash, - .LocalFileRawSize = LocalContent.RawSizes[LocalPathIndex], - .RemotePathIndexes = {RemotePathIndex}}); - CacheMappedBytesForReuse += RemoteContent.RawSizes[RemotePathIndex]; - ChunkCountToWrite++; - } RemotePathIndexWantsCopyFromCacheFlags[RemotePathIndex] = true; + CacheMappedBytesForReuse += RemoteContent.RawSizes[RemotePathIndex]; } } - // Pick up all chunks in cached files and make sure we block moving of cache files if we need part of them - for (auto& CachedLocalFile : LocalRawHashToPathIndex) + // Pick up all chunks in current local state + for (auto& CachedLocalFile : RawHashToLocalPathIndex) { const IoHash& LocalFileRawHash = CachedLocalFile.first; const uint32_t LocalPathIndex = CachedLocalFile.second; const uint32_t LocalSequenceRawHashIndex = LocalLookup.RawHashToSequenceRawHashIndex.at(LocalFileRawHash); - const uint32_t LocalOrderOffset = - LocalLookup.SequenceRawHashIndexChunkOrderOffset[LocalSequenceRawHashIndex]; // CachedLocalFile.second.ChunkOrderOffset; + const uint32_t LocalOrderOffset = LocalLookup.SequenceRawHashIndexChunkOrderOffset[LocalSequenceRawHashIndex]; { uint64_t SourceOffset = 0; @@ -3482,30 +3458,30 @@ namespace { if (!ChunkTargetPtrs.empty()) { - CacheCopyData::ChunkTarget Target = {.ChunkSourceCount = gsl::narrow(ChunkTargetPtrs.size()), - .ChunkRawSize = LocalChunkRawSize, - .LocalFileOffset = SourceOffset}; + CacheCopyData::ChunkTarget Target = { + .TargetChunkLocationCount = gsl::narrow(ChunkTargetPtrs.size()), + .ChunkRawSize = LocalChunkRawSize, + .CacheFileOffset = SourceOffset}; if (auto CopySourceIt = RawHashToCacheCopyDataIndex.find(LocalFileRawHash); CopySourceIt != RawHashToCacheCopyDataIndex.end()) { CacheCopyData& Data = CacheCopyDatas[CopySourceIt->second]; - Data.ChunkSourcePtrs.insert(Data.ChunkSourcePtrs.end(), ChunkTargetPtrs.begin(), ChunkTargetPtrs.end()); + Data.TargetChunkLocationPtrs.insert(Data.TargetChunkLocationPtrs.end(), + ChunkTargetPtrs.begin(), + ChunkTargetPtrs.end()); Data.ChunkTargets.push_back(Target); } else { RawHashToCacheCopyDataIndex.insert_or_assign(LocalFileRawHash, CacheCopyDatas.size()); CacheCopyDatas.push_back( - CacheCopyData{.OriginalSourceFileName = LocalContent.Paths[LocalPathIndex], - .LocalFileRawHash = LocalFileRawHash, - .LocalFileRawSize = LocalContent.RawSizes[LocalPathIndex], - .RemotePathIndexes = {}, - .ChunkSourcePtrs = ChunkTargetPtrs, - .ChunkTargets = std::vector{Target}}); + CacheCopyData{.LocalPathIndex = LocalPathIndex, + .TargetChunkLocationPtrs = ChunkTargetPtrs, + .ChunkTargets = std::vector{Target}}); } CacheMappedBytesForReuse += LocalChunkRawSize; + RemoteChunkIndexWantsCopyFromCacheFlags[RemoteChunkIndex] = true; } - RemoteChunkIndexWantsCopyFromCacheFlags[RemoteChunkIndex] = true; } } SourceOffset += LocalChunkRawSize; @@ -3535,531 +3511,554 @@ namespace { ZEN_CONSOLE("Mapped {} cached data for reuse in {}", NiceBytes(CacheMappedBytesForReuse), NiceTimeSpanMs(CacheMappingTimer.GetElapsedTimeMs())); + { + WorkerThreadPool& NetworkPool = GetSmallWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // + WorkerThreadPool& WritePool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // - auto CopyChunksFromCacheFile = [](const std::filesystem::path& Path, - BufferedOpenFile& SourceFile, - WriteFileCache& OpenFileCache, - const ChunkedFolderContent& RemoteContent, - const uint64_t LocalFileSourceOffset, - const uint64_t LocalChunkRawSize, - std::span ChunkTargetPtrs, - uint64_t& OutBytesWritten) { - CompositeBuffer Chunk = SourceFile.GetRange(LocalFileSourceOffset, LocalChunkRawSize); - uint64_t TotalBytesWritten = 0; - - WriteChunkToDisk(Path, RemoteContent, ChunkTargetPtrs, Chunk, OpenFileCache, TotalBytesWritten); - OutBytesWritten += TotalBytesWritten; - }; - - auto CloneFullFileFromCache = [](const std::filesystem::path& Path, - const std::filesystem::path& CacheFolderPath, - const ChunkedFolderContent& RemoteContent, - const IoHash& FileRawHash, - const uint64_t FileRawSize, - std::span FullCloneRemotePathIndexes, - bool CanMove, - uint64_t& OutBytesWritten) { - const std::filesystem::path CacheFilePath = (CacheFolderPath / FileRawHash.ToHexString()).make_preferred(); + ProgressBar WriteProgressBar(UsePlainProgress); + ParallellWork Work(AbortFlag); - size_t CopyCount = FullCloneRemotePathIndexes.size(); - if (CanMove) - { - // If every reference to this chunk has are full files we can move the cache file to the last target - CopyCount--; - } + std::atomic BytesDownloaded = 0; - for (uint32_t RemotePathIndex : FullCloneRemotePathIndexes) + for (size_t CopyDataIndex = 0; CopyDataIndex < CacheCopyDatas.size(); CopyDataIndex++) { - const std::filesystem::path TargetPath = (Path / RemoteContent.Paths[RemotePathIndex]).make_preferred(); - CreateDirectories(TargetPath.parent_path()); - - if (CopyCount == 0) - { - std::filesystem::rename(CacheFilePath, TargetPath); - } - else + if (AbortFlag) { - CopyFile(CacheFilePath, TargetPath, {.EnableClone = false}); - ZEN_ASSERT(CopyCount > 0); - CopyCount--; + break; } - OutBytesWritten += FileRawSize; - } - }; - - WorkerThreadPool& NetworkPool = GetSmallWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // - WorkerThreadPool& WritePool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // - - ProgressBar WriteProgressBar(UsePlainProgress); - ParallellWork Work(AbortFlag); - - std::atomic BytesDownloaded = 0; - - for (size_t CopyDataIndex = 0; CopyDataIndex < CacheCopyDatas.size(); CopyDataIndex++) - { - if (AbortFlag) - { - break; - } - - Work.ScheduleWork( - WritePool, // GetSyncWorkerPool(),// - [&, CopyDataIndex](std::atomic&) { - if (!AbortFlag) - { - const CacheCopyData& CopyData = CacheCopyDatas[CopyDataIndex]; - const std::filesystem::path CacheFilePath = - (CacheFolderPath / CopyData.LocalFileRawHash.ToHexString()).make_preferred(); - if (!CopyData.ChunkSourcePtrs.empty()) + Work.ScheduleWork( + WritePool, // GetSyncWorkerPool(),// + [&, CopyDataIndex](std::atomic&) { + if (!AbortFlag) { - uint64_t CacheLocalFileBytesRead = 0; - - size_t TargetStart = 0; - const std::span AllTargets(CopyData.ChunkSourcePtrs); - - struct WriteOp + const CacheCopyData& CopyData = CacheCopyDatas[CopyDataIndex]; + const std::filesystem::path LocalFilePath = + (Path / LocalContent.Paths[CopyData.LocalPathIndex]).make_preferred(); + if (!CopyData.TargetChunkLocationPtrs.empty()) { - const ChunkedContentLookup::ChunkLocation* Target; - uint64_t LocalFileOffset; - uint64_t ChunkSize; - }; + uint64_t CacheLocalFileBytesRead = 0; - std::vector WriteOps; - WriteOps.reserve(CopyData.ChunkSourcePtrs.size()); + size_t TargetStart = 0; + const std::span AllTargets( + CopyData.TargetChunkLocationPtrs); - for (const CacheCopyData::ChunkTarget& ChunkTarget : CopyData.ChunkTargets) - { - std::span TargetRange = - AllTargets.subspan(TargetStart, ChunkTarget.ChunkSourceCount); - for (const ChunkedContentLookup::ChunkLocation* Target : TargetRange) + struct WriteOp { - WriteOps.push_back(WriteOp{.Target = Target, - .LocalFileOffset = ChunkTarget.LocalFileOffset, - .ChunkSize = ChunkTarget.ChunkRawSize}); - } - TargetStart += ChunkTarget.ChunkSourceCount; - } + const ChunkedContentLookup::ChunkLocation* Target; + uint64_t CacheFileOffset; + uint64_t ChunkSize; + }; - std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOp& Lhs, const WriteOp& Rhs) { - if (Lhs.Target->PathIndex < Rhs.Target->PathIndex) + std::vector WriteOps; + WriteOps.reserve(AllTargets.size()); + + for (const CacheCopyData::ChunkTarget& ChunkTarget : CopyData.ChunkTargets) { - return true; + std::span TargetRange = + AllTargets.subspan(TargetStart, ChunkTarget.TargetChunkLocationCount); + for (const ChunkedContentLookup::ChunkLocation* Target : TargetRange) + { + WriteOps.push_back(WriteOp{.Target = Target, + .CacheFileOffset = ChunkTarget.CacheFileOffset, + .ChunkSize = ChunkTarget.ChunkRawSize}); + } + TargetStart += ChunkTarget.TargetChunkLocationCount; } - else if (Lhs.Target->PathIndex > Rhs.Target->PathIndex) - { + + std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOp& Lhs, const WriteOp& Rhs) { + if (Lhs.Target->PathIndex < Rhs.Target->PathIndex) + { + return true; + } + else if (Lhs.Target->PathIndex > Rhs.Target->PathIndex) + { + return false; + } + if (Lhs.Target->Offset < Rhs.Target->Offset) + { + return true; + } return false; + }); + + { + BufferedOpenFile SourceFile(LocalFilePath); + WriteFileCache OpenFileCache; + for (const WriteOp& Op : WriteOps) + { + if (AbortFlag) + { + break; + } + const uint32_t RemotePathIndex = Op.Target->PathIndex; + const uint64_t ChunkSize = Op.ChunkSize; + CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset, ChunkSize); + + ZEN_ASSERT(Op.Target->Offset + ChunkSource.GetSize() <= RemoteContent.RawSizes[RemotePathIndex]); + + OpenFileCache.WriteToFile( + RemotePathIndex, + [&CacheFolderPath, &RemoteContent](uint32_t TargetIndex) { + return (CacheFolderPath / RemoteContent.RawHashes[TargetIndex].ToHexString()) + .make_preferred(); + }, + ChunkSource, + Op.Target->Offset, + RemoteContent.RawSizes[RemotePathIndex]); + BytesWritten += ChunkSize; + WriteToDiskBytes += ChunkSize; + CacheLocalFileBytesRead += ChunkSize; // TODO: This should be the sum of unique chunk sizes? + } } - if (Lhs.Target->Offset < Rhs.Target->Offset) + if (!AbortFlag) { - return true; + ChunkCountWritten += gsl::narrow(CopyData.ChunkTargets.size()); + ZEN_DEBUG("Copied {} from {}", + NiceBytes(CacheLocalFileBytesRead), + LocalContent.Paths[CopyData.LocalPathIndex]); } - return false; - }); + } + } + }, + Work.DefaultErrorFunction()); + } - BufferedOpenFile SourceFile(CacheFilePath); - WriteFileCache OpenFileCache; - for (const WriteOp& Op : WriteOps) - { - if (AbortFlag) + for (const IoHash ChunkHash : LooseChunkHashes) + { + if (AbortFlag) + { + break; + } + + uint32_t RemoteChunkIndex = RemoteLookup.ChunkHashToChunkIndex.at(ChunkHash); + if (RemoteChunkIndexWantsCopyFromCacheFlags[RemoteChunkIndex]) + { + ZEN_DEBUG("Skipping chunk {} due to cache reuse", ChunkHash); + continue; + } + bool NeedsCopy = true; + if (RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex].compare_exchange_strong(NeedsCopy, false)) + { + std::vector ChunkTargetPtrs = + GetRemainingChunkTargets(RemotePathIndexWantsCopyFromCacheFlags, RemoteLookup, RemoteChunkIndex); + + if (ChunkTargetPtrs.empty()) + { + ZEN_DEBUG("Skipping chunk {} due to cache reuse", ChunkHash); + } + else + { + Work.ScheduleWork( + NetworkPool, + [&, ChunkHash, RemoteChunkIndex, ChunkTargetPtrs](std::atomic&) { + if (!AbortFlag) { - break; - } - const uint32_t RemotePathIndex = Op.Target->PathIndex; - const uint64_t ChunkSize = Op.ChunkSize; - CompositeBuffer ChunkSource = SourceFile.GetRange(Op.LocalFileOffset, ChunkSize); + if (RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex] >= LargeAttachmentSize) + { + DownloadLargeBlob(Storage, + Path / ZenTempChunkFolderName, + CacheFolderPath, + RemoteContent, + RemoteLookup, + BuildId, + ChunkHash, + PreferredMultipartChunkSize, + ChunkTargetPtrs, + Work, + WritePool, + NetworkPool, + BytesWritten, + WriteToDiskBytes, + BytesDownloaded, + LooseChunksBytes, + DownloadedChunks, + ChunkCountWritten, + MultipartAttachmentCount); + } + else + { + IoBuffer CompressedPart = Storage.GetBuildBlob(BuildId, ChunkHash); + if (!CompressedPart) + { + throw std::runtime_error(fmt::format("Chunk {} is missing", ChunkHash)); + } + BytesDownloaded += CompressedPart.GetSize(); + LooseChunksBytes += CompressedPart.GetSize(); + CompositeBuffer Payload = WriteToTempFileIfNeeded(CompositeBuffer(std::move(CompressedPart)), + Path / ZenTempChunkFolderName, + ChunkHash); + DownloadedChunks++; - ZEN_ASSERT(Op.Target->Offset + ChunkSource.GetSize() <= RemoteContent.RawSizes[RemotePathIndex]); + if (!AbortFlag) + { + Work.ScheduleWork( + WritePool, + [&, ChunkHash, RemoteChunkIndex, ChunkTargetPtrs, CompressedPart = std::move(Payload)]( + std::atomic&) { + if (!AbortFlag) + { + uint64_t TotalBytesWritten = 0; + SharedBuffer Chunk = + Decompress(CompressedPart, + ChunkHash, + RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex]); - OpenFileCache.WriteToFile( - RemotePathIndex, - [&Path, &RemoteContent](uint32_t TargetIndex) { - return (Path / RemoteContent.Paths[TargetIndex]).make_preferred(); - }, - ChunkSource, - Op.Target->Offset, - RemoteContent.RawSizes[RemotePathIndex]); - BytesWritten += ChunkSize; - WriteToDiskBytes += ChunkSize; - CacheLocalFileBytesRead += ChunkSize; // TODO: This should be the sum of unique chunk sizes? - } - if (!AbortFlag) - { - ChunkCountWritten += gsl::narrow(CopyData.ChunkTargets.size()); - ZEN_DEBUG("Copied {} from {}", NiceBytes(CacheLocalFileBytesRead), CopyData.OriginalSourceFileName); - } - } + { + WriteFileCache OpenFileCache; + WriteChunkToDisk(CacheFolderPath, + RemoteContent, + ChunkTargetPtrs, + CompositeBuffer(Chunk), + OpenFileCache, + TotalBytesWritten); + } + ChunkCountWritten++; + BytesWritten += TotalBytesWritten; + WriteToDiskBytes += TotalBytesWritten; + } + }, + Work.DefaultErrorFunction()); + } + } + } + }, + Work.DefaultErrorFunction()); + } + } + } - if (CopyData.RemotePathIndexes.empty()) - { - std::filesystem::remove(CacheFilePath); - } - else if (!AbortFlag) + size_t BlockCount = BlockDescriptions.size(); + std::atomic BlocksComplete = 0; + + auto IsBlockNeeded = [&RemoteContent, &RemoteLookup, &RemoteChunkIndexNeedsCopyFromSourceFlags]( + const ChunkBlockDescription& BlockDescription) -> bool { + for (const IoHash& ChunkHash : BlockDescription.ChunkRawHashes) + { + if (auto It = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); It != RemoteLookup.ChunkHashToChunkIndex.end()) + { + const uint32_t RemoteChunkIndex = It->second; + if (RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex]) { - uint64_t LocalBytesWritten = 0; - CloneFullFileFromCache(Path, - CacheFolderPath, - RemoteContent, - CopyData.LocalFileRawHash, - CopyData.LocalFileRawSize, - CopyData.RemotePathIndexes, - true, - LocalBytesWritten); - // CacheLocalFileBytesRead += CopyData.LocalFileRawSize; - BytesWritten += LocalBytesWritten; - WriteToDiskBytes += LocalBytesWritten; - ChunkCountWritten++; - - ZEN_DEBUG("Used full cached file {} ({}) for {} ({}) targets", - CopyData.OriginalSourceFileName, - NiceBytes(CopyData.LocalFileRawSize), - CopyData.RemotePathIndexes.size(), - NiceBytes(LocalBytesWritten)); + return true; } } - }, - Work.DefaultErrorFunction()); - } - - for (const IoHash ChunkHash : LooseChunkHashes) - { - if (AbortFlag) - { - break; - } + } + return false; + }; - uint32_t RemoteChunkIndex = RemoteLookup.ChunkHashToChunkIndex.at(ChunkHash); - if (RemoteChunkIndexWantsCopyFromCacheFlags[RemoteChunkIndex]) + size_t BlocksNeededCount = 0; + for (size_t BlockIndex = 0; BlockIndex < BlockCount; BlockIndex++) { - ZEN_DEBUG("Skipping chunk {} due to cache reuse", ChunkHash); - continue; - } - bool NeedsCopy = true; - if (RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex].compare_exchange_strong(NeedsCopy, false)) - { - std::vector ChunkTargetPtrs = - GetRemainingChunkTargets(RemotePathIndexWantsCopyFromCacheFlags, RemoteLookup, RemoteChunkIndex); - - if (ChunkTargetPtrs.empty()) + if (Work.IsAborted()) { - ZEN_DEBUG("Skipping chunk {} due to cache reuse", ChunkHash); + break; } - else + if (IsBlockNeeded(BlockDescriptions[BlockIndex])) { + BlocksNeededCount++; Work.ScheduleWork( NetworkPool, - [&, ChunkHash, RemoteChunkIndex, ChunkTargetPtrs](std::atomic&) { + [&, BlockIndex](std::atomic&) { if (!AbortFlag) { - if (RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex] >= LargeAttachmentSize) + IoBuffer BlockBuffer = Storage.GetBuildBlob(BuildId, BlockDescriptions[BlockIndex].BlockHash); + if (!BlockBuffer) { - DownloadLargeBlob(Storage, - Path, - RemoteContent, - RemoteLookup, - BuildId, - ChunkHash, - PreferredMultipartChunkSize, - ChunkTargetPtrs, - Work, - WritePool, - NetworkPool, - BytesWritten, - WriteToDiskBytes, - BytesDownloaded, - LooseChunksBytes, - DownloadedChunks, - ChunkCountWritten, - MultipartAttachmentCount); + throw std::runtime_error(fmt::format("Block {} is missing", BlockDescriptions[BlockIndex].BlockHash)); } - else + BytesDownloaded += BlockBuffer.GetSize(); + BlockBytes += BlockBuffer.GetSize(); + DownloadedBlocks++; + CompositeBuffer Payload = WriteToTempFileIfNeeded(CompositeBuffer(std::move(BlockBuffer)), + Path / ZenTempBlockFolderName, + BlockDescriptions[BlockIndex].BlockHash); + + if (!AbortFlag) { - IoBuffer CompressedPart = Storage.GetBuildBlob(BuildId, ChunkHash); - if (!CompressedPart) - { - throw std::runtime_error(fmt::format("Chunk {} is missing", ChunkHash)); - } - BytesDownloaded += CompressedPart.GetSize(); - LooseChunksBytes += CompressedPart.GetSize(); - DownloadedChunks++; + Work.ScheduleWork( + WritePool, + [&, BlockIndex, BlockBuffer = std::move(Payload)](std::atomic&) { + if (!AbortFlag) + { + IoHash BlockRawHash; + uint64_t BlockRawSize; + CompressedBuffer CompressedBlockBuffer = + CompressedBuffer::FromCompressed(std::move(BlockBuffer), BlockRawHash, BlockRawSize); + if (!CompressedBlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} is not a compressed buffer", + BlockDescriptions[BlockIndex].BlockHash)); + } - if (!AbortFlag) - { - Work.ScheduleWork( - WritePool, - [&, ChunkHash, RemoteChunkIndex, ChunkTargetPtrs, CompressedPart = std::move(CompressedPart)]( - std::atomic&) { - if (!AbortFlag) + if (BlockRawHash != BlockDescriptions[BlockIndex].BlockHash) + { + throw std::runtime_error(fmt::format("Block {} header has a mismatching raw hash {}", + BlockDescriptions[BlockIndex].BlockHash, + BlockRawHash)); + } + + CompositeBuffer DecompressedBlockBuffer = CompressedBlockBuffer.DecompressToComposite(); + if (!DecompressedBlockBuffer) { - uint64_t TotalBytesWritten = 0; - SharedBuffer Chunk = - Decompress(CompressedPart, - ChunkHash, - RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex]); - WriteFileCache OpenFileCache; - - WriteChunkToDisk(Path, + throw std::runtime_error(fmt::format("Block {} failed to decompress", + BlockDescriptions[BlockIndex].BlockHash)); + } + + ZEN_ASSERT_SLOW(BlockDescriptions[BlockIndex].BlockHash == + IoHash::HashBuffer(DecompressedBlockBuffer)); + + uint64_t BytesWrittenToDisk = 0; + uint32_t ChunksReadFromBlock = 0; + if (WriteBlockToDisk(CacheFolderPath, RemoteContent, - ChunkTargetPtrs, - CompositeBuffer(Chunk), - OpenFileCache, - TotalBytesWritten); - ChunkCountWritten++; - BytesWritten += TotalBytesWritten; - WriteToDiskBytes += TotalBytesWritten; + RemotePathIndexWantsCopyFromCacheFlags, + DecompressedBlockBuffer, + RemoteLookup, + RemoteChunkIndexNeedsCopyFromSourceFlags.data(), + ChunksReadFromBlock, + BytesWrittenToDisk)) + { + BytesWritten += BytesWrittenToDisk; + WriteToDiskBytes += BytesWrittenToDisk; + ChunkCountWritten += ChunksReadFromBlock; } - }, - Work.DefaultErrorFunction()); - } + else + { + throw std::runtime_error( + fmt::format("Block {} is malformed", BlockDescriptions[BlockIndex].BlockHash)); + } + BlocksComplete++; + } + }, + [&, BlockIndex](const std::exception& Ex, std::atomic&) { + ZEN_ERROR("Failed writing block {}. Reason: {}", + BlockDescriptions[BlockIndex].BlockHash, + Ex.what()); + AbortFlag = true; + }); } } }, Work.DefaultErrorFunction()); } + else + { + ZEN_DEBUG("Skipping block {} due to cache reuse", BlockDescriptions[BlockIndex].BlockHash); + } } + + Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, PendingWork); + ZEN_ASSERT(ChunkCountToWrite >= ChunkCountWritten.load()); + WriteProgressBar.UpdateState( + {.Task = "Writing chunks ", + .Details = fmt::format("Written {} chunks out of {}. {} ouf of {} blocks complete. Downloaded: {}. Written: {}", + ChunkCountWritten.load(), + ChunkCountToWrite, + BlocksComplete.load(), + BlocksNeededCount, + NiceBytes(BytesDownloaded.load()), + NiceBytes(BytesWritten.load())), + .TotalCount = gsl::narrow(ChunkCountToWrite), + .RemainingCount = gsl::narrow(ChunkCountToWrite - ChunkCountWritten.load())}, + false); + }); + + if (AbortFlag) + { + return; + } + + WriteProgressBar.Finish(); } - size_t BlockCount = BlockDescriptions.size(); - std::atomic BlocksComplete = 0; + std::vector> Targets; + Targets.reserve(RemoteContent.Paths.size()); + for (uint32_t RemotePathIndex = 0; RemotePathIndex < RemoteContent.Paths.size(); RemotePathIndex++) + { + Targets.push_back(std::make_pair(RemoteContent.RawHashes[RemotePathIndex], RemotePathIndex)); + } + std::sort(Targets.begin(), Targets.end(), [](const std::pair& Lhs, const std::pair& Rhs) { + return Lhs.first < Rhs.first; + }); + + // Move all files we will reuse to cache folder + for (auto It : RawHashToLocalPathIndex) + { + const IoHash& RawHash = It.first; + if (RemoteLookup.RawHashToSequenceRawHashIndex.contains(RawHash)) + { + const std::filesystem::path LocalFilePath = (Path / LocalContent.Paths[It.second]).make_preferred(); + const std::filesystem::path CacheFilePath = (CacheFolderPath / RawHash.ToHexString()).make_preferred(); + ZEN_ASSERT_SLOW(std::filesystem::exists(LocalFilePath)); + SetFileReadOnly(LocalFilePath, false); + std::filesystem::rename(LocalFilePath, CacheFilePath); + } + } - auto IsBlockNeeded = [&RemoteContent, &RemoteLookup, &RemoteChunkIndexNeedsCopyFromSourceFlags]( - const ChunkBlockDescription& BlockDescription) -> bool { - for (const IoHash& ChunkHash : BlockDescription.ChunkRawHashes) + if (WipeTargetFolder) + { + // Clean target folder + ZEN_CONSOLE("Wiping {}", Path); + CleanDirectory(Path, DefaultExcludeFolders); + } + else + { + // Remove unused tracked files + tsl::robin_map RemotePathToRemoteIndex; + RemotePathToRemoteIndex.reserve(RemoteContent.Paths.size()); + for (uint32_t RemotePathIndex = 0; RemotePathIndex < RemoteContent.Paths.size(); RemotePathIndex++) + { + RemotePathToRemoteIndex.insert({RemoteContent.Paths[RemotePathIndex].generic_string(), RemotePathIndex}); + } + std::vector LocalFilesToRemove; + for (uint32_t LocalPathIndex = 0; LocalPathIndex < LocalContent.Paths.size(); LocalPathIndex++) { - if (auto It = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); It != RemoteLookup.ChunkHashToChunkIndex.end()) + if (!RemotePathToRemoteIndex.contains(LocalContent.Paths[LocalPathIndex].generic_string())) { - const uint32_t RemoteChunkIndex = It->second; - if (RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex]) + const std::filesystem::path LocalFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred(); + if (std::filesystem::exists(LocalFilePath)) { - return true; + LocalFilesToRemove.emplace_back(std::move(LocalFilePath)); } } } - return false; - }; - - for (size_t BlockIndex = 0; BlockIndex < BlockCount; BlockIndex++) - { - if (Work.IsAborted()) + if (!LocalFilesToRemove.empty()) { - break; + ZEN_CONSOLE("Cleaning {} removed files from {}", LocalFilesToRemove.size(), Path); + for (const std::filesystem::path& LocalFilePath : LocalFilesToRemove) + { + SetFileReadOnly(LocalFilePath, false); + std::filesystem::remove(LocalFilePath); + } } - Work.ScheduleWork( - WritePool, - [&, BlockIndex](std::atomic&) { - if (!AbortFlag) - { - if (IsBlockNeeded(BlockDescriptions[BlockIndex])) - { - Work.ScheduleWork( - NetworkPool, - [&, BlockIndex](std::atomic&) { - if (!AbortFlag) - { - IoBuffer BlockBuffer = Storage.GetBuildBlob(BuildId, BlockDescriptions[BlockIndex].BlockHash); - if (!BlockBuffer) - { - throw std::runtime_error( - fmt::format("Block {} is missing", BlockDescriptions[BlockIndex].BlockHash)); - } - BytesDownloaded += BlockBuffer.GetSize(); - BlockBytes += BlockBuffer.GetSize(); - DownloadedBlocks++; + } - if (!AbortFlag) - { - Work.ScheduleWork( - WritePool, - [&, BlockIndex, BlockBuffer = std::move(BlockBuffer)](std::atomic&) { - if (!AbortFlag) - { - IoHash BlockRawHash; - uint64_t BlockRawSize; - CompressedBuffer CompressedBlockBuffer = - CompressedBuffer::FromCompressed(SharedBuffer(std::move(BlockBuffer)), - BlockRawHash, - BlockRawSize); - if (!CompressedBlockBuffer) - { - throw std::runtime_error(fmt::format("Block {} is not a compressed buffer", - BlockDescriptions[BlockIndex].BlockHash)); - } + { + WorkerThreadPool& WritePool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // - if (BlockRawHash != BlockDescriptions[BlockIndex].BlockHash) - { - throw std::runtime_error( - fmt::format("Block {} header has a mismatching raw hash {}", - BlockDescriptions[BlockIndex].BlockHash, - BlockRawHash)); - } + ProgressBar RebuildProgressBar(UsePlainProgress); + ParallellWork Work(AbortFlag); - CompositeBuffer DecompressedBlockBuffer = - CompressedBlockBuffer.DecompressToComposite(); - if (!DecompressedBlockBuffer) - { - throw std::runtime_error(fmt::format("Block {} failed to decompress", - BlockDescriptions[BlockIndex].BlockHash)); - } + OutLocalFolderState.Paths.resize(RemoteContent.Paths.size()); + OutLocalFolderState.RawSizes.resize(RemoteContent.Paths.size()); + OutLocalFolderState.Attributes.resize(RemoteContent.Paths.size()); + OutLocalFolderState.ModificationTicks.resize(RemoteContent.Paths.size()); - ZEN_ASSERT_SLOW(BlockDescriptions[BlockIndex].BlockHash == - IoHash::HashBuffer(DecompressedBlockBuffer)); + std::atomic TargetsComplete = 0; - uint64_t BytesWrittenToDisk = 0; - uint32_t ChunksReadFromBlock = 0; - if (WriteBlockToDisk(Path, - RemoteContent, - RemotePathIndexWantsCopyFromCacheFlags, - DecompressedBlockBuffer, - RemoteLookup, - RemoteChunkIndexNeedsCopyFromSourceFlags.data(), - ChunksReadFromBlock, - BytesWrittenToDisk)) - { - BytesWritten += BytesWrittenToDisk; - WriteToDiskBytes += BytesWrittenToDisk; - ChunkCountWritten += ChunksReadFromBlock; - } - else - { - throw std::runtime_error(fmt::format("Block {} is malformed", - BlockDescriptions[BlockIndex].BlockHash)); - } - BlocksComplete++; - } - }, - [&, BlockIndex](const std::exception& Ex, std::atomic&) { - ZEN_ERROR("Failed writing block {}. Reason: {}", - BlockDescriptions[BlockIndex].BlockHash, - Ex.what()); - AbortFlag = true; - }); - } - } - }, - Work.DefaultErrorFunction()); - } - else - { - ZEN_DEBUG("Skipping block {} due to cache reuse", BlockDescriptions[BlockIndex].BlockHash); - BlocksComplete++; - } - } - }, - Work.DefaultErrorFunction()); - } - for (uint32_t PathIndex = 0; PathIndex < RemoteContent.Paths.size(); PathIndex++) - { - if (Work.IsAborted()) - { - break; - } - if (RemoteContent.RawSizes[PathIndex] == 0) + size_t TargetOffset = 0; + while (TargetOffset < Targets.size()) { + if (AbortFlag) + { + break; + } + + size_t TargetCount = 1; + const IoHash& RawHash = Targets[TargetOffset].first; + while (Targets[TargetOffset + TargetCount].first == RawHash) + { + TargetCount++; + } + Work.ScheduleWork( - WritePool, - [&, PathIndex](std::atomic&) { + WritePool, // GetSyncWorkerPool(),// + [&, BaseTargetOffset = TargetOffset, TargetCount](std::atomic&) { if (!AbortFlag) { - const std::filesystem::path TargetPath = (Path / RemoteContent.Paths[PathIndex]).make_preferred(); - CreateDirectories(TargetPath.parent_path()); - BasicFile OutputFile; - OutputFile.Open(TargetPath, BasicFile::Mode::kTruncate); + size_t TargetOffset = BaseTargetOffset; + const IoHash& RawHash = Targets[TargetOffset].first; + const uint32_t FirstTargetPathIndex = Targets[TargetOffset].second; + const std::filesystem::path& FirstTargetPath = RemoteContent.Paths[FirstTargetPathIndex]; + OutLocalFolderState.Paths[FirstTargetPathIndex] = FirstTargetPath; + OutLocalFolderState.RawSizes[FirstTargetPathIndex] = RemoteContent.RawSizes[FirstTargetPathIndex]; + const std::filesystem::path FirstTargetFilePath = (Path / FirstTargetPath).make_preferred(); + if (RawHash == IoHash::Zero) + { + if (std::filesystem::exists(FirstTargetFilePath)) + { + SetFileReadOnly(FirstTargetFilePath, false); + } + CreateDirectories(FirstTargetFilePath.parent_path()); + { + BasicFile OutputFile; + OutputFile.Open(FirstTargetFilePath, BasicFile::Mode::kTruncate); + } + } + else + { + const std::filesystem::path CacheFilePath = (CacheFolderPath / RawHash.ToHexString()).make_preferred(); + ZEN_ASSERT_SLOW(std::filesystem::exists(CacheFilePath)); + CreateDirectories(FirstTargetFilePath.parent_path()); + if (std::filesystem::exists(FirstTargetFilePath)) + { + SetFileReadOnly(FirstTargetFilePath, false); + } + std::filesystem::rename(CacheFilePath, FirstTargetFilePath); + } + + OutLocalFolderState.Attributes[FirstTargetPathIndex] = + RemoteContent.Attributes.empty() ? GetNativeFileAttributes(FirstTargetFilePath) + : SetNativeFileAttributes(FirstTargetFilePath, + RemoteContent.Platform, + RemoteContent.Attributes[FirstTargetPathIndex]); + OutLocalFolderState.ModificationTicks[FirstTargetPathIndex] = GetModificationTickFromPath(FirstTargetFilePath); + + TargetOffset++; + TargetsComplete++; + while (TargetOffset < (BaseTargetOffset + TargetCount)) + { + ZEN_ASSERT(Targets[TargetOffset].first == RawHash); + ZEN_ASSERT_SLOW(std::filesystem::exists(FirstTargetFilePath)); + const uint32_t ExtraTargetPathIndex = Targets[TargetOffset].second; + const std::filesystem::path& ExtraTargetPath = RemoteContent.Paths[ExtraTargetPathIndex]; + const std::filesystem::path ExtraTargetFilePath = (Path / ExtraTargetPath).make_preferred(); + OutLocalFolderState.Paths[ExtraTargetPathIndex] = ExtraTargetPath; + OutLocalFolderState.RawSizes[ExtraTargetPathIndex] = RemoteContent.RawSizes[ExtraTargetPathIndex]; + CreateDirectories(ExtraTargetFilePath.parent_path()); + if (std::filesystem::exists(ExtraTargetFilePath)) + { + SetFileReadOnly(ExtraTargetFilePath, false); + } + CopyFile(FirstTargetFilePath, ExtraTargetFilePath, {.EnableClone = false}); + + OutLocalFolderState.Attributes[ExtraTargetPathIndex] = + RemoteContent.Attributes.empty() + ? GetNativeFileAttributes(ExtraTargetFilePath) + : SetNativeFileAttributes(ExtraTargetFilePath, + RemoteContent.Platform, + RemoteContent.Attributes[ExtraTargetPathIndex]); + OutLocalFolderState.ModificationTicks[ExtraTargetPathIndex] = + GetModificationTickFromPath(ExtraTargetFilePath); + + TargetOffset++; + TargetsComplete++; + } } }, Work.DefaultErrorFunction()); - } - } - Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { - ZEN_UNUSED(IsAborted, PendingWork); - ZEN_ASSERT(ChunkCountToWrite >= ChunkCountWritten.load()); - WriteProgressBar.UpdateState( - {.Task = "Writing chunks ", - .Details = fmt::format("Written {} chunks out of {}. {} ouf of {} blocks complete. Downloaded: {}. Written: {}", - ChunkCountWritten.load(), - ChunkCountToWrite, - BlocksComplete.load(), - BlockCount, - NiceBytes(BytesDownloaded.load()), - NiceBytes(BytesWritten.load())), - .TotalCount = gsl::narrow(ChunkCountToWrite), - .RemainingCount = gsl::narrow(ChunkCountToWrite - ChunkCountWritten.load())}, - false); - }); - - if (AbortFlag) - { - return; - } + TargetOffset += TargetCount; + } - WriteProgressBar.Finish(); + Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, PendingWork); + RebuildProgressBar.UpdateState( + {.Task = "Rebuilding state ", + .Details = fmt::format("Written {} files out of {}", TargetsComplete.load(), Targets.size()), + .TotalCount = gsl::narrow(Targets.size()), + .RemainingCount = gsl::narrow(Targets.size() - TargetsComplete.load())}, + false); + }); - { - ProgressBar PremissionsProgressBar(false); - if (!RemoteContent.Attributes.empty()) + if (AbortFlag) { - auto SetNativeFileAttributes = - [](const std::filesystem::path FilePath, SourcePlatform SourcePlatform, uint32_t Attributes) -> uint32_t { -#if ZEN_PLATFORM_WINDOWS - if (SourcePlatform == SourcePlatform::Windows) - { - SetFileAttributes(FilePath, Attributes); - return Attributes; - } - else - { - uint32_t CurrentAttributes = GetFileAttributes(FilePath); - uint32_t NewAttributes = MakeFileAttributeReadOnly(CurrentAttributes, IsFileModeReadOnly(Attributes)); - if (CurrentAttributes != NewAttributes) - { - SetFileAttributes(FilePath, NewAttributes); - } - return NewAttributes; - } -#endif // ZEN_PLATFORM_WINDOWS -#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC - if (SourcePlatform != SourcePlatform::Windows) - { - SetFileMode(FilePath, Attributes); - return Attributes; - } - else - { - uint32_t CurrentMode = GetFileMode(FilePath); - uint32_t NewMode = MakeFileModeReadOnly(CurrentMode, IsFileAttributeReadOnly(Attributes)); - if (CurrentMode != NewMode) - { - SetFileMode(FilePath, NewMode); - } - return NewMode; - } -#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC - }; - - OutLocalFolderState.Paths.reserve(RemoteContent.Paths.size()); - OutLocalFolderState.RawSizes.reserve(RemoteContent.Paths.size()); - OutLocalFolderState.Attributes.reserve(RemoteContent.Paths.size()); - OutLocalFolderState.ModificationTicks.reserve(RemoteContent.Paths.size()); - for (uint32_t PathIndex = 0; PathIndex < RemoteContent.Paths.size(); PathIndex++) - { - const std::filesystem::path LocalFilePath = (Path / RemoteContent.Paths[PathIndex]); - const uint32_t CurrentPlatformAttributes = - SetNativeFileAttributes(LocalFilePath, RemoteContent.Platform, RemoteContent.Attributes[PathIndex]); - - OutLocalFolderState.Paths.push_back(RemoteContent.Paths[PathIndex]); - OutLocalFolderState.RawSizes.push_back(RemoteContent.RawSizes[PathIndex]); - OutLocalFolderState.Attributes.push_back(CurrentPlatformAttributes); - OutLocalFolderState.ModificationTicks.push_back(GetModificationTickFromPath(LocalFilePath)); - - PremissionsProgressBar.UpdateState( - {.Task = "Set permissions ", - .Details = fmt::format("Updated {} files out of {}", PathIndex, RemoteContent.Paths.size()), - .TotalCount = RemoteContent.Paths.size(), - .RemainingCount = (RemoteContent.Paths.size() - PathIndex)}, - false); - } + return; } - PremissionsProgressBar.Finish(); + + RebuildProgressBar.Finish(); } } @@ -4077,7 +4076,7 @@ namespace { CbObject BuildObject = Storage.GetBuild(BuildId); ZEN_CONSOLE("GetBuild took {}. Payload size: {}", - NiceLatencyNs(GetBuildTimer.GetElapsedTimeUs() * 1000), + NiceTimeSpanMs(GetBuildTimer.GetElapsedTimeMs()), NiceBytes(BuildObject.GetSize())); CbObjectView PartsObject = BuildObject["parts"sv].AsObjectView(); @@ -4162,7 +4161,7 @@ namespace { ZEN_CONSOLE("GetBuildPart {} ('{}') took {}. Payload size: {}", BuildParts[0].first, BuildParts[0].second, - NiceLatencyNs(GetBuildPartTimer.GetElapsedTimeUs() * 1000), + NiceTimeSpanMs(GetBuildPartTimer.GetElapsedTimeMs()), NiceBytes(BuildPartManifest.GetSize())); { @@ -4202,7 +4201,7 @@ namespace { OutBlockDescriptions = Storage.GetBlockMetadata(BuildId, BlockRawHashes); ZEN_CONSOLE("GetBlockMetadata for {} took {}. Found {} blocks", BuildPartId, - NiceLatencyNs(GetBlockMetadataTimer.GetElapsedTimeUs() * 1000), + NiceTimeSpanMs(GetBlockMetadataTimer.GetElapsedTimeMs()), OutBlockDescriptions.size()); if (OutBlockDescriptions.size() != BlockRawHashes.size()) @@ -4309,7 +4308,7 @@ namespace { ZEN_CONSOLE("GetBuildPart {} ('{}') took {}. Payload size: {}", OverlayBuildPartId, OverlayBuildPartName, - NiceLatencyNs(GetOverlayBuildPartTimer.GetElapsedTimeUs() * 1000), + NiceTimeSpanMs(GetOverlayBuildPartTimer.GetElapsedTimeMs()), NiceBytes(OverlayBuildPartManifest.GetSize())); ChunkedFolderContent OverlayPartContent; @@ -4457,6 +4456,7 @@ namespace { if (!LocalFolderState.AreKnownFilesEqual(CurrentLocalFolderContent)) { + const size_t LocaStatePathCount = LocalFolderState.Paths.size(); std::vector DeletedPaths; FolderContent UpdatedContent = GetUpdatedContent(LocalFolderState, CurrentLocalFolderContent, DeletedPaths); if (!DeletedPaths.empty()) @@ -4464,9 +4464,10 @@ namespace { LocalContent = DeletePathsFromChunkedContent(LocalContent, DeletedPaths); } - ZEN_CONSOLE("Updating state, {} local files deleted and {} local files updated", + ZEN_CONSOLE("Updating state, {} local files deleted and {} local files updated out of {}", DeletedPaths.size(), - UpdatedContent.Paths.size()); + UpdatedContent.Paths.size(), + LocaStatePathCount); if (UpdatedContent.Paths.size() > 0) { uint64_t ByteCountToScan = 0; @@ -4535,7 +4536,7 @@ namespace { ZEN_CONSOLE("Using cached local state"); } - ZEN_CONSOLE("Read local state in {}", NiceLatencyNs(ReadStateTimer.GetElapsedTimeUs() * 1000)); + ZEN_CONSOLE("Read local state in {}", NiceTimeSpanMs(ReadStateTimer.GetElapsedTimeMs())); ScanContent = false; } } @@ -4610,7 +4611,7 @@ namespace { }); CreateDirectories(Path / ZenTempBlockFolderName); CreateDirectories(Path / ZenTempChunkFolderName); - CreateDirectories(Path / ZenTempReuseFolderName); + CreateDirectories(Path / ZenTempCacheFolderName); std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; @@ -4698,7 +4699,7 @@ namespace { if (CompareContent(RemoteContent, LocalContent)) { ZEN_CONSOLE("Local state is identical to build to download. All done. Completed in {}.", - NiceLatencyNs(DownloadTimer.GetElapsedTimeUs() * 1000)); + NiceTimeSpanMs(DownloadTimer.GetElapsedTimeMs())); } else { @@ -4730,7 +4731,7 @@ namespace { CreateDirectories((Path / ZenStateFilePath).parent_path()); TemporaryFile::SafeWriteFile(Path / ZenStateFilePath, StateObject.GetView()); - ZEN_CONSOLE("Wrote local state in {}", NiceLatencyNs(WriteStateTimer.GetElapsedTimeUs() * 1000)); + ZEN_CONSOLE("Wrote local state in {}", NiceTimeSpanMs(WriteStateTimer.GetElapsedTimeMs())); #if 0 ExtendableStringBuilder<1024> SB; @@ -4738,7 +4739,7 @@ namespace { WriteFile(Path / ZenStateFileJsonPath, IoBuffer(IoBuffer::Wrap, SB.Data(), SB.Size())); #endif // 0 - ZEN_CONSOLE("Downloaded build in {}.", NiceLatencyNs(DownloadTimer.GetElapsedTimeUs() * 1000)); + ZEN_CONSOLE("Downloaded build in {}.", NiceTimeSpanMs(DownloadTimer.GetElapsedTimeMs())); } } } @@ -5095,13 +5096,13 @@ BuildsCommand::BuildsCommand() "Build part Ids list separated by ',', if no build-part-ids or build-part-names are given all parts will be downloaded", cxxopts::value(m_BuildPartIds), ""); - m_DownloadOptions.add_option( - "", - "", - "build-part-name", - "Name of the build parts list separated by ',', if no build-part-ids or build-part-names are given all parts will be downloaded", - cxxopts::value(m_BuildPartNames), - ""); + m_DownloadOptions.add_option("", + "", + "build-part-name", + "Name of the build parts list separated by ',', if no build-part-ids or build-part-names are given " + "all parts will be downloaded", + cxxopts::value(m_BuildPartNames), + ""); m_DownloadOptions .add_option("", "", "clean", "Delete all data in target folder before downloading", cxxopts::value(m_Clean), ""); m_DownloadOptions.add_option("", @@ -5841,7 +5842,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) ZEN_CONSOLE("Scrambling files, {} remaining", PendingWork); }); ZEN_ASSERT(!AbortFlag.load()); - ZEN_CONSOLE("Scrambled files in {}", NiceLatencyNs(Timer.GetElapsedTimeUs() * 1000)); + ZEN_CONSOLE("Scrambled files in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }; ScrambleDir(DownloadPath); diff --git a/src/zencore/filesystem.cpp b/src/zencore/filesystem.cpp index 8279fb952..85feab2f7 100644 --- a/src/zencore/filesystem.cpp +++ b/src/zencore/filesystem.cpp @@ -531,7 +531,10 @@ CloneFile(std::filesystem::path FromPath, std::filesystem::path ToPath) } void -CopyFile(std::filesystem::path FromPath, std::filesystem::path ToPath, const CopyFileOptions& Options, std::error_code& OutErrorCode) +CopyFile(const std::filesystem::path& FromPath, + const std::filesystem::path& ToPath, + const CopyFileOptions& Options, + std::error_code& OutErrorCode) { OutErrorCode.clear(); @@ -544,7 +547,7 @@ CopyFile(std::filesystem::path FromPath, std::filesystem::path ToPath, const Cop } bool -CopyFile(std::filesystem::path FromPath, std::filesystem::path ToPath, const CopyFileOptions& Options) +CopyFile(const std::filesystem::path& FromPath, const std::filesystem::path& ToPath, const CopyFileOptions& Options) { bool Success = false; diff --git a/src/zencore/include/zencore/filesystem.h b/src/zencore/include/zencore/filesystem.h index 20f6dc56c..e020668fc 100644 --- a/src/zencore/include/zencore/filesystem.h +++ b/src/zencore/include/zencore/filesystem.h @@ -97,11 +97,11 @@ struct CopyFileOptions bool MustClone = false; }; -ZENCORE_API bool CopyFile(std::filesystem::path FromPath, std::filesystem::path ToPath, const CopyFileOptions& Options); -ZENCORE_API void CopyFile(std::filesystem::path FromPath, - std::filesystem::path ToPath, - const CopyFileOptions& Options, - std::error_code& OutError); +ZENCORE_API bool CopyFile(const std::filesystem::path& FromPath, const std::filesystem::path& ToPath, const CopyFileOptions& Options); +ZENCORE_API void CopyFile(const std::filesystem::path& FromPath, + const std::filesystem::path& ToPath, + const CopyFileOptions& Options, + std::error_code& OutError); ZENCORE_API void CopyTree(std::filesystem::path FromPath, std::filesystem::path ToPath, const CopyFileOptions& Options); ZENCORE_API bool SupportsBlockRefCounting(std::filesystem::path Path); diff --git a/src/zenutil/filebuildstorage.cpp b/src/zenutil/filebuildstorage.cpp index 78ebcdd55..a4bb759e7 100644 --- a/src/zenutil/filebuildstorage.cpp +++ b/src/zenutil/filebuildstorage.cpp @@ -336,7 +336,8 @@ public: const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash); if (std::filesystem::is_regular_file(BlockPath)) { - IoBuffer Payload = ReadFile(BlockPath).Flatten(); + BasicFile File(BlockPath, BasicFile::Mode::kRead); + IoBuffer Payload = File.ReadAll(); ZEN_ASSERT_SLOW(ValidateCompressedBuffer(RawHash, CompositeBuffer(SharedBuffer(Payload)))); m_Stats.TotalBytesRead += Payload.GetSize(); Payload.SetContentType(ZenContentType::kCompressedBinary); @@ -365,13 +366,13 @@ public: struct WorkloadData { std::atomic BytesRemaining; - IoBuffer BlobFile; + BasicFile BlobFile; std::function Receiver; }; std::shared_ptr Workload(std::make_shared()); - Workload->BlobFile = IoBufferBuilder::MakeFromFile(BlockPath); - const uint64_t BlobSize = Workload->BlobFile.GetSize(); + Workload->BlobFile.Open(BlockPath, BasicFile::Mode::kRead); + const uint64_t BlobSize = Workload->BlobFile.FileSize(); Workload->Receiver = std::move(Receiver); Workload->BytesRemaining = BlobSize; @@ -383,7 +384,8 @@ public: uint64_t Size = Min(ChunkSize, BlobSize - Offset); WorkItems.push_back([this, BlockPath, Workload, Offset, Size]() { SimulateLatency(0, 0); - IoBuffer PartPayload(Workload->BlobFile, Offset, Size); + IoBuffer PartPayload(Size); + Workload->BlobFile.Read(PartPayload.GetMutableView().GetData(), Size, Offset); m_Stats.TotalBytesRead += PartPayload.GetSize(); uint64_t ByteRemaning = Workload->BytesRemaining.fetch_sub(Size); Workload->Receiver(Offset, PartPayload, ByteRemaning); -- cgit v1.2.3 From 1270bfeffbc81b1e4940c5c454ee6acde43e696a Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 3 Mar 2025 17:53:11 +0100 Subject: refactor use chunk sequence download (#291) * work on chunk sequences on download, not paths * write chunksequences to .tmp file and move when complete * cleanup * Added on the fly validation `zen builds download` of files built from smaller chunks as each file is completed Added `--verify` option to `zen builds upload` to verify all uploaded data once entire upload is complete Added `--verify` option to `zen builds download` to verify all files in target folder once entire download is complete Fixed/improved progress updated Multithreaded part validation * added rates to Write Chunks task * b/s -> bits/s * dont validate partial content as complete payload * handle legacy c# builds --- src/zen/cmds/builds_cmd.cpp | 1324 ++++++++++++++++---------- src/zen/cmds/builds_cmd.h | 2 + src/zenhttp/httpclient.cpp | 5 + src/zenutil/chunkedcontent.cpp | 78 +- src/zenutil/include/zenutil/chunkedcontent.h | 53 +- 5 files changed, 910 insertions(+), 552 deletions(-) (limited to 'src') diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index 28c794559..219d01240 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -286,7 +286,7 @@ namespace { } } - uint64_t GetCurrent() const + uint64_t GetCurrent() const // If Stopped - return total count / total time { if (LastTimeUS == (uint64_t)-1) { @@ -330,6 +330,16 @@ namespace { return Count * 1000000 / ElapsedWallTimeUS; } + std::filesystem::path GetTempChunkedSequenceFileName(const std::filesystem::path& CacheFolderPath, const IoHash& RawHash) + { + return (CacheFolderPath / (RawHash.ToHexString() + ".tmp")).make_preferred(); + } + + std::filesystem::path GetFinalChunkedSequenceFileName(const std::filesystem::path& CacheFolderPath, const IoHash& RawHash) + { + return (CacheFolderPath / RawHash.ToHexString()).make_preferred(); + } + ChunkedFolderContent ScanAndChunkFolder( GetFolderContentStatistics& GetFolderContentStats, ChunkingStatistics& ChunkingStats, @@ -364,15 +374,16 @@ namespace { UsePlainProgress ? 5000 : 200, [&](bool, std::ptrdiff_t) { FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); + std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found", + ChunkingStats.FilesProcessed.load(), + GetFolderContentStats.AcceptedFileCount.load(), + NiceBytes(ChunkingStats.BytesHashed.load()), + NiceBytes(GetFolderContentStats.FoundFileByteCount), + NiceNum(FilteredBytesHashed.GetCurrent()), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load())); ProgressBar.UpdateState({.Task = "Scanning files ", - .Details = fmt::format("{}/{} ({}/{}, {}B/s) files, {} ({}) chunks found", - ChunkingStats.FilesProcessed.load(), - GetFolderContentStats.AcceptedFileCount.load(), - NiceBytes(ChunkingStats.BytesHashed.load()), - NiceBytes(GetFolderContentStats.FoundFileByteCount), - NiceNum(FilteredBytesHashed.GetCurrent()), - ChunkingStats.UniqueChunksFound.load(), - NiceBytes(ChunkingStats.UniqueBytesFound.load())), + .Details = Details, .TotalCount = GetFolderContentStats.AcceptedFileByteCount, .RemainingCount = GetFolderContentStats.AcceptedFileByteCount - ChunkingStats.BytesHashed.load()}, false); @@ -766,9 +777,19 @@ namespace { } } - if (FilesObject["chunkcounts"sv]) + if (CbObjectView ChunkContentView = BuildPartManifest["chunkedContent"sv].AsObjectView(); ChunkContentView) + { + compactbinary_helpers::ReadArray("sequenceRawHashes"sv, ChunkContentView, OutSequenceRawHashes); + compactbinary_helpers::ReadArray("chunkcounts"sv, ChunkContentView, OutChunkCounts); + if (OutChunkCounts.size() != OutSequenceRawHashes.size()) + { + throw std::runtime_error(fmt::format("Number of chunk count entries does not match number of paths")); + } + compactbinary_helpers::ReadArray("chunkorders"sv, ChunkContentView, OutAbsoluteChunkOrders); + } + else if (FilesObject["chunkcounts"sv]) { - // Legacy style + // Legacy zen style std::vector LegacyChunkCounts; compactbinary_helpers::ReadArray("chunkcounts"sv, FilesObject, LegacyChunkCounts); @@ -805,15 +826,29 @@ namespace { OrderIndexOffset += LegacyChunkCounts[PathIndex]; } } - if (CbObjectView ChunkContentView = BuildPartManifest["chunkedContent"sv].AsObjectView(); ChunkContentView) + else { - compactbinary_helpers::ReadArray("sequenceRawHashes"sv, ChunkContentView, OutSequenceRawHashes); - compactbinary_helpers::ReadArray("chunkcounts"sv, ChunkContentView, OutChunkCounts); - if (OutChunkCounts.size() != OutSequenceRawHashes.size()) + // Legacy C# style + + tsl::robin_set FoundRawHashes; + FoundRawHashes.reserve(PathCount); + uint32_t OrderIndexOffset = 0; + for (uint32_t PathIndex = 0; PathIndex < OutPaths.size(); PathIndex++) { - throw std::runtime_error(fmt::format("Number of chunk count entries does not match number of paths")); + if (OutRawSizes[PathIndex] > 0) + { + const IoHash& PathRawHash = OutRawHashes[PathIndex]; + if (FoundRawHashes.insert(PathRawHash).second) + { + OutSequenceRawHashes.push_back(PathRawHash); + OutChunkCounts.push_back(1); + OutAbsoluteChunkOrders.push_back(OrderIndexOffset); + OutLooseChunkHashes.push_back(PathRawHash); + OutLooseChunkRawSizes.push_back(OutRawSizes[PathIndex]); + OrderIndexOffset += 1; + } + } } - compactbinary_helpers::ReadArray("chunkorders"sv, ChunkContentView, OutAbsoluteChunkOrders); } CbObjectView ChunkAttachmentsView = BuildPartManifest["chunkAttachments"sv].AsObjectView(); @@ -963,14 +998,14 @@ namespace { { public: // A buffered file reader that provides CompositeBuffer where the buffers are owned and the memory never overwritten - ReadFileCache(DiskStatistics& DiskStats, - const std::filesystem::path& Path, - const std::vector& Paths, - const std::vector& RawSizes, - size_t MaxOpenFileCount) + ReadFileCache(DiskStatistics& DiskStats, + const std::filesystem::path& Path, + const ChunkedFolderContent& LocalContent, + const ChunkedContentLookup& LocalLookup, + size_t MaxOpenFileCount) : m_Path(Path) - , m_Paths(Paths) - , m_RawSizes(RawSizes) + , m_LocalContent(LocalContent) + , m_LocalLookup(LocalLookup) , m_DiskStats(DiskStats) { m_OpenFiles.reserve(MaxOpenFileCount); @@ -981,26 +1016,28 @@ namespace { m_OpenFiles.clear(); } - CompositeBuffer GetRange(uint32_t PathIndex, uint64_t Offset, uint64_t Size) + CompositeBuffer GetRange(uint32_t SequenceIndex, uint64_t Offset, uint64_t Size) { - auto CacheIt = - std::find_if(m_OpenFiles.begin(), m_OpenFiles.end(), [PathIndex](const auto& Lhs) { return Lhs.first == PathIndex; }); + auto CacheIt = std::find_if(m_OpenFiles.begin(), m_OpenFiles.end(), [SequenceIndex](const auto& Lhs) { + return Lhs.first == SequenceIndex; + }); if (CacheIt != m_OpenFiles.end()) { if (CacheIt != m_OpenFiles.begin()) { auto CachedFile(std::move(CacheIt->second)); m_OpenFiles.erase(CacheIt); - m_OpenFiles.insert(m_OpenFiles.begin(), std::make_pair(PathIndex, std::move(CachedFile))); + m_OpenFiles.insert(m_OpenFiles.begin(), std::make_pair(SequenceIndex, std::move(CachedFile))); } CompositeBuffer Result = m_OpenFiles.front().second->GetRange(Offset, Size); m_DiskStats.ReadByteCount += Result.GetSize(); return Result; } - const std::filesystem::path AttachmentPath = (m_Path / m_Paths[PathIndex]).make_preferred(); - if (Size == m_RawSizes[PathIndex]) + const uint32_t LocalPathIndex = m_LocalLookup.SequenceIndexFirstPathIndex[SequenceIndex]; + const std::filesystem::path LocalFilePath = (m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred(); + if (Size == m_LocalContent.RawSizes[LocalPathIndex]) { - IoBuffer Result = IoBufferBuilder::MakeFromFile(AttachmentPath); + IoBuffer Result = IoBufferBuilder::MakeFromFile(LocalFilePath); m_DiskStats.OpenReadCount++; m_DiskStats.ReadByteCount += Result.GetSize(); return CompositeBuffer(SharedBuffer(Result)); @@ -1010,7 +1047,7 @@ namespace { m_OpenFiles.pop_back(); m_DiskStats.CurrentOpenFileCount--; } - m_OpenFiles.insert(m_OpenFiles.begin(), std::make_pair(PathIndex, std::make_unique(AttachmentPath))); + m_OpenFiles.insert(m_OpenFiles.begin(), std::make_pair(SequenceIndex, std::make_unique(LocalFilePath))); CompositeBuffer Result = m_OpenFiles.front().second->GetRange(Offset, Size); m_DiskStats.ReadByteCount += Result.GetSize(); m_DiskStats.OpenReadCount++; @@ -1020,23 +1057,14 @@ namespace { private: const std::filesystem::path m_Path; - const std::vector& m_Paths; - const std::vector& m_RawSizes; + const ChunkedFolderContent& m_LocalContent; + const ChunkedContentLookup& m_LocalLookup; std::vector>> m_OpenFiles; DiskStatistics& m_DiskStats; }; - CompositeBuffer ValidateBlob(BuildStorage& Storage, - const Oid& BuildId, - const IoHash& BlobHash, - uint64_t& OutCompressedSize, - uint64_t& OutDecompressedSize) + CompositeBuffer ValidateBlob(IoBuffer&& Payload, const IoHash& BlobHash, uint64_t& OutCompressedSize, uint64_t& OutDecompressedSize) { - IoBuffer Payload = Storage.GetBuildBlob(BuildId, BlobHash); - if (!Payload) - { - throw std::runtime_error(fmt::format("Blob {} could not be found", BlobHash)); - } if (Payload.GetContentType() != ZenContentType::kCompressedBinary) { throw std::runtime_error(fmt::format("Blob {} ({} bytes) has unexpected content type '{}'", @@ -1080,13 +1108,26 @@ namespace { return DecompressedComposite; } - ChunkBlockDescription ValidateChunkBlock(BuildStorage& Storage, - const Oid& BuildId, + CompositeBuffer ValidateBlob(BuildStorage& Storage, + const Oid& BuildId, + const IoHash& BlobHash, + uint64_t& OutCompressedSize, + uint64_t& OutDecompressedSize) + { + IoBuffer Payload = Storage.GetBuildBlob(BuildId, BlobHash); + if (!Payload) + { + throw std::runtime_error(fmt::format("Blob {} could not be found", BlobHash)); + } + return ValidateBlob(std::move(Payload), BlobHash, OutCompressedSize, OutDecompressedSize); + } + + ChunkBlockDescription ValidateChunkBlock(IoBuffer&& Payload, const IoHash& BlobHash, uint64_t& OutCompressedSize, uint64_t& OutDecompressedSize) { - CompositeBuffer BlockBuffer = ValidateBlob(Storage, BuildId, BlobHash, OutCompressedSize, OutDecompressedSize); + CompositeBuffer BlockBuffer = ValidateBlob(std::move(Payload), BlobHash, OutCompressedSize, OutDecompressedSize); return GetChunkBlockDescription(BlockBuffer.Flatten(), BlobHash); } @@ -1097,11 +1138,12 @@ namespace { { auto It = Lookup.ChunkHashToChunkIndex.find(ChunkHash); ZEN_ASSERT(It != Lookup.ChunkHashToChunkIndex.end()); - uint32_t ChunkIndex = It->second; - std::span ChunkLocations = GetChunkLocations(Lookup, ChunkIndex); + uint32_t ChunkIndex = It->second; + std::span ChunkLocations = GetChunkSequenceLocations(Lookup, ChunkIndex); ZEN_ASSERT(!ChunkLocations.empty()); - CompositeBuffer Chunk = - OpenFileCache.GetRange(ChunkLocations[0].PathIndex, ChunkLocations[0].Offset, Content.ChunkedContent.ChunkRawSizes[ChunkIndex]); + CompositeBuffer Chunk = OpenFileCache.GetRange(ChunkLocations[0].SequenceIndex, + ChunkLocations[0].Offset, + Content.ChunkedContent.ChunkRawSizes[ChunkIndex]); ZEN_ASSERT_SLOW(IoHash::HashBuffer(Chunk) == ChunkHash); return Chunk; }; @@ -1113,7 +1155,7 @@ namespace { ChunkBlockDescription& OutBlockDescription, DiskStatistics& DiskStats) { - ReadFileCache OpenFileCache(DiskStats, Path, Content.Paths, Content.RawSizes, 4); + ReadFileCache OpenFileCache(DiskStats, Path, Content, Lookup, 4); std::vector> BlockContent; BlockContent.reserve(ChunksInBlock.size()); @@ -1135,6 +1177,187 @@ namespace { return GenerateChunkBlock(std::move(BlockContent), OutBlockDescription); }; + void ValidateBuildPart(BuildStorage& Storage, const Oid& BuildId, Oid BuildPartId, const std::string_view BuildPartName) + { + Stopwatch Timer; + auto _ = MakeGuard([&]() { + ZEN_CONSOLE("Validated build part {}/{} ('{}') in {}", + BuildId, + BuildPartId, + BuildPartName, + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + CbObject Build = Storage.GetBuild(BuildId); + if (!BuildPartName.empty()) + { + BuildPartId = Build["parts"sv].AsObjectView()[BuildPartName].AsObjectId(); + if (BuildPartId == Oid::Zero) + { + throw std::runtime_error(fmt::format("Build {} does not have a part named '{}'", BuildId, BuildPartName)); + } + } + CbObject BuildPart = Storage.GetBuildPart(BuildId, BuildPartId); + ZEN_CONSOLE("Validating build part {}/{} ({})", BuildId, BuildPartId, NiceBytes(BuildPart.GetSize())); + std::vector ChunkAttachments; + for (CbFieldView LooseFileView : BuildPart["chunkAttachments"sv].AsObjectView()["rawHashes"sv]) + { + ChunkAttachments.push_back(LooseFileView.AsBinaryAttachment()); + } + std::vector BlockAttachments; + for (CbFieldView BlocksView : BuildPart["blockAttachments"sv].AsObjectView()["rawHashes"sv]) + { + BlockAttachments.push_back(BlocksView.AsBinaryAttachment()); + } + + std::vector VerifyBlockDescriptions = Storage.GetBlockMetadata(BuildId, BlockAttachments); + if (VerifyBlockDescriptions.size() != BlockAttachments.size()) + { + throw std::runtime_error(fmt::format("Uploaded blocks metadata could not all be found, {} blocks metadata is missing", + BlockAttachments.size() - VerifyBlockDescriptions.size())); + } + + WorkerThreadPool& NetworkPool = GetSmallWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // + WorkerThreadPool& VerifyPool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // + ParallellWork Work(AbortFlag); + + ProgressBar ProgressBar(UsePlainProgress); + + uint64_t AttachmentsToVerifyCount = ChunkAttachments.size() + BlockAttachments.size(); + std::atomic DownloadedAttachmentCount = 0; + std::atomic VerifiedAttachmentCount = 0; + std::atomic DownloadedByteCount = 0; + std::atomic VerifiedByteCount = 0; + FilteredRate FilteredDownloadedBytesPerSecond; + FilteredRate FilteredVerifiedBytesPerSecond; + + for (const IoHash& ChunkAttachment : ChunkAttachments) + { + Work.ScheduleWork( + NetworkPool, + [&, ChunkAttachment](std::atomic&) { + if (!AbortFlag) + { + FilteredDownloadedBytesPerSecond.Start(); + IoBuffer Payload = Storage.GetBuildBlob(BuildId, ChunkAttachment); + DownloadedAttachmentCount++; + DownloadedByteCount += Payload.GetSize(); + if (DownloadedAttachmentCount.load() == AttachmentsToVerifyCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + if (!Payload) + { + throw std::runtime_error(fmt::format("Chunk attachment {} could not be found", ChunkAttachment)); + } + if (!AbortFlag) + { + Work.ScheduleWork( + VerifyPool, + [&, Payload = std::move(Payload), ChunkAttachment](std::atomic&) { + if (!AbortFlag) + { + FilteredVerifiedBytesPerSecond.Start(); + + uint64_t CompressedSize; + uint64_t DecompressedSize; + ValidateBlob(IoBuffer(Payload), ChunkAttachment, CompressedSize, DecompressedSize); + ZEN_CONSOLE_VERBOSE("Chunk attachment {} ({} -> {}) is valid", + ChunkAttachment, + NiceBytes(CompressedSize), + NiceBytes(DecompressedSize)); + VerifiedAttachmentCount++; + VerifiedByteCount += DecompressedSize; + if (VerifiedAttachmentCount.load() == AttachmentsToVerifyCount) + { + FilteredVerifiedBytesPerSecond.Stop(); + } + } + }, + Work.DefaultErrorFunction()); + } + } + }, + Work.DefaultErrorFunction()); + } + + for (const IoHash& BlockAttachment : BlockAttachments) + { + Work.ScheduleWork( + NetworkPool, + [&, BlockAttachment](std::atomic&) { + if (!AbortFlag) + { + FilteredDownloadedBytesPerSecond.Start(); + IoBuffer Payload = Storage.GetBuildBlob(BuildId, BlockAttachment); + DownloadedAttachmentCount++; + DownloadedByteCount += Payload.GetSize(); + if (DownloadedAttachmentCount.load() == AttachmentsToVerifyCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + if (!Payload) + { + throw std::runtime_error(fmt::format("Block attachment {} could not be found", BlockAttachment)); + } + if (!AbortFlag) + { + Work.ScheduleWork( + VerifyPool, + [&, Payload = std::move(Payload), BlockAttachment](std::atomic&) { + if (!AbortFlag) + { + FilteredVerifiedBytesPerSecond.Start(); + + uint64_t CompressedSize; + uint64_t DecompressedSize; + ValidateChunkBlock(IoBuffer(Payload), BlockAttachment, CompressedSize, DecompressedSize); + ZEN_CONSOLE_VERBOSE("Chunk block {} ({} -> {}) is valid", + BlockAttachment, + NiceBytes(CompressedSize), + NiceBytes(DecompressedSize)); + VerifiedAttachmentCount++; + VerifiedByteCount += DecompressedSize; + if (VerifiedAttachmentCount.load() == AttachmentsToVerifyCount) + { + FilteredVerifiedBytesPerSecond.Stop(); + } + } + }, + Work.DefaultErrorFunction()); + } + } + }, + Work.DefaultErrorFunction()); + } + + Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, PendingWork); + + FilteredDownloadedBytesPerSecond.Update(DownloadedByteCount); + FilteredVerifiedBytesPerSecond.Update(VerifiedByteCount); + + std::string Details = fmt::format("Downloaded {}/{} ({}, {}bits/s). Verified {}/{} ({}, {}B/s)", + DownloadedAttachmentCount.load(), + AttachmentsToVerifyCount, + NiceBytes(DownloadedByteCount.load()), + NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8), + VerifiedAttachmentCount.load(), + AttachmentsToVerifyCount, + NiceBytes(VerifiedByteCount.load()), + NiceNum(FilteredVerifiedBytesPerSecond.GetCurrent())); + + ProgressBar.UpdateState( + {.Task = "Validating blobs ", + .Details = Details, + .TotalCount = gsl::narrow(AttachmentsToVerifyCount * 2), + .RemainingCount = gsl::narrow(AttachmentsToVerifyCount * 2 - + (DownloadedAttachmentCount.load() + VerifiedAttachmentCount.load()))}, + false); + }); + + ProgressBar.Finish(); + } + void ArrangeChunksIntoBlocks(const ChunkedFolderContent& Content, const ChunkedContentLookup& Lookup, uint64_t MaxBlockSize, @@ -1142,13 +1365,13 @@ namespace { std::vector>& OutBlocks) { std::sort(ChunkIndexes.begin(), ChunkIndexes.end(), [&Content, &Lookup](uint32_t Lhs, uint32_t Rhs) { - const ChunkedContentLookup::ChunkLocation& LhsLocation = GetChunkLocations(Lookup, Lhs)[0]; - const ChunkedContentLookup::ChunkLocation& RhsLocation = GetChunkLocations(Lookup, Rhs)[0]; - if (LhsLocation.PathIndex < RhsLocation.PathIndex) + const ChunkedContentLookup::ChunkSequenceLocation& LhsLocation = GetChunkSequenceLocations(Lookup, Lhs)[0]; + const ChunkedContentLookup::ChunkSequenceLocation& RhsLocation = GetChunkSequenceLocations(Lookup, Rhs)[0]; + if (LhsLocation.SequenceIndex < RhsLocation.SequenceIndex) { return true; } - else if (LhsLocation.PathIndex > RhsLocation.PathIndex) + else if (LhsLocation.SequenceIndex > RhsLocation.SequenceIndex) { return false; } @@ -1171,7 +1394,8 @@ namespace { // between source paths for chunks. Break the block at the last such break if any. ZEN_ASSERT(ChunkIndexOffset > ChunkIndexStart); - const uint32_t ChunkPathIndex = Lookup.ChunkLocations[Lookup.ChunkLocationOffset[ChunkIndex]].PathIndex; + const uint32_t ChunkSequenceIndex = + Lookup.ChunkSequenceLocations[Lookup.ChunkSequenceLocationOffset[ChunkIndex]].SequenceIndex; uint64_t ScanBlockSize = BlockSize; @@ -1185,8 +1409,9 @@ namespace { break; } - const uint32_t TestPathIndex = Lookup.ChunkLocations[Lookup.ChunkLocationOffset[TestChunkIndex]].PathIndex; - if (ChunkPathIndex != TestPathIndex) + const uint32_t TestSequenceIndex = + Lookup.ChunkSequenceLocations[Lookup.ChunkSequenceLocationOffset[TestChunkIndex]].SequenceIndex; + if (ChunkSequenceIndex != TestSequenceIndex) { ChunkIndexOffset = ScanChunkIndexOffset + 1; break; @@ -1235,10 +1460,9 @@ namespace { const IoHash& ChunkHash = Content.ChunkedContent.ChunkHashes[ChunkIndex]; const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; - const ChunkedContentLookup::ChunkLocation& Source = GetChunkLocations(Lookup, ChunkIndex)[0]; - - IoBuffer RawSource = - IoBufferBuilder::MakeFromFile((Path / Content.Paths[Source.PathIndex]).make_preferred(), Source.Offset, ChunkSize); + const ChunkedContentLookup::ChunkSequenceLocation& Source = GetChunkSequenceLocations(Lookup, ChunkIndex)[0]; + const std::uint32_t PathIndex = Lookup.SequenceIndexFirstPathIndex[Source.SequenceIndex]; + IoBuffer RawSource = IoBufferBuilder::MakeFromFile((Path / Content.Paths[PathIndex]).make_preferred(), Source.Offset, ChunkSize); if (!RawSource) { throw std::runtime_error(fmt::format("Failed fetching chunk {}", ChunkHash)); @@ -1418,7 +1642,7 @@ namespace { FilteredGeneratedBytesPerSecond.Update(GenerateBlocksStats.GeneratedBlockByteCount.load()); FilteredUploadedBytesPerSecond.Update(UploadStats.BlocksBytes.load()); - std::string Details = fmt::format("Generated {}/{} ({}, {}B/s) and uploaded {}/{} ({}, {}bits/s) blocks", + std::string Details = fmt::format("Generated {}/{} ({}, {}B/s). Uploaded {}/{} ({}, {}bits/s)", GenerateBlocksStats.GeneratedBlockCount.load(), NewBlockCount, NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount.load()), @@ -1470,10 +1694,11 @@ namespace { ParallellWork Work(AbortFlag); - std::atomic UploadedBlockSize = 0; - std::atomic UploadedBlockCount = 0; - std::atomic UploadedChunkSize = 0; - std::atomic UploadedChunkCount = 0; + std::atomic UploadedBlockSize = 0; + std::atomic UploadedBlockCount = 0; + std::atomic UploadedRawChunkSize = 0; + std::atomic UploadedCompressedChunkSize = 0; + std::atomic UploadedChunkCount = 0; tsl::robin_map ChunkIndexToLooseChunkOrderIndex; ChunkIndexToLooseChunkOrderIndex.reserve(LooseChunkIndexes.size()); @@ -1485,8 +1710,8 @@ namespace { std::vector BlockIndexes; std::vector LooseChunkOrderIndexes; - uint64_t TotalChunksSize = 0; - uint64_t TotalBlocksSize = 0; + uint64_t TotalLooseChunksSize = 0; + uint64_t TotalBlocksSize = 0; for (const IoHash& RawHash : RawHashes) { if (auto It = NewBlocks.BlockHashToBlockIndex.find(RawHash); It != NewBlocks.BlockHashToBlockIndex.end()) @@ -1501,11 +1726,11 @@ namespace { LooseOrderIndexIt != ChunkIndexToLooseChunkOrderIndex.end()) { LooseChunkOrderIndexes.push_back(LooseOrderIndexIt->second); - TotalChunksSize += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + TotalLooseChunksSize += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; } } } - uint64_t TotalSize = TotalChunksSize + TotalBlocksSize; + uint64_t TotalRawSize = TotalLooseChunksSize + TotalBlocksSize; const size_t UploadBlockCount = BlockIndexes.size(); const uint32_t UploadChunkCount = gsl::narrow(LooseChunkOrderIndexes.size()); @@ -1548,10 +1773,10 @@ namespace { Work.DefaultErrorFunction()); }; - auto AsyncUploadLooseChunk = [&](const IoHash& RawHash, CompositeBuffer&& Payload) { + auto AsyncUploadLooseChunk = [&](const IoHash& RawHash, uint64_t RawSize, CompositeBuffer&& Payload) { Work.ScheduleWork( UploadChunkPool, - [&, RawHash, Payload = CompositeBuffer(std::move(Payload))](std::atomic&) { + [&, RawHash, RawSize, Payload = CompositeBuffer(std::move(Payload))](std::atomic&) { if (!AbortFlag) { const uint64_t PayloadSize = Payload.GetSize(); @@ -1571,9 +1796,9 @@ namespace { PartPayload.SetContentType(ZenContentType::kBinary); return PartPayload; }, - [&](uint64_t SentBytes, bool IsComplete) { + [&, RawSize](uint64_t SentBytes, bool IsComplete) { UploadStats.ChunksBytes += SentBytes; - UploadedChunkSize += SentBytes; + UploadedCompressedChunkSize += SentBytes; if (IsComplete) { UploadStats.ChunkCount++; @@ -1582,6 +1807,7 @@ namespace { { FilteredUploadedBytesPerSecond.Stop(); } + UploadedRawChunkSize += RawSize; } }); for (auto& WorkPart : MultipartWork) @@ -1604,7 +1830,8 @@ namespace { ZEN_CONSOLE_VERBOSE("Uploaded chunk {} ({})", RawHash, NiceBytes(PayloadSize)); UploadStats.ChunksBytes += Payload.GetSize(); UploadStats.ChunkCount++; - UploadedChunkSize += Payload.GetSize(); + UploadedCompressedChunkSize += Payload.GetSize(); + UploadedRawChunkSize += RawSize; UploadedChunkCount++; if (UploadedChunkCount == UploadChunkCount) { @@ -1696,6 +1923,7 @@ namespace { std::atomic CompressedLooseChunkCount = 0; std::atomic CompressedLooseChunkByteCount = 0; + std::atomic RawLooseChunkByteCount = 0; // Start compression of any non-precompressed loose chunks and schedule upload for (const uint32_t CompressLooseChunkOrderIndex : CompressLooseChunkOrderIndexes) @@ -1712,18 +1940,20 @@ namespace { Content.ChunkedContent.ChunkHashes[ChunkIndex], NiceBytes(Content.ChunkedContent.ChunkRawSizes[ChunkIndex]), NiceBytes(Payload.GetSize())); - UploadStats.ReadFromDiskBytes += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + const uint64_t ChunkRawSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + UploadStats.ReadFromDiskBytes += ChunkRawSize; LooseChunksStats.CompressedChunkBytes += Payload.GetSize(); LooseChunksStats.CompressedChunkCount++; CompressedLooseChunkByteCount += Payload.GetSize(); CompressedLooseChunkCount++; + RawLooseChunkByteCount += ChunkRawSize; if (CompressedLooseChunkCount == CompressLooseChunkOrderIndexes.size()) { FilteredCompressedBytesPerSecond.Stop(); } if (!AbortFlag) { - AsyncUploadLooseChunk(Content.ChunkedContent.ChunkHashes[ChunkIndex], std::move(Payload)); + AsyncUploadLooseChunk(Content.ChunkedContent.ChunkHashes[ChunkIndex], ChunkRawSize, std::move(Payload)); } } }, @@ -1734,22 +1964,31 @@ namespace { ZEN_UNUSED(IsAborted, PendingWork); FilteredCompressedBytesPerSecond.Update(CompressedLooseChunkByteCount.load()); FilteredGenerateBlockBytesPerSecond.Update(GeneratedBlockByteCount.load()); - FilteredUploadedBytesPerSecond.Update(UploadedChunkSize.load() + UploadedBlockSize.load()); - uint64_t UploadedSize = UploadedChunkSize.load() + UploadedBlockSize.load(); + FilteredUploadedBytesPerSecond.Update(UploadedCompressedChunkSize.load() + UploadedBlockSize.load()); + uint64_t UploadedRawSize = UploadedRawChunkSize.load() + UploadedBlockSize.load(); + uint64_t UploadedCompressedSize = UploadedCompressedChunkSize.load() + UploadedBlockSize.load(); + + std::string Details = fmt::format( + "Compressed {}/{} ({}/{}) chunks. " + "Uploaded {}/{} ({}/{}) blobs " + "({} {}bits/s)", + CompressedLooseChunkCount.load(), + CompressLooseChunkOrderIndexes.size(), + NiceBytes(RawLooseChunkByteCount), + NiceBytes(TotalLooseChunksSize), + + UploadedBlockCount.load() + UploadedChunkCount.load(), + UploadBlockCount + UploadChunkCount, + NiceBytes(UploadedRawSize), + NiceBytes(TotalRawSize), + + NiceBytes(UploadedCompressedSize), + NiceNum(FilteredUploadedBytesPerSecond.GetCurrent())); + ProgressBar.UpdateState({.Task = "Uploading blobs ", - .Details = fmt::format("Compressed {}/{} chunks. " - "Uploaded {}/{} blobs ({}/{} {}bits/s)", - CompressedLooseChunkCount.load(), - CompressLooseChunkOrderIndexes.size(), - - UploadedBlockCount.load() + UploadedChunkCount.load(), - UploadBlockCount + UploadChunkCount, - - NiceBytes(UploadedChunkSize.load() + UploadedBlockSize.load()), - NiceBytes(TotalSize), - NiceNum(FilteredUploadedBytesPerSecond.GetCurrent())), - .TotalCount = gsl::narrow(TotalSize), - .RemainingCount = gsl::narrow(TotalSize - UploadedSize)}, + .Details = Details, + .TotalCount = gsl::narrow(TotalRawSize), + .RemainingCount = gsl::narrow(TotalRawSize - UploadedRawSize)}, false); }); @@ -1930,7 +2169,8 @@ namespace { bool AllowMultiparts, const CbObject& MetaData, bool CreateBuild, - bool IgnoreExistingBlocks) + bool IgnoreExistingBlocks, + bool PostUploadVerify) { Stopwatch ProcessTimer; @@ -2121,15 +2361,16 @@ namespace { UsePlainProgress ? 5000 : 200, [&](bool, std::ptrdiff_t) { FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); + std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found", + ChunkingStats.FilesProcessed.load(), + Content.Paths.size(), + NiceBytes(ChunkingStats.BytesHashed.load()), + NiceBytes(TotalRawSize), + NiceNum(FilteredBytesHashed.GetCurrent()), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load())); ProgressBar.UpdateState({.Task = "Scanning files ", - .Details = fmt::format("{}/{} ({}/{}, {}B/s) files, {} ({}) chunks found", - ChunkingStats.FilesProcessed.load(), - Content.Paths.size(), - NiceBytes(ChunkingStats.BytesHashed.load()), - NiceBytes(TotalRawSize), - NiceNum(FilteredBytesHashed.GetCurrent()), - ChunkingStats.UniqueChunksFound.load(), - NiceBytes(ChunkingStats.UniqueBytesFound.load())), + .Details = Details, .TotalCount = TotalRawSize, .RemainingCount = TotalRawSize - ChunkingStats.BytesHashed.load()}, false); @@ -2422,19 +2663,18 @@ namespace { std::vector OutLooseChunkHashes; std::vector OutLooseChunkRawSizes; std::vector OutBlockRawHashes; - - ReadBuildContentFromCompactBinary(PartManifestWriter.Save(), - VerifyFolderContent.Platform, - VerifyFolderContent.Paths, - VerifyFolderContent.RawHashes, - VerifyFolderContent.RawSizes, - VerifyFolderContent.Attributes, - VerifyFolderContent.ChunkedContent.SequenceRawHashes, - VerifyFolderContent.ChunkedContent.ChunkCounts, - OutAbsoluteChunkOrders, - OutLooseChunkHashes, - OutLooseChunkRawSizes, - OutBlockRawHashes); + 4 ReadBuildContentFromCompactBinary(PartManifestWriter.Save(), + VerifyFolderContent.Platform, + VerifyFolderContent.Paths, + VerifyFolderContent.RawHashes, + VerifyFolderContent.RawSizes, + VerifyFolderContent.Attributes, + VerifyFolderContent.ChunkedContent.SequenceRawHashes, + VerifyFolderContent.ChunkedContent.ChunkCounts, + OutAbsoluteChunkOrders, + OutLooseChunkHashes, + OutLooseChunkRawSizes, + OutBlockRawHashes); ZEN_ASSERT(OutBlockRawHashes == AllChunkBlockHashes); for (uint32_t OrderIndex = 0; OrderIndex < OutAbsoluteChunkOrders.size(); OrderIndex++) @@ -2483,7 +2723,7 @@ namespace { Stopwatch PutBuildPartResultTimer; std::pair> PutBuildPartResult = Storage.PutBuildPart(BuildId, BuildPartId, BuildPartName, PartManifest); - ZEN_CONSOLE("PutBuildPart took {}, payload size {}. {} attachments are missing.", + ZEN_CONSOLE("PutBuildPart took {}, payload size {}. {} attachments are needed.", NiceTimeSpanMs(PutBuildPartResultTimer.GetElapsedTimeMs()), NiceBytes(PartManifest.GetSize()), PutBuildPartResult.second.size()); @@ -2504,7 +2744,7 @@ namespace { ZEN_CONSOLE( "Generated {} ({} {}B/s) and uploaded {} ({}) blocks. " "Compressed {} ({} {}B/s) and uploaded {} ({}) chunks. " - "Transferred {} ({}B/s) in {}", + "Transferred {} ({}bits/s) in {}", TempGenerateBlocksStats.GeneratedBlockCount.load(), NiceBytes(TempGenerateBlocksStats.GeneratedBlockByteCount.load()), NiceNum(GetBytesPerSecond(TempGenerateBlocksStats.GenerateBlocksElapsedWallTimeUS, @@ -2571,7 +2811,7 @@ namespace { UploadAttachments(PutBuildPartResult.second); } - while (true) + while (!AbortFlag) { Stopwatch FinalizeBuildPartTimer; std::vector Needs = Storage.FinalizeBuildPart(BuildId, BuildPartId, PartHash); @@ -2586,14 +2826,14 @@ namespace { UploadAttachments(Needs); } - if (CreateBuild) + if (CreateBuild && !AbortFlag) { Stopwatch FinalizeBuildTimer; Storage.FinalizeBuild(BuildId); ZEN_CONSOLE("FinalizeBuild took {}", NiceTimeSpanMs(FinalizeBuildTimer.GetElapsedTimeMs())); } - if (!NewBlocks.BlockDescriptions.empty()) + if (!NewBlocks.BlockDescriptions.empty() && !AbortFlag) { uint64_t UploadBlockMetadataCount = 0; std::vector BlockHashes; @@ -2619,13 +2859,11 @@ namespace { UploadStats.ElapsedWallTimeUS += ElapsedUS; ZEN_CONSOLE("Uploaded metadata for {} blocks in {}", UploadBlockMetadataCount, NiceTimeSpanMs(ElapsedUS / 1000)); } + } - std::vector VerifyBlockDescriptions = Storage.GetBlockMetadata(BuildId, BlockHashes); - if (VerifyBlockDescriptions.size() != BlockHashes.size()) - { - throw std::runtime_error(fmt::format("Uploaded blocks could not all be found, {} blocks are missing", - BlockHashes.size() - VerifyBlockDescriptions.size())); - } + if (PostUploadVerify && !AbortFlag) + { + ValidateBuildPart(Storage, BuildId, BuildPartId, BuildPartName); } const double DeltaByteCountPercent = @@ -2786,7 +3024,7 @@ namespace { NiceTimeSpanMs(ProcessTimer.GetElapsedTimeMs())); } - void VerifyFolder(const ChunkedFolderContent& Content, const std::filesystem::path& Path) + void VerifyFolder(const ChunkedFolderContent& Content, const std::filesystem::path& Path, bool VerifyFileHash) { ProgressBar ProgressBar(UsePlainProgress); std::atomic FilesVerified(0); @@ -2879,18 +3117,18 @@ namespace { }); FilesFailed++; } - else if (SizeOnDisk > 0) + else if (SizeOnDisk > 0 && VerifyFileHash) { const IoHash& ExpectedRawHash = Content.RawHashes[PathIndex]; IoBuffer Buffer = IoBufferBuilder::MakeFromFile(TargetPath); IoHash RawHash = IoHash::HashBuffer(Buffer); if (RawHash != ExpectedRawHash) { - uint64_t FileOffset = 0; - const uint32_t SequenceRawHashesIndex = Lookup.RawHashToSequenceRawHashIndex.at(ExpectedRawHash); - const uint32_t OrderOffset = Lookup.SequenceRawHashIndexChunkOrderOffset[SequenceRawHashesIndex]; + uint64_t FileOffset = 0; + const uint32_t SequenceIndex = Lookup.RawHashToSequenceIndex.at(ExpectedRawHash); + const uint32_t OrderOffset = Lookup.SequenceIndexChunkOrderOffset[SequenceIndex]; for (uint32_t OrderIndex = OrderOffset; - OrderIndex < OrderOffset + Content.ChunkedContent.ChunkCounts[SequenceRawHashesIndex]; + OrderIndex < OrderOffset + Content.ChunkedContent.ChunkCounts[SequenceIndex]; OrderIndex++) { uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex]; @@ -2933,12 +3171,13 @@ namespace { Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { ZEN_UNUSED(IsAborted, PendingWork); + std::string Details = fmt::format("Verified {}/{} ({}). Failed files: {}", + FilesVerified.load(), + PathCount, + NiceBytes(ReadBytes.load()), + FilesFailed.load()); ProgressBar.UpdateState({.Task = "Verifying files ", - .Details = fmt::format("Verified {} files out of {}. Verfied: {}. Failed files: {}", - FilesVerified.load(), - PathCount, - NiceBytes(ReadBytes.load()), - FilesFailed.load()), + .Details = Details, .TotalCount = gsl::narrow(PathCount), .RemainingCount = gsl::narrow(PathCount - FilesVerified.load())}, false); @@ -3018,19 +3257,19 @@ namespace { std::unique_ptr OpenFileWriter; }; - std::vector GetRemainingChunkTargets( - const std::vector& RemotePathIndexWantsCopyFromCacheFlags, - const ChunkedContentLookup& Lookup, - uint32_t ChunkIndex) + std::vector GetRemainingChunkTargets( + std::span> SequenceIndexChunksLeftToWriteCounters, + const ChunkedContentLookup& Lookup, + uint32_t ChunkIndex) { - std::span ChunkSources = GetChunkLocations(Lookup, ChunkIndex); - std::vector ChunkTargetPtrs; + std::span ChunkSources = GetChunkSequenceLocations(Lookup, ChunkIndex); + std::vector ChunkTargetPtrs; if (!ChunkSources.empty()) { ChunkTargetPtrs.reserve(ChunkSources.size()); - for (const ChunkedContentLookup::ChunkLocation& Source : ChunkSources) + for (const ChunkedContentLookup::ChunkSequenceLocation& Source : ChunkSources) { - if (!RemotePathIndexWantsCopyFromCacheFlags[Source.PathIndex]) + if (SequenceIndexChunksLeftToWriteCounters[Source.SequenceIndex].load() > 0) { ChunkTargetPtrs.push_back(&Source); } @@ -3039,20 +3278,20 @@ namespace { return ChunkTargetPtrs; }; - bool WriteBlockToDisk(const std::filesystem::path& CacheFolderPath, - const ChunkedFolderContent& Content, - const std::vector& RemotePathIndexWantsCopyFromCacheFlags, - const CompositeBuffer& DecompressedBlockBuffer, - const ChunkedContentLookup& Lookup, - std::atomic* RemoteChunkIndexNeedsCopyFromSourceFlags, - uint32_t& OutChunksComplete, - uint64_t& OutBytesWritten) + bool WriteBlockToDisk(const std::filesystem::path& CacheFolderPath, + const ChunkedFolderContent& RemoteContent, + std::span> SequenceIndexChunksLeftToWriteCounters, + const CompositeBuffer& DecompressedBlockBuffer, + const ChunkedContentLookup& Lookup, + std::atomic* RemoteChunkIndexNeedsCopyFromSourceFlags, + uint32_t& OutChunksComplete, + uint64_t& OutBytesWritten) { std::vector ChunkBuffers; struct WriteOpData { - const ChunkedContentLookup::ChunkLocation* Target; - size_t ChunkBufferIndex; + const ChunkedContentLookup::ChunkSequenceLocation* Target; + size_t ChunkBufferIndex; }; std::vector WriteOps; @@ -3063,9 +3302,9 @@ namespace { [&](CompressedBuffer&& Chunk, const IoHash& ChunkHash) { if (auto It = Lookup.ChunkHashToChunkIndex.find(ChunkHash); It != Lookup.ChunkHashToChunkIndex.end()) { - const uint32_t ChunkIndex = It->second; - std::vector ChunkTargetPtrs = - GetRemainingChunkTargets(RemotePathIndexWantsCopyFromCacheFlags, Lookup, ChunkIndex); + const uint32_t ChunkIndex = It->second; + std::vector ChunkTargetPtrs = + GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, Lookup, ChunkIndex); if (!ChunkTargetPtrs.empty()) { @@ -3078,8 +3317,8 @@ namespace { throw std::runtime_error(fmt::format("Decompression of build blob {} failed", ChunkHash)); } ZEN_ASSERT_SLOW(ChunkHash == IoHash::HashBuffer(Decompressed)); - ZEN_ASSERT(Decompressed.GetSize() == Content.ChunkedContent.ChunkRawSizes[ChunkIndex]); - for (const ChunkedContentLookup::ChunkLocation* Target : ChunkTargetPtrs) + ZEN_ASSERT(Decompressed.GetSize() == RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]); + for (const ChunkedContentLookup::ChunkSequenceLocation* Target : ChunkTargetPtrs) { WriteOps.push_back(WriteOpData{.Target = Target, .ChunkBufferIndex = ChunkBuffers.size()}); } @@ -3095,11 +3334,11 @@ namespace { if (!AbortFlag) { std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOpData& Lhs, const WriteOpData& Rhs) { - if (Lhs.Target->PathIndex < Rhs.Target->PathIndex) + if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex) { return true; } - if (Lhs.Target->PathIndex > Rhs.Target->PathIndex) + if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex) { return false; } @@ -3114,23 +3353,50 @@ namespace { { break; } - const CompositeBuffer& Chunk = ChunkBuffers[WriteOp.ChunkBufferIndex]; - const uint32_t PathIndex = WriteOp.Target->PathIndex; - const uint64_t ChunkSize = Chunk.GetSize(); - const uint64_t FileOffset = WriteOp.Target->Offset; - ZEN_ASSERT(FileOffset + ChunkSize <= Content.RawSizes[PathIndex]); + const CompositeBuffer& Chunk = ChunkBuffers[WriteOp.ChunkBufferIndex]; + const uint32_t SequenceIndex = WriteOp.Target->SequenceIndex; + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[SequenceIndex].load() <= + RemoteContent.ChunkedContent.ChunkCounts[SequenceIndex]); + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[SequenceIndex].load() > 0); + const uint64_t ChunkSize = Chunk.GetSize(); + const uint64_t FileOffset = WriteOp.Target->Offset; + const uint32_t PathIndex = Lookup.SequenceIndexFirstPathIndex[SequenceIndex]; + ZEN_ASSERT(FileOffset + ChunkSize <= RemoteContent.RawSizes[PathIndex]); OpenFileCache.WriteToFile( - PathIndex, - [&CacheFolderPath, &Content](uint32_t TargetIndex) { - return (CacheFolderPath / Content.RawHashes[TargetIndex].ToHexString()).make_preferred(); + SequenceIndex, + [&CacheFolderPath, &RemoteContent](uint32_t SequenceIndex) { + return GetTempChunkedSequenceFileName(CacheFolderPath, + RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]); }, Chunk, FileOffset, - Content.RawSizes[PathIndex]); + RemoteContent.RawSizes[PathIndex]); OutBytesWritten += ChunkSize; } } + if (!AbortFlag) + { + // Write tracking, updating this must be done without any files open (WriteFileCache) + for (const WriteOpData& WriteOp : WriteOps) + { + const uint32_t RemoteSequenceIndex = WriteOp.Target->SequenceIndex; + if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1) == 1) + { + const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + const IoHash VerifyChunkHash = IoHash::HashBuffer( + IoBufferBuilder::MakeFromFile(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); + if (VerifyChunkHash != SequenceRawHash) + { + throw std::runtime_error(fmt::format("Written hunk sequence {} hash does not match expected hash {}", + VerifyChunkHash, + SequenceRawHash)); + } + std::filesystem::rename(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), + GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); + } + } + } OutChunksComplete += gsl::narrow(ChunkBuffers.size()); } } @@ -3171,50 +3437,52 @@ namespace { return Decompressed; } - void WriteChunkToDisk(const std::filesystem::path& CacheFolderPath, - const ChunkedFolderContent& Content, - std::span ChunkTargets, - const CompositeBuffer& ChunkData, - WriteFileCache& OpenFileCache, - uint64_t& OutBytesWritten) + void WriteChunkToDisk(const std::filesystem::path& CacheFolderPath, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + std::span ChunkTargets, + const CompositeBuffer& ChunkData, + WriteFileCache& OpenFileCache, + uint64_t& OutBytesWritten) { - for (const ChunkedContentLookup::ChunkLocation* TargetPtr : ChunkTargets) + for (const ChunkedContentLookup::ChunkSequenceLocation* TargetPtr : ChunkTargets) { - const auto& Target = *TargetPtr; - const uint64_t FileOffset = Target.Offset; + const auto& Target = *TargetPtr; + const uint64_t FileOffset = Target.Offset; + const uint32_t SequenceIndex = Target.SequenceIndex; + const uint32_t PathIndex = Lookup.SequenceIndexFirstPathIndex[SequenceIndex]; OpenFileCache.WriteToFile( - Target.PathIndex, - [&CacheFolderPath, &Content](uint32_t TargetIndex) { - return (CacheFolderPath / Content.RawHashes[TargetIndex].ToHexString()).make_preferred(); - // return (Path / Content.Paths[TargetIndex]).make_preferred(); + SequenceIndex, + [&CacheFolderPath, &Content](uint32_t SequenceIndex) { + return GetTempChunkedSequenceFileName(CacheFolderPath, Content.ChunkedContent.SequenceRawHashes[SequenceIndex]); }, ChunkData, FileOffset, - Content.RawSizes[Target.PathIndex]); + Content.RawSizes[PathIndex]); OutBytesWritten += ChunkData.GetSize(); } } - void DownloadLargeBlob(BuildStorage& Storage, - const std::filesystem::path& TempFolderPath, - const std::filesystem::path& CacheFolderPath, - const ChunkedFolderContent& RemoteContent, - const ChunkedContentLookup& RemoteLookup, - const Oid& BuildId, - const IoHash& ChunkHash, - const std::uint64_t PreferredMultipartChunkSize, - const std::vector& ChunkTargetPtrs, - ParallellWork& Work, - WorkerThreadPool& WritePool, - WorkerThreadPool& NetworkPool, - std::atomic& BytesWritten, - std::atomic& WriteToDiskBytes, - std::atomic& BytesDownloaded, - std::atomic& LooseChunksBytes, - std::atomic& DownloadedChunks, - std::atomic& ChunksComplete, - std::atomic& MultipartAttachmentCount) + void DownloadLargeBlob(BuildStorage& Storage, + const std::filesystem::path& TempFolderPath, + const std::filesystem::path& CacheFolderPath, + const ChunkedFolderContent& RemoteContent, + const ChunkedContentLookup& RemoteLookup, + const Oid& BuildId, + const IoHash& ChunkHash, + const std::uint64_t PreferredMultipartChunkSize, + const std::vector& ChunkTargetPtrs, + std::span> SequenceIndexChunksLeftToWriteCounters, + ParallellWork& Work, + WorkerThreadPool& WritePool, + WorkerThreadPool& NetworkPool, + std::atomic& WriteToDiskBytes, + std::atomic& BytesDownloaded, + std::atomic& LooseChunksBytes, + std::atomic& DownloadedChunks, + std::atomic& ChunksComplete, + std::atomic& MultipartAttachmentCount) { struct WorkloadData { @@ -3242,11 +3510,11 @@ namespace { ChunkHash, &BytesDownloaded, &LooseChunksBytes, - &BytesWritten, &WriteToDiskBytes, &DownloadedChunks, &ChunksComplete, - ChunkTargetPtrs = std::vector( + SequenceIndexChunksLeftToWriteCounters, + ChunkTargetPtrs = std::vector( ChunkTargetPtrs)](uint64_t Offset, const IoBuffer& Chunk, uint64_t BytesRemaining) { BytesDownloaded += Chunk.GetSize(); LooseChunksBytes += Chunk.GetSize(); @@ -3268,8 +3536,8 @@ namespace { Offset, BytesRemaining, &ChunksComplete, - &BytesWritten, &WriteToDiskBytes, + SequenceIndexChunksLeftToWriteCounters, ChunkTargetPtrs](std::atomic&) { if (!AbortFlag) { @@ -3289,7 +3557,9 @@ namespace { uint64_t TotalBytesWritten = 0; - uint32_t ChunkIndex = RemoteLookup.ChunkHashToChunkIndex.at(ChunkHash); + auto ChunkHashToChunkIndexIt = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); + ZEN_ASSERT(ChunkHashToChunkIndexIt != RemoteLookup.ChunkHashToChunkIndex.end()); + uint32_t ChunkIndex = ChunkHashToChunkIndexIt->second; SharedBuffer Chunk = Decompress(CompositeBuffer(std::move(CompressedPart)), ChunkHash, @@ -3304,14 +3574,38 @@ namespace { WriteChunkToDisk(CacheFolderPath, RemoteContent, + RemoteLookup, ChunkTargetPtrs, CompositeBuffer(Chunk), OpenFileCache, TotalBytesWritten); ChunksComplete++; - BytesWritten += TotalBytesWritten; WriteToDiskBytes += TotalBytesWritten; } + if (!AbortFlag) + { + // Write tracking, updating this must be done without any files open (WriteFileCache) + for (const ChunkedContentLookup::ChunkSequenceLocation* Location : ChunkTargetPtrs) + { + const uint32_t RemoteSequenceIndex = Location->SequenceIndex; + if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1) == 1) + { + const IoHash& SequenceRawHash = + RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + const IoHash VerifyChunkHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile( + GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); + if (VerifyChunkHash != SequenceRawHash) + { + throw std::runtime_error( + fmt::format("Written chunk sequence {} hash does not match expected hash {}", + VerifyChunkHash, + SequenceRawHash)); + } + std::filesystem::rename(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), + GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); + } + } + } } }, Work.DefaultErrorFunction()); @@ -3368,45 +3662,37 @@ namespace { const std::filesystem::path CacheFolderPath = Path / ZenTempCacheFolderName; - tsl::robin_map RawHashToLocalPathIndex; + Stopwatch CacheMappingTimer; - { - Stopwatch CacheTimer; + uint64_t CacheMappedBytesForReuse = 0; + std::vector> SequenceIndexChunksLeftToWriteCounters(RemoteContent.ChunkedContent.SequenceRawHashes.size()); + // std::vector RemoteSequenceIndexIsCachedFlags(RemoteContent.ChunkedContent.SequenceRawHashes.size(), false); + std::vector RemoteChunkIndexIsCachedFlags(RemoteContent.ChunkedContent.ChunkHashes.size()); + // Guard if he same chunks is in multiple blocks (can happen due to block reuse, cache reuse blocks writes directly) + std::vector> RemoteChunkIndexNeedsCopyFromSourceFlags(RemoteContent.ChunkedContent.ChunkHashes.size()); - for (uint32_t LocalPathIndex = 0; LocalPathIndex < LocalContent.Paths.size(); LocalPathIndex++) + // Pick up all whole files we can use from current local state + for (uint32_t RemoteSequenceIndex = 0; RemoteSequenceIndex < RemoteContent.ChunkedContent.SequenceRawHashes.size(); + RemoteSequenceIndex++) + { + const IoHash& RemoteSequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + if (auto It = LocalLookup.RawHashToSequenceIndex.find(RemoteSequenceRawHash); It != LocalLookup.RawHashToSequenceIndex.end()) { - ZEN_ASSERT_SLOW(std::filesystem::exists(Path / LocalContent.Paths[LocalPathIndex])); - - if (LocalContent.RawSizes[LocalPathIndex] > 0) - { - const uint32_t SequenceRawHashIndex = - LocalLookup.RawHashToSequenceRawHashIndex.at(LocalContent.RawHashes[LocalPathIndex]); - uint32_t ChunkCount = LocalContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; - if (ChunkCount > 0) - { - const IoHash LocalRawHash = LocalContent.RawHashes[LocalPathIndex]; - if (!RawHashToLocalPathIndex.contains(LocalRawHash)) - { - RawHashToLocalPathIndex.insert_or_assign(LocalRawHash, LocalPathIndex); - } - } - } + SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = 0; + const uint32_t RemotePathIndex = GetFirstPathIndexForRawHash(RemoteLookup, RemoteSequenceRawHash); + CacheMappedBytesForReuse += RemoteContent.RawSizes[RemotePathIndex]; + } + else + { + SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex]; } } - Stopwatch CacheMappingTimer; - - std::atomic BytesWritten = 0; - uint64_t CacheMappedBytesForReuse = 0; - - std::vector RemotePathIndexWantsCopyFromCacheFlags(RemoteContent.Paths.size(), false); - std::vector RemoteChunkIndexWantsCopyFromCacheFlags(RemoteContent.ChunkedContent.ChunkHashes.size()); - // Guard if he same chunks is in multiple blocks (can happen due to block reuse, cache reuse blocks writes directly) - std::vector> RemoteChunkIndexNeedsCopyFromSourceFlags(RemoteContent.ChunkedContent.ChunkHashes.size()); + // Pick up all chunks in current local state struct CacheCopyData { - uint32_t LocalPathIndex; - std::vector TargetChunkLocationPtrs; + uint32_t LocalSequenceIndex; + std::vector TargetChunkLocationPtrs; struct ChunkTarget { uint32_t TargetChunkLocationCount; @@ -3418,43 +3704,30 @@ namespace { tsl::robin_map RawHashToCacheCopyDataIndex; std::vector CacheCopyDatas; - uint32_t ChunkCountToWrite = 0; - // Pick up all whole files we can use from current local state - for (uint32_t RemotePathIndex = 0; RemotePathIndex < RemoteContent.Paths.size(); RemotePathIndex++) + for (uint32_t LocalSequenceIndex = 0; LocalSequenceIndex < LocalContent.ChunkedContent.SequenceRawHashes.size(); + LocalSequenceIndex++) { - const IoHash& RemoteRawHash = RemoteContent.RawHashes[RemotePathIndex]; - if (auto It = RawHashToLocalPathIndex.find(RemoteRawHash); It != RawHashToLocalPathIndex.end()) - { - RemotePathIndexWantsCopyFromCacheFlags[RemotePathIndex] = true; - CacheMappedBytesForReuse += RemoteContent.RawSizes[RemotePathIndex]; - } - } - - // Pick up all chunks in current local state - for (auto& CachedLocalFile : RawHashToLocalPathIndex) - { - const IoHash& LocalFileRawHash = CachedLocalFile.first; - const uint32_t LocalPathIndex = CachedLocalFile.second; - const uint32_t LocalSequenceRawHashIndex = LocalLookup.RawHashToSequenceRawHashIndex.at(LocalFileRawHash); - const uint32_t LocalOrderOffset = LocalLookup.SequenceRawHashIndexChunkOrderOffset[LocalSequenceRawHashIndex]; + const IoHash& LocalSequenceRawHash = LocalContent.ChunkedContent.SequenceRawHashes[LocalSequenceIndex]; + const uint32_t LocalOrderOffset = LocalLookup.SequenceIndexChunkOrderOffset[LocalSequenceIndex]; { uint64_t SourceOffset = 0; - const uint32_t LocalChunkCount = LocalContent.ChunkedContent.ChunkCounts[LocalSequenceRawHashIndex]; + const uint32_t LocalChunkCount = LocalContent.ChunkedContent.ChunkCounts[LocalSequenceIndex]; for (uint32_t LocalOrderIndex = 0; LocalOrderIndex < LocalChunkCount; LocalOrderIndex++) { const uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[LocalOrderOffset + LocalOrderIndex]; const IoHash& LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; const uint64_t LocalChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[LocalChunkIndex]; + if (auto RemoteChunkIt = RemoteLookup.ChunkHashToChunkIndex.find(LocalChunkHash); RemoteChunkIt != RemoteLookup.ChunkHashToChunkIndex.end()) { const uint32_t RemoteChunkIndex = RemoteChunkIt->second; - if (!RemoteChunkIndexWantsCopyFromCacheFlags[RemoteChunkIndex]) + if (!RemoteChunkIndexIsCachedFlags[RemoteChunkIndex]) { - std::vector ChunkTargetPtrs = - GetRemainingChunkTargets(RemotePathIndexWantsCopyFromCacheFlags, RemoteLookup, RemoteChunkIndex); + std::vector ChunkTargetPtrs = + GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteLookup, RemoteChunkIndex); if (!ChunkTargetPtrs.empty()) { @@ -3462,7 +3735,7 @@ namespace { .TargetChunkLocationCount = gsl::narrow(ChunkTargetPtrs.size()), .ChunkRawSize = LocalChunkRawSize, .CacheFileOffset = SourceOffset}; - if (auto CopySourceIt = RawHashToCacheCopyDataIndex.find(LocalFileRawHash); + if (auto CopySourceIt = RawHashToCacheCopyDataIndex.find(LocalSequenceRawHash); CopySourceIt != RawHashToCacheCopyDataIndex.end()) { CacheCopyData& Data = CacheCopyDatas[CopySourceIt->second]; @@ -3473,14 +3746,14 @@ namespace { } else { - RawHashToCacheCopyDataIndex.insert_or_assign(LocalFileRawHash, CacheCopyDatas.size()); + RawHashToCacheCopyDataIndex.insert_or_assign(LocalSequenceRawHash, CacheCopyDatas.size()); CacheCopyDatas.push_back( - CacheCopyData{.LocalPathIndex = LocalPathIndex, + CacheCopyData{.LocalSequenceIndex = LocalSequenceIndex, .TargetChunkLocationPtrs = ChunkTargetPtrs, .ChunkTargets = std::vector{Target}}); } CacheMappedBytesForReuse += LocalChunkRawSize; - RemoteChunkIndexWantsCopyFromCacheFlags[RemoteChunkIndex] = true; + RemoteChunkIndexIsCachedFlags[RemoteChunkIndex] = true; } } } @@ -3489,16 +3762,24 @@ namespace { } } + if (CacheMappedBytesForReuse > 0) + { + ZEN_CONSOLE("Mapped {} cached data for reuse in {}", + NiceBytes(CacheMappedBytesForReuse), + NiceTimeSpanMs(CacheMappingTimer.GetElapsedTimeMs())); + } + + uint32_t ChunkCountToWrite = 0; for (uint32_t RemoteChunkIndex = 0; RemoteChunkIndex < RemoteContent.ChunkedContent.ChunkHashes.size(); RemoteChunkIndex++) { - if (RemoteChunkIndexWantsCopyFromCacheFlags[RemoteChunkIndex]) + if (RemoteChunkIndexIsCachedFlags[RemoteChunkIndex]) { ChunkCountToWrite++; } else { - std::vector ChunkTargetPtrs = - GetRemainingChunkTargets(RemotePathIndexWantsCopyFromCacheFlags, RemoteLookup, RemoteChunkIndex); + std::vector ChunkTargetPtrs = + GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteLookup, RemoteChunkIndex); if (!ChunkTargetPtrs.empty()) { RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex] = true; @@ -3506,12 +3787,13 @@ namespace { } } } + std::atomic ChunkCountWritten = 0; - ZEN_CONSOLE("Mapped {} cached data for reuse in {}", - NiceBytes(CacheMappedBytesForReuse), - NiceTimeSpanMs(CacheMappingTimer.GetElapsedTimeMs())); { + FilteredRate FilteredDownloadedBytesPerSecond; + FilteredRate FilteredWrittenBytesPerSecond; + WorkerThreadPool& NetworkPool = GetSmallWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // WorkerThreadPool& WritePool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // @@ -3520,110 +3802,6 @@ namespace { std::atomic BytesDownloaded = 0; - for (size_t CopyDataIndex = 0; CopyDataIndex < CacheCopyDatas.size(); CopyDataIndex++) - { - if (AbortFlag) - { - break; - } - - Work.ScheduleWork( - WritePool, // GetSyncWorkerPool(),// - [&, CopyDataIndex](std::atomic&) { - if (!AbortFlag) - { - const CacheCopyData& CopyData = CacheCopyDatas[CopyDataIndex]; - const std::filesystem::path LocalFilePath = - (Path / LocalContent.Paths[CopyData.LocalPathIndex]).make_preferred(); - if (!CopyData.TargetChunkLocationPtrs.empty()) - { - uint64_t CacheLocalFileBytesRead = 0; - - size_t TargetStart = 0; - const std::span AllTargets( - CopyData.TargetChunkLocationPtrs); - - struct WriteOp - { - const ChunkedContentLookup::ChunkLocation* Target; - uint64_t CacheFileOffset; - uint64_t ChunkSize; - }; - - std::vector WriteOps; - WriteOps.reserve(AllTargets.size()); - - for (const CacheCopyData::ChunkTarget& ChunkTarget : CopyData.ChunkTargets) - { - std::span TargetRange = - AllTargets.subspan(TargetStart, ChunkTarget.TargetChunkLocationCount); - for (const ChunkedContentLookup::ChunkLocation* Target : TargetRange) - { - WriteOps.push_back(WriteOp{.Target = Target, - .CacheFileOffset = ChunkTarget.CacheFileOffset, - .ChunkSize = ChunkTarget.ChunkRawSize}); - } - TargetStart += ChunkTarget.TargetChunkLocationCount; - } - - std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOp& Lhs, const WriteOp& Rhs) { - if (Lhs.Target->PathIndex < Rhs.Target->PathIndex) - { - return true; - } - else if (Lhs.Target->PathIndex > Rhs.Target->PathIndex) - { - return false; - } - if (Lhs.Target->Offset < Rhs.Target->Offset) - { - return true; - } - return false; - }); - - { - BufferedOpenFile SourceFile(LocalFilePath); - WriteFileCache OpenFileCache; - for (const WriteOp& Op : WriteOps) - { - if (AbortFlag) - { - break; - } - const uint32_t RemotePathIndex = Op.Target->PathIndex; - const uint64_t ChunkSize = Op.ChunkSize; - CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset, ChunkSize); - - ZEN_ASSERT(Op.Target->Offset + ChunkSource.GetSize() <= RemoteContent.RawSizes[RemotePathIndex]); - - OpenFileCache.WriteToFile( - RemotePathIndex, - [&CacheFolderPath, &RemoteContent](uint32_t TargetIndex) { - return (CacheFolderPath / RemoteContent.RawHashes[TargetIndex].ToHexString()) - .make_preferred(); - }, - ChunkSource, - Op.Target->Offset, - RemoteContent.RawSizes[RemotePathIndex]); - BytesWritten += ChunkSize; - WriteToDiskBytes += ChunkSize; - CacheLocalFileBytesRead += ChunkSize; // TODO: This should be the sum of unique chunk sizes? - } - } - if (!AbortFlag) - { - ChunkCountWritten += gsl::narrow(CopyData.ChunkTargets.size()); - ZEN_DEBUG("Copied {} from {}", - NiceBytes(CacheLocalFileBytesRead), - LocalContent.Paths[CopyData.LocalPathIndex]); - } - } - } - }, - Work.DefaultErrorFunction()); - } - for (const IoHash ChunkHash : LooseChunkHashes) { if (AbortFlag) @@ -3631,8 +3809,10 @@ namespace { break; } - uint32_t RemoteChunkIndex = RemoteLookup.ChunkHashToChunkIndex.at(ChunkHash); - if (RemoteChunkIndexWantsCopyFromCacheFlags[RemoteChunkIndex]) + auto RemoteChunkIndexIt = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); + ZEN_ASSERT(RemoteChunkIndexIt != RemoteLookup.ChunkHashToChunkIndex.end()); + const uint32_t RemoteChunkIndex = RemoteChunkIndexIt->second; + if (RemoteChunkIndexIsCachedFlags[RemoteChunkIndex]) { ZEN_DEBUG("Skipping chunk {} due to cache reuse", ChunkHash); continue; @@ -3640,8 +3820,8 @@ namespace { bool NeedsCopy = true; if (RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex].compare_exchange_strong(NeedsCopy, false)) { - std::vector ChunkTargetPtrs = - GetRemainingChunkTargets(RemotePathIndexWantsCopyFromCacheFlags, RemoteLookup, RemoteChunkIndex); + std::vector ChunkTargetPtrs = + GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteLookup, RemoteChunkIndex); if (ChunkTargetPtrs.empty()) { @@ -3654,6 +3834,7 @@ namespace { [&, ChunkHash, RemoteChunkIndex, ChunkTargetPtrs](std::atomic&) { if (!AbortFlag) { + FilteredDownloadedBytesPerSecond.Start(); if (RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex] >= LargeAttachmentSize) { DownloadLargeBlob(Storage, @@ -3665,10 +3846,10 @@ namespace { ChunkHash, PreferredMultipartChunkSize, ChunkTargetPtrs, + SequenceIndexChunksLeftToWriteCounters, Work, WritePool, NetworkPool, - BytesWritten, WriteToDiskBytes, BytesDownloaded, LooseChunksBytes, @@ -3698,6 +3879,7 @@ namespace { std::atomic&) { if (!AbortFlag) { + FilteredWrittenBytesPerSecond.Start(); uint64_t TotalBytesWritten = 0; SharedBuffer Chunk = Decompress(CompressedPart, @@ -3708,14 +3890,45 @@ namespace { WriteFileCache OpenFileCache; WriteChunkToDisk(CacheFolderPath, RemoteContent, + RemoteLookup, ChunkTargetPtrs, CompositeBuffer(Chunk), OpenFileCache, TotalBytesWritten); } - ChunkCountWritten++; - BytesWritten += TotalBytesWritten; - WriteToDiskBytes += TotalBytesWritten; + if (!AbortFlag) + { + // Write tracking, updating this must be done without any files open + // (WriteFileCache) + for (const ChunkedContentLookup::ChunkSequenceLocation* Location : + ChunkTargetPtrs) + { + const uint32_t RemoteSequenceIndex = Location->SequenceIndex; + if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub( + 1) == 1) + { + const IoHash& SequenceRawHash = + RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + const IoHash VerifyChunkHash = + IoHash::HashBuffer(IoBufferBuilder::MakeFromFile( + GetTempChunkedSequenceFileName(CacheFolderPath, + SequenceRawHash))); + if (VerifyChunkHash != SequenceRawHash) + { + throw std::runtime_error(fmt::format( + "Written hunk sequence {} hash does not match expected hash {}", + VerifyChunkHash, + SequenceRawHash)); + } + std::filesystem::rename( + GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), + GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); + } + } + + ChunkCountWritten++; + WriteToDiskBytes += TotalBytesWritten; + } } }, Work.DefaultErrorFunction()); @@ -3728,6 +3941,139 @@ namespace { } } + for (size_t CopyDataIndex = 0; CopyDataIndex < CacheCopyDatas.size(); CopyDataIndex++) + { + if (AbortFlag) + { + break; + } + + Work.ScheduleWork( + WritePool, // GetSyncWorkerPool(),// + [&, CopyDataIndex](std::atomic&) { + if (!AbortFlag) + { + FilteredWrittenBytesPerSecond.Start(); + const CacheCopyData& CopyData = CacheCopyDatas[CopyDataIndex]; + const uint32_t LocalPathIndex = LocalLookup.SequenceIndexFirstPathIndex[CopyData.LocalSequenceIndex]; + const std::filesystem::path LocalFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred(); + if (!CopyData.TargetChunkLocationPtrs.empty()) + { + uint64_t CacheLocalFileBytesRead = 0; + + size_t TargetStart = 0; + const std::span AllTargets( + CopyData.TargetChunkLocationPtrs); + + struct WriteOp + { + const ChunkedContentLookup::ChunkSequenceLocation* Target; + uint64_t CacheFileOffset; + uint64_t ChunkSize; + }; + + std::vector WriteOps; + WriteOps.reserve(AllTargets.size()); + + for (const CacheCopyData::ChunkTarget& ChunkTarget : CopyData.ChunkTargets) + { + std::span TargetRange = + AllTargets.subspan(TargetStart, ChunkTarget.TargetChunkLocationCount); + for (const ChunkedContentLookup::ChunkSequenceLocation* Target : TargetRange) + { + WriteOps.push_back(WriteOp{.Target = Target, + .CacheFileOffset = ChunkTarget.CacheFileOffset, + .ChunkSize = ChunkTarget.ChunkRawSize}); + } + TargetStart += ChunkTarget.TargetChunkLocationCount; + } + + std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOp& Lhs, const WriteOp& Rhs) { + if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex) + { + return true; + } + else if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex) + { + return false; + } + if (Lhs.Target->Offset < Rhs.Target->Offset) + { + return true; + } + return false; + }); + + if (!AbortFlag) + { + BufferedOpenFile SourceFile(LocalFilePath); + WriteFileCache OpenFileCache; + for (const WriteOp& Op : WriteOps) + { + if (AbortFlag) + { + break; + } + const uint32_t RemoteSequenceIndex = Op.Target->SequenceIndex; + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].load() <= + RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex]); + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].load() > 0); + const uint32_t RemotePathIndex = RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex]; + const uint64_t ChunkSize = Op.ChunkSize; + CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset, ChunkSize); + + ZEN_ASSERT(Op.Target->Offset + ChunkSource.GetSize() <= RemoteContent.RawSizes[RemotePathIndex]); + + OpenFileCache.WriteToFile( + RemoteSequenceIndex, + [&CacheFolderPath, &RemoteContent](uint32_t SequenceIndex) { + return GetTempChunkedSequenceFileName( + CacheFolderPath, + RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]); + }, + ChunkSource, + Op.Target->Offset, + RemoteContent.RawSizes[RemotePathIndex]); + WriteToDiskBytes += ChunkSize; + CacheLocalFileBytesRead += ChunkSize; // TODO: This should be the sum of unique chunk sizes? + } + } + if (!AbortFlag) + { + if (!AbortFlag) + { + // Write tracking, updating this must be done without any files open (WriteFileCache) + for (const WriteOp& Op : WriteOps) + { + const uint32_t RemoteSequenceIndex = Op.Target->SequenceIndex; + if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1) == 1) + { + const IoHash& SequenceRawHash = + RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + const IoHash VerifyChunkHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile( + GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); + if (VerifyChunkHash != SequenceRawHash) + { + throw std::runtime_error( + fmt::format("Written hunk sequence {} hash does not match expected hash {}", + VerifyChunkHash, + SequenceRawHash)); + } + std::filesystem::rename(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), + GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); + } + } + } + + ChunkCountWritten += gsl::narrow(CopyData.ChunkTargets.size()); + ZEN_DEBUG("Copied {} from {}", NiceBytes(CacheLocalFileBytesRead), LocalContent.Paths[LocalPathIndex]); + } + } + } + }, + Work.DefaultErrorFunction()); + } + size_t BlockCount = BlockDescriptions.size(); std::atomic BlocksComplete = 0; @@ -3762,6 +4108,7 @@ namespace { [&, BlockIndex](std::atomic&) { if (!AbortFlag) { + FilteredDownloadedBytesPerSecond.Start(); IoBuffer BlockBuffer = Storage.GetBuildBlob(BuildId, BlockDescriptions[BlockIndex].BlockHash); if (!BlockBuffer) { @@ -3781,6 +4128,7 @@ namespace { [&, BlockIndex, BlockBuffer = std::move(Payload)](std::atomic&) { if (!AbortFlag) { + FilteredWrittenBytesPerSecond.Start(); IoHash BlockRawHash; uint64_t BlockRawSize; CompressedBuffer CompressedBlockBuffer = @@ -3812,14 +4160,13 @@ namespace { uint32_t ChunksReadFromBlock = 0; if (WriteBlockToDisk(CacheFolderPath, RemoteContent, - RemotePathIndexWantsCopyFromCacheFlags, + SequenceIndexChunksLeftToWriteCounters, DecompressedBlockBuffer, RemoteLookup, RemoteChunkIndexNeedsCopyFromSourceFlags.data(), ChunksReadFromBlock, BytesWrittenToDisk)) { - BytesWritten += BytesWrittenToDisk; WriteToDiskBytes += BytesWrittenToDisk; ChunkCountWritten += ChunksReadFromBlock; } @@ -3851,20 +4198,27 @@ namespace { Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { ZEN_UNUSED(IsAborted, PendingWork); ZEN_ASSERT(ChunkCountToWrite >= ChunkCountWritten.load()); - WriteProgressBar.UpdateState( - {.Task = "Writing chunks ", - .Details = fmt::format("Written {} chunks out of {}. {} ouf of {} blocks complete. Downloaded: {}. Written: {}", - ChunkCountWritten.load(), - ChunkCountToWrite, - BlocksComplete.load(), - BlocksNeededCount, - NiceBytes(BytesDownloaded.load()), - NiceBytes(BytesWritten.load())), - .TotalCount = gsl::narrow(ChunkCountToWrite), - .RemainingCount = gsl::narrow(ChunkCountToWrite - ChunkCountWritten.load())}, - false); + FilteredWrittenBytesPerSecond.Update(WriteToDiskBytes.load()); + FilteredDownloadedBytesPerSecond.Update(BytesDownloaded.load()); + std::string Details = fmt::format("{}/{} chunks. {}/{} blocks. {} {}bits/s downloaded. {} {}B/s written", + ChunkCountWritten.load(), + ChunkCountToWrite, + BlocksComplete.load(), + BlocksNeededCount, + NiceBytes(BytesDownloaded.load()), + NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8), + NiceBytes(WriteToDiskBytes.load()), + NiceNum(FilteredWrittenBytesPerSecond.GetCurrent())); + WriteProgressBar.UpdateState({.Task = "Writing chunks ", + .Details = Details, + .TotalCount = gsl::narrow(ChunkCountToWrite), + .RemainingCount = gsl::narrow(ChunkCountToWrite - ChunkCountWritten.load())}, + false); }); + FilteredWrittenBytesPerSecond.Stop(); + FilteredDownloadedBytesPerSecond.Stop(); + if (AbortFlag) { return; @@ -3873,6 +4227,11 @@ namespace { WriteProgressBar.Finish(); } + for (const auto& SequenceIndexChunksLeftToWriteCounter : SequenceIndexChunksLeftToWriteCounters) + { + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounter.load() == 0); + } + std::vector> Targets; Targets.reserve(RemoteContent.Paths.size()); for (uint32_t RemotePathIndex = 0; RemotePathIndex < RemoteContent.Paths.size(); RemotePathIndex++) @@ -3884,13 +4243,13 @@ namespace { }); // Move all files we will reuse to cache folder - for (auto It : RawHashToLocalPathIndex) + for (uint32_t LocalPathIndex = 0; LocalPathIndex < LocalContent.Paths.size(); LocalPathIndex++) { - const IoHash& RawHash = It.first; - if (RemoteLookup.RawHashToSequenceRawHashIndex.contains(RawHash)) + const IoHash& RawHash = LocalContent.RawHashes[LocalPathIndex]; + if (RemoteLookup.RawHashToSequenceIndex.contains(RawHash)) { - const std::filesystem::path LocalFilePath = (Path / LocalContent.Paths[It.second]).make_preferred(); - const std::filesystem::path CacheFilePath = (CacheFolderPath / RawHash.ToHexString()).make_preferred(); + const std::filesystem::path LocalFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred(); + const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(CacheFolderPath, RawHash); ZEN_ASSERT_SLOW(std::filesystem::exists(LocalFilePath)); SetFileReadOnly(LocalFilePath, false); std::filesystem::rename(LocalFilePath, CacheFilePath); @@ -3989,7 +4348,7 @@ namespace { } else { - const std::filesystem::path CacheFilePath = (CacheFolderPath / RawHash.ToHexString()).make_preferred(); + const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(CacheFolderPath, RawHash); ZEN_ASSERT_SLOW(std::filesystem::exists(CacheFilePath)); CreateDirectories(FirstTargetFilePath.parent_path()); if (std::filesystem::exists(FirstTargetFilePath)) @@ -4045,12 +4404,12 @@ namespace { Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { ZEN_UNUSED(IsAborted, PendingWork); - RebuildProgressBar.UpdateState( - {.Task = "Rebuilding state ", - .Details = fmt::format("Written {} files out of {}", TargetsComplete.load(), Targets.size()), - .TotalCount = gsl::narrow(Targets.size()), - .RemainingCount = gsl::narrow(Targets.size() - TargetsComplete.load())}, - false); + std::string Details = fmt::format("{}/{} files", TargetsComplete.load(), Targets.size()); + RebuildProgressBar.UpdateState({.Task = "Rebuilding state ", + .Details = Details, + .TotalCount = gsl::narrow(Targets.size()), + .RemainingCount = gsl::narrow(Targets.size() - TargetsComplete.load())}, + false); }); if (AbortFlag) @@ -4487,20 +4846,19 @@ namespace { UsePlainProgress ? 5000 : 200, [&](bool, std::ptrdiff_t) { FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); - - ProgressBar.UpdateState( - {.Task = "Scanning files ", - .Details = fmt::format("{}/{} ({}/{}, {}B/s) files, {} ({}) chunks found", - ChunkingStats.FilesProcessed.load(), - UpdatedContent.Paths.size(), - NiceBytes(ChunkingStats.BytesHashed.load()), - NiceBytes(ByteCountToScan), - NiceNum(FilteredBytesHashed.GetCurrent()), - ChunkingStats.UniqueChunksFound.load(), - NiceBytes(ChunkingStats.UniqueBytesFound.load())), - .TotalCount = ByteCountToScan, - .RemainingCount = ByteCountToScan - ChunkingStats.BytesHashed.load()}, - false); + std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found", + ChunkingStats.FilesProcessed.load(), + UpdatedContent.Paths.size(), + NiceBytes(ChunkingStats.BytesHashed.load()), + NiceBytes(ByteCountToScan), + NiceNum(FilteredBytesHashed.GetCurrent()), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load())); + ProgressBar.UpdateState({.Task = "Scanning files ", + .Details = Details, + .TotalCount = ByteCountToScan, + .RemainingCount = ByteCountToScan - ChunkingStats.BytesHashed.load()}, + false); }, AbortFlag); if (AbortFlag) @@ -4567,15 +4925,16 @@ namespace { UsePlainProgress ? 5000 : 200, [&](bool, std::ptrdiff_t) { FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); + std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found", + ChunkingStats.FilesProcessed.load(), + CurrentLocalFolderContent.Paths.size(), + NiceBytes(ChunkingStats.BytesHashed.load()), + ByteCountToScan, + NiceNum(FilteredBytesHashed.GetCurrent()), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load())); ProgressBar.UpdateState({.Task = "Scanning files ", - .Details = fmt::format("{}/{} ({}/{}, {}B/s) files, {} ({}) chunks found", - ChunkingStats.FilesProcessed.load(), - CurrentLocalFolderContent.Paths.size(), - NiceBytes(ChunkingStats.BytesHashed.load()), - ByteCountToScan, - NiceNum(FilteredBytesHashed.GetCurrent()), - ChunkingStats.UniqueChunksFound.load(), - NiceBytes(ChunkingStats.UniqueBytesFound.load())), + .Details = Details, .TotalCount = ByteCountToScan, .RemainingCount = (ByteCountToScan - ChunkingStats.BytesHashed.load())}, false); @@ -4599,7 +4958,8 @@ namespace { std::span BuildPartNames, const std::filesystem::path& Path, bool AllowMultiparts, - bool WipeTargetFolder) + bool WipeTargetFolder, + bool PostDownloadVerify) { Stopwatch DownloadTimer; @@ -4610,8 +4970,10 @@ namespace { std::filesystem::remove(ZenTempFolder); }); CreateDirectories(Path / ZenTempBlockFolderName); - CreateDirectories(Path / ZenTempChunkFolderName); - CreateDirectories(Path / ZenTempCacheFolderName); + CreateDirectories(Path / ZenTempChunkFolderName); // TODO: Don't clear this - pick up files -> chunks to use + CreateDirectories(Path / + ZenTempCacheFolderName); // TODO: Don't clear this - pick up files and use as sequences (non .tmp extension) and + // delete .tmp (maybe?) - chunk them? How do we know the file is worth chunking? std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; @@ -4724,7 +5086,7 @@ namespace { if (!AbortFlag) { - VerifyFolder(RemoteContent, Path); + VerifyFolder(RemoteContent, Path, PostDownloadVerify); Stopwatch WriteStateTimer; CbObject StateObject = CreateStateObject(BuildId, AllBuildParts, PartContents, LocalFolderState); @@ -5078,6 +5440,8 @@ BuildsCommand::BuildsCommand() "Path to a text file with one line of [TAB] per file to include.", cxxopts::value(m_ManifestPath), ""); + m_UploadOptions + .add_option("", "", "verify", "Enable post upload verify of all uploaded data", cxxopts::value(m_PostUploadVerify), ""); m_UploadOptions.parse_positional({"local-path", "build-id"}); m_UploadOptions.positional_help("local-path build-id"); @@ -5111,6 +5475,8 @@ BuildsCommand::BuildsCommand() "Allow large attachments to be transfered using multipart protocol. Defaults to true.", cxxopts::value(m_AllowMultiparts), ""); + m_DownloadOptions + .add_option("", "", "verify", "Enable post download verify of all tracked files", cxxopts::value(m_PostDownloadVerify), ""); m_DownloadOptions.parse_positional({"local-path", "build-id", "build-part-name"}); m_DownloadOptions.positional_help("local-path build-id build-part-name"); @@ -5503,7 +5869,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_AllowMultiparts, MetaData, m_CreateBuild, - m_Clean); + m_Clean, + m_PostUploadVerify); if (false) { @@ -5593,7 +5960,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); } - DownloadFolder(*Storage, BuildId, BuildPartIds, m_BuildPartNames, m_Path, m_AllowMultiparts, m_Clean); + DownloadFolder(*Storage, BuildId, BuildPartIds, m_BuildPartNames, m_Path, m_AllowMultiparts, m_Clean, m_PostDownloadVerify); if (false) { @@ -5727,8 +6094,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_BlockReuseMinPercentLimit, m_AllowMultiparts, MetaData, - m_CreateBuild, - m_Clean); + true, + false, + true); if (AbortFlag) { ZEN_CONSOLE("Upload failed."); @@ -5737,7 +6105,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) const std::filesystem::path DownloadPath = m_Path.parent_path() / (m_BuildPartName + "_download"); ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}'", BuildId, BuildPartId, m_BuildPartName, DownloadPath); - DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, true); + DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, true, true); if (AbortFlag) { ZEN_CONSOLE("Download failed."); @@ -5749,7 +6117,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) BuildPartId, m_BuildPartName, DownloadPath); - DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, false); + DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, false, true); if (AbortFlag) { ZEN_CONSOLE("Re-download failed. (identical target)"); @@ -5851,7 +6219,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) BuildPartId, m_BuildPartName, DownloadPath); - DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, false); + DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, false, true); if (AbortFlag) { ZEN_CONSOLE("Re-download failed. (scrambled target)"); @@ -5880,7 +6248,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_AllowMultiparts, MetaData2, true, - false); + false, + true); if (AbortFlag) { ZEN_CONSOLE("Upload of scrambled failed."); @@ -5888,7 +6257,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}' (original)", BuildId, BuildPartId, m_BuildPartName, DownloadPath); - DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, false); + DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, false, true); if (AbortFlag) { ZEN_CONSOLE("Re-download failed."); @@ -5896,7 +6265,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}' (scrambled)", BuildId2, BuildPartId2, m_BuildPartName, DownloadPath); - DownloadFolder(*Storage, BuildId2, {BuildPartId2}, {}, DownloadPath, m_AllowMultiparts, false); + DownloadFolder(*Storage, BuildId2, {BuildPartId2}, {}, DownloadPath, m_AllowMultiparts, false, true); if (AbortFlag) { ZEN_CONSOLE("Re-download failed."); @@ -5904,7 +6273,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } ZEN_CONSOLE("\nRe-download Build {}, Part {} ({}) to '{}' (scrambled)", BuildId2, BuildPartId2, m_BuildPartName, DownloadPath); - DownloadFolder(*Storage, BuildId2, {BuildPartId2}, {}, DownloadPath, m_AllowMultiparts, false); + DownloadFolder(*Storage, BuildId2, {BuildPartId2}, {}, DownloadPath, m_AllowMultiparts, false, true); if (AbortFlag) { ZEN_CONSOLE("Re-download failed."); @@ -6032,64 +6401,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) { throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); } - Oid BuildPartId = Oid::TryFromHexString(m_BuildPartId); - CbObject Build = Storage->GetBuild(BuildId); - if (!m_BuildPartName.empty()) - { - BuildPartId = Build["parts"sv].AsObjectView()[m_BuildPartName].AsObjectId(); - if (BuildPartId == Oid::Zero) - { - throw std::runtime_error(fmt::format("Build {} does not have a part named '{}'", m_BuildId, m_BuildPartName)); - } - } - CbObject BuildPart = Storage->GetBuildPart(BuildId, BuildPartId); - ZEN_CONSOLE("Validating build part {}/{} ({})", BuildId, BuildPartId, NiceBytes(BuildPart.GetSize())); - std::vector ChunkAttachments; - for (CbFieldView LooseFileView : BuildPart["chunkAttachments"sv].AsObjectView()["rawHashes"sv]) - { - ChunkAttachments.push_back(LooseFileView.AsBinaryAttachment()); - } - std::vector BlockAttachments; - for (CbFieldView BlocksView : BuildPart["blockAttachments"sv].AsObjectView()["rawHashes"sv]) - { - BlockAttachments.push_back(BlocksView.AsBinaryAttachment()); - } + Oid BuildPartId = Oid::TryFromHexString(m_BuildPartId); - for (const IoHash& ChunkAttachment : ChunkAttachments) - { - uint64_t CompressedSize; - uint64_t DecompressedSize; - try - { - ValidateBlob(*Storage, BuildId, ChunkAttachment, CompressedSize, DecompressedSize); - ZEN_CONSOLE("Chunk attachment {} ({} -> {}) is valid", - ChunkAttachment, - NiceBytes(CompressedSize), - NiceBytes(DecompressedSize)); - } - catch (const std::exception& Ex) - { - ZEN_CONSOLE("Failed validating chunk attachment {}: {}", ChunkAttachment, Ex.what()); - } - } - - for (const IoHash& BlockAttachment : BlockAttachments) - { - uint64_t CompressedSize; - uint64_t DecompressedSize; - try - { - ValidateChunkBlock(*Storage, BuildId, BlockAttachment, CompressedSize, DecompressedSize); - ZEN_CONSOLE("Block attachment {} ({} -> {}) is valid", - BlockAttachment, - NiceBytes(CompressedSize), - NiceBytes(DecompressedSize)); - } - catch (const std::exception& Ex) - { - ZEN_CONSOLE("Failed validating block attachment {}: {}", BlockAttachment, Ex.what()); - } - } + ValidateBuildPart(*Storage, BuildId, BuildPartId, m_BuildPartName); return AbortFlag ? 13 : 0; } diff --git a/src/zen/cmds/builds_cmd.h b/src/zen/cmds/builds_cmd.h index fa223943b..c54fb4db1 100644 --- a/src/zen/cmds/builds_cmd.h +++ b/src/zen/cmds/builds_cmd.h @@ -78,10 +78,12 @@ private: std::filesystem::path m_Path; cxxopts::Options m_UploadOptions{"upload", "Upload a folder"}; + bool m_PostUploadVerify = false; cxxopts::Options m_DownloadOptions{"download", "Download a folder"}; std::vector m_BuildPartNames; std::vector m_BuildPartIds; + bool m_PostDownloadVerify = false; cxxopts::Options m_DiffOptions{"diff", "Compare two local folders"}; std::filesystem::path m_DiffPath; diff --git a/src/zenhttp/httpclient.cpp b/src/zenhttp/httpclient.cpp index bb15a6fce..7f7e70fef 100644 --- a/src/zenhttp/httpclient.cpp +++ b/src/zenhttp/httpclient.cpp @@ -414,6 +414,11 @@ ValidatePayload(cpr::Response& Response, std::unique_ptrsecond == "application/x-ue-comp") diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp index 6dc2a20d8..1552ea823 100644 --- a/src/zenutil/chunkedcontent.cpp +++ b/src/zenutil/chunkedcontent.cpp @@ -599,10 +599,10 @@ MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span(Result.ChunkedContent.SequenceRawHashes.size())}); - const uint32_t SequenceRawHashIndex = OverlayLookup.RawHashToSequenceRawHashIndex.at(RawHash); - const uint32_t OrderIndexOffset = OverlayLookup.SequenceRawHashIndexChunkOrderOffset[SequenceRawHashIndex]; - const uint32_t ChunkCount = OverlayContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; - ChunkingStatistics Stats; + const uint32_t SequenceRawHashIndex = OverlayLookup.RawHashToSequenceIndex.at(RawHash); + const uint32_t OrderIndexOffset = OverlayLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; + const uint32_t ChunkCount = OverlayContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + ChunkingStatistics Stats; std::span OriginalChunkOrder = std::span(OverlayContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); AddCunkSequence(Stats, @@ -667,9 +667,9 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span { RawHashToSequenceRawHashIndex.insert( {RawHash, gsl::narrow(Result.ChunkedContent.SequenceRawHashes.size())}); - const uint32_t SequenceRawHashIndex = BaseLookup.RawHashToSequenceRawHashIndex.at(RawHash); - const uint32_t OrderIndexOffset = BaseLookup.SequenceRawHashIndexChunkOrderOffset[SequenceRawHashIndex]; - const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + const uint32_t SequenceRawHashIndex = BaseLookup.RawHashToSequenceIndex.at(RawHash); + const uint32_t OrderIndexOffset = BaseLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; + const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; ChunkingStatistics Stats; std::span OriginalChunkOrder = std::span(BaseContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); @@ -777,46 +777,40 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) { struct ChunkLocationReference { - uint32_t ChunkIndex; - ChunkedContentLookup::ChunkLocation Location; + uint32_t ChunkIndex; + ChunkedContentLookup::ChunkSequenceLocation Location; }; ChunkedContentLookup Result; { const uint32_t SequenceRawHashesCount = gsl::narrow(Content.ChunkedContent.SequenceRawHashes.size()); - Result.RawHashToSequenceRawHashIndex.reserve(SequenceRawHashesCount); - Result.SequenceRawHashIndexChunkOrderOffset.reserve(SequenceRawHashesCount); + Result.RawHashToSequenceIndex.reserve(SequenceRawHashesCount); + Result.SequenceIndexChunkOrderOffset.reserve(SequenceRawHashesCount); uint32_t OrderOffset = 0; for (uint32_t SequenceRawHashIndex = 0; SequenceRawHashIndex < Content.ChunkedContent.SequenceRawHashes.size(); SequenceRawHashIndex++) { - Result.RawHashToSequenceRawHashIndex.insert( - {Content.ChunkedContent.SequenceRawHashes[SequenceRawHashIndex], SequenceRawHashIndex}); - Result.SequenceRawHashIndexChunkOrderOffset.push_back(OrderOffset); + Result.RawHashToSequenceIndex.insert({Content.ChunkedContent.SequenceRawHashes[SequenceRawHashIndex], SequenceRawHashIndex}); + Result.SequenceIndexChunkOrderOffset.push_back(OrderOffset); OrderOffset += Content.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; } } std::vector Locations; Locations.reserve(Content.ChunkedContent.ChunkOrders.size()); - for (uint32_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++) + for (uint32_t SequenceIndex = 0; SequenceIndex < Content.ChunkedContent.SequenceRawHashes.size(); SequenceIndex++) { - if (Content.RawSizes[PathIndex] > 0) + const uint32_t OrderOffset = Result.SequenceIndexChunkOrderOffset[SequenceIndex]; + const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SequenceIndex]; + uint64_t LocationOffset = 0; + for (size_t OrderIndex = OrderOffset; OrderIndex < OrderOffset + ChunkCount; OrderIndex++) { - const IoHash& RawHash = Content.RawHashes[PathIndex]; - uint32_t SequenceRawHashIndex = Result.RawHashToSequenceRawHashIndex.at(RawHash); - const uint32_t OrderOffset = Result.SequenceRawHashIndexChunkOrderOffset[SequenceRawHashIndex]; - const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; - uint64_t LocationOffset = 0; - for (size_t OrderIndex = OrderOffset; OrderIndex < OrderOffset + ChunkCount; OrderIndex++) - { - uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex]; + uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex]; - Locations.push_back(ChunkLocationReference{ChunkIndex, ChunkedContentLookup::ChunkLocation{PathIndex, LocationOffset}}); + Locations.push_back( + ChunkLocationReference{ChunkIndex, ChunkedContentLookup::ChunkSequenceLocation{SequenceIndex, LocationOffset}}); - LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; - } - ZEN_ASSERT(LocationOffset == Content.RawSizes[PathIndex]); + LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; } } @@ -829,18 +823,18 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) { return false; } - if (Lhs.Location.PathIndex < Rhs.Location.PathIndex) + if (Lhs.Location.SequenceIndex < Rhs.Location.SequenceIndex) { return true; } - if (Lhs.Location.PathIndex > Rhs.Location.PathIndex) + if (Lhs.Location.SequenceIndex > Rhs.Location.SequenceIndex) { return false; } return Lhs.Location.Offset < Rhs.Location.Offset; }); - Result.ChunkLocations.reserve(Locations.size()); + Result.ChunkSequenceLocations.reserve(Locations.size()); const uint32_t ChunkCount = gsl::narrow(Content.ChunkedContent.ChunkHashes.size()); Result.ChunkHashToChunkIndex.reserve(ChunkCount); size_t RangeOffset = 0; @@ -850,14 +844,30 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) uint32_t Count = 0; while (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex) { - Result.ChunkLocations.push_back(Locations[RangeOffset + Count].Location); + Result.ChunkSequenceLocations.push_back(Locations[RangeOffset + Count].Location); Count++; } - Result.ChunkLocationOffset.push_back(RangeOffset); - Result.ChunkLocationCounts.push_back(Count); + Result.ChunkSequenceLocationOffset.push_back(RangeOffset); + Result.ChunkSequenceLocationCounts.push_back(Count); RangeOffset += Count; } + Result.SequenceIndexFirstPathIndex.resize(Content.ChunkedContent.SequenceRawHashes.size(), (uint32_t)-1); + for (uint32_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++) + { + if (Content.RawSizes[PathIndex] > 0) + { + const IoHash& RawHash = Content.RawHashes[PathIndex]; + auto SequenceIndexIt = Result.RawHashToSequenceIndex.find(RawHash); + ZEN_ASSERT(SequenceIndexIt != Result.RawHashToSequenceIndex.end()); + const uint32_t SequenceIndex = SequenceIndexIt->second; + if (Result.SequenceIndexFirstPathIndex[SequenceIndex] == (uint32_t)-1) + { + Result.SequenceIndexFirstPathIndex[SequenceIndex] = PathIndex; + } + } + } + return Result; } diff --git a/src/zenutil/include/zenutil/chunkedcontent.h b/src/zenutil/include/zenutil/chunkedcontent.h index 15c687462..309341550 100644 --- a/src/zenutil/include/zenutil/chunkedcontent.h +++ b/src/zenutil/include/zenutil/chunkedcontent.h @@ -122,32 +122,59 @@ ChunkedFolderContent ChunkFolderContent(ChunkingStatistics& Stats, struct ChunkedContentLookup { - struct ChunkLocation + struct ChunkSequenceLocation { - uint32_t PathIndex; + uint32_t SequenceIndex; uint64_t Offset; }; tsl::robin_map ChunkHashToChunkIndex; - tsl::robin_map RawHashToSequenceRawHashIndex; - std::vector SequenceRawHashIndexChunkOrderOffset; - std::vector ChunkLocations; - std::vector ChunkLocationOffset; // ChunkLocations[ChunkLocationOffset[ChunkIndex]] -> start of sources for ChunkIndex - std::vector ChunkLocationCounts; // ChunkLocationCounts[ChunkIndex] count of chunk locations for ChunkIndex + tsl::robin_map RawHashToSequenceIndex; + std::vector SequenceIndexChunkOrderOffset; + std::vector ChunkSequenceLocations; + std::vector + ChunkSequenceLocationOffset; // ChunkSequenceLocations[ChunkLocationOffset[ChunkIndex]] -> start of sources for ChunkIndex + std::vector ChunkSequenceLocationCounts; // ChunkSequenceLocationCounts[ChunkIndex] count of chunk locations for ChunkIndex + std::vector SequenceIndexFirstPathIndex; // SequenceIndexFirstPathIndex[SequenceIndex] -> first path index with that RawHash }; ChunkedContentLookup BuildChunkedContentLookup(const ChunkedFolderContent& Content); inline std::pair -GetChunkLocationRange(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) +GetChunkSequenceLocationRange(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) { - return std::make_pair(Lookup.ChunkLocationOffset[ChunkIndex], Lookup.ChunkLocationCounts[ChunkIndex]); + return std::make_pair(Lookup.ChunkSequenceLocationOffset[ChunkIndex], Lookup.ChunkSequenceLocationCounts[ChunkIndex]); } -inline std::span -GetChunkLocations(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) +inline std::span +GetChunkSequenceLocations(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) { - std::pair Range = GetChunkLocationRange(Lookup, ChunkIndex); - return std::span(Lookup.ChunkLocations).subspan(Range.first, Range.second); + std::pair Range = GetChunkSequenceLocationRange(Lookup, ChunkIndex); + return std::span(Lookup.ChunkSequenceLocations).subspan(Range.first, Range.second); +} + +inline uint32_t +GetSequenceIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash) +{ + return Lookup.RawHashToSequenceIndex.at(RawHash); +} + +inline uint32_t +GetChunkIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash) +{ + return Lookup.RawHashToSequenceIndex.at(RawHash); +} + +inline uint32_t +GetFirstPathIndexForSeqeuenceIndex(const ChunkedContentLookup& Lookup, const uint32_t SequenceIndex) +{ + return Lookup.SequenceIndexFirstPathIndex[SequenceIndex]; +} + +inline uint32_t +GetFirstPathIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash) +{ + const uint32_t SequenceIndex = GetSequenceIndexForRawHash(Lookup, RawHash); + return GetFirstPathIndexForSeqeuenceIndex(Lookup, SequenceIndex); } namespace compactbinary_helpers { -- cgit v1.2.3 From 2232eb28256ec54beaf3dbe06f5176698c7245a0 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Tue, 4 Mar 2025 09:38:13 +0100 Subject: limit and validate responses before logging the text (#292) Improvement: When logging HTTP responses, the body is now sanity checked to ensure it is human readable, and the length of the output is capped to prevent inadvertent log bloat --- src/zencore/include/zencore/string.h | 3 ++ src/zencore/string.cpp | 14 ++++++++++ src/zenhttp/httpclient.cpp | 46 ++++++++++++++++++++++++++++++ src/zenhttp/include/zenhttp/formatters.h | 48 +++++++++++++++++++++++++++++++- 4 files changed, 110 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/zencore/include/zencore/string.h b/src/zencore/include/zencore/string.h index e2ef1c1a0..68129b691 100644 --- a/src/zencore/include/zencore/string.h +++ b/src/zencore/include/zencore/string.h @@ -522,6 +522,9 @@ public: ////////////////////////////////////////////////////////////////////////// +bool IsValidUtf8(const std::string_view& str); +std::string_view::const_iterator FindFirstInvalidUtf8Byte(const std::string_view& str); + void Utf8ToWide(const char8_t* str, WideStringBuilderBase& out); void Utf8ToWide(const std::u8string_view& wstr, WideStringBuilderBase& out); void Utf8ToWide(const std::string_view& wstr, WideStringBuilderBase& out); diff --git a/src/zencore/string.cpp b/src/zencore/string.cpp index 242d41abe..a0d8c927f 100644 --- a/src/zencore/string.cpp +++ b/src/zencore/string.cpp @@ -99,6 +99,20 @@ FilepathFindExtension(const std::string_view& Path, const char* ExtensionToMatch ////////////////////////////////////////////////////////////////////////// +bool +IsValidUtf8(const std::string_view& str) +{ + return utf8::is_valid(begin(str), end(str)); +} + +std::string_view::const_iterator +FindFirstInvalidUtf8Byte(const std::string_view& str) +{ + return utf8::find_invalid(begin(str), end(str)); +} + +////////////////////////////////////////////////////////////////////////// + void Utf8ToWide(const char8_t* Str8, WideStringBuilderBase& OutString) { diff --git a/src/zenhttp/httpclient.cpp b/src/zenhttp/httpclient.cpp index 7f7e70fef..e4c6d243d 100644 --- a/src/zenhttp/httpclient.cpp +++ b/src/zenhttp/httpclient.cpp @@ -1422,6 +1422,52 @@ HttpClient::Response::ThrowError(std::string_view ErrorPrefix) #if ZEN_WITH_TESTS +TEST_CASE("responseformat") +{ + using namespace std::literals; + + SUBCASE("identity") + { + BodyLogFormatter _{"abcd"}; + CHECK_EQ(_.GetText(), "abcd"sv); + } + + SUBCASE("very long") + { + std::string_view LongView = + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; + + BodyLogFormatter _{LongView}; + + CHECK(_.GetText().size() < LongView.size()); + CHECK(_.GetText().starts_with("[truncated"sv)); + } + + SUBCASE("invalid text") + { + std::string_view BadText = "totobaba\xff\xfe"; + + BodyLogFormatter _{BadText}; + + CHECK_EQ(_.GetText(), "totobaba"); + } +} + TEST_CASE("httpclient") { using namespace std::literals; diff --git a/src/zenhttp/include/zenhttp/formatters.h b/src/zenhttp/include/zenhttp/formatters.h index 538136238..0fa5dc6da 100644 --- a/src/zenhttp/include/zenhttp/formatters.h +++ b/src/zenhttp/include/zenhttp/formatters.h @@ -13,6 +13,50 @@ ZEN_THIRD_PARTY_INCLUDES_START #include ZEN_THIRD_PARTY_INCLUDES_END +namespace zen { + +struct BodyLogFormatter +{ +private: + std::string_view ResponseText; + zen::ExtendableStringBuilder<128> ModifiedResponse; + +public: + explicit BodyLogFormatter(std::string_view InResponseText) : ResponseText(InResponseText) + { + using namespace std::literals; + + const int TextSizeLimit = 1024; + + // Trim invalid UTF8 + + auto InvalidIt = zen::FindFirstInvalidUtf8Byte(ResponseText); + + if (InvalidIt != end(ResponseText)) + { + ResponseText = ResponseText.substr(0, InvalidIt - begin(ResponseText)); + } + + if (ResponseText.empty()) + { + ResponseText = ""sv; + } + + if (ResponseText.size() > TextSizeLimit) + { + const auto TruncatedString = "[truncated response] "sv; + ModifiedResponse.Append(TruncatedString); + ModifiedResponse.Append(ResponseText.data(), TextSizeLimit - TruncatedString.size()); + + ResponseText = ModifiedResponse; + } + } + + inline std::string_view GetText() const { return ResponseText; } +}; + +} // namespace zen + template<> struct fmt::formatter { @@ -57,6 +101,8 @@ struct fmt::formatter } else { + zen::BodyLogFormatter Body(Response.text); + return fmt::format_to(Ctx.out(), "Url: {}, Status: {}, Bytes: {}/{} (Up/Down), Elapsed: {}s, Reponse: '{}', Reason: '{}'", Response.url.str(), @@ -64,7 +110,7 @@ struct fmt::formatter Response.uploaded_bytes, Response.downloaded_bytes, Response.elapsed, - Response.text, + Body.GetText(), Response.reason); } } -- cgit v1.2.3 From 7cb1de5a966aa23e0c098d2bbf9009d0f82a6477 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 4 Mar 2025 10:22:53 +0100 Subject: stream decompress (#293) * clean up latency parameters and slow down rate updates * add DecompressToStream --- src/zen/cmds/builds_cmd.cpp | 198 +++++++++++++++++++++++---------- src/zencore/compress.cpp | 186 +++++++++++++++++++++++++++++++ src/zencore/include/zencore/compress.h | 10 ++ 3 files changed, 337 insertions(+), 57 deletions(-) (limited to 'src') diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index 219d01240..a9852151f 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -78,15 +78,19 @@ namespace { const ChunksBlockParameters DefaultChunksBlockParams{.MaxBlockSize = 32u * 1024u * 1024u, .MaxChunkEmbedSize = DefaultChunkedParams.MaxSize}; - const std::string ZenFolderName = ".zen"; - const std::string ZenStateFilePath = fmt::format("{}/current_state.cbo", ZenFolderName); - const std::string ZenStateFileJsonPath = fmt::format("{}/current_state.json", ZenFolderName); - const std::string ZenTempFolderName = fmt::format("{}/tmp", ZenFolderName); - const std::string ZenTempCacheFolderName = fmt::format("{}/cache", ZenTempFolderName); - const std::string ZenTempStorageFolderName = fmt::format("{}/storage", ZenTempFolderName); - const std::string ZenTempBlockFolderName = fmt::format("{}/blocks", ZenTempFolderName); - const std::string ZenTempChunkFolderName = fmt::format("{}/chunks", ZenTempFolderName); - const std::string ZenExcludeManifestName = ".zen_exclude_manifest.txt"; + + const double DefaultLatency = 0; // .0010; + const double DefaultDelayPerKBSec = 0; // 0.00005; + + const std::string ZenFolderName = ".zen"; + const std::string ZenStateFilePath = fmt::format("{}/current_state.cbo", ZenFolderName); + const std::string ZenStateFileJsonPath = fmt::format("{}/current_state.json", ZenFolderName); + const std::string ZenTempFolderName = fmt::format("{}/tmp", ZenFolderName); + const std::string ZenTempCacheFolderName = fmt::format("{}/cache", ZenTempFolderName); + const std::string ZenTempStorageFolderName = fmt::format("{}/storage", ZenTempFolderName); + const std::string ZenTempBlockFolderName = fmt::format("{}/blocks", ZenTempFolderName); + const std::string ZenTempChunkFolderName = fmt::format("{}/chunks", ZenTempFolderName); + const std::string ZenExcludeManifestName = ".zen_exclude_manifest.txt"; const std::string UnsyncFolderName = ".unsync"; @@ -271,7 +275,7 @@ namespace { } uint64_t TimeUS = Timer.GetElapsedTimeUs(); uint64_t TimeDeltaUS = TimeUS - LastTimeUS; - if (TimeDeltaUS >= 1000000) + if (TimeDeltaUS >= 2000000) { uint64_t Delta = Count - LastCount; uint64_t PerSecond = (Delta * 1000000) / TimeDeltaUS; @@ -3464,6 +3468,61 @@ namespace { } } + bool CanStreamDecompress(const ChunkedFolderContent& RemoteContent, + const std::vector Locations) + { + if (Locations.size() == 1) + { + const uint32_t FirstSequenceIndex = Locations[0]->SequenceIndex; + if (Locations[0]->Offset == 0 && RemoteContent.ChunkedContent.ChunkCounts[FirstSequenceIndex] == 1) + { + return true; + } + } + return false; + } + + void StreamDecompress(const std::filesystem::path& CacheFolderPath, + const IoHash& SequenceRawHash, + CompositeBuffer&& CompressedPart, + std::atomic& WriteToDiskBytes) + { + const std::filesystem::path TempChunkSequenceFileName = GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash); + TemporaryFile DecompressedTemp; + std::error_code Ec; + DecompressedTemp.CreateTemporary(CacheFolderPath, Ec); + if (Ec) + { + throw std::runtime_error( + fmt::format("Failed creating temporary file for decompressing large blob {}. Reason: {}", SequenceRawHash, Ec.message())); + } + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(CompressedPart, RawHash, RawSize); + if (!Compressed) + { + throw std::runtime_error(fmt::format("Failed to parse header of compressed large blob {}", SequenceRawHash)); + } + if (RawHash != SequenceRawHash) + { + throw std::runtime_error(fmt::format("RawHash in header {} in large blob {} does match.", RawHash, SequenceRawHash)); + } + bool CouldDecompress = Compressed.DecompressToStream(0, (uint64_t)-1, [&](uint64_t Offset, const CompositeBuffer& RangeBuffer) { + DecompressedTemp.Write(RangeBuffer, Offset); + WriteToDiskBytes += RangeBuffer.GetSize(); + }); + if (!CouldDecompress) + { + throw std::runtime_error(fmt::format("Failed to decompress large blob {}", SequenceRawHash)); + } + DecompressedTemp.MoveTemporaryIntoPlace(TempChunkSequenceFileName, Ec); + if (Ec) + { + throw std::runtime_error( + fmt::format("Failed moving temporary file for decompressing large blob {}. Reason: {}", SequenceRawHash, Ec.message())); + } + } + void DownloadLargeBlob(BuildStorage& Storage, const std::filesystem::path& TempFolderPath, const std::filesystem::path& CacheFolderPath, @@ -3527,7 +3586,7 @@ namespace { DownloadedChunks++; Work.ScheduleWork( - WritePool, + WritePool, // GetSyncWorkerPool(),// [&CacheFolderPath, &RemoteContent, &RemoteLookup, @@ -3555,33 +3614,44 @@ namespace { } CompressedPart.SetDeleteOnClose(true); - uint64_t TotalBytesWritten = 0; - auto ChunkHashToChunkIndexIt = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); ZEN_ASSERT(ChunkHashToChunkIndexIt != RemoteLookup.ChunkHashToChunkIndex.end()); - uint32_t ChunkIndex = ChunkHashToChunkIndexIt->second; - - SharedBuffer Chunk = Decompress(CompositeBuffer(std::move(CompressedPart)), - ChunkHash, - RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]); + if (CanStreamDecompress(RemoteContent, ChunkTargetPtrs)) + { + const IoHash& SequenceRawHash = + RemoteContent.ChunkedContent.SequenceRawHashes[ChunkTargetPtrs.front()->SequenceIndex]; + StreamDecompress(CacheFolderPath, + SequenceRawHash, + CompositeBuffer(std::move(CompressedPart)), + WriteToDiskBytes); + } + else + { + const uint32_t ChunkIndex = ChunkHashToChunkIndexIt->second; + SharedBuffer Chunk = Decompress(CompositeBuffer(std::move(CompressedPart)), + ChunkHash, + RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]); - // ZEN_ASSERT_SLOW(ChunkHash == - // IoHash::HashBuffer(Chunk.AsIoBuffer())); + // ZEN_ASSERT_SLOW(ChunkHash == + // IoHash::HashBuffer(Chunk.AsIoBuffer())); - if (!AbortFlag) - { - WriteFileCache OpenFileCache; - - WriteChunkToDisk(CacheFolderPath, - RemoteContent, - RemoteLookup, - ChunkTargetPtrs, - CompositeBuffer(Chunk), - OpenFileCache, - TotalBytesWritten); - ChunksComplete++; - WriteToDiskBytes += TotalBytesWritten; + if (!AbortFlag) + { + WriteFileCache OpenFileCache; + + uint64_t TotalBytesWritten = 0; + WriteChunkToDisk(CacheFolderPath, + RemoteContent, + RemoteLookup, + ChunkTargetPtrs, + CompositeBuffer(Chunk), + OpenFileCache, + TotalBytesWritten); + ChunksComplete++; + WriteToDiskBytes += TotalBytesWritten; + } } + if (!AbortFlag) { // Write tracking, updating this must be done without any files open (WriteFileCache) @@ -3619,7 +3689,7 @@ namespace { for (auto& WorkItem : WorkItems) { Work.ScheduleWork( - NetworkPool, + NetworkPool, // GetSyncWorkerPool(),// [WorkItem = std::move(WorkItem)](std::atomic&) { if (!AbortFlag) { @@ -3830,7 +3900,7 @@ namespace { else { Work.ScheduleWork( - NetworkPool, + NetworkPool, // GetSyncWorkerPool(),// [&, ChunkHash, RemoteChunkIndex, ChunkTargetPtrs](std::atomic&) { if (!AbortFlag) { @@ -3880,21 +3950,38 @@ namespace { if (!AbortFlag) { FilteredWrittenBytesPerSecond.Start(); - uint64_t TotalBytesWritten = 0; - SharedBuffer Chunk = - Decompress(CompressedPart, - ChunkHash, - RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex]); + if (CanStreamDecompress(RemoteContent, ChunkTargetPtrs)) { - WriteFileCache OpenFileCache; - WriteChunkToDisk(CacheFolderPath, - RemoteContent, - RemoteLookup, - ChunkTargetPtrs, - CompositeBuffer(Chunk), - OpenFileCache, - TotalBytesWritten); + const IoHash& SequenceRawHash = + RemoteContent.ChunkedContent + .SequenceRawHashes[ChunkTargetPtrs.front()->SequenceIndex]; + StreamDecompress(CacheFolderPath, + SequenceRawHash, + CompositeBuffer(CompressedPart), + WriteToDiskBytes); + ChunkCountWritten++; + } + else + { + uint64_t TotalBytesWritten = 0; + SharedBuffer Chunk = + Decompress(CompressedPart, + ChunkHash, + RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex]); + + { + WriteFileCache OpenFileCache; + WriteChunkToDisk(CacheFolderPath, + RemoteContent, + RemoteLookup, + ChunkTargetPtrs, + CompositeBuffer(Chunk), + OpenFileCache, + TotalBytesWritten); + ChunkCountWritten++; + WriteToDiskBytes += TotalBytesWritten; + } } if (!AbortFlag) { @@ -3925,9 +4012,6 @@ namespace { GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); } } - - ChunkCountWritten++; - WriteToDiskBytes += TotalBytesWritten; } } }, @@ -5707,7 +5791,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) else if (!m_StoragePath.empty()) { ZEN_CONSOLE("Querying builds in folder '{}'.", m_StoragePath); - Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false); // , .0015, 0.00004 + Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); } else { @@ -5819,7 +5903,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_StoragePath, GeneratedBuildId ? "Generated " : "", BuildId); - Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, m_WriteMetadataAsJson); // , .0015, 0.00004 + Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, m_WriteMetadataAsJson, DefaultLatency, DefaultDelayPerKBSec); StorageName = fmt::format("Disk {}", m_StoragePath.stem()); } else @@ -5952,7 +6036,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) else if (!m_StoragePath.empty()) { ZEN_CONSOLE("Downloading '{}' to '{}' from folder {}. BuildId '{}'", BuildId, m_Path, m_StoragePath, BuildId); - Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false); // , .0015, 0.00004 + Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); StorageName = fmt::format("Disk {}", m_StoragePath.stem()); } else @@ -6056,7 +6140,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_Path, m_StoragePath, BuildId); - Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false); // , .0015, 0.00004 + Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); StorageName = fmt::format("Disk {}", m_StoragePath.stem()); } else @@ -6326,7 +6410,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) else if (!m_StoragePath.empty()) { ZEN_CONSOLE("Using folder {}. BuildId '{}'", m_StoragePath, BuildId); - Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false); // , .0015, 0.00004 + Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); StorageName = fmt::format("Disk {}", m_StoragePath.stem()); } else @@ -6394,7 +6478,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) else if (!m_StoragePath.empty()) { ZEN_CONSOLE("Using folder {}. BuildId '{}'", m_StoragePath, BuildId); - Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false); // , .0015, 0.00004 + Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); StorageName = fmt::format("Disk {}", m_StoragePath.stem()); } else diff --git a/src/zencore/compress.cpp b/src/zencore/compress.cpp index 0e2ce2b54..f13f8b9ca 100644 --- a/src/zencore/compress.cpp +++ b/src/zencore/compress.cpp @@ -185,6 +185,12 @@ public: const MemoryView HeaderView, uint64_t RawOffset, uint64_t RawSize) const = 0; + + virtual bool DecompressToStream(const BufferHeader& Header, + const CompositeBuffer& CompressedData, + uint64_t RawOffset, + uint64_t RawSize, + std::function&& Callback) const = 0; }; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -263,6 +269,22 @@ public: } [[nodiscard]] uint64_t GetHeaderSize(const BufferHeader&) const final { return sizeof(BufferHeader); } + + virtual bool DecompressToStream(const BufferHeader& Header, + const CompositeBuffer& CompressedData, + uint64_t RawOffset, + uint64_t RawSize, + std::function&& Callback) const final + { + if (Header.Method == CompressionMethod::None && Header.TotalCompressedSize == CompressedData.GetSize() && + Header.TotalCompressedSize == Header.TotalRawSize + sizeof(BufferHeader) && RawOffset < Header.TotalRawSize && + (RawOffset + RawSize) <= Header.TotalRawSize) + { + Callback(0, CompressedData.Mid(sizeof(BufferHeader) + RawOffset, RawSize)); + return true; + } + return false; + } }; ////////////////////////////////////////////////////////////////////////// @@ -447,6 +469,12 @@ public: MutableMemoryView RawView, uint64_t RawOffset) const final; + virtual bool DecompressToStream(const BufferHeader& Header, + const CompositeBuffer& CompressedData, + uint64_t RawOffset, + uint64_t RawSize, + std::function&& Callback) const final; + protected: virtual bool DecompressBlock(MutableMemoryView RawData, MemoryView CompressedData) const = 0; }; @@ -568,6 +596,143 @@ BlockDecoder::DecompressToComposite(const BufferHeader& Header, const CompositeB return CompositeBuffer(std::move(Segments)); } +bool +BlockDecoder::DecompressToStream(const BufferHeader& Header, + const CompositeBuffer& CompressedData, + uint64_t RawOffset, + uint64_t RawSize, + std::function&& Callback) const +{ + if (Header.TotalCompressedSize != CompressedData.GetSize()) + { + return false; + } + + const uint64_t BlockSize = uint64_t(1) << Header.BlockSizeExponent; + + UniqueBuffer BlockSizeBuffer; + MemoryView BlockSizeView = CompressedData.ViewOrCopyRange(sizeof(BufferHeader), Header.BlockCount * sizeof(uint32_t), BlockSizeBuffer); + std::span CompressedBlockSizes(reinterpret_cast(BlockSizeView.GetData()), Header.BlockCount); + + UniqueBuffer CompressedBlockCopy; + + const size_t FirstBlockIndex = uint64_t(RawOffset / BlockSize); + const size_t LastBlockIndex = uint64_t((RawOffset + RawSize - 1) / BlockSize); + const uint64_t LastBlockSize = BlockSize - ((Header.BlockCount * BlockSize) - Header.TotalRawSize); + uint64_t OffsetInFirstBlock = RawOffset % BlockSize; + uint64_t CompressedOffset = sizeof(BufferHeader) + uint64_t(Header.BlockCount) * sizeof(uint32_t); + uint64_t RemainingRawSize = RawSize; + + for (size_t BlockIndex = 0; BlockIndex < FirstBlockIndex; BlockIndex++) + { + const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); + CompressedOffset += CompressedBlockSize; + } + + UniqueBuffer RawDataBuffer; + + IoBufferFileReference FileRef = {nullptr, 0, 0}; + if ((CompressedData.GetSegments().size() == 1) && CompressedData.GetSegments()[0].AsIoBuffer().GetFileReference(FileRef)) + { + ZEN_ASSERT(FileRef.FileHandle != nullptr); + BasicFile Source; + Source.Attach(FileRef.FileHandle); + + for (size_t BlockIndex = FirstBlockIndex; BlockIndex <= LastBlockIndex; BlockIndex++) + { + const uint64_t UncompressedBlockSize = BlockIndex == Header.BlockCount - 1 ? LastBlockSize : BlockSize; + const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); + const bool IsCompressed = CompressedBlockSize < UncompressedBlockSize; + + const uint64_t BytesToUncompress = OffsetInFirstBlock > 0 ? zen::Min(RawSize, UncompressedBlockSize - OffsetInFirstBlock) + : zen::Min(RemainingRawSize, BlockSize); + + if (CompressedBlockCopy.GetSize() < CompressedBlockSize) + { + CompressedBlockCopy = UniqueBuffer::Alloc(CompressedBlockSize); + } + Source.Read(CompressedBlockCopy.GetData(), CompressedBlockSize, FileRef.FileChunkOffset + CompressedOffset); + + MemoryView CompressedBlock = CompressedBlockCopy.GetView().Left(CompressedBlockSize); + + if (IsCompressed) + { + if (RawDataBuffer.IsNull()) + { + RawDataBuffer = UniqueBuffer::Alloc(zen::Min(RawSize, UncompressedBlockSize)); + } + else + { + ZEN_ASSERT(RawDataBuffer.GetSize() >= UncompressedBlockSize); + } + MutableMemoryView UncompressedBlock = RawDataBuffer.GetMutableView().Left(UncompressedBlockSize); + if (!DecompressBlock(UncompressedBlock, CompressedBlock)) + { + Source.Detach(); + return false; + } + Callback(BlockIndex * BlockSize + OffsetInFirstBlock, + CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawDataBuffer.GetData(), BytesToUncompress))); + } + else + { + Callback(BlockIndex * BlockSize + OffsetInFirstBlock, + CompositeBuffer( + IoBuffer(IoBuffer::Wrap, CompressedBlockCopy.GetView().Mid(OffsetInFirstBlock).GetData(), BytesToUncompress))); + } + + OffsetInFirstBlock = 0; + RemainingRawSize -= BytesToUncompress; + CompressedOffset += CompressedBlockSize; + } + Source.Detach(); + } + else + { + for (size_t BlockIndex = FirstBlockIndex; BlockIndex <= LastBlockIndex; BlockIndex++) + { + const uint64_t UncompressedBlockSize = BlockIndex == Header.BlockCount - 1 ? LastBlockSize : BlockSize; + const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); + const bool IsCompressed = CompressedBlockSize < UncompressedBlockSize; + + const uint64_t BytesToUncompress = OffsetInFirstBlock > 0 ? zen::Min(RawSize, UncompressedBlockSize - OffsetInFirstBlock) + : zen::Min(RemainingRawSize, BlockSize); + + MemoryView CompressedBlock = CompressedData.ViewOrCopyRange(CompressedOffset, CompressedBlockSize, CompressedBlockCopy); + + if (IsCompressed) + { + if (RawDataBuffer.IsNull()) + { + RawDataBuffer = UniqueBuffer::Alloc(zen::Min(RawSize, UncompressedBlockSize)); + } + else + { + ZEN_ASSERT(RawDataBuffer.GetSize() >= UncompressedBlockSize); + } + MutableMemoryView UncompressedBlock = RawDataBuffer.GetMutableView().Left(UncompressedBlockSize); + if (!DecompressBlock(UncompressedBlock, CompressedBlock)) + { + return false; + } + Callback(BlockIndex * BlockSize + OffsetInFirstBlock, + CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawDataBuffer.GetData(), BytesToUncompress))); + } + else + { + Callback(BlockIndex * BlockSize + OffsetInFirstBlock, + CompositeBuffer( + IoBuffer(IoBuffer::Wrap, CompressedBlockCopy.GetView().Mid(OffsetInFirstBlock).GetData(), BytesToUncompress))); + } + + OffsetInFirstBlock = 0; + RemainingRawSize -= BytesToUncompress; + CompressedOffset += CompressedBlockSize; + } + } + return true; +} + bool BlockDecoder::TryDecompressTo(const BufferHeader& Header, const CompositeBuffer& CompressedData, @@ -1643,6 +1808,27 @@ CompressedBuffer::DecompressToComposite() const return CompositeBuffer(); } +bool +CompressedBuffer::DecompressToStream(uint64_t RawOffset, + uint64_t RawSize, + std::function&& Callback) const +{ + using namespace detail; + if (CompressedData) + { + const BufferHeader Header = BufferHeader::Read(CompressedData); + if (Header.Magic == BufferHeader::ExpectedMagic) + { + if (const BaseDecoder* const Decoder = GetDecoder(Header.Method)) + { + const uint64_t TotalRawSize = RawSize < ~uint64_t(0) ? RawSize : Header.TotalRawSize - RawOffset; + return Decoder->DecompressToStream(Header, CompressedData, RawOffset, TotalRawSize, std::move(Callback)); + } + } + } + return false; +} + bool CompressedBuffer::TryGetCompressParameters(OodleCompressor& OutCompressor, OodleCompressionLevel& OutCompressionLevel, diff --git a/src/zencore/include/zencore/compress.h b/src/zencore/include/zencore/compress.h index 5e761ceef..c056d7561 100644 --- a/src/zencore/include/zencore/compress.h +++ b/src/zencore/include/zencore/compress.h @@ -196,6 +196,16 @@ public: */ [[nodiscard]] ZENCORE_API CompositeBuffer DecompressToComposite() const; + /** + * Decompress into and call callback for ranges of decompressed data. + * The buffer in the callback will be overwritten when the callback returns. + * + * @return True if the buffer is valid and can be decompressed. + */ + [[nodiscard]] ZENCORE_API bool DecompressToStream(uint64_t RawOffset, + uint64_t RawSize, + std::function&& Callback) const; + /** A null compressed buffer. */ static const CompressedBuffer Null; -- cgit v1.2.3 From 5867d04117645cfd8e558ad6d7888e929ca6e816 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 4 Mar 2025 12:43:05 +0100 Subject: do direct update of stats numbers (#294) --- src/zen/cmds/builds_cmd.cpp | 37 +++++++++++++------------------------ 1 file changed, 13 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index a9852151f..132f5db86 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -3288,8 +3288,8 @@ namespace { const CompositeBuffer& DecompressedBlockBuffer, const ChunkedContentLookup& Lookup, std::atomic* RemoteChunkIndexNeedsCopyFromSourceFlags, - uint32_t& OutChunksComplete, - uint64_t& OutBytesWritten) + std::atomic& OutChunksComplete, + std::atomic& OutBytesWritten) { std::vector ChunkBuffers; struct WriteOpData @@ -3447,7 +3447,7 @@ namespace { std::span ChunkTargets, const CompositeBuffer& ChunkData, WriteFileCache& OpenFileCache, - uint64_t& OutBytesWritten) + std::atomic& OutBytesWritten) { for (const ChunkedContentLookup::ChunkSequenceLocation* TargetPtr : ChunkTargets) { @@ -3639,16 +3639,14 @@ namespace { { WriteFileCache OpenFileCache; - uint64_t TotalBytesWritten = 0; WriteChunkToDisk(CacheFolderPath, RemoteContent, RemoteLookup, ChunkTargetPtrs, CompositeBuffer(Chunk), OpenFileCache, - TotalBytesWritten); + WriteToDiskBytes); ChunksComplete++; - WriteToDiskBytes += TotalBytesWritten; } } @@ -3964,7 +3962,6 @@ namespace { } else { - uint64_t TotalBytesWritten = 0; SharedBuffer Chunk = Decompress(CompressedPart, ChunkHash, @@ -3978,9 +3975,8 @@ namespace { ChunkTargetPtrs, CompositeBuffer(Chunk), OpenFileCache, - TotalBytesWritten); + WriteToDiskBytes); ChunkCountWritten++; - WriteToDiskBytes += TotalBytesWritten; } } if (!AbortFlag) @@ -4240,21 +4236,14 @@ namespace { ZEN_ASSERT_SLOW(BlockDescriptions[BlockIndex].BlockHash == IoHash::HashBuffer(DecompressedBlockBuffer)); - uint64_t BytesWrittenToDisk = 0; - uint32_t ChunksReadFromBlock = 0; - if (WriteBlockToDisk(CacheFolderPath, - RemoteContent, - SequenceIndexChunksLeftToWriteCounters, - DecompressedBlockBuffer, - RemoteLookup, - RemoteChunkIndexNeedsCopyFromSourceFlags.data(), - ChunksReadFromBlock, - BytesWrittenToDisk)) - { - WriteToDiskBytes += BytesWrittenToDisk; - ChunkCountWritten += ChunksReadFromBlock; - } - else + if (!WriteBlockToDisk(CacheFolderPath, + RemoteContent, + SequenceIndexChunksLeftToWriteCounters, + DecompressedBlockBuffer, + RemoteLookup, + RemoteChunkIndexNeedsCopyFromSourceFlags.data(), + ChunkCountWritten, + WriteToDiskBytes)) { throw std::runtime_error( fmt::format("Block {} is malformed", BlockDescriptions[BlockIndex].BlockHash)); -- cgit v1.2.3 From 4b981aed0281ac995c326e3d03dc482353f66405 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 5 Mar 2025 11:27:54 +0100 Subject: streaming compress (#295) - Improvement: Validate hash of decompressed data inline with streaming decompression - Improvement: Do streaming compression of large blobs to improve memory and I/O performance --- src/zen/cmds/builds_cmd.cpp | 150 ++++++++++++++++++++++------- src/zencore/compress.cpp | 171 ++++++++++++++++++++++++++++++++- src/zencore/include/zencore/compress.h | 5 + 3 files changed, 289 insertions(+), 37 deletions(-) (limited to 'src') diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index 132f5db86..fb9da021d 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -1088,13 +1088,21 @@ namespace { throw std::runtime_error( fmt::format("Blob {} ({} bytes) compressed header has a mismatching raw hash {}", BlobHash, Payload.GetSize(), RawHash)); } - SharedBuffer Decompressed = Compressed.Decompress(); - if (!Decompressed) + + IoHashStream Hash; + bool CouldDecompress = Compressed.DecompressToStream(0, RawSize, [&Hash](uint64_t, const CompositeBuffer& RangeBuffer) { + for (const SharedBuffer& Segment : RangeBuffer.GetSegments()) + { + Hash.Append(Segment.GetView()); + } + }); + + if (!CouldDecompress) { throw std::runtime_error( fmt::format("Blob {} ({} bytes) failed to decompress - header information mismatch", BlobHash, Payload.GetSize())); } - IoHash ValidateRawHash = IoHash::HashBuffer(Decompressed); + IoHash ValidateRawHash = Hash.GetHash(); if (ValidateRawHash != BlobHash) { throw std::runtime_error(fmt::format("Blob {} ({} bytes) decompressed hash {} does not match header information", @@ -1102,14 +1110,26 @@ namespace { Payload.GetSize(), ValidateRawHash)); } - CompositeBuffer DecompressedComposite = Compressed.DecompressToComposite(); - if (!DecompressedComposite) + OodleCompressor Compressor; + OodleCompressionLevel CompressionLevel; + uint64_t BlockSize; + if (!Compressed.TryGetCompressParameters(Compressor, CompressionLevel, BlockSize)) { - throw std::runtime_error(fmt::format("Blob {} ({} bytes) failed to decompress to composite", BlobHash, Payload.GetSize())); + throw std::runtime_error(fmt::format("Blob {} ({} bytes) failed to get compression details", BlobHash, Payload.GetSize())); } OutCompressedSize = Payload.GetSize(); OutDecompressedSize = RawSize; - return DecompressedComposite; + if (CompressionLevel == OodleCompressionLevel::None) + { + // Only decompress to composite if we need it for block verification + CompositeBuffer DecompressedComposite = Compressed.DecompressToComposite(); + if (!DecompressedComposite) + { + throw std::runtime_error(fmt::format("Blob {} ({} bytes) failed to decompress to composite", BlobHash, Payload.GetSize())); + } + return DecompressedComposite; + } + return CompositeBuffer{}; } CompositeBuffer ValidateBlob(BuildStorage& Storage, @@ -1132,6 +1152,10 @@ namespace { uint64_t& OutDecompressedSize) { CompositeBuffer BlockBuffer = ValidateBlob(std::move(Payload), BlobHash, OutCompressedSize, OutDecompressedSize); + if (!BlockBuffer) + { + throw std::runtime_error(fmt::format("Chunk block blob {} is not compressed using 'None' compression level", BlobHash)); + } return GetChunkBlockDescription(BlockBuffer.Flatten(), BlobHash); } @@ -1461,6 +1485,7 @@ namespace { uint32_t ChunkIndex, const std::filesystem::path& TempFolderPath) { + ZEN_ASSERT(!TempFolderPath.empty()); const IoHash& ChunkHash = Content.ChunkedContent.ChunkHashes[ChunkIndex]; const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; @@ -1476,21 +1501,53 @@ namespace { throw std::runtime_error(fmt::format("Fetched chunk {} has invalid size", ChunkHash)); } ZEN_ASSERT_SLOW(IoHash::HashBuffer(RawSource) == ChunkHash); + { + std::filesystem::path TempFilePath = (TempFolderPath / ChunkHash.ToHexString()).make_preferred(); + BasicFile CompressedFile; + std::error_code Ec; + CompressedFile.Open(TempFilePath, BasicFile::Mode::kTruncate, Ec); + if (Ec) + { + throw std::runtime_error( + fmt::format("Failed creating temporary file for compressing blob {}. Reason: {}", ChunkHash, Ec.message())); + } + + bool CouldCompress = CompressedBuffer::CompressToStream( + CompositeBuffer(SharedBuffer(RawSource)), + [&](uint64_t Offset, const CompositeBuffer& RangeBuffer) { CompressedFile.Write(RangeBuffer, Offset); }); + if (CouldCompress) + { + uint64_t CompressedSize = CompressedFile.FileSize(); + void* FileHandle = CompressedFile.Detach(); + IoBuffer TempPayload = IoBuffer(IoBuffer::File, + FileHandle, + 0, + CompressedSize, + /*IsWholeFile*/ true); + ZEN_ASSERT(TempPayload); + TempPayload.SetDeleteOnClose(true); + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(TempPayload), RawHash, RawSize); + ZEN_ASSERT(Compressed); + ZEN_ASSERT(RawHash == ChunkHash); + ZEN_ASSERT(RawSize == ChunkSize); + return Compressed.GetCompressed(); + } + CompressedFile.Close(); + std::filesystem::remove(TempFilePath, Ec); + ZEN_UNUSED(Ec); + } + + // Try regular compress - decompress may fail if compressed data is larger than non-compressed CompressedBuffer CompressedBlob = CompressedBuffer::Compress(SharedBuffer(std::move(RawSource))); if (!CompressedBlob) { - throw std::runtime_error(fmt::format("Failed decompressing chunk {}", ChunkHash)); - } - if (TempFolderPath.empty()) - { - return CompressedBlob.GetCompressed().MakeOwned(); - } - else - { - CompositeBuffer TempPayload = WriteToTempFileIfNeeded(CompressedBlob.GetCompressed(), TempFolderPath, ChunkHash); - return CompressedBuffer::FromCompressedNoValidate(std::move(TempPayload)).GetCompressed(); + throw std::runtime_error(fmt::format("Failed to compress large blob {}", ChunkHash)); } + CompositeBuffer TempPayload = WriteToTempFileIfNeeded(CompressedBlob.GetCompressed(), TempFolderPath, ChunkHash); + return CompressedBuffer::FromCompressedNoValidate(std::move(TempPayload)).GetCompressed(); } struct GeneratedBlocks @@ -1934,7 +1991,7 @@ namespace { { const uint32_t ChunkIndex = LooseChunkIndexes[CompressLooseChunkOrderIndex]; Work.ScheduleWork( - ReadChunkPool, + ReadChunkPool, // GetSyncWorkerPool(),// ReadChunkPool, [&, ChunkIndex](std::atomic&) { if (!AbortFlag) { @@ -3490,7 +3547,7 @@ namespace { const std::filesystem::path TempChunkSequenceFileName = GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash); TemporaryFile DecompressedTemp; std::error_code Ec; - DecompressedTemp.CreateTemporary(CacheFolderPath, Ec); + DecompressedTemp.CreateTemporary(TempChunkSequenceFileName.parent_path(), Ec); if (Ec) { throw std::runtime_error( @@ -3507,14 +3564,25 @@ namespace { { throw std::runtime_error(fmt::format("RawHash in header {} in large blob {} does match.", RawHash, SequenceRawHash)); } + IoHashStream Hash; bool CouldDecompress = Compressed.DecompressToStream(0, (uint64_t)-1, [&](uint64_t Offset, const CompositeBuffer& RangeBuffer) { DecompressedTemp.Write(RangeBuffer, Offset); + for (const SharedBuffer& Segment : RangeBuffer.GetSegments()) + { + Hash.Append(Segment.GetView()); + } WriteToDiskBytes += RangeBuffer.GetSize(); }); if (!CouldDecompress) { throw std::runtime_error(fmt::format("Failed to decompress large blob {}", SequenceRawHash)); } + const IoHash VerifyHash = Hash.GetHash(); + if (VerifyHash != SequenceRawHash) + { + throw std::runtime_error( + fmt::format("Decompressed blob payload hash {} does not match expected hash {}", VerifyHash, SequenceRawHash)); + } DecompressedTemp.MoveTemporaryIntoPlace(TempChunkSequenceFileName, Ec); if (Ec) { @@ -3616,6 +3684,7 @@ namespace { auto ChunkHashToChunkIndexIt = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); ZEN_ASSERT(ChunkHashToChunkIndexIt != RemoteLookup.ChunkHashToChunkIndex.end()); + bool NeedHashVerify = true; if (CanStreamDecompress(RemoteContent, ChunkTargetPtrs)) { const IoHash& SequenceRawHash = @@ -3624,6 +3693,8 @@ namespace { SequenceRawHash, CompositeBuffer(std::move(CompressedPart)), WriteToDiskBytes); + NeedHashVerify = false; + ChunksComplete++; } else { @@ -3660,14 +3731,17 @@ namespace { { const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; - const IoHash VerifyChunkHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile( - GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); - if (VerifyChunkHash != SequenceRawHash) + if (NeedHashVerify) { - throw std::runtime_error( - fmt::format("Written chunk sequence {} hash does not match expected hash {}", - VerifyChunkHash, - SequenceRawHash)); + const IoHash VerifyChunkHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile( + GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); + if (VerifyChunkHash != ChunkHash) + { + throw std::runtime_error( + fmt::format("Written chunk sequence {} hash does not match expected hash {}", + VerifyChunkHash, + ChunkHash)); + } } std::filesystem::rename(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); @@ -3949,6 +4023,7 @@ namespace { { FilteredWrittenBytesPerSecond.Start(); + bool NeedHashVerify = true; if (CanStreamDecompress(RemoteContent, ChunkTargetPtrs)) { const IoHash& SequenceRawHash = @@ -3959,6 +4034,7 @@ namespace { CompositeBuffer(CompressedPart), WriteToDiskBytes); ChunkCountWritten++; + NeedHashVerify = false; } else { @@ -3992,16 +4068,20 @@ namespace { { const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; - const IoHash VerifyChunkHash = - IoHash::HashBuffer(IoBufferBuilder::MakeFromFile( - GetTempChunkedSequenceFileName(CacheFolderPath, - SequenceRawHash))); - if (VerifyChunkHash != SequenceRawHash) + if (NeedHashVerify) { - throw std::runtime_error(fmt::format( - "Written hunk sequence {} hash does not match expected hash {}", - VerifyChunkHash, - SequenceRawHash)); + const IoHash VerifyChunkHash = + IoHash::HashBuffer(IoBufferBuilder::MakeFromFile( + GetTempChunkedSequenceFileName(CacheFolderPath, + SequenceRawHash))); + if (VerifyChunkHash != SequenceRawHash) + { + throw std::runtime_error( + fmt::format("Written hunk sequence {} hash does not match " + "expected hash {}", + VerifyChunkHash, + SequenceRawHash)); + } } std::filesystem::rename( GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), diff --git a/src/zencore/compress.cpp b/src/zencore/compress.cpp index f13f8b9ca..1844f6a63 100644 --- a/src/zencore/compress.cpp +++ b/src/zencore/compress.cpp @@ -158,6 +158,9 @@ class BaseEncoder { public: [[nodiscard]] virtual CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t BlockSize = DefaultBlockSize) const = 0; + [[nodiscard]] virtual bool CompressToStream(const CompositeBuffer& RawData, + std::function&& Callback, + uint64_t BlockSize = DefaultBlockSize) const = 0; }; class BaseDecoder @@ -198,11 +201,21 @@ public: class NoneEncoder final : public BaseEncoder { public: - [[nodiscard]] CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t /* BlockSize */) const final + [[nodiscard]] virtual CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t /* BlockSize */) const final { UniqueBuffer HeaderData = CompressedBuffer::CreateHeaderForNoneEncoder(RawData.GetSize(), BLAKE3::HashBuffer(RawData)); return CompositeBuffer(HeaderData.MoveToShared(), RawData.MakeOwned()); } + + [[nodiscard]] virtual bool CompressToStream(const CompositeBuffer& RawData, + std::function&& Callback, + uint64_t /* BlockSize */) const final + { + UniqueBuffer HeaderData = CompressedBuffer::CreateHeaderForNoneEncoder(RawData.GetSize(), BLAKE3::HashBuffer(RawData)); + Callback(0, CompositeBuffer(IoBuffer(IoBuffer::Wrap, HeaderData.GetData(), HeaderData.GetSize()))); + Callback(HeaderData.GetSize(), RawData); + return true; + } }; class NoneDecoder final : public BaseDecoder @@ -292,7 +305,10 @@ public: class BlockEncoder : public BaseEncoder { public: - CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t BlockSize = DefaultBlockSize) const final; + virtual CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t BlockSize) const final; + virtual bool CompressToStream(const CompositeBuffer& RawData, + std::function&& Callback, + uint64_t BlockSize) const final; protected: virtual CompressionMethod GetMethod() const = 0; @@ -440,6 +456,133 @@ BlockEncoder::Compress(const CompositeBuffer& RawData, const uint64_t BlockSize) return CompositeBuffer(SharedBuffer::MakeView(CompositeView, CompressedData.MoveToShared())); } +bool +BlockEncoder::CompressToStream(const CompositeBuffer& RawData, + std::function&& Callback, + uint64_t BlockSize = DefaultBlockSize) const +{ + ZEN_ASSERT(IsPow2(BlockSize) && (BlockSize <= (1u << 31))); + + const uint64_t RawSize = RawData.GetSize(); + BLAKE3Stream RawHash; + + const uint64_t BlockCount = RoundUp(RawSize, BlockSize) / BlockSize; + ZEN_ASSERT(BlockCount <= ~uint32_t(0)); + + const uint64_t MetaSize = BlockCount * sizeof(uint32_t); + const uint64_t FullHeaderSize = sizeof(BufferHeader) + MetaSize; + + std::vector CompressedBlockSizes; + CompressedBlockSizes.reserve(BlockCount); + uint64_t CompressedSize = 0; + { + UniqueBuffer CompressedBlockBuffer = UniqueBuffer::Alloc(GetCompressedBlocksBound(1, BlockSize, Min(RawSize, BlockSize))); + + IoBufferFileReference FileRef = {nullptr, 0, 0}; + if ((RawData.GetSegments().size() == 1) && RawData.GetSegments()[0].AsIoBuffer().GetFileReference(FileRef)) + { + ZEN_ASSERT(FileRef.FileHandle != nullptr); + UniqueBuffer RawBlockCopy = UniqueBuffer::Alloc(BlockSize); + BasicFile Source; + Source.Attach(FileRef.FileHandle); + for (uint64_t RawOffset = 0; RawOffset < RawSize;) + { + const uint64_t RawBlockSize = zen::Min(RawSize - RawOffset, BlockSize); + Source.Read(RawBlockCopy.GetData(), RawBlockSize, FileRef.FileChunkOffset + RawOffset); + const MemoryView RawBlock = RawBlockCopy.GetView().Left(RawBlockSize); + RawHash.Append(RawBlock); + MutableMemoryView CompressedBlock = CompressedBlockBuffer.GetMutableView(); + if (!CompressBlock(CompressedBlock, RawBlock)) + { + Source.Detach(); + return false; + } + + uint64_t CompressedBlockSize = CompressedBlock.GetSize(); + if (RawBlockSize <= CompressedBlockSize) + { + Callback(FullHeaderSize + CompressedSize, + CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawBlockCopy.GetView().GetData(), RawBlockSize))); + CompressedBlockSize = RawBlockSize; + } + else + { + Callback(FullHeaderSize + CompressedSize, + CompositeBuffer(IoBuffer(IoBuffer::Wrap, CompressedBlock.GetData(), CompressedBlockSize))); + } + + CompressedBlockSizes.push_back(static_cast(CompressedBlockSize)); + CompressedSize += CompressedBlockSize; + RawOffset += RawBlockSize; + } + Source.Detach(); + } + else + { + UniqueBuffer RawBlockCopy; + CompositeBuffer::Iterator It = RawData.GetIterator(0); + + for (uint64_t RawOffset = 0; RawOffset < RawSize;) + { + const uint64_t RawBlockSize = zen::Min(RawSize - RawOffset, BlockSize); + const MemoryView RawBlock = RawData.ViewOrCopyRange(It, RawBlockSize, RawBlockCopy); + RawHash.Append(RawBlock); + + MutableMemoryView CompressedBlock = CompressedBlockBuffer.GetMutableView(); + if (!CompressBlock(CompressedBlock, RawBlock)) + { + return false; + } + + uint64_t CompressedBlockSize = CompressedBlock.GetSize(); + if (RawBlockSize <= CompressedBlockSize) + { + Callback(FullHeaderSize + CompressedSize, CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawBlock.GetData(), RawBlockSize))); + CompressedBlockSize = RawBlockSize; + } + else + { + Callback(FullHeaderSize + CompressedSize, + CompositeBuffer(IoBuffer(IoBuffer::Wrap, CompressedBlock.GetData(), CompressedBlockSize))); + } + + CompressedBlockSizes.push_back(static_cast(CompressedBlockSize)); + CompressedSize += CompressedBlockSize; + RawOffset += RawBlockSize; + } + } + } + + // Return failure if the compressed data is larger than the raw data. + if (RawSize <= MetaSize + CompressedSize) + { + return false; + } + + // Write the header and calculate the CRC-32. + for (uint32_t& Size : CompressedBlockSizes) + { + Size = ByteSwap(Size); + } + UniqueBuffer HeaderBuffer = UniqueBuffer::Alloc(sizeof(BufferHeader) + MetaSize); + + BufferHeader Header; + Header.Method = GetMethod(); + Header.Compressor = GetCompressor(); + Header.CompressionLevel = GetCompressionLevel(); + Header.BlockSizeExponent = static_cast(zen::FloorLog2_64(BlockSize)); + Header.BlockCount = static_cast(BlockCount); + Header.TotalRawSize = RawSize; + Header.TotalCompressedSize = sizeof(BufferHeader) + MetaSize + CompressedSize; + Header.RawHash = RawHash.GetHash(); + + HeaderBuffer.GetMutableView().Mid(sizeof(BufferHeader), MetaSize).CopyFrom(MakeMemoryView(CompressedBlockSizes)); + Header.Write(HeaderBuffer.GetMutableView()); + + Callback(0, CompositeBuffer(IoBuffer(IoBuffer::Wrap, HeaderBuffer.GetData(), HeaderBuffer.GetSize()))); + return true; +} + class BlockDecoder : public BaseDecoder { public: @@ -1615,6 +1758,30 @@ CompressedBuffer::Compress(const SharedBuffer& RawData, return Compress(CompositeBuffer(RawData), Compressor, CompressionLevel, BlockSize); } +bool +CompressedBuffer::CompressToStream(const CompositeBuffer& RawData, + std::function&& Callback, + OodleCompressor Compressor, + OodleCompressionLevel CompressionLevel, + uint64_t BlockSize) +{ + using namespace detail; + + if (BlockSize == 0) + { + BlockSize = DefaultBlockSize; + } + + if (CompressionLevel == OodleCompressionLevel::None) + { + return NoneEncoder().CompressToStream(RawData, std::move(Callback), BlockSize); + } + else + { + return OodleEncoder(Compressor, CompressionLevel).CompressToStream(RawData, std::move(Callback), BlockSize); + } +} + CompressedBuffer CompressedBuffer::FromCompressed(const CompositeBuffer& InCompressedData, IoHash& OutRawHash, uint64_t& OutRawSize) { diff --git a/src/zencore/include/zencore/compress.h b/src/zencore/include/zencore/compress.h index c056d7561..3969e9dbd 100644 --- a/src/zencore/include/zencore/compress.h +++ b/src/zencore/include/zencore/compress.h @@ -74,6 +74,11 @@ public: OodleCompressor Compressor = OodleCompressor::Mermaid, OodleCompressionLevel CompressionLevel = OodleCompressionLevel::VeryFast, uint64_t BlockSize = 0); + [[nodiscard]] ZENCORE_API static bool CompressToStream(const CompositeBuffer& RawData, + std::function&& Callback, + OodleCompressor Compressor = OodleCompressor::Mermaid, + OodleCompressionLevel CompressionLevel = OodleCompressionLevel::VeryFast, + uint64_t BlockSize = 0); /** * Construct from a compressed buffer previously created by Compress(). -- cgit v1.2.3 From 7b1c99f53da3a08b844cc7d7ce99530758e34be2 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Wed, 5 Mar 2025 12:25:51 +0100 Subject: Add trace support for zen CLI command (#296) - This change adds support for `--trace`, `--tracehost` and `--tracefile` command arguments to enable and control tracing to Insights - It also adds profiling scopes primarily to build download command related code --- src/zen/cmds/builds_cmd.cpp | 83 +++++++++++++++++++++++++++-- src/zen/zen.cpp | 54 +++++++++++++++++++ src/zenutil/chunkedcontent.cpp | 79 +++++++++++++++------------ src/zenutil/jupiter/jupiterbuildstorage.cpp | 31 +++++++++++ 4 files changed, 210 insertions(+), 37 deletions(-) (limited to 'src') diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index fb9da021d..eb0650c4d 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -180,6 +181,8 @@ namespace { void CleanDirectory(const std::filesystem::path& Path, std::span ExcludeDirectories) { + ZEN_TRACE_CPU("CleanDirectory"); + DirectoryContent LocalDirectoryContent; GetDirectoryContent(Path, DirectoryContentFlags::IncludeDirs | DirectoryContentFlags::IncludeFiles, LocalDirectoryContent); for (const std::filesystem::path& LocalFilePath : LocalDirectoryContent.Files) @@ -352,6 +355,8 @@ namespace { std::function&& IsAcceptedFile, ChunkingController& ChunkController) { + ZEN_TRACE_CPU("ScanAndChunkFolder"); + FolderContent Content = GetFolderContent( GetFolderContentStats, Path, @@ -506,6 +511,8 @@ namespace { const std::span& LooseChunkIndexes, const std::span& BlockDescriptions) { + ZEN_TRACE_CPU("CalculateAbsoluteChunkOrders"); + #if EXTRA_VERIFY std::vector TmpAbsoluteChunkHashes; TmpAbsoluteChunkHashes.reserve(LocalChunkHashes.size()); @@ -572,6 +579,8 @@ namespace { std::vector& OutLocalChunkRawSizes, std::vector& OutLocalChunkOrders) { + ZEN_TRACE_CPU("CalculateLocalChunkOrders"); + std::vector AbsoluteChunkHashes; std::vector AbsoluteChunkRawSizes; AbsoluteChunkHashes.insert(AbsoluteChunkHashes.end(), LooseChunkHashes.begin(), LooseChunkHashes.end()); @@ -955,6 +964,8 @@ namespace { const uint64_t BlockSize = 256u * 1024u; CompositeBuffer GetRange(uint64_t Offset, uint64_t Size) { + ZEN_TRACE_CPU("BufferedOpenFile::GetRange"); + ZEN_ASSERT((CacheBlockIndex == (uint64_t)-1) || Cache); auto _ = MakeGuard([&]() { ZEN_ASSERT((CacheBlockIndex == (uint64_t)-1) || Cache); }); @@ -1022,6 +1033,8 @@ namespace { CompositeBuffer GetRange(uint32_t SequenceIndex, uint64_t Offset, uint64_t Size) { + ZEN_TRACE_CPU("ReadFileCache::GetRange"); + auto CacheIt = std::find_if(m_OpenFiles.begin(), m_OpenFiles.end(), [SequenceIndex](const auto& Lhs) { return Lhs.first == SequenceIndex; }); @@ -1069,6 +1082,8 @@ namespace { CompositeBuffer ValidateBlob(IoBuffer&& Payload, const IoHash& BlobHash, uint64_t& OutCompressedSize, uint64_t& OutDecompressedSize) { + ZEN_TRACE_CPU("ValidateBlob"); + if (Payload.GetContentType() != ZenContentType::kCompressedBinary) { throw std::runtime_error(fmt::format("Blob {} ({} bytes) has unexpected content type '{}'", @@ -3087,6 +3102,8 @@ namespace { void VerifyFolder(const ChunkedFolderContent& Content, const std::filesystem::path& Path, bool VerifyFileHash) { + ZEN_TRACE_CPU("VerifyFolder"); + ProgressBar ProgressBar(UsePlainProgress); std::atomic FilesVerified(0); std::atomic FilesFailed(0); @@ -3133,6 +3150,8 @@ namespace { [&, PathIndex](std::atomic&) { if (!AbortFlag) { + ZEN_TRACE_CPU("VerifyFile_work"); + // TODO: Convert ScheduleWork body to function const std::filesystem::path TargetPath = (Path / Content.Paths[PathIndex]).make_preferred(); @@ -3348,6 +3367,8 @@ namespace { std::atomic& OutChunksComplete, std::atomic& OutBytesWritten) { + ZEN_TRACE_CPU("WriteBlockToDisk"); + std::vector ChunkBuffers; struct WriteOpData { @@ -3438,6 +3459,8 @@ namespace { } if (!AbortFlag) { + ZEN_TRACE_CPU("WriteBlockToDisk_VerifyHash"); + // Write tracking, updating this must be done without any files open (WriteFileCache) for (const WriteOpData& WriteOp : WriteOps) { @@ -3449,7 +3472,7 @@ namespace { IoBufferBuilder::MakeFromFile(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); if (VerifyChunkHash != SequenceRawHash) { - throw std::runtime_error(fmt::format("Written hunk sequence {} hash does not match expected hash {}", + throw std::runtime_error(fmt::format("Written chunk sequence {} hash does not match expected hash {}", VerifyChunkHash, SequenceRawHash)); } @@ -3468,6 +3491,8 @@ namespace { SharedBuffer Decompress(const CompositeBuffer& CompressedChunk, const IoHash& ChunkHash, const uint64_t ChunkRawSize) { + ZEN_TRACE_CPU("Decompress"); + IoHash RawHash; uint64_t RawSize; CompressedBuffer Compressed = CompressedBuffer::FromCompressed(CompressedChunk, RawHash, RawSize); @@ -3506,6 +3531,8 @@ namespace { WriteFileCache& OpenFileCache, std::atomic& OutBytesWritten) { + ZEN_TRACE_CPU("WriteChunkToDisk"); + for (const ChunkedContentLookup::ChunkSequenceLocation* TargetPtr : ChunkTargets) { const auto& Target = *TargetPtr; @@ -3611,6 +3638,8 @@ namespace { std::atomic& ChunksComplete, std::atomic& MultipartAttachmentCount) { + ZEN_TRACE_CPU("DownloadLargeBlob"); + struct WorkloadData { TemporaryFile TempFile; @@ -3666,6 +3695,8 @@ namespace { &WriteToDiskBytes, SequenceIndexChunksLeftToWriteCounters, ChunkTargetPtrs](std::atomic&) { + ZEN_TRACE_CPU("DownloadLargeBlob_Work"); + if (!AbortFlag) { uint64_t CompressedSize = Workload->TempFile.FileSize(); @@ -3729,6 +3760,8 @@ namespace { const uint32_t RemoteSequenceIndex = Location->SequenceIndex; if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1) == 1) { + ZEN_TRACE_CPU("VerifyChunkHash"); + const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; if (NeedHashVerify) @@ -3743,6 +3776,8 @@ namespace { ChunkHash)); } } + + ZEN_TRACE_CPU("VerifyChunkHashes_rename"); std::filesystem::rename(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); } @@ -3784,6 +3819,8 @@ namespace { bool WipeTargetFolder, FolderContent& OutLocalFolderState) { + ZEN_TRACE_CPU("UpdateFolder"); + ZEN_UNUSED(WipeTargetFolder); std::atomic DownloadedBlocks = 0; std::atomic BlockBytes = 0; @@ -3933,6 +3970,8 @@ namespace { std::atomic ChunkCountWritten = 0; { + ZEN_TRACE_CPU("HandleChunks"); + FilteredRate FilteredDownloadedBytesPerSecond; FilteredRate FilteredWrittenBytesPerSecond; @@ -3974,6 +4013,8 @@ namespace { Work.ScheduleWork( NetworkPool, // GetSyncWorkerPool(),// [&, ChunkHash, RemoteChunkIndex, ChunkTargetPtrs](std::atomic&) { + ZEN_TRACE_CPU("UpdateFolder_LooseChunk"); + if (!AbortFlag) { FilteredDownloadedBytesPerSecond.Start(); @@ -4019,6 +4060,8 @@ namespace { WritePool, [&, ChunkHash, RemoteChunkIndex, ChunkTargetPtrs, CompressedPart = std::move(Payload)]( std::atomic&) { + ZEN_TRACE_CPU("UpdateFolder_WriteBlob"); + if (!AbortFlag) { FilteredWrittenBytesPerSecond.Start(); @@ -4066,6 +4109,8 @@ namespace { if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub( 1) == 1) { + ZEN_TRACE_CPU("UpdateFolder_VerifyHash"); + const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; if (NeedHashVerify) @@ -4077,12 +4122,14 @@ namespace { if (VerifyChunkHash != SequenceRawHash) { throw std::runtime_error( - fmt::format("Written hunk sequence {} hash does not match " + fmt::format("Written chunk sequence {} hash does not match " "expected hash {}", VerifyChunkHash, SequenceRawHash)); } } + + ZEN_TRACE_CPU("UpdateFolder_rename"); std::filesystem::rename( GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); @@ -4113,6 +4160,8 @@ namespace { [&, CopyDataIndex](std::atomic&) { if (!AbortFlag) { + ZEN_TRACE_CPU("UpdateFolder_Copy"); + FilteredWrittenBytesPerSecond.Start(); const CacheCopyData& CopyData = CacheCopyDatas[CopyDataIndex]; const uint32_t LocalPathIndex = LocalLookup.SequenceIndexFirstPathIndex[CopyData.LocalSequenceIndex]; @@ -4205,6 +4254,8 @@ namespace { // Write tracking, updating this must be done without any files open (WriteFileCache) for (const WriteOp& Op : WriteOps) { + ZEN_TRACE_CPU("UpdateFolder_Copy_VerifyHash"); + const uint32_t RemoteSequenceIndex = Op.Target->SequenceIndex; if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1) == 1) { @@ -4215,10 +4266,12 @@ namespace { if (VerifyChunkHash != SequenceRawHash) { throw std::runtime_error( - fmt::format("Written hunk sequence {} hash does not match expected hash {}", + fmt::format("Written chunk sequence {} hash does not match expected hash {}", VerifyChunkHash, SequenceRawHash)); } + + ZEN_TRACE_CPU("UpdateFolder_Copy_rename"); std::filesystem::rename(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); } @@ -4268,6 +4321,8 @@ namespace { [&, BlockIndex](std::atomic&) { if (!AbortFlag) { + ZEN_TRACE_CPU("UpdateFolder_HandleBlocks_Read"); + FilteredDownloadedBytesPerSecond.Start(); IoBuffer BlockBuffer = Storage.GetBuildBlob(BuildId, BlockDescriptions[BlockIndex].BlockHash); if (!BlockBuffer) @@ -4288,6 +4343,8 @@ namespace { [&, BlockIndex, BlockBuffer = std::move(Payload)](std::atomic&) { if (!AbortFlag) { + ZEN_TRACE_CPU("UpdateFolder_HandleBlocks_Write"); + FilteredWrittenBytesPerSecond.Start(); IoHash BlockRawHash; uint64_t BlockRawSize; @@ -4348,6 +4405,8 @@ namespace { } } + ZEN_TRACE_CPU("HandleChunks_Wait"); + Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { ZEN_UNUSED(IsAborted, PendingWork); ZEN_ASSERT(ChunkCountToWrite >= ChunkCountWritten.load()); @@ -4411,12 +4470,16 @@ namespace { if (WipeTargetFolder) { + ZEN_TRACE_CPU("UpdateFolder_WipeTarget"); + // Clean target folder ZEN_CONSOLE("Wiping {}", Path); CleanDirectory(Path, DefaultExcludeFolders); } else { + ZEN_TRACE_CPU("UpdateFolder_RemoveUnused"); + // Remove unused tracked files tsl::robin_map RemotePathToRemoteIndex; RemotePathToRemoteIndex.reserve(RemoteContent.Paths.size()); @@ -4468,6 +4531,8 @@ namespace { break; } + ZEN_TRACE_CPU("UpdateFolder_FinalizeTree"); + size_t TargetCount = 1; const IoHash& RawHash = Targets[TargetOffset].first; while (Targets[TargetOffset + TargetCount].first == RawHash) @@ -4480,6 +4545,8 @@ namespace { [&, BaseTargetOffset = TargetOffset, TargetCount](std::atomic&) { if (!AbortFlag) { + ZEN_TRACE_CPU("FinalizeTree_Work"); + size_t TargetOffset = BaseTargetOffset; const IoHash& RawHash = Targets[TargetOffset].first; const uint32_t FirstTargetPathIndex = Targets[TargetOffset].second; @@ -4501,6 +4568,8 @@ namespace { } else { + ZEN_TRACE_CPU("FinalizeTree_MoveIntoPlace"); + const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(CacheFolderPath, RawHash); ZEN_ASSERT_SLOW(std::filesystem::exists(CacheFilePath)); CreateDirectories(FirstTargetFilePath.parent_path()); @@ -4522,6 +4591,8 @@ namespace { TargetsComplete++; while (TargetOffset < (BaseTargetOffset + TargetCount)) { + ZEN_TRACE_CPU("FinalizeTree_Copy"); + ZEN_ASSERT(Targets[TargetOffset].first == RawHash); ZEN_ASSERT_SLOW(std::filesystem::exists(FirstTargetFilePath)); const uint32_t ExtraTargetPathIndex = Targets[TargetOffset].second; @@ -4555,6 +4626,8 @@ namespace { TargetOffset += TargetCount; } + ZEN_TRACE_CPU("FinalizeTree_Wait"); + Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { ZEN_UNUSED(IsAborted, PendingWork); std::string Details = fmt::format("{}/{} files", TargetsComplete.load(), Targets.size()); @@ -4668,6 +4741,8 @@ namespace { std::vector& OutBlockDescriptions, std::vector& OutLooseChunkHashes) { + ZEN_TRACE_CPU("GetRemoteContent"); + Stopwatch GetBuildPartTimer; CbObject BuildPartManifest = Storage.GetBuildPart(BuildId, BuildParts[0].first); ZEN_CONSOLE("GetBuildPart {} ('{}') took {}. Payload size: {}", @@ -5114,6 +5189,8 @@ namespace { bool WipeTargetFolder, bool PostDownloadVerify) { + ZEN_TRACE_CPU("DownloadFolder"); + Stopwatch DownloadTimer; const std::filesystem::path ZenTempFolder = Path / ZenTempFolderName; diff --git a/src/zen/zen.cpp b/src/zen/zen.cpp index bee1fd676..713a0d66d 100644 --- a/src/zen/zen.cpp +++ b/src/zen/zen.cpp @@ -31,9 +31,16 @@ #include #include #include +#include #include #include +#include +#include +#include +#include +#include + #if ZEN_WITH_TESTS # define ZEN_TEST_WITH_RUNNER 1 # include @@ -603,6 +610,29 @@ main(int argc, char** argv) Options.add_options()("help", "Show command line help"); Options.add_options()("c, command", "Sub command", cxxopts::value(SubCommand)); +#if ZEN_WITH_TRACE + std::string TraceChannels; + std::string TraceHost; + std::string TraceFile; + + Options.add_option("ue-trace", + "", + "trace", + "Specify which trace channels should be enabled", + cxxopts::value(TraceChannels)->default_value(""), + ""); + + Options.add_option("ue-trace", + "", + "tracehost", + "Hostname to send the trace to", + cxxopts::value(TraceHost)->default_value(""), + ""); + + Options + .add_option("ue-trace", "", "tracefile", "Path to write a trace to", cxxopts::value(TraceFile)->default_value(""), ""); +#endif // ZEN_WITH_TRACE + Options.parse_positional({"command"}); const bool IsNullInvoke = (argc == 1); // If no arguments are passed we want to print usage information @@ -653,6 +683,30 @@ main(int argc, char** argv) logging::SetLogLevel(logging::level::Trace); } +#if ZEN_WITH_TRACE + if (TraceHost.size()) + { + TraceStart("zen", TraceHost.c_str(), TraceType::Network); + } + else if (TraceFile.size()) + { + TraceStart("zen", TraceFile.c_str(), TraceType::File); + } + else + { + TraceInit("zen"); + } +#endif // ZEN_WITH_TRACE + +#if ZEN_WITH_MEMTRACK + FMalloc* TraceMalloc = MemoryTrace_Create(GMalloc); + if (TraceMalloc != GMalloc) + { + GMalloc = TraceMalloc; + MemoryTrace_Initialize(); + } +#endif + for (const CommandInfo& CmdInfo : Commands) { if (StrCaseCompare(SubCommand.c_str(), CmdInfo.CmdName) == 0) diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp index 1552ea823..4ca89d996 100644 --- a/src/zenutil/chunkedcontent.cpp +++ b/src/zenutil/chunkedcontent.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -23,13 +24,13 @@ namespace zen { using namespace std::literals; namespace { - void AddCunkSequence(ChunkingStatistics& Stats, - ChunkedContentData& InOutChunkedContent, - tsl::robin_map& ChunkHashToChunkIndex, - const IoHash& RawHash, - std::span ChunkSequence, - std::span ChunkHashes, - std::span ChunkRawSizes) + void AddChunkSequence(ChunkingStatistics& Stats, + ChunkedContentData& InOutChunkedContent, + tsl::robin_map& ChunkHashToChunkIndex, + const IoHash& RawHash, + std::span ChunkSequence, + std::span ChunkHashes, + std::span ChunkRawSizes) { ZEN_ASSERT(ChunkHashes.size() == ChunkRawSizes.size()); InOutChunkedContent.ChunkCounts.push_back(gsl::narrow(ChunkSequence.size())); @@ -58,11 +59,11 @@ namespace { Stats.UniqueSequencesFound++; } - void AddCunkSequence(ChunkingStatistics& Stats, - ChunkedContentData& InOutChunkedContent, - tsl::robin_map& ChunkHashToChunkIndex, - const IoHash& RawHash, - const uint64_t RawSize) + void AddChunkSequence(ChunkingStatistics& Stats, + ChunkedContentData& InOutChunkedContent, + tsl::robin_map& ChunkHashToChunkIndex, + const IoHash& RawHash, + const uint64_t RawSize) { InOutChunkedContent.ChunkCounts.push_back(1); @@ -120,13 +121,13 @@ namespace { { ChunkSizes.push_back(Source.Size); } - AddCunkSequence(Stats, - OutChunkedContent.ChunkedContent, - ChunkHashToChunkIndex, - Chunked.Info.RawHash, - Chunked.Info.ChunkSequence, - Chunked.Info.ChunkHashes, - ChunkSizes); + AddChunkSequence(Stats, + OutChunkedContent.ChunkedContent, + ChunkHashToChunkIndex, + Chunked.Info.RawHash, + Chunked.Info.ChunkSequence, + Chunked.Info.ChunkHashes, + ChunkSizes); Stats.UniqueSequencesFound++; } }); @@ -143,7 +144,7 @@ namespace { { RawHashToSequenceRawHashIndex.insert( {Hash, gsl::narrow(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); - AddCunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Hash, RawSize); + AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Hash, RawSize); Stats.UniqueSequencesFound++; } }); @@ -360,6 +361,8 @@ GetFolderContent(GetFolderContentStatistics& Stats, std::function&& UpdateCallback, std::atomic& AbortFlag) { + ZEN_TRACE_CPU("GetFolderContent"); + Stopwatch Timer; auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); @@ -528,6 +531,8 @@ LoadChunkedFolderContentToCompactBinary(CbObjectView Input) ChunkedFolderContent MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span Overlays) { + ZEN_TRACE_CPU("MergeChunkedFolderContents"); + ZEN_ASSERT(!Overlays.empty()); ChunkedFolderContent Result; @@ -605,13 +610,13 @@ MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span OriginalChunkOrder = std::span(OverlayContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); - AddCunkSequence(Stats, - Result.ChunkedContent, - ChunkHashToChunkIndex, - RawHash, - OriginalChunkOrder, - OverlayContent.ChunkedContent.ChunkHashes, - OverlayContent.ChunkedContent.ChunkRawSizes); + AddChunkSequence(Stats, + Result.ChunkedContent, + ChunkHashToChunkIndex, + RawHash, + OriginalChunkOrder, + OverlayContent.ChunkedContent.ChunkHashes, + OverlayContent.ChunkedContent.ChunkRawSizes); Stats.UniqueSequencesFound++; } } @@ -636,6 +641,8 @@ MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span DeletedPaths) { + ZEN_TRACE_CPU("DeletePathsFromChunkedContent"); + ZEN_ASSERT(DeletedPaths.size() <= BaseContent.Paths.size()); ChunkedFolderContent Result = {.Platform = BaseContent.Platform}; if (DeletedPaths.size() < BaseContent.Paths.size()) @@ -673,13 +680,13 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span ChunkingStatistics Stats; std::span OriginalChunkOrder = std::span(BaseContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); - AddCunkSequence(Stats, - Result.ChunkedContent, - ChunkHashToChunkIndex, - RawHash, - OriginalChunkOrder, - BaseContent.ChunkedContent.ChunkHashes, - BaseContent.ChunkedContent.ChunkRawSizes); + AddChunkSequence(Stats, + Result.ChunkedContent, + ChunkHashToChunkIndex, + RawHash, + OriginalChunkOrder, + BaseContent.ChunkedContent.ChunkHashes, + BaseContent.ChunkedContent.ChunkRawSizes); Stats.UniqueSequencesFound++; } } @@ -699,6 +706,8 @@ ChunkFolderContent(ChunkingStatistics& Stats, std::function&& UpdateCallback, std::atomic& AbortFlag) { + ZEN_TRACE_CPU("ChunkFolderContent"); + Stopwatch Timer; auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); @@ -775,6 +784,8 @@ ChunkFolderContent(ChunkingStatistics& Stats, ChunkedContentLookup BuildChunkedContentLookup(const ChunkedFolderContent& Content) { + ZEN_TRACE_CPU("BuildChunkedContentLookup"); + struct ChunkLocationReference { uint32_t ChunkIndex; diff --git a/src/zenutil/jupiter/jupiterbuildstorage.cpp b/src/zenutil/jupiter/jupiterbuildstorage.cpp index 481e9146f..309885b05 100644 --- a/src/zenutil/jupiter/jupiterbuildstorage.cpp +++ b/src/zenutil/jupiter/jupiterbuildstorage.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include ZEN_THIRD_PARTY_INCLUDES_START @@ -36,6 +37,8 @@ public: virtual CbObject ListBuilds(CbObject Query) override { + ZEN_TRACE_CPU("Jupiter::ListBuilds"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); IoBuffer Payload = Query.GetBuffer().AsIoBuffer(); @@ -51,6 +54,8 @@ public: virtual CbObject PutBuild(const Oid& BuildId, const CbObject& MetaData) override { + ZEN_TRACE_CPU("Jupiter::PutBuild"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); IoBuffer Payload = MetaData.GetBuffer().AsIoBuffer(); @@ -66,6 +71,8 @@ public: virtual CbObject GetBuild(const Oid& BuildId) override { + ZEN_TRACE_CPU("Jupiter::GetBuild"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); JupiterResult GetBuildResult = m_Session.GetBuild(m_Namespace, m_Bucket, BuildId); @@ -79,6 +86,8 @@ public: virtual void FinalizeBuild(const Oid& BuildId) override { + ZEN_TRACE_CPU("Jupiter::FinalizeBuild"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); JupiterResult FinalizeBuildResult = m_Session.FinalizeBuild(m_Namespace, m_Bucket, BuildId); @@ -95,6 +104,8 @@ public: std::string_view PartName, const CbObject& MetaData) override { + ZEN_TRACE_CPU("Jupiter::PutBuildPart"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); IoBuffer Payload = MetaData.GetBuffer().AsIoBuffer(); @@ -110,6 +121,8 @@ public: virtual CbObject GetBuildPart(const Oid& BuildId, const Oid& BuildPartId) override { + ZEN_TRACE_CPU("Jupiter::GetBuildPart"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); JupiterResult GetBuildPartResult = m_Session.GetBuildPart(m_Namespace, m_Bucket, BuildId, BuildPartId); @@ -126,6 +139,8 @@ public: virtual std::vector FinalizeBuildPart(const Oid& BuildId, const Oid& BuildPartId, const IoHash& PartHash) override { + ZEN_TRACE_CPU("Jupiter::FinalizeBuildPart"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); FinalizeBuildPartResult FinalizePartResult = m_Session.FinalizeBuildPart(m_Namespace, m_Bucket, BuildId, BuildPartId, PartHash); @@ -143,6 +158,8 @@ public: ZenContentType ContentType, const CompositeBuffer& Payload) override { + ZEN_TRACE_CPU("Jupiter::PutBuildBlob"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); JupiterResult PutBlobResult = m_Session.PutBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, ContentType, Payload); @@ -160,6 +177,8 @@ public: std::function&& Transmitter, std::function&& OnSentBytes) override { + ZEN_TRACE_CPU("Jupiter::PutLargeBuildBlob"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); std::vector> WorkItems; @@ -200,6 +219,8 @@ public: virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash) override { + ZEN_TRACE_CPU("Jupiter::GetBuildBlob"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); JupiterResult GetBuildBlobResult = m_Session.GetBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, m_TempFolderPath); @@ -218,6 +239,8 @@ public: uint64_t ChunkSize, std::function&& Receiver) override { + ZEN_TRACE_CPU("Jupiter::GetLargeBuildBlob"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); std::vector> WorkItems; @@ -249,6 +272,8 @@ public: virtual void PutBlockMetadata(const Oid& BuildId, const IoHash& BlockRawHash, const CbObject& MetaData) override { + ZEN_TRACE_CPU("Jupiter::PutBlockMetadata"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); IoBuffer Payload = MetaData.GetBuffer().AsIoBuffer(); @@ -264,6 +289,8 @@ public: virtual std::vector FindBlocks(const Oid& BuildId) override { + ZEN_TRACE_CPU("Jupiter::FindBlocks"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); JupiterResult FindResult = m_Session.FindBlocks(m_Namespace, m_Bucket, BuildId); @@ -277,6 +304,8 @@ public: virtual std::vector GetBlockMetadata(const Oid& BuildId, std::span BlockHashes) override { + ZEN_TRACE_CPU("Jupiter::GetBlockMetadata"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); CbObjectWriter Request; @@ -365,6 +394,8 @@ CreateJupiterBuildStorage(LoggerRef InLog, std::string_view Bucket, const std::filesystem::path& TempFolderPath) { + ZEN_TRACE_CPU("CreateJupiterBuildStorage"); + return std::make_unique(InLog, InHttpClient, Stats, Namespace, Bucket, TempFolderPath); } -- cgit v1.2.3 From 920120bbcec9f91df3336f62970b3e010a4fa6c2 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Thu, 6 Mar 2025 16:18:32 +0100 Subject: reduced memory churn using fixed_xxx containers (#236) * Added EASTL to help with eliminating memory allocations * Applied EASTL to eliminate memory allocations, primarily by using `fixed_vector` et al to use stack allocations / inline struct allocations Reduces memory events in traces by close to a factor of 10 in test scenario (starting editor for project F) --- src/zencore/compactbinary.cpp | 10 +- src/zencore/compactbinarybuilder.cpp | 11 +- src/zencore/compactbinarypackage.cpp | 14 ++- src/zencore/include/zencore/compactbinarybuilder.h | 8 +- src/zencore/include/zencore/compactbinarypackage.h | 15 ++- src/zencore/include/zencore/compositebuffer.h | 38 ++++++- src/zencore/include/zencore/eastlutil.h | 20 ++++ src/zencore/include/zencore/memory/newdelete.h | 26 +++++ src/zencore/xmake.lua | 1 + src/zenhttp/httpserver.cpp | 6 +- src/zenhttp/include/zenhttp/httpserver.h | 8 +- src/zenhttp/packageformat.cpp | 24 +++- src/zenhttp/servers/httpsys.cpp | 18 +-- src/zenserver/objectstore/objectstore.cpp | 27 +++-- src/zenserver/objectstore/objectstore.h | 4 +- src/zenserver/projectstore/httpprojectstore.cpp | 32 +++--- src/zenserver/projectstore/projectstore.cpp | 45 +++++--- src/zenserver/workspaces/httpworkspaces.cpp | 12 +- src/zenstore/cache/cachedisklayer.cpp | 121 ++++++++++++--------- src/zenstore/cache/cacherpc.cpp | 73 +++++++------ src/zenstore/cache/structuredcachestore.cpp | 8 +- .../include/zenstore/cache/cachedisklayer.h | 38 ++++--- src/zenstore/include/zenstore/cache/cacheshared.h | 4 + .../include/zenstore/cache/structuredcachestore.h | 6 +- src/zenstore/xmake.lua | 1 + src/zenutil/include/zenutil/cache/cachekey.h | 6 + 26 files changed, 377 insertions(+), 199 deletions(-) create mode 100644 src/zencore/include/zencore/eastlutil.h (limited to 'src') diff --git a/src/zencore/compactbinary.cpp b/src/zencore/compactbinary.cpp index adccaba70..b43cc18f1 100644 --- a/src/zencore/compactbinary.cpp +++ b/src/zencore/compactbinary.cpp @@ -15,6 +15,8 @@ #include #include +#include + #include #include @@ -1376,9 +1378,9 @@ TryMeasureCompactBinary(MemoryView View, CbFieldType& OutType, uint64_t& OutSize CbField LoadCompactBinary(BinaryReader& Ar, BufferAllocator Allocator) { - std::vector HeaderBytes; - CbFieldType FieldType; - uint64_t FieldSize = 1; + eastl::fixed_vector HeaderBytes; + CbFieldType FieldType; + uint64_t FieldSize = 1; for (const int64_t StartPos = Ar.CurrentOffset(); FieldSize > 0;) { @@ -1393,7 +1395,7 @@ LoadCompactBinary(BinaryReader& Ar, BufferAllocator Allocator) HeaderBytes.resize(ReadOffset + ReadSize); Ar.Read(HeaderBytes.data() + ReadOffset, ReadSize); - if (TryMeasureCompactBinary(MakeMemoryView(HeaderBytes), FieldType, FieldSize)) + if (TryMeasureCompactBinary(MakeMemoryView(HeaderBytes.data(), HeaderBytes.size()), FieldType, FieldSize)) { if (FieldSize <= uint64_t(Ar.Size() - StartPos)) { diff --git a/src/zencore/compactbinarybuilder.cpp b/src/zencore/compactbinarybuilder.cpp index a60de023d..63c0b9c5c 100644 --- a/src/zencore/compactbinarybuilder.cpp +++ b/src/zencore/compactbinarybuilder.cpp @@ -15,23 +15,21 @@ namespace zen { -template uint64_t -AddUninitialized(std::vector& Vector, uint64_t Count) +AddUninitialized(CbWriter::CbWriterData_t& Vector, uint64_t Count) { const uint64_t Offset = Vector.size(); Vector.resize(Offset + Count); return Offset; } -template uint64_t -Append(std::vector& Vector, const T* Data, uint64_t Count) +Append(CbWriter::CbWriterData_t& Vector, const uint8_t* Data, uint64_t Count) { const uint64_t Offset = Vector.size(); Vector.resize(Offset + Count); - memcpy(Vector.data() + Offset, Data, sizeof(T) * Count); + memcpy(Vector.data() + Offset, Data, sizeof(uint8_t) * Count); return Offset; } @@ -76,7 +74,7 @@ IsUniformType(const CbFieldType Type) /** Append the payload from the compact binary value to the array and return its type. */ static inline CbFieldType -AppendCompactBinary(const CbFieldView& Value, std::vector& OutData) +AppendCompactBinary(const CbFieldView& Value, CbWriter::CbWriterData_t& OutData) { struct FCopy : public CbFieldView { @@ -93,7 +91,6 @@ AppendCompactBinary(const CbFieldView& Value, std::vector& OutData) CbWriter::CbWriter() { - States.reserve(4); States.emplace_back(); } diff --git a/src/zencore/compactbinarypackage.cpp b/src/zencore/compactbinarypackage.cpp index 7de161845..ffe64f2e9 100644 --- a/src/zencore/compactbinarypackage.cpp +++ b/src/zencore/compactbinarypackage.cpp @@ -3,10 +3,13 @@ #include "zencore/compactbinarypackage.h" #include #include +#include #include #include #include +#include + namespace zen { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -340,6 +343,12 @@ CbPackage::SetObject(CbObject InObject, const IoHash* InObjectHash, AttachmentRe } } +void +CbPackage::ReserveAttachments(size_t Count) +{ + Attachments.reserve(Count); +} + void CbPackage::AddAttachment(const CbAttachment& Attachment, AttachmentResolver* Resolver) { @@ -374,17 +383,18 @@ CbPackage::AddAttachments(std::span InAttachments) { ZEN_ASSERT(!Attachment.IsNull()); } + // Assume we have no duplicates! Attachments.insert(Attachments.end(), InAttachments.begin(), InAttachments.end()); std::sort(Attachments.begin(), Attachments.end()); - ZEN_ASSERT_SLOW(std::unique(Attachments.begin(), Attachments.end()) == Attachments.end()); + ZEN_ASSERT_SLOW(eastl::unique(Attachments.begin(), Attachments.end()) == Attachments.end()); } int32_t CbPackage::RemoveAttachment(const IoHash& Hash) { return gsl::narrow_cast( - std::erase_if(Attachments, [&Hash](const CbAttachment& Attachment) -> bool { return Attachment.GetHash() == Hash; })); + erase_if(Attachments, [&Hash](const CbAttachment& Attachment) -> bool { return Attachment.GetHash() == Hash; })); } bool diff --git a/src/zencore/include/zencore/compactbinarybuilder.h b/src/zencore/include/zencore/compactbinarybuilder.h index 1c625cacc..f11717453 100644 --- a/src/zencore/include/zencore/compactbinarybuilder.h +++ b/src/zencore/include/zencore/compactbinarybuilder.h @@ -18,6 +18,8 @@ #include #include +#include + #include namespace zen { @@ -367,6 +369,8 @@ public: /** Private flags that are public to work with ENUM_CLASS_FLAGS. */ enum class StateFlags : uint8_t; + typedef eastl::fixed_vector CbWriterData_t; + protected: /** Reserve the specified size up front until the format is optimized. */ ZENCORE_API explicit CbWriter(int64_t InitialSize); @@ -409,8 +413,8 @@ private: // provided externally, such as on the stack. That format will store the offsets that require // object or array sizes to be inserted and field types to be removed, and will perform those // operations only when saving to a buffer. - std::vector Data; - std::vector States; + eastl::fixed_vector States; + CbWriterData_t Data; }; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/zencore/include/zencore/compactbinarypackage.h b/src/zencore/include/zencore/compactbinarypackage.h index 12fcc41b7..9ec12cb0f 100644 --- a/src/zencore/include/zencore/compactbinarypackage.h +++ b/src/zencore/include/zencore/compactbinarypackage.h @@ -12,6 +12,8 @@ #include #include +#include + #ifdef GetObject # error "windows.h pollution" # undef GetObject @@ -265,7 +267,10 @@ public: } /** Returns the attachments in this package. */ - inline std::span GetAttachments() const { return Attachments; } + inline std::span GetAttachments() const + { + return std::span(begin(Attachments), end(Attachments)); + } /** * Find an attachment by its hash. @@ -286,6 +291,8 @@ public: void AddAttachments(std::span Attachments); + void ReserveAttachments(size_t Count); + /** * Remove an attachment by hash. * @@ -324,9 +331,9 @@ private: void GatherAttachments(const CbObject& Object, AttachmentResolver Resolver); /** Attachments ordered by their hash. */ - std::vector Attachments; - CbObject Object; - IoHash ObjectHash; + eastl::fixed_vector Attachments; + CbObject Object; + IoHash ObjectHash; }; namespace legacy { diff --git a/src/zencore/include/zencore/compositebuffer.h b/src/zencore/include/zencore/compositebuffer.h index b435c5e74..1e1611de9 100644 --- a/src/zencore/include/zencore/compositebuffer.h +++ b/src/zencore/include/zencore/compositebuffer.h @@ -2,6 +2,7 @@ #pragma once +#include #include #include @@ -9,6 +10,8 @@ #include #include +#include + namespace zen { /** @@ -35,7 +38,7 @@ public: { m_Segments.reserve((GetBufferCount(std::forward(Buffers)) + ...)); (AppendBuffers(std::forward(Buffers)), ...); - std::erase_if(m_Segments, [](const SharedBuffer& It) { return It.IsNull(); }); + erase_if(m_Segments, [](const SharedBuffer& It) { return It.IsNull(); }); } } @@ -46,7 +49,10 @@ public: [[nodiscard]] ZENCORE_API uint64_t GetSize() const; /** Returns the segments that the buffer is composed from. */ - [[nodiscard]] inline std::span GetSegments() const { return std::span{m_Segments}; } + [[nodiscard]] inline std::span GetSegments() const + { + return std::span{begin(m_Segments), end(m_Segments)}; + } /** Returns true if the composite buffer is not null. */ [[nodiscard]] inline explicit operator bool() const { return !IsNull(); } @@ -120,6 +126,8 @@ public: static const CompositeBuffer Null; private: + typedef eastl::fixed_vector SharedBufferVector_t; + static inline size_t GetBufferCount(const CompositeBuffer& Buffer) { return Buffer.m_Segments.size(); } inline void AppendBuffers(const CompositeBuffer& Buffer) { @@ -134,12 +142,25 @@ private: inline void AppendBuffers(SharedBuffer&& Buffer) { m_Segments.push_back(std::move(Buffer)); } inline void AppendBuffers(IoBuffer&& Buffer) { m_Segments.push_back(SharedBuffer(std::move(Buffer))); } + static inline size_t GetBufferCount(std::span&& Container) { return Container.size(); } + inline void AppendBuffers(std::span&& Container) + { + m_Segments.reserve(m_Segments.size() + Container.size()); + for (IoBuffer& Buffer : Container) + { + m_Segments.emplace_back(SharedBuffer(std::move(Buffer))); + } + } + static inline size_t GetBufferCount(std::vector&& Container) { return Container.size(); } static inline size_t GetBufferCount(std::vector&& Container) { return Container.size(); } inline void AppendBuffers(std::vector&& Container) { m_Segments.reserve(m_Segments.size() + Container.size()); - m_Segments.insert(m_Segments.end(), std::make_move_iterator(Container.begin()), std::make_move_iterator(Container.end())); + for (SharedBuffer& Buffer : Container) + { + m_Segments.emplace_back(std::move(Buffer)); + } } inline void AppendBuffers(std::vector&& Container) { @@ -150,8 +171,17 @@ private: } } + inline void AppendBuffers(SharedBufferVector_t&& Container) + { + m_Segments.reserve(m_Segments.size() + Container.size()); + for (SharedBuffer& Buffer : Container) + { + m_Segments.emplace_back(std::move(Buffer)); + } + } + private: - std::vector m_Segments; + SharedBufferVector_t m_Segments; }; void compositebuffer_forcelink(); // internal diff --git a/src/zencore/include/zencore/eastlutil.h b/src/zencore/include/zencore/eastlutil.h new file mode 100644 index 000000000..642321dae --- /dev/null +++ b/src/zencore/include/zencore/eastlutil.h @@ -0,0 +1,20 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include + +namespace zen { + +size_t +erase_if(auto& _Cont, auto Predicate) +{ + auto _First = _Cont.begin(); + const auto _Last = _Cont.end(); + const auto _Old_size = _Cont.size(); + _First = std::remove_if(_First, _Last, Predicate); + _Cont.erase(_First, _Last); + return _Old_size - _Cont.size(); +} + +} // namespace zen diff --git a/src/zencore/include/zencore/memory/newdelete.h b/src/zencore/include/zencore/memory/newdelete.h index d22c8604f..059f1d5ea 100644 --- a/src/zencore/include/zencore/memory/newdelete.h +++ b/src/zencore/include/zencore/memory/newdelete.h @@ -153,3 +153,29 @@ operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexce return zen_new_aligned_nothrow(n, static_cast(al)); } #endif + +// EASTL operator new + +void* +operator new[](size_t size, const char* pName, int flags, unsigned debugFlags, const char* file, int line) +{ + ZEN_UNUSED(pName, flags, debugFlags, file, line); + return zen_new(size); +} + +void* +operator new[](size_t size, + size_t alignment, + size_t alignmentOffset, + const char* pName, + int flags, + unsigned debugFlags, + const char* file, + int line) +{ + ZEN_UNUSED(alignmentOffset, pName, flags, debugFlags, file, line); + + ZEN_ASSERT_SLOW(alignmentOffset == 0); // currently not supported + + return zen_new_aligned(size, alignment); +} diff --git a/src/zencore/xmake.lua b/src/zencore/xmake.lua index 2efa3fdb8..b8b14084c 100644 --- a/src/zencore/xmake.lua +++ b/src/zencore/xmake.lua @@ -55,6 +55,7 @@ target('zencore') add_packages( "vcpkg::doctest", + "vcpkg::eastl", "vcpkg::fmt", "vcpkg::gsl-lite", "vcpkg::lz4", diff --git a/src/zenhttp/httpserver.cpp b/src/zenhttp/httpserver.cpp index 1fbe22628..27a09f339 100644 --- a/src/zenhttp/httpserver.cpp +++ b/src/zenhttp/httpserver.cpp @@ -31,6 +31,8 @@ #include #include +#include + namespace zen { using namespace std::literals; @@ -529,7 +531,7 @@ HttpServerRequest::WriteResponse(HttpResponseCode ResponseCode, HttpContentType { std::span Segments = Payload.GetSegments(); - std::vector Buffers; + eastl::fixed_vector Buffers; Buffers.reserve(Segments.size()); for (auto& Segment : Segments) @@ -537,7 +539,7 @@ HttpServerRequest::WriteResponse(HttpResponseCode ResponseCode, HttpContentType Buffers.push_back(Segment.AsIoBuffer()); } - WriteResponse(ResponseCode, ContentType, Buffers); + WriteResponse(ResponseCode, ContentType, std::span(begin(Buffers), end(Buffers))); } std::string diff --git a/src/zenhttp/include/zenhttp/httpserver.h b/src/zenhttp/include/zenhttp/httpserver.h index 7b87cb84b..217455dba 100644 --- a/src/zenhttp/include/zenhttp/httpserver.h +++ b/src/zenhttp/include/zenhttp/httpserver.h @@ -208,7 +208,7 @@ class HttpRouterRequest public: HttpRouterRequest(HttpServerRequest& Request) : m_HttpRequest(Request) {} - ZENCORE_API std::string GetCapture(uint32_t Index) const; + std::string_view GetCapture(uint32_t Index) const; inline HttpServerRequest& ServerRequest() { return m_HttpRequest; } private: @@ -220,12 +220,14 @@ private: friend class HttpRequestRouter; }; -inline std::string +inline std::string_view HttpRouterRequest::GetCapture(uint32_t Index) const { ZEN_ASSERT(Index < m_Match.size()); - return m_Match[Index]; + const auto& Match = m_Match[Index]; + + return std::string_view(&*Match.first, Match.second - Match.first); } /** HTTP request router helper diff --git a/src/zenhttp/packageformat.cpp b/src/zenhttp/packageformat.cpp index 676fc73fd..ae80851e4 100644 --- a/src/zenhttp/packageformat.cpp +++ b/src/zenhttp/packageformat.cpp @@ -19,6 +19,8 @@ #include #include +#include + #if ZEN_PLATFORM_WINDOWS # include #endif @@ -31,6 +33,10 @@ namespace zen { const std::string_view HandlePrefix(":?#:"); +typedef eastl::fixed_vector IoBufferVec_t; + +IoBufferVec_t FormatPackageMessageInternal(const CbPackage& Data, FormatFlags Flags, void* TargetProcessHandle); + std::vector FormatPackageMessage(const CbPackage& Data, void* TargetProcessHandle) { @@ -42,10 +48,18 @@ FormatPackageMessageBuffer(const CbPackage& Data, void* TargetProcessHandle) return FormatPackageMessageBuffer(Data, FormatFlags::kDefault, TargetProcessHandle); } +std::vector +FormatPackageMessage(const CbPackage& Data, FormatFlags Flags, void* TargetProcessHandle) +{ + auto Vec = FormatPackageMessageInternal(Data, Flags, TargetProcessHandle); + return std::vector(begin(Vec), end(Vec)); +} + CompositeBuffer FormatPackageMessageBuffer(const CbPackage& Data, FormatFlags Flags, void* TargetProcessHandle) { - return CompositeBuffer(FormatPackageMessage(Data, Flags, TargetProcessHandle)); + auto Vec = FormatPackageMessageInternal(Data, Flags, TargetProcessHandle); + return CompositeBuffer(std::span{begin(Vec), end(Vec)}); } static void @@ -54,7 +68,7 @@ MarshalLocal(CbAttachmentEntry*& AttachmentInfo, CbAttachmentReferenceHeader& LocalRef, const IoHash& AttachmentHash, bool IsCompressed, - std::vector& ResponseBuffers) + IoBufferVec_t& ResponseBuffers) { IoBuffer RefBuffer(sizeof(CbAttachmentReferenceHeader) + Path8.size()); @@ -146,8 +160,8 @@ IsLocalRef(tsl::robin_map& FileNameMap, return true; }; -std::vector -FormatPackageMessage(const CbPackage& Data, FormatFlags Flags, void* TargetProcessHandle) +IoBufferVec_t +FormatPackageMessageInternal(const CbPackage& Data, FormatFlags Flags, void* TargetProcessHandle) { ZEN_TRACE_CPU("FormatPackageMessage"); @@ -177,7 +191,7 @@ FormatPackageMessage(const CbPackage& Data, FormatFlags Flags, void* TargetProce #endif // ZEN_PLATFORM_WINDOWS const std::span& Attachments = Data.GetAttachments(); - std::vector ResponseBuffers; + IoBufferVec_t ResponseBuffers; ResponseBuffers.reserve(2 + Attachments.size()); // TODO: may want to use an additional fudge factor here to avoid growing since each // attachment is likely to consist of several buffers diff --git a/src/zenhttp/servers/httpsys.cpp b/src/zenhttp/servers/httpsys.cpp index 87128c0c9..3bdcdf098 100644 --- a/src/zenhttp/servers/httpsys.cpp +++ b/src/zenhttp/servers/httpsys.cpp @@ -16,6 +16,8 @@ #include #include +#include + #if ZEN_WITH_HTTPSYS # define _WINSOCKAPI_ # include @@ -381,14 +383,14 @@ public: void SuppressResponseBody(); // typically used for HEAD requests private: - std::vector m_HttpDataChunks; - uint64_t m_TotalDataSize = 0; // Sum of all chunk sizes - uint16_t m_ResponseCode = 0; - uint32_t m_NextDataChunkOffset = 0; // Cursor used for very large chunk lists - uint32_t m_RemainingChunkCount = 0; // Backlog for multi-call sends - bool m_IsInitialResponse = true; - HttpContentType m_ContentType = HttpContentType::kBinary; - std::vector m_DataBuffers; + eastl::fixed_vector m_HttpDataChunks; + uint64_t m_TotalDataSize = 0; // Sum of all chunk sizes + uint16_t m_ResponseCode = 0; + uint32_t m_NextDataChunkOffset = 0; // Cursor used for very large chunk lists + uint32_t m_RemainingChunkCount = 0; // Backlog for multi-call sends + bool m_IsInitialResponse = true; + HttpContentType m_ContentType = HttpContentType::kBinary; + eastl::fixed_vector m_DataBuffers; void InitializeForPayload(uint16_t ResponseCode, std::span Blobs); }; diff --git a/src/zenserver/objectstore/objectstore.cpp b/src/zenserver/objectstore/objectstore.cpp index 5d96de225..e757ef84e 100644 --- a/src/zenserver/objectstore/objectstore.cpp +++ b/src/zenserver/objectstore/objectstore.cpp @@ -269,9 +269,9 @@ HttpObjectStoreService::Inititalize() m_Router.RegisterRoute( "bucket/{path}", [this](zen::HttpRouterRequest& Request) { - const std::string Path = Request.GetCapture(1); - const auto Sep = Path.find_last_of('.'); - const bool IsObject = Sep != std::string::npos && Path.size() - Sep > 0; + const std::string_view Path = Request.GetCapture(1); + const auto Sep = Path.find_last_of('.'); + const bool IsObject = Sep != std::string::npos && Path.size() - Sep > 0; if (IsObject) { @@ -337,18 +337,18 @@ HttpObjectStoreService::CreateBucket(zen::HttpRouterRequest& Request) } void -HttpObjectStoreService::ListBucket(zen::HttpRouterRequest& Request, const std::string& Path) +HttpObjectStoreService::ListBucket(zen::HttpRouterRequest& Request, const std::string_view Path) { namespace fs = std::filesystem; - const auto Sep = Path.find_first_of('/'); - const std::string BucketName = Sep == std::string::npos ? Path : Path.substr(0, Sep); + const auto Sep = Path.find_first_of('/'); + const std::string BucketName{Sep == std::string::npos ? Path : Path.substr(0, Sep)}; if (BucketName.empty()) { return Request.ServerRequest().WriteResponse(HttpResponseCode::BadRequest); } - std::string BucketPrefix = Sep == std::string::npos || Sep == Path.size() - 1 ? std::string() : Path.substr(BucketName.size() + 1); + std::string BucketPrefix{Sep == std::string::npos || Sep == Path.size() - 1 ? std::string() : Path.substr(BucketName.size() + 1)}; if (BucketPrefix.empty()) { const auto QueryParms = Request.ServerRequest().GetQueryParams(); @@ -450,14 +450,13 @@ HttpObjectStoreService::DeleteBucket(zen::HttpRouterRequest& Request) } void -HttpObjectStoreService::GetObject(zen::HttpRouterRequest& Request, const std::string& Path) +HttpObjectStoreService::GetObject(zen::HttpRouterRequest& Request, const std::string_view Path) { namespace fs = std::filesystem; - const auto Sep = Path.find_first_of('/'); - const std::string BucketName = Sep == std::string::npos ? Path : Path.substr(0, Sep); - const std::string BucketPrefix = - Sep == std::string::npos || Sep == Path.size() - 1 ? std::string() : Path.substr(BucketName.size() + 1); + const auto Sep = Path.find_first_of('/'); + const std::string BucketName{Sep == std::string::npos ? Path : Path.substr(0, Sep)}; + const std::string BucketPrefix{Sep == std::string::npos || Sep == Path.size() - 1 ? std::string() : Path.substr(BucketName.size() + 1)}; const fs::path BucketDir = GetBucketDirectory(BucketName); @@ -554,8 +553,8 @@ HttpObjectStoreService::PutObject(zen::HttpRouterRequest& Request) { namespace fs = std::filesystem; - const std::string& BucketName = Request.GetCapture(1); - const fs::path BucketDir = GetBucketDirectory(BucketName); + const std::string_view BucketName = Request.GetCapture(1); + const fs::path BucketDir = GetBucketDirectory(BucketName); if (BucketDir.empty()) { diff --git a/src/zenserver/objectstore/objectstore.h b/src/zenserver/objectstore/objectstore.h index c905ceab3..dae979c4c 100644 --- a/src/zenserver/objectstore/objectstore.h +++ b/src/zenserver/objectstore/objectstore.h @@ -36,9 +36,9 @@ private: void Inititalize(); std::filesystem::path GetBucketDirectory(std::string_view BucketName); void CreateBucket(zen::HttpRouterRequest& Request); - void ListBucket(zen::HttpRouterRequest& Request, const std::string& Path); + void ListBucket(zen::HttpRouterRequest& Request, const std::string_view Path); void DeleteBucket(zen::HttpRouterRequest& Request); - void GetObject(zen::HttpRouterRequest& Request, const std::string& Path); + void GetObject(zen::HttpRouterRequest& Request, const std::string_view Path); void PutObject(zen::HttpRouterRequest& Request); ObjectStoreConfig m_Cfg; diff --git a/src/zenserver/projectstore/httpprojectstore.cpp b/src/zenserver/projectstore/httpprojectstore.cpp index 0b8e5f13b..47748dd90 100644 --- a/src/zenserver/projectstore/httpprojectstore.cpp +++ b/src/zenserver/projectstore/httpprojectstore.cpp @@ -983,15 +983,19 @@ HttpProjectService::HandleOplogOpPrepRequest(HttpRouterRequest& Req) IoBuffer Payload = HttpReq.ReadPayload(); CbObject RequestObject = LoadCompactBinaryObject(Payload); - std::vector ChunkList; - CbArrayView HaveList = RequestObject["have"sv].AsArrayView(); - ChunkList.reserve(HaveList.Num()); - for (auto& Entry : HaveList) + std::vector NeedList; + { - ChunkList.push_back(Entry.AsHash()); - } + eastl::fixed_vector ChunkList; + CbArrayView HaveList = RequestObject["have"sv].AsArrayView(); + ChunkList.reserve(HaveList.Num()); + for (auto& Entry : HaveList) + { + ChunkList.push_back(Entry.AsHash()); + } - std::vector NeedList = FoundLog->CheckPendingChunkReferences(ChunkList, std::chrono::minutes(2)); + NeedList = FoundLog->CheckPendingChunkReferences(std::span(begin(ChunkList), end(ChunkList)), std::chrono::minutes(2)); + } CbObjectWriter Cbo(1 + 1 + 5 + NeedList.size() * (1 + sizeof(IoHash::Hash)) + 1); Cbo.BeginArray("need"); @@ -1151,7 +1155,7 @@ HttpProjectService::HandleOplogOpNewRequest(HttpRouterRequest& Req) return HttpReq.WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, "No oplog entry key specified"); } - std::vector ReferencedChunks; + eastl::fixed_vector ReferencedChunks; Core.IterateAttachments([&ReferencedChunks](CbFieldView View) { ReferencedChunks.push_back(View.AsAttachment()); }); // Write core to oplog @@ -1169,7 +1173,7 @@ HttpProjectService::HandleOplogOpNewRequest(HttpRouterRequest& Req) // Once we stored the op, we no longer need to retain any chunks this op references if (!ReferencedChunks.empty()) { - FoundLog->RemovePendingChunkReferences(ReferencedChunks); + FoundLog->RemovePendingChunkReferences(std::span(begin(ReferencedChunks), end(ReferencedChunks))); } m_ProjectStats.OpWriteCount++; @@ -1301,9 +1305,9 @@ HttpProjectService::HandleOpLogOpRequest(HttpRouterRequest& Req) HttpServerRequest& HttpReq = Req.ServerRequest(); - const std::string& ProjectId = Req.GetCapture(1); - const std::string& OplogId = Req.GetCapture(2); - const std::string& OpIdString = Req.GetCapture(3); + const std::string_view ProjectId = Req.GetCapture(1); + const std::string_view OplogId = Req.GetCapture(2); + const std::string_view OpIdString = Req.GetCapture(3); Ref Project = m_ProjectStore->OpenProject(ProjectId); if (!Project) @@ -1690,8 +1694,8 @@ HttpProjectService::HandleProjectRequest(HttpRouterRequest& Req) using namespace std::literals; - HttpServerRequest& HttpReq = Req.ServerRequest(); - const std::string ProjectId = Req.GetCapture(1); + HttpServerRequest& HttpReq = Req.ServerRequest(); + const std::string_view ProjectId = Req.GetCapture(1); switch (HttpReq.RequestVerb()) { diff --git a/src/zenserver/projectstore/projectstore.cpp b/src/zenserver/projectstore/projectstore.cpp index 53df12b14..86791e29a 100644 --- a/src/zenserver/projectstore/projectstore.cpp +++ b/src/zenserver/projectstore/projectstore.cpp @@ -423,9 +423,13 @@ ComputeOpKey(const CbObjectView& Op) { using namespace std::literals; - BinaryWriter KeyStream; + eastl::fixed_vector KeyData; - Op["key"sv].WriteToStream([&](const void* Data, size_t Size) { KeyStream.Write(Data, Size); }); + Op["key"sv].WriteToStream([&](const void* Data, size_t Size) { + auto Begin = reinterpret_cast(Data); + auto End = Begin + Size; + KeyData.insert(KeyData.end(), Begin, End); + }); XXH3_128 KeyHash128; @@ -434,15 +438,15 @@ ComputeOpKey(const CbObjectView& Op) // path but longer paths are evaluated properly. In the future all key lengths // should be evaluated using the proper path, this is a temporary workaround to // maintain compatibility with existing disk state. - if (KeyStream.GetSize() < 240) + if (KeyData.size() < 240) { XXH3_128Stream_deprecated KeyHasher; - KeyHasher.Append(KeyStream.Data(), KeyStream.Size()); + KeyHasher.Append(KeyData.data(), KeyData.size()); KeyHash128 = KeyHasher.GetHash(); } else { - KeyHash128 = XXH3_128::HashMemory(KeyStream.GetView()); + KeyHash128 = XXH3_128::HashMemory(KeyData.data(), KeyData.size()); } Oid KeyHash; @@ -2735,7 +2739,7 @@ ProjectStore::Oplog::CheckPendingChunkReferences(std::span ChunkHa MissingChunks.reserve(ChunkHashes.size()); for (const IoHash& FileHash : ChunkHashes) { - if (IoBuffer Payload = m_CidStore.FindChunkByCid(FileHash); !Payload) + if (!m_CidStore.ContainsChunk(FileHash)) { MissingChunks.push_back(FileHash); } @@ -3359,7 +3363,6 @@ ProjectStore::Project::OpenOplog(std::string_view OplogId, bool AllowCompact, bo ZEN_MEMSCOPE(GetProjectstoreTag()); ZEN_TRACE_CPU("Store::OpenOplog"); - std::filesystem::path OplogBasePath = BasePathForOplog(OplogId); { RwLock::SharedLockScope ProjectLock(m_ProjectLock); @@ -3367,21 +3370,35 @@ ProjectStore::Project::OpenOplog(std::string_view OplogId, bool AllowCompact, bo if (OplogIt != m_Oplogs.end()) { - if (!VerifyPathOnDisk || Oplog::ExistsAt(OplogBasePath)) + bool ReOpen = false; + + if (VerifyPathOnDisk) { - return OplogIt->second.get(); + std::filesystem::path OplogBasePath = BasePathForOplog(OplogId); + + if (!Oplog::ExistsAt(OplogBasePath)) + { + // Somebody deleted the oplog on disk behind our back + ProjectLock.ReleaseNow(); + std::filesystem::path DeletePath; + if (!RemoveOplog(OplogId, DeletePath)) + { + ZEN_WARN("Failed to clean up deleted oplog {}/{}", Identifier, OplogId, OplogBasePath); + } + + ReOpen = true; + } } - // Somebody deleted the oplog on disk behind our back - ProjectLock.ReleaseNow(); - std::filesystem::path DeletePath; - if (!RemoveOplog(OplogId, DeletePath)) + if (!ReOpen) { - ZEN_WARN("Failed to clean up deleted oplog {}/{}", Identifier, OplogId, OplogBasePath); + return OplogIt->second.get(); } } } + std::filesystem::path OplogBasePath = BasePathForOplog(OplogId); + RwLock::ExclusiveLockScope Lock(m_ProjectLock); if (auto It = m_Oplogs.find(std::string{OplogId}); It != m_Oplogs.end()) { diff --git a/src/zenserver/workspaces/httpworkspaces.cpp b/src/zenserver/workspaces/httpworkspaces.cpp index 905ba5ab2..8a4b977ad 100644 --- a/src/zenserver/workspaces/httpworkspaces.cpp +++ b/src/zenserver/workspaces/httpworkspaces.cpp @@ -589,7 +589,7 @@ void HttpWorkspacesService::ShareAliasFilesRequest(HttpRouterRequest& Req) { HttpServerRequest& ServerRequest = Req.ServerRequest(); - std::string Alias = Req.GetCapture(1); + std::string_view Alias = Req.GetCapture(1); if (Alias.empty()) { return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, @@ -608,7 +608,7 @@ void HttpWorkspacesService::ShareAliasChunkInfoRequest(HttpRouterRequest& Req) { HttpServerRequest& ServerRequest = Req.ServerRequest(); - std::string Alias = Req.GetCapture(1); + std::string_view Alias = Req.GetCapture(1); if (Alias.empty()) { return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, @@ -635,7 +635,7 @@ void HttpWorkspacesService::ShareAliasBatchRequest(HttpRouterRequest& Req) { HttpServerRequest& ServerRequest = Req.ServerRequest(); - std::string Alias = Req.GetCapture(1); + std::string_view Alias = Req.GetCapture(1); if (Alias.empty()) { return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, @@ -654,7 +654,7 @@ void HttpWorkspacesService::ShareAliasEntriesRequest(HttpRouterRequest& Req) { HttpServerRequest& ServerRequest = Req.ServerRequest(); - std::string Alias = Req.GetCapture(1); + std::string_view Alias = Req.GetCapture(1); if (Alias.empty()) { return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, @@ -673,7 +673,7 @@ void HttpWorkspacesService::ShareAliasChunkRequest(HttpRouterRequest& Req) { HttpServerRequest& ServerRequest = Req.ServerRequest(); - std::string Alias = Req.GetCapture(1); + std::string_view Alias = Req.GetCapture(1); if (Alias.empty()) { return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, @@ -700,7 +700,7 @@ void HttpWorkspacesService::ShareAliasRequest(HttpRouterRequest& Req) { HttpServerRequest& ServerRequest = Req.ServerRequest(); - std::string Alias = Req.GetCapture(1); + std::string_view Alias = Req.GetCapture(1); if (Alias.empty()) { return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp index 25f68330a..61552fafc 100644 --- a/src/zenstore/cache/cachedisklayer.cpp +++ b/src/zenstore/cache/cachedisklayer.cpp @@ -708,11 +708,11 @@ namespace zen { ZenCacheDiskLayer::CacheBucket::CacheBucket(GcManager& Gc, std::atomic_uint64_t& OuterCacheMemoryUsage, - std::string BucketName, + std::string_view BucketName, const BucketConfiguration& Config) : m_Gc(Gc) , m_OuterCacheMemoryUsage(OuterCacheMemoryUsage) -, m_BucketName(std::move(BucketName)) +, m_BucketName(BucketName) , m_Configuration(Config) , m_BucketId(Oid::Zero) { @@ -1329,7 +1329,7 @@ ZenCacheDiskLayer::CacheBucket::EndPutBatch(PutBatchHandle* Batch) noexcept struct ZenCacheDiskLayer::CacheBucket::GetBatchHandle { - GetBatchHandle(std::vector& OutResults) : OutResults(OutResults) + GetBatchHandle(ZenCacheValueVec_t& OutResults) : OutResults(OutResults) { Keys.reserve(OutResults.capacity()); ResultIndexes.reserve(OutResults.capacity()); @@ -1340,11 +1340,11 @@ struct ZenCacheDiskLayer::CacheBucket::GetBatchHandle std::vector Keys; std::vector ResultIndexes; - std::vector& OutResults; + ZenCacheValueVec_t& OutResults; }; ZenCacheDiskLayer::CacheBucket::GetBatchHandle* -ZenCacheDiskLayer::CacheBucket::BeginGetBatch(std::vector& OutResult) +ZenCacheDiskLayer::CacheBucket::BeginGetBatch(ZenCacheValueVec_t& OutResult) { ZEN_TRACE_CPU("Z$::Bucket::BeginGetBatch"); return new GetBatchHandle(OutResult); @@ -1364,13 +1364,13 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept if (!Batch->ResultIndexes.empty()) { - std::vector StandaloneDiskLocations; - std::vector StandaloneKeyIndexes; - std::vector MemCachedKeyIndexes; - std::vector InlineDiskLocations; - std::vector InlineBlockLocations; - std::vector InlineKeyIndexes; - std::vector FillRawHashAndRawSize(Batch->Keys.size(), false); + eastl::fixed_vector StandaloneDiskLocations; + eastl::fixed_vector StandaloneKeyIndexes; + eastl::fixed_vector MemCachedKeyIndexes; + eastl::fixed_vector InlineDiskLocations; + eastl::fixed_vector InlineBlockLocations; + eastl::fixed_vector InlineKeyIndexes; + eastl::fixed_vector FillRawHashAndRawSize(Batch->Keys.size(), false); { RwLock::SharedLockScope IndexLock(m_IndexLock); for (size_t KeyIndex = 0; KeyIndex < Batch->Keys.size(); KeyIndex++) @@ -1526,33 +1526,35 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept if (!InlineDiskLocations.empty()) { ZEN_TRACE_CPU("Z$::Bucket::EndGetBatch::ReadInline"); - m_BlockStore.IterateChunks(InlineBlockLocations, [&](uint32_t, std::span ChunkIndexes) -> bool { - // Only read into memory the IoBuffers we could potentially add to memcache - const uint64_t LargeChunkSizeLimit = Max(m_Configuration.MemCacheSizeThreshold, 1u * 1024u); - m_BlockStore.IterateBlock( - InlineBlockLocations, - ChunkIndexes, - [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex, - const void* Data, - uint64_t Size) -> bool { - if (Data != nullptr) - { - FillOne(InlineDiskLocations[ChunkIndex], - InlineKeyIndexes[ChunkIndex], - IoBufferBuilder::MakeCloneFromMemory(Data, Size)); - } - return true; - }, - [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex, - BlockStoreFile& File, - uint64_t Offset, - uint64_t Size) -> bool { - FillOne(InlineDiskLocations[ChunkIndex], InlineKeyIndexes[ChunkIndex], File.GetChunk(Offset, Size)); - return true; - }, - LargeChunkSizeLimit); - return true; - }); + m_BlockStore.IterateChunks( + std::span{begin(InlineBlockLocations), end(InlineBlockLocations)}, + [&](uint32_t, std::span ChunkIndexes) -> bool { + // Only read into memory the IoBuffers we could potentially add to memcache + const uint64_t LargeChunkSizeLimit = Max(m_Configuration.MemCacheSizeThreshold, 1u * 1024u); + m_BlockStore.IterateBlock( + std::span{begin(InlineBlockLocations), end(InlineBlockLocations)}, + ChunkIndexes, + [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex, + const void* Data, + uint64_t Size) -> bool { + if (Data != nullptr) + { + FillOne(InlineDiskLocations[ChunkIndex], + InlineKeyIndexes[ChunkIndex], + IoBufferBuilder::MakeCloneFromMemory(Data, Size)); + } + return true; + }, + [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex, + BlockStoreFile& File, + uint64_t Offset, + uint64_t Size) -> bool { + FillOne(InlineDiskLocations[ChunkIndex], InlineKeyIndexes[ChunkIndex], File.GetChunk(Offset, Size)); + return true; + }, + LargeChunkSizeLimit); + return true; + }); } if (!StandaloneDiskLocations.empty()) @@ -3581,15 +3583,29 @@ ZenCacheDiskLayer::~ZenCacheDiskLayer() } } +template +struct equal_to_2 : public eastl::binary_function +{ + constexpr bool operator()(const T& a, const U& b) const { return a == b; } + + template, eastl::remove_const_t>>> + constexpr bool operator()(const U& b, const T& a) const + { + return b == a; + } +}; + ZenCacheDiskLayer::CacheBucket* ZenCacheDiskLayer::GetOrCreateBucket(std::string_view InBucket) { ZEN_TRACE_CPU("Z$::GetOrCreateBucket"); - const auto BucketName = std::string(InBucket); { RwLock::SharedLockScope SharedLock(m_Lock); - if (auto It = m_Buckets.find(BucketName); It != m_Buckets.end()) + if (auto It = m_Buckets.find_as(InBucket, std::hash(), equal_to_2()); + It != m_Buckets.end()) { return It->second.get(); } @@ -3597,31 +3613,32 @@ ZenCacheDiskLayer::GetOrCreateBucket(std::string_view InBucket) // We create the bucket without holding a lock since contructor calls GcManager::AddGcReferencer which takes an exclusive lock. // This can cause a deadlock, if GC is running we would block while holding ZenCacheDiskLayer::m_Lock - std::unique_ptr Bucket( - std::make_unique(m_Gc, m_TotalMemCachedSize, BucketName, m_Configuration.BucketConfig)); + std::unique_ptr Bucket(std::make_unique(m_Gc, m_TotalMemCachedSize, InBucket, m_Configuration.BucketConfig)); RwLock::ExclusiveLockScope Lock(m_Lock); - if (auto It = m_Buckets.find(BucketName); It != m_Buckets.end()) + if (auto It = m_Buckets.find_as(InBucket, std::hash(), equal_to_2()); + It != m_Buckets.end()) { return It->second.get(); } std::filesystem::path BucketPath = m_RootDir; - BucketPath /= BucketName; + BucketPath /= InBucket; try { if (!Bucket->OpenOrCreate(BucketPath)) { - ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", BucketName, m_RootDir); + ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", InBucket, m_RootDir); return nullptr; } } catch (const std::exception& Err) { - ZEN_WARN("Creating bucket '{}' in '{}' FAILED, reason: '{}'", BucketName, BucketPath, Err.what()); + ZEN_WARN("Creating bucket '{}' in '{}' FAILED, reason: '{}'", InBucket, BucketPath, Err.what()); throw; } + std::string BucketName{InBucket}; CacheBucket* Result = Bucket.get(); m_Buckets.emplace(BucketName, std::move(Bucket)); if (m_CapturedBuckets) @@ -3720,7 +3737,7 @@ ZenCacheDiskLayer::EndPutBatch(PutBatchHandle* Batch) noexcept struct ZenCacheDiskLayer::GetBatchHandle { - GetBatchHandle(std::vector& OutResults) : OutResults(OutResults) {} + GetBatchHandle(ZenCacheValueVec_t& OutResults) : OutResults(OutResults) {} struct BucketHandle { CacheBucket* Bucket; @@ -3780,13 +3797,13 @@ struct ZenCacheDiskLayer::GetBatchHandle return NewBucketHandle; } - RwLock Lock; - std::vector BucketHandles; - std::vector& OutResults; + RwLock Lock; + eastl::fixed_vector BucketHandles; + ZenCacheValueVec_t& OutResults; }; ZenCacheDiskLayer::GetBatchHandle* -ZenCacheDiskLayer::BeginGetBatch(std::vector& OutResults) +ZenCacheDiskLayer::BeginGetBatch(ZenCacheValueVec_t& OutResults) { return new GetBatchHandle(OutResults); } diff --git a/src/zenstore/cache/cacherpc.cpp b/src/zenstore/cache/cacherpc.cpp index cca51e63e..97e26a38d 100644 --- a/src/zenstore/cache/cacherpc.cpp +++ b/src/zenstore/cache/cacherpc.cpp @@ -20,6 +20,8 @@ #include +#include + ////////////////////////////////////////////////////////////////////////// namespace zen { @@ -89,7 +91,7 @@ GetRpcRequestCacheKey(const CbObjectView& KeyView, CacheKey& Key) return false; } IoHash Hash = HashField.AsHash(); - Key = CacheKey::Create(*Bucket, Hash); + Key = CacheKey::CreateValidated(std::move(*Bucket), Hash); return true; } @@ -305,7 +307,7 @@ CacheRpcHandler::HandleRpcPutCacheRecords(const CacheRequestContext& Context, co } DefaultPolicy = !PolicyText.empty() ? ParseCachePolicy(PolicyText) : CachePolicy::Default; - std::vector Results; + eastl::fixed_vector Results; CbArrayView RequestsArray = Params["Requests"sv].AsArrayView(); for (CbFieldView RequestField : RequestsArray) @@ -481,16 +483,15 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb bool Exists = false; bool ReadFromUpstream = false; }; - struct RecordRequestData + struct RecordRequestData : public CacheKeyRequest { - CacheKeyRequest Upstream; - CbObjectView RecordObject; - IoBuffer RecordCacheValue; - CacheRecordPolicy DownstreamPolicy; - std::vector Values; - bool Complete = false; - const UpstreamEndpointInfo* Source = nullptr; - uint64_t ElapsedTimeUs; + CbObjectView RecordObject; + IoBuffer RecordCacheValue; + CacheRecordPolicy DownstreamPolicy; + eastl::fixed_vector Values; + bool Complete = false; + const UpstreamEndpointInfo* Source = nullptr; + uint64_t ElapsedTimeUs; }; std::string_view PolicyText = Params["DefaultPolicy"sv].AsString(); @@ -503,8 +504,8 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb const bool HasUpstream = m_UpstreamCache.IsActive(); - std::vector Requests; - std::vector UpstreamIndexes; + eastl::fixed_vector Requests; + eastl::fixed_vector UpstreamIndexes; auto ParseValues = [](RecordRequestData& Request) { CbArrayView ValuesArray = Request.RecordObject["Values"sv].AsArrayView(); @@ -535,7 +536,7 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb CbObjectView RequestObject = RequestField.AsObjectView(); CbObjectView KeyObject = RequestObject["Key"sv].AsObjectView(); - CacheKey& Key = Request.Upstream.Key; + CacheKey& Key = Request.Key; if (!GetRpcRequestCacheKey(KeyObject, Key)) { return CbPackage{}; @@ -707,7 +708,7 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb for (size_t Index : UpstreamIndexes) { RecordRequestData& Request = Requests[Index]; - UpstreamRequests.push_back(&Request.Upstream); + UpstreamRequests.push_back(&Request); if (Request.Values.size()) { @@ -721,13 +722,13 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb UpstreamPolicy |= !Value.ReadFromUpstream ? CachePolicy::SkipData : CachePolicy::None; Builder.AddValuePolicy(Value.ValueId, UpstreamPolicy); } - Request.Upstream.Policy = Builder.Build(); + Request.Policy = Builder.Build(); } else { // We don't know which Values exist in the Record; ask the upstrem for all values that the client wants, // and convert the CacheRecordPolicy to an upstream policy - Request.Upstream.Policy = Request.DownstreamPolicy.ConvertToUpstream(); + Request.Policy = Request.DownstreamPolicy.ConvertToUpstream(); } } @@ -737,10 +738,9 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb return; } - RecordRequestData& Request = - *reinterpret_cast(reinterpret_cast(&Params.Request) - offsetof(RecordRequestData, Upstream)); + RecordRequestData& Request = *static_cast(&Params.Request); Request.ElapsedTimeUs += static_cast(Params.ElapsedSeconds * 1000000.0); - const CacheKey& Key = Request.Upstream.Key; + const CacheKey& Key = Request.Key; Stopwatch Timer; auto TimeGuard = MakeGuard([&Timer, &Request]() { Request.ElapsedTimeUs += Timer.GetElapsedTimeUs(); }); if (!Request.RecordObject) @@ -832,10 +832,12 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb CbPackage ResponsePackage; CbObjectWriter ResponseObject{2048}; + ResponsePackage.ReserveAttachments(Requests.size()); + ResponseObject.BeginArray("Result"sv); for (RecordRequestData& Request : Requests) { - const CacheKey& Key = Request.Upstream.Key; + const CacheKey& Key = Request.Key; if (Request.Complete || (Request.RecordObject && EnumHasAllFlags(Request.DownstreamPolicy.GetRecordPolicy(), CachePolicy::PartialRecord))) { @@ -910,11 +912,12 @@ CacheRpcHandler::HandleRpcPutCacheValues(const CacheRequestContext& Context, con const bool HasUpstream = m_UpstreamCache.IsActive(); CbArrayView RequestsArray = Params["Requests"sv].AsArrayView(); - std::vector BatchResults; - std::vector BatchResultIndexes; - std::vector Results; - std::vector UpstreamCacheKeys; - uint64_t RequestCount = RequestsArray.Num(); + std::vector BatchResults; + eastl::fixed_vector BatchResultIndexes; + eastl::fixed_vector Results; + eastl::fixed_vector UpstreamCacheKeys; + + uint64_t RequestCount = RequestsArray.Num(); { Results.reserve(RequestCount); std::unique_ptr Batch; @@ -1099,15 +1102,15 @@ CacheRpcHandler::HandleRpcGetCacheValues(const CacheRequestContext& Context, CbO uint64_t RawSize = 0; CompressedBuffer Result; }; - std::vector Requests; + eastl::fixed_vector Requests; - std::vector RemoteRequestIndexes; + eastl::fixed_vector RemoteRequestIndexes; const bool HasUpstream = m_UpstreamCache.IsActive(); - CbArrayView RequestsArray = Params["Requests"sv].AsArrayView(); - std::vector CacheValues; - const uint64_t RequestCount = RequestsArray.Num(); + CbArrayView RequestsArray = Params["Requests"sv].AsArrayView(); + ZenCacheValueVec_t CacheValues; + const uint64_t RequestCount = RequestsArray.Num(); CacheValues.reserve(RequestCount); { std::unique_ptr Batch; @@ -1136,7 +1139,6 @@ CacheRpcHandler::HandleRpcGetCacheValues(const CacheRequestContext& Context, CbO CacheKey& Key = Request.Key; CachePolicy Policy = Request.Policy; - ZenCacheValue CacheValue; if (EnumHasAllFlags(Policy, CachePolicy::QueryLocal)) { if (Batch) @@ -1276,6 +1278,9 @@ CacheRpcHandler::HandleRpcGetCacheValues(const CacheRequestContext& Context, CbO ZEN_TRACE_CPU("Z$::RpcGetCacheValues::Response"); CbPackage RpcResponse; CbObjectWriter ResponseObject{1024}; + + RpcResponse.ReserveAttachments(Requests.size()); + ResponseObject.BeginArray("Result"sv); for (const RequestData& Request : Requests) { @@ -1642,7 +1647,7 @@ CacheRpcHandler::GetLocalCacheValues(const CacheRequestContext& Context, using namespace cache::detail; const bool HasUpstream = m_UpstreamCache.IsActive(); - std::vector Chunks; + ZenCacheValueVec_t Chunks; Chunks.reserve(ValueRequests.size()); { std::unique_ptr Batch; @@ -1796,6 +1801,8 @@ CacheRpcHandler::WriteGetCacheChunksResponse([[maybe_unused]] const CacheRequest CbPackage RpcResponse; CbObjectWriter Writer{1024}; + RpcResponse.ReserveAttachments(Requests.size()); + Writer.BeginArray("Result"sv); for (ChunkRequest& Request : Requests) { diff --git a/src/zenstore/cache/structuredcachestore.cpp b/src/zenstore/cache/structuredcachestore.cpp index 133cb42d7..7d277329e 100644 --- a/src/zenstore/cache/structuredcachestore.cpp +++ b/src/zenstore/cache/structuredcachestore.cpp @@ -178,13 +178,13 @@ ZenCacheNamespace::EndPutBatch(PutBatchHandle* Batch) noexcept struct ZenCacheNamespace::GetBatchHandle { - GetBatchHandle(std::vector& OutResult) : Results(OutResult) {} - std::vector& Results; + GetBatchHandle(ZenCacheValueVec_t& OutResult) : Results(OutResult) {} + ZenCacheValueVec_t& Results; ZenCacheDiskLayer::GetBatchHandle* DiskLayerHandle = nullptr; }; ZenCacheNamespace::GetBatchHandle* -ZenCacheNamespace::BeginGetBatch(std::vector& OutResult) +ZenCacheNamespace::BeginGetBatch(ZenCacheValueVec_t& OutResult) { ZenCacheNamespace::GetBatchHandle* Handle = new ZenCacheNamespace::GetBatchHandle(OutResult); Handle->DiskLayerHandle = m_DiskLayer.BeginGetBatch(OutResult); @@ -580,7 +580,7 @@ ZenCacheStore::PutBatch::~PutBatch() } } -ZenCacheStore::GetBatch::GetBatch(ZenCacheStore& CacheStore, std::string_view InNamespace, std::vector& OutResult) +ZenCacheStore::GetBatch::GetBatch(ZenCacheStore& CacheStore, std::string_view InNamespace, ZenCacheValueVec_t& OutResult) : m_CacheStore(CacheStore) , Results(OutResult) { diff --git a/src/zenstore/include/zenstore/cache/cachedisklayer.h b/src/zenstore/include/zenstore/cache/cachedisklayer.h index b0b4f22cb..05400c784 100644 --- a/src/zenstore/include/zenstore/cache/cachedisklayer.h +++ b/src/zenstore/include/zenstore/cache/cachedisklayer.h @@ -12,8 +12,9 @@ ZEN_THIRD_PARTY_INCLUDES_START #include ZEN_THIRD_PARTY_INCLUDES_END +#include +#include #include -#include namespace zen { @@ -169,7 +170,7 @@ public: ~ZenCacheDiskLayer(); struct GetBatchHandle; - GetBatchHandle* BeginGetBatch(std::vector& OutResult); + GetBatchHandle* BeginGetBatch(ZenCacheValueVec_t& OutResult); void EndGetBatch(GetBatchHandle* Batch) noexcept; bool Get(std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue); void Get(std::string_view Bucket, const IoHash& HashKey, GetBatchHandle& BatchHandle); @@ -216,13 +217,16 @@ public: */ struct CacheBucket : public GcReferencer { - CacheBucket(GcManager& Gc, std::atomic_uint64_t& OuterCacheMemoryUsage, std::string BucketName, const BucketConfiguration& Config); + CacheBucket(GcManager& Gc, + std::atomic_uint64_t& OuterCacheMemoryUsage, + std::string_view BucketName, + const BucketConfiguration& Config); ~CacheBucket(); bool OpenOrCreate(std::filesystem::path BucketDir, bool AllowCreate = true); struct GetBatchHandle; - GetBatchHandle* BeginGetBatch(std::vector& OutResult); + GetBatchHandle* BeginGetBatch(ZenCacheValueVec_t& OutResult); void EndGetBatch(GetBatchHandle* Batch) noexcept; bool Get(const IoHash& HashKey, ZenCacheValue& OutValue); void Get(const IoHash& HashKey, GetBatchHandle& BatchHandle); @@ -486,18 +490,20 @@ private: bool StartAsyncMemCacheTrim(); void MemCacheTrim(); - GcManager& m_Gc; - JobQueue& m_JobQueue; - std::filesystem::path m_RootDir; - Configuration m_Configuration; - std::atomic_uint64_t m_TotalMemCachedSize{}; - std::atomic_bool m_IsMemCacheTrimming = false; - std::atomic m_NextAllowedTrimTick; - mutable RwLock m_Lock; - std::unordered_map> m_Buckets; - std::vector> m_DroppedBuckets; - uint32_t m_UpdateCaptureRefCounter = 0; - std::unique_ptr> m_CapturedBuckets; + typedef eastl::unordered_map, std::hash, std::equal_to> BucketMap_t; + + GcManager& m_Gc; + JobQueue& m_JobQueue; + std::filesystem::path m_RootDir; + Configuration m_Configuration; + std::atomic_uint64_t m_TotalMemCachedSize{}; + std::atomic_bool m_IsMemCacheTrimming = false; + std::atomic m_NextAllowedTrimTick; + mutable RwLock m_Lock; + BucketMap_t m_Buckets; + std::vector> m_DroppedBuckets; + uint32_t m_UpdateCaptureRefCounter = 0; + std::unique_ptr> m_CapturedBuckets; ZenCacheDiskLayer(const ZenCacheDiskLayer&) = delete; ZenCacheDiskLayer& operator=(const ZenCacheDiskLayer&) = delete; diff --git a/src/zenstore/include/zenstore/cache/cacheshared.h b/src/zenstore/include/zenstore/cache/cacheshared.h index 9b45c7b21..521c78bb1 100644 --- a/src/zenstore/include/zenstore/cache/cacheshared.h +++ b/src/zenstore/include/zenstore/cache/cacheshared.h @@ -6,6 +6,8 @@ #include #include +#include + #include #include @@ -32,6 +34,8 @@ struct ZenCacheValue IoHash RawHash = IoHash::Zero; }; +typedef eastl::fixed_vector ZenCacheValueVec_t; + struct CacheValueDetails { struct ValueDetails diff --git a/src/zenstore/include/zenstore/cache/structuredcachestore.h b/src/zenstore/include/zenstore/cache/structuredcachestore.h index 82fec9b0e..5e056cf2d 100644 --- a/src/zenstore/include/zenstore/cache/structuredcachestore.h +++ b/src/zenstore/include/zenstore/cache/structuredcachestore.h @@ -86,7 +86,7 @@ public: void EndPutBatch(PutBatchHandle* Batch) noexcept; struct GetBatchHandle; - GetBatchHandle* BeginGetBatch(std::vector& OutResults); + GetBatchHandle* BeginGetBatch(ZenCacheValueVec_t& OutResults); void EndGetBatch(GetBatchHandle* Batch) noexcept; bool Get(std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue); @@ -220,14 +220,14 @@ public: class GetBatch { public: - GetBatch(ZenCacheStore& CacheStore, std::string_view Namespace, std::vector& OutResult); + GetBatch(ZenCacheStore& CacheStore, std::string_view Namespace, ZenCacheValueVec_t& OutResult); ~GetBatch(); private: ZenCacheStore& m_CacheStore; ZenCacheNamespace* m_Store = nullptr; ZenCacheNamespace::GetBatchHandle* m_NamespaceBatchHandle = nullptr; - std::vector& Results; + ZenCacheValueVec_t& Results; friend class ZenCacheStore; }; diff --git a/src/zenstore/xmake.lua b/src/zenstore/xmake.lua index f0bd64d2e..031a66829 100644 --- a/src/zenstore/xmake.lua +++ b/src/zenstore/xmake.lua @@ -8,3 +8,4 @@ target('zenstore') add_includedirs("include", {public=true}) add_deps("zencore", "zenutil") add_packages("vcpkg::robin-map") + add_packages("vcpkg::eastl", {public=true}); diff --git a/src/zenutil/include/zenutil/cache/cachekey.h b/src/zenutil/include/zenutil/cache/cachekey.h index 741375946..0ab05f4f1 100644 --- a/src/zenutil/include/zenutil/cache/cachekey.h +++ b/src/zenutil/include/zenutil/cache/cachekey.h @@ -17,6 +17,12 @@ struct CacheKey static CacheKey Create(std::string_view Bucket, const IoHash& Hash) { return {.Bucket = ToLower(Bucket), .Hash = Hash}; } + // This should be used whenever the bucket name has already been validated to avoid redundant ToLower calls + static CacheKey CreateValidated(std::string&& BucketValidated, const IoHash& Hash) + { + return {.Bucket = std::move(BucketValidated), .Hash = Hash}; + } + auto operator<=>(const CacheKey& that) const { if (auto b = caseSensitiveCompareStrings(Bucket, that.Bucket); b != std::strong_ordering::equal) -- cgit v1.2.3 From 9b24647facccc9c7848a52f1f4c5e32055bf2f01 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Fri, 7 Mar 2025 09:58:20 +0100 Subject: partial block fetch (#298) - Improvement: Do partial requests of blocks if not all of the block is needed - Improvement: Better progress/statistics on download - Bugfix: Ensure that temporary folder for Jupiter downloads exists during verify phase --- src/zen/cmds/builds_cmd.cpp | 1099 ++++++++++++++------ src/zen/cmds/builds_cmd.h | 1 + src/zenhttp/httpclient.cpp | 161 +-- src/zenutil/filebuildstorage.cpp | 17 +- src/zenutil/include/zenutil/buildstorage.h | 5 +- .../include/zenutil/jupiter/jupitersession.h | 4 +- src/zenutil/jupiter/jupiterbuildstorage.cpp | 10 +- src/zenutil/jupiter/jupitersession.cpp | 12 +- 8 files changed, 907 insertions(+), 402 deletions(-) (limited to 'src') diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index eb0650c4d..1c9476b96 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -231,7 +231,10 @@ namespace { return AuthToken; } - CompositeBuffer WriteToTempFileIfNeeded(const CompositeBuffer& Buffer, const std::filesystem::path& TempFolderPath, const IoHash& Hash) + CompositeBuffer WriteToTempFileIfNeeded(const CompositeBuffer& Buffer, + const std::filesystem::path& TempFolderPath, + const IoHash& Hash, + const std::string& Suffix = {}) { // If this is a file based buffer or a compressed buffer with a memory-based header, we don't need to rewrite to disk to save memory std::span Segments = Buffer.GetSegments(); @@ -241,7 +244,7 @@ namespace { { return Buffer; } - std::filesystem::path TempFilePath = (TempFolderPath / Hash.ToHexString()).make_preferred(); + std::filesystem::path TempFilePath = (TempFolderPath / (Hash.ToHexString() + Suffix)).make_preferred(); return CompositeBuffer(WriteToTempFile(Buffer, TempFilePath)); } @@ -302,7 +305,7 @@ namespace { return FilteredPerSecond; } - uint64_t GetElapsedTime() const + uint64_t GetElapsedTimeUS() const { if (StartTimeUS == (uint64_t)-1) { @@ -1738,8 +1741,8 @@ namespace { ProgressBar.Finish(); - GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS = FilteredGeneratedBytesPerSecond.GetElapsedTime(); - UploadStats.ElapsedWallTimeUS = FilteredUploadedBytesPerSecond.GetElapsedTime(); + GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS = FilteredGeneratedBytesPerSecond.GetElapsedTimeUS(); + UploadStats.ElapsedWallTimeUS = FilteredUploadedBytesPerSecond.GetElapsedTimeUS(); } } @@ -2069,9 +2072,9 @@ namespace { }); ProgressBar.Finish(); - UploadStats.ElapsedWallTimeUS = FilteredUploadedBytesPerSecond.GetElapsedTime(); - GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS = FilteredGenerateBlockBytesPerSecond.GetElapsedTime(); - LooseChunksStats.CompressChunksElapsedWallTimeUS = FilteredCompressedBytesPerSecond.GetElapsedTime(); + UploadStats.ElapsedWallTimeUS = FilteredUploadedBytesPerSecond.GetElapsedTimeUS(); + GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS = FilteredGenerateBlockBytesPerSecond.GetElapsedTimeUS(); + LooseChunksStats.CompressChunksElapsedWallTimeUS = FilteredCompressedBytesPerSecond.GetElapsedTimeUS(); } } @@ -3358,17 +3361,8 @@ namespace { return ChunkTargetPtrs; }; - bool WriteBlockToDisk(const std::filesystem::path& CacheFolderPath, - const ChunkedFolderContent& RemoteContent, - std::span> SequenceIndexChunksLeftToWriteCounters, - const CompositeBuffer& DecompressedBlockBuffer, - const ChunkedContentLookup& Lookup, - std::atomic* RemoteChunkIndexNeedsCopyFromSourceFlags, - std::atomic& OutChunksComplete, - std::atomic& OutBytesWritten) + struct BlockWriteOps { - ZEN_TRACE_CPU("WriteBlockToDisk"); - std::vector ChunkBuffers; struct WriteOpData { @@ -3376,7 +3370,79 @@ namespace { size_t ChunkBufferIndex; }; std::vector WriteOps; + }; + void WriteBlockChunkOps(const std::filesystem::path& CacheFolderPath, + const ChunkedFolderContent& RemoteContent, + const ChunkedContentLookup& Lookup, + std::span> SequenceIndexChunksLeftToWriteCounters, + const BlockWriteOps Ops, + std::atomic& OutChunksComplete, + std::atomic& OutBytesWritten) + { + ZEN_TRACE_CPU("WriteBlockChunkOps"); + { + WriteFileCache OpenFileCache; + for (const BlockWriteOps::WriteOpData& WriteOp : Ops.WriteOps) + { + if (AbortFlag) + { + break; + } + const CompositeBuffer& Chunk = Ops.ChunkBuffers[WriteOp.ChunkBufferIndex]; + const uint32_t SequenceIndex = WriteOp.Target->SequenceIndex; + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[SequenceIndex].load() <= + RemoteContent.ChunkedContent.ChunkCounts[SequenceIndex]); + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[SequenceIndex].load() > 0); + const uint64_t ChunkSize = Chunk.GetSize(); + const uint64_t FileOffset = WriteOp.Target->Offset; + const uint32_t PathIndex = Lookup.SequenceIndexFirstPathIndex[SequenceIndex]; + ZEN_ASSERT(FileOffset + ChunkSize <= RemoteContent.RawSizes[PathIndex]); + + OpenFileCache.WriteToFile( + SequenceIndex, + [&CacheFolderPath, &RemoteContent](uint32_t SequenceIndex) { + return GetTempChunkedSequenceFileName(CacheFolderPath, + RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]); + }, + Chunk, + FileOffset, + RemoteContent.RawSizes[PathIndex]); + OutBytesWritten += ChunkSize; + } + } + if (!AbortFlag) + { + // Write tracking, updating this must be done without any files open (WriteFileCache) + for (const BlockWriteOps::WriteOpData& WriteOp : Ops.WriteOps) + { + const uint32_t RemoteSequenceIndex = WriteOp.Target->SequenceIndex; + if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1) == 1) + { + const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + const IoHash VerifyChunkHash = + IoHash::HashBuffer(IoBufferBuilder::MakeFromFile(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); + if (VerifyChunkHash != SequenceRawHash) + { + throw std::runtime_error( + fmt::format("Written hunk sequence {} hash does not match expected hash {}", VerifyChunkHash, SequenceRawHash)); + } + std::filesystem::rename(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), + GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); + } + } + OutChunksComplete += gsl::narrow(Ops.ChunkBuffers.size()); + } + } + + bool GetBlockWriteOps(const ChunkedFolderContent& RemoteContent, + const ChunkedContentLookup& Lookup, + std::span> SequenceIndexChunksLeftToWriteCounters, + std::span> RemoteChunkIndexNeedsCopyFromSourceFlags, + const CompositeBuffer& DecompressedBlockBuffer, + BlockWriteOps& OutOps) + { + ZEN_TRACE_CPU("GetBlockWriteOps"); SharedBuffer BlockBuffer = DecompressedBlockBuffer.Flatten(); uint64_t HeaderSize = 0; if (IterateChunkBlock( @@ -3402,91 +3468,207 @@ namespace { ZEN_ASSERT(Decompressed.GetSize() == RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]); for (const ChunkedContentLookup::ChunkSequenceLocation* Target : ChunkTargetPtrs) { - WriteOps.push_back(WriteOpData{.Target = Target, .ChunkBufferIndex = ChunkBuffers.size()}); + OutOps.WriteOps.push_back( + BlockWriteOps::WriteOpData{.Target = Target, .ChunkBufferIndex = OutOps.ChunkBuffers.size()}); } - ChunkBuffers.emplace_back(std::move(Decompressed)); + OutOps.ChunkBuffers.emplace_back(std::move(Decompressed)); } } } }, HeaderSize)) { - if (!WriteOps.empty()) + std::sort(OutOps.WriteOps.begin(), + OutOps.WriteOps.end(), + [](const BlockWriteOps::WriteOpData& Lhs, const BlockWriteOps::WriteOpData& Rhs) { + if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex) + { + return true; + } + if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex) + { + return false; + } + return Lhs.Target->Offset < Rhs.Target->Offset; + }); + + return true; + } + else + { + return false; + } + } + + bool WriteBlockToDisk(const std::filesystem::path& CacheFolderPath, + const ChunkedFolderContent& RemoteContent, + const ChunkBlockDescription& BlockDescription, + std::span> SequenceIndexChunksLeftToWriteCounters, + const CompositeBuffer& BlockBuffer, + const ChunkedContentLookup& Lookup, + std::span> RemoteChunkIndexNeedsCopyFromSourceFlags, + std::atomic& OutChunksComplete, + std::atomic& OutBytesWritten) + { + ZEN_TRACE_CPU("WriteBlockToDisk"); + + IoHash BlockRawHash; + uint64_t BlockRawSize; + CompressedBuffer CompressedBlockBuffer = CompressedBuffer::FromCompressed(BlockBuffer, BlockRawHash, BlockRawSize); + if (!CompressedBlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} is not a compressed buffer", BlockDescription.BlockHash)); + } + + if (BlockRawHash != BlockDescription.BlockHash) + { + throw std::runtime_error( + fmt::format("Block {} header has a mismatching raw hash {}", BlockDescription.BlockHash, BlockRawHash)); + } + + CompositeBuffer DecompressedBlockBuffer = CompressedBlockBuffer.DecompressToComposite(); + if (!DecompressedBlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} failed to decompress", BlockDescription.BlockHash)); + } + + ZEN_ASSERT_SLOW(BlockDescription.BlockHash == IoHash::HashBuffer(DecompressedBlockBuffer)); + + BlockWriteOps Ops; + if (GetBlockWriteOps(RemoteContent, + Lookup, + SequenceIndexChunksLeftToWriteCounters, + RemoteChunkIndexNeedsCopyFromSourceFlags, + DecompressedBlockBuffer, + Ops)) + { + WriteBlockChunkOps(CacheFolderPath, + RemoteContent, + Lookup, + SequenceIndexChunksLeftToWriteCounters, + Ops, + OutChunksComplete, + OutBytesWritten); + return true; + } + return false; + } + + bool GetPartialBlockWriteOps(const ChunkedFolderContent& RemoteContent, + const ChunkedContentLookup& Lookup, + const ChunkBlockDescription& BlockDescription, + std::span> SequenceIndexChunksLeftToWriteCounters, + std::span> RemoteChunkIndexNeedsCopyFromSourceFlags, + const CompositeBuffer& PartialBlockBuffer, + uint32_t FirstIncludedBlockChunkIndex, + uint32_t LastIncludedBlockChunkIndex, + BlockWriteOps& OutOps) + { + ZEN_TRACE_CPU("GetPartialBlockWriteOps"); + uint32_t OffsetInBlock = 0; + for (uint32_t ChunkBlockIndex = FirstIncludedBlockChunkIndex; ChunkBlockIndex <= LastIncludedBlockChunkIndex; ChunkBlockIndex++) + { + const uint32_t ChunkCompressedSize = BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; + const IoHash& ChunkHash = BlockDescription.ChunkRawHashes[ChunkBlockIndex]; + if (auto It = Lookup.ChunkHashToChunkIndex.find(ChunkHash); It != Lookup.ChunkHashToChunkIndex.end()) { - if (!AbortFlag) + const uint32_t ChunkIndex = It->second; + std::vector ChunkTargetPtrs = + GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, Lookup, ChunkIndex); + + if (!ChunkTargetPtrs.empty()) { - std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOpData& Lhs, const WriteOpData& Rhs) { - if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex) + bool NeedsWrite = true; + if (RemoteChunkIndexNeedsCopyFromSourceFlags[ChunkIndex].compare_exchange_strong(NeedsWrite, false)) + { + CompositeBuffer Chunk = PartialBlockBuffer.Mid(OffsetInBlock, ChunkCompressedSize); + IoHash VerifyChunkHash; + uint64_t VerifyRawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(Chunk, VerifyChunkHash, VerifyRawSize); + if (!Compressed) { - return true; + ZEN_ASSERT(false); } - if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex) + if (VerifyChunkHash != ChunkHash) { - return false; + ZEN_ASSERT(false); } - return Lhs.Target->Offset < Rhs.Target->Offset; - }); - - { - WriteFileCache OpenFileCache; - for (const WriteOpData& WriteOp : WriteOps) + if (VerifyRawSize != BlockDescription.ChunkRawLengths[ChunkBlockIndex]) { - if (AbortFlag) - { - break; - } - const CompositeBuffer& Chunk = ChunkBuffers[WriteOp.ChunkBufferIndex]; - const uint32_t SequenceIndex = WriteOp.Target->SequenceIndex; - ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[SequenceIndex].load() <= - RemoteContent.ChunkedContent.ChunkCounts[SequenceIndex]); - ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[SequenceIndex].load() > 0); - const uint64_t ChunkSize = Chunk.GetSize(); - const uint64_t FileOffset = WriteOp.Target->Offset; - const uint32_t PathIndex = Lookup.SequenceIndexFirstPathIndex[SequenceIndex]; - ZEN_ASSERT(FileOffset + ChunkSize <= RemoteContent.RawSizes[PathIndex]); - - OpenFileCache.WriteToFile( - SequenceIndex, - [&CacheFolderPath, &RemoteContent](uint32_t SequenceIndex) { - return GetTempChunkedSequenceFileName(CacheFolderPath, - RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]); - }, - Chunk, - FileOffset, - RemoteContent.RawSizes[PathIndex]); - OutBytesWritten += ChunkSize; + ZEN_ASSERT(false); } - } - if (!AbortFlag) - { - ZEN_TRACE_CPU("WriteBlockToDisk_VerifyHash"); - - // Write tracking, updating this must be done without any files open (WriteFileCache) - for (const WriteOpData& WriteOp : WriteOps) + CompositeBuffer Decompressed = Compressed.DecompressToComposite(); + if (!Decompressed) { - const uint32_t RemoteSequenceIndex = WriteOp.Target->SequenceIndex; - if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1) == 1) - { - const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; - const IoHash VerifyChunkHash = IoHash::HashBuffer( - IoBufferBuilder::MakeFromFile(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); - if (VerifyChunkHash != SequenceRawHash) - { - throw std::runtime_error(fmt::format("Written chunk sequence {} hash does not match expected hash {}", - VerifyChunkHash, - SequenceRawHash)); - } - std::filesystem::rename(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), - GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); - } + throw std::runtime_error(fmt::format("Decompression of build blob {} failed", ChunkHash)); } + ZEN_ASSERT_SLOW(ChunkHash == IoHash::HashBuffer(Decompressed)); + ZEN_ASSERT(Decompressed.GetSize() == RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]); + for (const ChunkedContentLookup::ChunkSequenceLocation* Target : ChunkTargetPtrs) + { + OutOps.WriteOps.push_back( + BlockWriteOps::WriteOpData{.Target = Target, .ChunkBufferIndex = OutOps.ChunkBuffers.size()}); + } + OutOps.ChunkBuffers.emplace_back(std::move(Decompressed)); } - OutChunksComplete += gsl::narrow(ChunkBuffers.size()); } } + + OffsetInBlock += ChunkCompressedSize; + } + std::sort(OutOps.WriteOps.begin(), + OutOps.WriteOps.end(), + [](const BlockWriteOps::WriteOpData& Lhs, const BlockWriteOps::WriteOpData& Rhs) { + if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex) + { + return true; + } + if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex) + { + return false; + } + return Lhs.Target->Offset < Rhs.Target->Offset; + }); + return true; + } + + bool WritePartialBlockToDisk(const std::filesystem::path& CacheFolderPath, + const ChunkedFolderContent& RemoteContent, + const ChunkBlockDescription& BlockDescription, + std::span> SequenceIndexChunksLeftToWriteCounters, + const CompositeBuffer& PartialBlockBuffer, + uint32_t FirstIncludedBlockChunkIndex, + uint32_t LastIncludedBlockChunkIndex, + const ChunkedContentLookup& Lookup, + std::span> RemoteChunkIndexNeedsCopyFromSourceFlags, + std::atomic& OutChunksComplete, + std::atomic& OutBytesWritten) + { + ZEN_TRACE_CPU("WritePartialBlockToDisk"); + BlockWriteOps Ops; + if (GetPartialBlockWriteOps(RemoteContent, + Lookup, + BlockDescription, + SequenceIndexChunksLeftToWriteCounters, + RemoteChunkIndexNeedsCopyFromSourceFlags, + PartialBlockBuffer, + FirstIncludedBlockChunkIndex, + LastIncludedBlockChunkIndex, + Ops)) + { + WriteBlockChunkOps(CacheFolderPath, + RemoteContent, + Lookup, + SequenceIndexChunksLeftToWriteCounters, + Ops, + OutChunksComplete, + OutBytesWritten); return true; } - return false; + else + { + return false; + } } SharedBuffer Decompress(const CompositeBuffer& CompressedChunk, const IoHash& ChunkHash, const uint64_t ChunkRawSize) @@ -3571,6 +3753,7 @@ namespace { CompositeBuffer&& CompressedPart, std::atomic& WriteToDiskBytes) { + ZEN_TRACE_CPU("StreamDecompress"); const std::filesystem::path TempChunkSequenceFileName = GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash); TemporaryFile DecompressedTemp; std::error_code Ec; @@ -3633,10 +3816,9 @@ namespace { WorkerThreadPool& NetworkPool, std::atomic& WriteToDiskBytes, std::atomic& BytesDownloaded, - std::atomic& LooseChunksBytes, - std::atomic& DownloadedChunks, - std::atomic& ChunksComplete, - std::atomic& MultipartAttachmentCount) + std::atomic& MultipartAttachmentCount, + std::function&& OnDownloadComplete, + std::function&& OnWriteComplete) { ZEN_TRACE_CPU("DownloadLargeBlob"); @@ -3665,22 +3847,20 @@ namespace { Workload, ChunkHash, &BytesDownloaded, - &LooseChunksBytes, + OnDownloadComplete = std::move(OnDownloadComplete), + OnWriteComplete = std::move(OnWriteComplete), &WriteToDiskBytes, - &DownloadedChunks, - &ChunksComplete, SequenceIndexChunksLeftToWriteCounters, ChunkTargetPtrs = std::vector( ChunkTargetPtrs)](uint64_t Offset, const IoBuffer& Chunk, uint64_t BytesRemaining) { BytesDownloaded += Chunk.GetSize(); - LooseChunksBytes += Chunk.GetSize(); if (!AbortFlag.load()) { Workload->TempFile.Write(Chunk.GetView(), Offset); if (Chunk.GetSize() == BytesRemaining) { - DownloadedChunks++; + OnDownloadComplete(Workload->TempFile.FileSize()); Work.ScheduleWork( WritePool, // GetSyncWorkerPool(),// @@ -3690,8 +3870,7 @@ namespace { ChunkHash, Workload, Offset, - BytesRemaining, - &ChunksComplete, + OnWriteComplete = std::move(OnWriteComplete), &WriteToDiskBytes, SequenceIndexChunksLeftToWriteCounters, ChunkTargetPtrs](std::atomic&) { @@ -3725,7 +3904,7 @@ namespace { CompositeBuffer(std::move(CompressedPart)), WriteToDiskBytes); NeedHashVerify = false; - ChunksComplete++; + OnWriteComplete(); } else { @@ -3748,7 +3927,7 @@ namespace { CompositeBuffer(Chunk), OpenFileCache, WriteToDiskBytes); - ChunksComplete++; + OnWriteComplete(); } } @@ -3760,12 +3939,12 @@ namespace { const uint32_t RemoteSequenceIndex = Location->SequenceIndex; if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1) == 1) { - ZEN_TRACE_CPU("VerifyChunkHash"); - const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; if (NeedHashVerify) { + ZEN_TRACE_CPU("VerifyChunkHash"); + const IoHash VerifyChunkHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile( GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); if (VerifyChunkHash != ChunkHash) @@ -3816,6 +3995,7 @@ namespace { const ChunkedFolderContent& RemoteContent, const std::vector& BlockDescriptions, const std::vector& LooseChunkHashes, + bool AllowPartialBlockRequests, bool WipeTargetFolder, FolderContent& OutLocalFolderState) { @@ -3967,11 +4147,17 @@ namespace { } } - std::atomic ChunkCountWritten = 0; + uint64_t TotalRequestCount = 0; + std::atomic RequestsComplete = 0; + std::atomic ChunkCountWritten = 0; + std::atomic TotalPartWriteCount = 0; + std::atomic WritePartsComplete = 0; { ZEN_TRACE_CPU("HandleChunks"); + Stopwatch WriteTimer; + FilteredRate FilteredDownloadedBytesPerSecond; FilteredRate FilteredWrittenBytesPerSecond; @@ -4010,6 +4196,8 @@ namespace { } else { + TotalRequestCount++; + TotalPartWriteCount++; Work.ScheduleWork( NetworkPool, // GetSyncWorkerPool(),// [&, ChunkHash, RemoteChunkIndex, ChunkTargetPtrs](std::atomic&) { @@ -4020,25 +4208,39 @@ namespace { FilteredDownloadedBytesPerSecond.Start(); if (RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex] >= LargeAttachmentSize) { - DownloadLargeBlob(Storage, - Path / ZenTempChunkFolderName, - CacheFolderPath, - RemoteContent, - RemoteLookup, - BuildId, - ChunkHash, - PreferredMultipartChunkSize, - ChunkTargetPtrs, - SequenceIndexChunksLeftToWriteCounters, - Work, - WritePool, - NetworkPool, - WriteToDiskBytes, - BytesDownloaded, - LooseChunksBytes, - DownloadedChunks, - ChunkCountWritten, - MultipartAttachmentCount); + DownloadLargeBlob( + Storage, + Path / ZenTempChunkFolderName, + CacheFolderPath, + RemoteContent, + RemoteLookup, + BuildId, + ChunkHash, + PreferredMultipartChunkSize, + ChunkTargetPtrs, + SequenceIndexChunksLeftToWriteCounters, + Work, + WritePool, + NetworkPool, + WriteToDiskBytes, + BytesDownloaded, + MultipartAttachmentCount, + [&](uint64_t BytesDownloaded) { + LooseChunksBytes += BytesDownloaded; + RequestsComplete++; + if (RequestsComplete == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + }, + [&]() { + ChunkCountWritten++; + WritePartsComplete++; + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + }); } else { @@ -4049,6 +4251,11 @@ namespace { } BytesDownloaded += CompressedPart.GetSize(); LooseChunksBytes += CompressedPart.GetSize(); + RequestsComplete++; + if (RequestsComplete == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } CompositeBuffer Payload = WriteToTempFileIfNeeded(CompositeBuffer(std::move(CompressedPart)), Path / ZenTempChunkFolderName, ChunkHash); @@ -4077,6 +4284,11 @@ namespace { CompositeBuffer(CompressedPart), WriteToDiskBytes); ChunkCountWritten++; + WritePartsComplete++; + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } NeedHashVerify = false; } else @@ -4096,10 +4308,17 @@ namespace { OpenFileCache, WriteToDiskBytes); ChunkCountWritten++; + WritePartsComplete++; + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } } } if (!AbortFlag) { + WritePartsComplete++; + // Write tracking, updating this must be done without any files open // (WriteFileCache) for (const ChunkedContentLookup::ChunkSequenceLocation* Location : @@ -4109,12 +4328,12 @@ namespace { if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub( 1) == 1) { - ZEN_TRACE_CPU("UpdateFolder_VerifyHash"); - const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; if (NeedHashVerify) { + ZEN_TRACE_CPU("UpdateFolder_VerifyHash"); + const IoHash VerifyChunkHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile( GetTempChunkedSequenceFileName(CacheFolderPath, @@ -4155,6 +4374,8 @@ namespace { break; } + TotalPartWriteCount++; + Work.ScheduleWork( WritePool, // GetSyncWorkerPool(),// [&, CopyDataIndex](std::atomic&) { @@ -4166,245 +4387,428 @@ namespace { const CacheCopyData& CopyData = CacheCopyDatas[CopyDataIndex]; const uint32_t LocalPathIndex = LocalLookup.SequenceIndexFirstPathIndex[CopyData.LocalSequenceIndex]; const std::filesystem::path LocalFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred(); - if (!CopyData.TargetChunkLocationPtrs.empty()) - { - uint64_t CacheLocalFileBytesRead = 0; + ZEN_ASSERT(!CopyData.TargetChunkLocationPtrs.empty()); - size_t TargetStart = 0; - const std::span AllTargets( - CopyData.TargetChunkLocationPtrs); + uint64_t CacheLocalFileBytesRead = 0; - struct WriteOp - { - const ChunkedContentLookup::ChunkSequenceLocation* Target; - uint64_t CacheFileOffset; - uint64_t ChunkSize; - }; + size_t TargetStart = 0; + const std::span AllTargets( + CopyData.TargetChunkLocationPtrs); + + struct WriteOp + { + const ChunkedContentLookup::ChunkSequenceLocation* Target; + uint64_t CacheFileOffset; + uint64_t ChunkSize; + }; - std::vector WriteOps; - WriteOps.reserve(AllTargets.size()); + std::vector WriteOps; + WriteOps.reserve(AllTargets.size()); - for (const CacheCopyData::ChunkTarget& ChunkTarget : CopyData.ChunkTargets) + for (const CacheCopyData::ChunkTarget& ChunkTarget : CopyData.ChunkTargets) + { + std::span TargetRange = + AllTargets.subspan(TargetStart, ChunkTarget.TargetChunkLocationCount); + for (const ChunkedContentLookup::ChunkSequenceLocation* Target : TargetRange) { - std::span TargetRange = - AllTargets.subspan(TargetStart, ChunkTarget.TargetChunkLocationCount); - for (const ChunkedContentLookup::ChunkSequenceLocation* Target : TargetRange) - { - WriteOps.push_back(WriteOp{.Target = Target, - .CacheFileOffset = ChunkTarget.CacheFileOffset, - .ChunkSize = ChunkTarget.ChunkRawSize}); - } - TargetStart += ChunkTarget.TargetChunkLocationCount; + WriteOps.push_back(WriteOp{.Target = Target, + .CacheFileOffset = ChunkTarget.CacheFileOffset, + .ChunkSize = ChunkTarget.ChunkRawSize}); } + TargetStart += ChunkTarget.TargetChunkLocationCount; + } - std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOp& Lhs, const WriteOp& Rhs) { - if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex) - { - return true; - } - else if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex) - { - return false; - } - if (Lhs.Target->Offset < Rhs.Target->Offset) - { - return true; - } + std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOp& Lhs, const WriteOp& Rhs) { + if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex) + { + return true; + } + else if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex) + { return false; - }); + } + if (Lhs.Target->Offset < Rhs.Target->Offset) + { + return true; + } + return false; + }); - if (!AbortFlag) + if (!AbortFlag) + { + BufferedOpenFile SourceFile(LocalFilePath); + WriteFileCache OpenFileCache; + for (const WriteOp& Op : WriteOps) { - BufferedOpenFile SourceFile(LocalFilePath); - WriteFileCache OpenFileCache; - for (const WriteOp& Op : WriteOps) + if (AbortFlag) { - if (AbortFlag) - { - break; - } - const uint32_t RemoteSequenceIndex = Op.Target->SequenceIndex; - ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].load() <= - RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex]); - ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].load() > 0); - const uint32_t RemotePathIndex = RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex]; - const uint64_t ChunkSize = Op.ChunkSize; - CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset, ChunkSize); - - ZEN_ASSERT(Op.Target->Offset + ChunkSource.GetSize() <= RemoteContent.RawSizes[RemotePathIndex]); - - OpenFileCache.WriteToFile( - RemoteSequenceIndex, - [&CacheFolderPath, &RemoteContent](uint32_t SequenceIndex) { - return GetTempChunkedSequenceFileName( - CacheFolderPath, - RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]); - }, - ChunkSource, - Op.Target->Offset, - RemoteContent.RawSizes[RemotePathIndex]); - WriteToDiskBytes += ChunkSize; - CacheLocalFileBytesRead += ChunkSize; // TODO: This should be the sum of unique chunk sizes? + break; } + const uint32_t RemoteSequenceIndex = Op.Target->SequenceIndex; + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].load() <= + RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex]); + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].load() > 0); + const uint32_t RemotePathIndex = RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex]; + const uint64_t ChunkSize = Op.ChunkSize; + CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset, ChunkSize); + + ZEN_ASSERT(Op.Target->Offset + ChunkSource.GetSize() <= RemoteContent.RawSizes[RemotePathIndex]); + + OpenFileCache.WriteToFile( + RemoteSequenceIndex, + [&CacheFolderPath, &RemoteContent](uint32_t SequenceIndex) { + return GetTempChunkedSequenceFileName( + CacheFolderPath, + RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]); + }, + ChunkSource, + Op.Target->Offset, + RemoteContent.RawSizes[RemotePathIndex]); + WriteToDiskBytes += ChunkSize; + CacheLocalFileBytesRead += ChunkSize; // TODO: This should be the sum of unique chunk sizes? } - if (!AbortFlag) + } + if (!AbortFlag) + { + // Write tracking, updating this must be done without any files open (WriteFileCache) + for (const WriteOp& Op : WriteOps) { - if (!AbortFlag) + ZEN_TRACE_CPU("UpdateFolder_Copy_VerifyHash"); + + const uint32_t RemoteSequenceIndex = Op.Target->SequenceIndex; + if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1) == 1) { - // Write tracking, updating this must be done without any files open (WriteFileCache) - for (const WriteOp& Op : WriteOps) + const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + const IoHash VerifyChunkHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile( + GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); + if (VerifyChunkHash != SequenceRawHash) { - ZEN_TRACE_CPU("UpdateFolder_Copy_VerifyHash"); - - const uint32_t RemoteSequenceIndex = Op.Target->SequenceIndex; - if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1) == 1) - { - const IoHash& SequenceRawHash = - RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; - const IoHash VerifyChunkHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile( - GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); - if (VerifyChunkHash != SequenceRawHash) - { - throw std::runtime_error( - fmt::format("Written chunk sequence {} hash does not match expected hash {}", - VerifyChunkHash, - SequenceRawHash)); - } - - ZEN_TRACE_CPU("UpdateFolder_Copy_rename"); - std::filesystem::rename(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), - GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); - } + throw std::runtime_error( + fmt::format("Written chunk sequence {} hash does not match expected hash {}", + VerifyChunkHash, + SequenceRawHash)); } - } - ChunkCountWritten += gsl::narrow(CopyData.ChunkTargets.size()); - ZEN_DEBUG("Copied {} from {}", NiceBytes(CacheLocalFileBytesRead), LocalContent.Paths[LocalPathIndex]); + ZEN_TRACE_CPU("UpdateFolder_Copy_rename"); + std::filesystem::rename(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), + GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); + } } + + ChunkCountWritten += gsl::narrow(CopyData.ChunkTargets.size()); + ZEN_DEBUG("Copied {} from {}", NiceBytes(CacheLocalFileBytesRead), LocalContent.Paths[LocalPathIndex]); + } + WritePartsComplete++; + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); } } }, Work.DefaultErrorFunction()); } - size_t BlockCount = BlockDescriptions.size(); - std::atomic BlocksComplete = 0; - - auto IsBlockNeeded = [&RemoteContent, &RemoteLookup, &RemoteChunkIndexNeedsCopyFromSourceFlags]( - const ChunkBlockDescription& BlockDescription) -> bool { - for (const IoHash& ChunkHash : BlockDescription.ChunkRawHashes) - { - if (auto It = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); It != RemoteLookup.ChunkHashToChunkIndex.end()) - { - const uint32_t RemoteChunkIndex = It->second; - if (RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex]) - { - return true; - } - } - } - return false; + size_t BlockCount = BlockDescriptions.size(); + + std::vector ChunkIsPickedUpByBlock(RemoteContent.ChunkedContent.ChunkHashes.size(), false); + auto GetNeededChunkBlockIndexes = [&RemoteContent, + &RemoteLookup, + &RemoteChunkIndexNeedsCopyFromSourceFlags, + &ChunkIsPickedUpByBlock](const ChunkBlockDescription& BlockDescription) { + std::vector NeededBlockChunkIndexes; + for (uint32_t ChunkBlockIndex = 0; ChunkBlockIndex < BlockDescription.ChunkRawHashes.size(); ChunkBlockIndex++) + { + const IoHash& ChunkHash = BlockDescription.ChunkRawHashes[ChunkBlockIndex]; + if (auto It = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); It != RemoteLookup.ChunkHashToChunkIndex.end()) + { + const uint32_t RemoteChunkIndex = It->second; + if (!ChunkIsPickedUpByBlock[RemoteChunkIndex]) + { + if (RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex]) + { + ChunkIsPickedUpByBlock[RemoteChunkIndex] = true; + NeededBlockChunkIndexes.push_back(ChunkBlockIndex); + } + } + } + } + return NeededBlockChunkIndexes; }; - size_t BlocksNeededCount = 0; + size_t BlocksNeededCount = 0; + uint64_t AllBlocksSize = 0; + uint64_t AllBlocksFetch = 0; + uint64_t AllBlocksSlack = 0; + uint64_t AllBlockRequests = 0; + uint64_t AllBlockChunksSize = 0; for (size_t BlockIndex = 0; BlockIndex < BlockCount; BlockIndex++) { if (Work.IsAborted()) { break; } - if (IsBlockNeeded(BlockDescriptions[BlockIndex])) + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + const std::vector BlockChunkIndexNeeded = GetNeededChunkBlockIndexes(BlockDescription); + if (!BlockChunkIndexNeeded.empty()) { - BlocksNeededCount++; - Work.ScheduleWork( - NetworkPool, - [&, BlockIndex](std::atomic&) { - if (!AbortFlag) + bool WantsToDoPartialBlockDownload = BlockChunkIndexNeeded.size() < BlockDescription.ChunkRawHashes.size(); + bool CanDoPartialBlockDownload = (BlockDescription.HeaderSize > 0) && (BlockDescription.ChunkCompressedLengths.size() == + BlockDescription.ChunkRawHashes.size()); + if (AllowPartialBlockRequests && WantsToDoPartialBlockDownload && CanDoPartialBlockDownload) + { + struct BlockRangeDescriptor + { + uint64_t RangeStart = 0; + uint64_t RangeLength = 0; + uint32_t ChunkBlockIndexStart = 0; + uint32_t ChunkBlockIndexCount = 0; + }; + std::vector BlockRanges; + + ZEN_TRACE_CPU("UpdateFolder_HandleBlocks_PartialAnalisys"); + + uint32_t NeedBlockChunkIndexOffset = 0; + uint32_t ChunkBlockIndex = 0; + uint32_t CurrentOffset = + gsl::narrow(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize); + + BlockRangeDescriptor NextRange; + while (NeedBlockChunkIndexOffset < BlockChunkIndexNeeded.size() && + ChunkBlockIndex < BlockDescription.ChunkRawHashes.size()) + { + const uint32_t ChunkCompressedLength = BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; + if (ChunkBlockIndex < BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) { - ZEN_TRACE_CPU("UpdateFolder_HandleBlocks_Read"); - - FilteredDownloadedBytesPerSecond.Start(); - IoBuffer BlockBuffer = Storage.GetBuildBlob(BuildId, BlockDescriptions[BlockIndex].BlockHash); - if (!BlockBuffer) + if (NextRange.RangeLength > 0) { - throw std::runtime_error(fmt::format("Block {} is missing", BlockDescriptions[BlockIndex].BlockHash)); + BlockRanges.push_back(NextRange); + NextRange = {}; } - BytesDownloaded += BlockBuffer.GetSize(); - BlockBytes += BlockBuffer.GetSize(); - DownloadedBlocks++; - CompositeBuffer Payload = WriteToTempFileIfNeeded(CompositeBuffer(std::move(BlockBuffer)), - Path / ZenTempBlockFolderName, - BlockDescriptions[BlockIndex].BlockHash); - - if (!AbortFlag) + ChunkBlockIndex++; + CurrentOffset += ChunkCompressedLength; + } + else if (ChunkBlockIndex == BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) + { + AllBlockChunksSize += ChunkCompressedLength; + if (NextRange.RangeLength == 0) { - Work.ScheduleWork( - WritePool, - [&, BlockIndex, BlockBuffer = std::move(Payload)](std::atomic&) { - if (!AbortFlag) - { - ZEN_TRACE_CPU("UpdateFolder_HandleBlocks_Write"); - - FilteredWrittenBytesPerSecond.Start(); - IoHash BlockRawHash; - uint64_t BlockRawSize; - CompressedBuffer CompressedBlockBuffer = - CompressedBuffer::FromCompressed(std::move(BlockBuffer), BlockRawHash, BlockRawSize); - if (!CompressedBlockBuffer) - { - throw std::runtime_error(fmt::format("Block {} is not a compressed buffer", - BlockDescriptions[BlockIndex].BlockHash)); - } + NextRange.RangeStart = CurrentOffset; + NextRange.ChunkBlockIndexStart = ChunkBlockIndex; + } + NextRange.RangeLength += ChunkCompressedLength; + NextRange.ChunkBlockIndexCount++; + ChunkBlockIndex++; + CurrentOffset += ChunkCompressedLength; + NeedBlockChunkIndexOffset++; + } + else + { + ZEN_ASSERT(false); + } + } + AllBlocksSize += CurrentOffset; + if (NextRange.RangeLength > 0) + { + BlockRanges.push_back(NextRange); + NextRange = {}; + } - if (BlockRawHash != BlockDescriptions[BlockIndex].BlockHash) - { - throw std::runtime_error(fmt::format("Block {} header has a mismatching raw hash {}", - BlockDescriptions[BlockIndex].BlockHash, - BlockRawHash)); - } + ZEN_ASSERT(!BlockRanges.empty()); + std::vector CollapsedBlockRanges; + auto It = BlockRanges.begin(); + CollapsedBlockRanges.push_back(*It++); + uint64_t TotalSlack = 0; + while (It != BlockRanges.end()) + { + BlockRangeDescriptor& LastRange = CollapsedBlockRanges.back(); + uint64_t Slack = It->RangeStart - (LastRange.RangeStart + LastRange.RangeLength); + uint64_t BothRangeSize = It->RangeLength + LastRange.RangeLength; + if (Slack <= Max(BothRangeSize / 8, 64u * 1024u)) // Made up heuristic - we'll see how it pans out + { + LastRange.ChunkBlockIndexCount = + (It->ChunkBlockIndexStart + It->ChunkBlockIndexCount) - LastRange.ChunkBlockIndexStart; + LastRange.RangeLength = (It->RangeStart + It->RangeLength) - LastRange.RangeStart; + TotalSlack += Slack; + } + else + { + CollapsedBlockRanges.push_back(*It); + } + ++It; + } + + uint64_t TotalFetch = 0; + for (const BlockRangeDescriptor& Range : CollapsedBlockRanges) + { + TotalFetch += Range.RangeLength; + } + + AllBlocksFetch += TotalFetch; + AllBlocksSlack += TotalSlack; + BlocksNeededCount++; + AllBlockRequests += CollapsedBlockRanges.size(); + + for (size_t BlockRangeIndex = 0; BlockRangeIndex < CollapsedBlockRanges.size(); BlockRangeIndex++) + { + TotalRequestCount++; + TotalPartWriteCount++; + const BlockRangeDescriptor BlockRange = CollapsedBlockRanges[BlockRangeIndex]; + // Partial block schedule + Work.ScheduleWork( + NetworkPool, // NetworkPool, // GetSyncWorkerPool() + [&, BlockIndex, BlockRange](std::atomic&) { + ZEN_TRACE_CPU("UpdateFolder_HandleBlocks_PartialGet"); + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + + FilteredDownloadedBytesPerSecond.Start(); + IoBuffer BlockBuffer = Storage.GetBuildBlob(BuildId, + BlockDescription.BlockHash, + BlockRange.RangeStart, + BlockRange.RangeLength); + if (!BlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} is missing", BlockDescription.BlockHash)); + } + BytesDownloaded += BlockBuffer.GetSize(); + BlockBytes += BlockBuffer.GetSize(); + DownloadedBlocks++; + CompositeBuffer Payload = WriteToTempFileIfNeeded(CompositeBuffer(std::move(BlockBuffer)), + Path / ZenTempBlockFolderName, + BlockDescription.BlockHash, + fmt::format("_{}", BlockRange.RangeStart)); + RequestsComplete++; + if (RequestsComplete == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } - CompositeBuffer DecompressedBlockBuffer = CompressedBlockBuffer.DecompressToComposite(); - if (!DecompressedBlockBuffer) + if (!AbortFlag) + { + Work.ScheduleWork( + WritePool, // WritePool, // GetSyncWorkerPool(), + [&, BlockIndex, BlockRange, BlockPartialBuffer = std::move(Payload)](std::atomic&) { + if (!AbortFlag) { - throw std::runtime_error(fmt::format("Block {} failed to decompress", - BlockDescriptions[BlockIndex].BlockHash)); + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + + FilteredWrittenBytesPerSecond.Start(); + + if (!WritePartialBlockToDisk( + CacheFolderPath, + RemoteContent, + BlockDescription, + SequenceIndexChunksLeftToWriteCounters, + BlockPartialBuffer, + BlockRange.ChunkBlockIndexStart, + BlockRange.ChunkBlockIndexStart + BlockRange.ChunkBlockIndexCount - 1, + RemoteLookup, + RemoteChunkIndexNeedsCopyFromSourceFlags, + ChunkCountWritten, + WriteToDiskBytes)) + { + throw std::runtime_error( + fmt::format("Partial block {} is malformed", BlockDescription.BlockHash)); + } + WritePartsComplete++; + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } } + }, + [&, BlockIndex](const std::exception& Ex, std::atomic&) { + ZEN_ERROR("Failed writing block {}. Reason: {}", + BlockDescriptions[BlockIndex].BlockHash, + Ex.what()); + AbortFlag = true; + }); + } + }, + Work.DefaultErrorFunction()); + } + } + else + { + BlocksNeededCount++; + TotalRequestCount++; + TotalPartWriteCount++; + + Work.ScheduleWork( + NetworkPool, // GetSyncWorkerPool(), // NetworkPool, + [&, BlockIndex](std::atomic&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_HandleBlocks_Get"); + + // Full block schedule - ZEN_ASSERT_SLOW(BlockDescriptions[BlockIndex].BlockHash == - IoHash::HashBuffer(DecompressedBlockBuffer)); - - if (!WriteBlockToDisk(CacheFolderPath, - RemoteContent, - SequenceIndexChunksLeftToWriteCounters, - DecompressedBlockBuffer, - RemoteLookup, - RemoteChunkIndexNeedsCopyFromSourceFlags.data(), - ChunkCountWritten, - WriteToDiskBytes)) + FilteredDownloadedBytesPerSecond.Start(); + IoBuffer BlockBuffer = Storage.GetBuildBlob(BuildId, BlockDescription.BlockHash); + if (!BlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} is missing", BlockDescription.BlockHash)); + } + BytesDownloaded += BlockBuffer.GetSize(); + BlockBytes += BlockBuffer.GetSize(); + DownloadedBlocks++; + RequestsComplete++; + if (RequestsComplete == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + + CompositeBuffer Payload = WriteToTempFileIfNeeded(CompositeBuffer(std::move(BlockBuffer)), + Path / ZenTempBlockFolderName, + BlockDescription.BlockHash); + if (!AbortFlag) + { + Work.ScheduleWork( + WritePool, + [&, BlockIndex, BlockBuffer = std::move(Payload)](std::atomic&) { + if (!AbortFlag) { - throw std::runtime_error( - fmt::format("Block {} is malformed", BlockDescriptions[BlockIndex].BlockHash)); + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + + FilteredWrittenBytesPerSecond.Start(); + if (!WriteBlockToDisk(CacheFolderPath, + RemoteContent, + BlockDescription, + SequenceIndexChunksLeftToWriteCounters, + BlockBuffer, + RemoteLookup, + RemoteChunkIndexNeedsCopyFromSourceFlags, + ChunkCountWritten, + WriteToDiskBytes)) + { + throw std::runtime_error( + fmt::format("Block {} is malformed", BlockDescription.BlockHash)); + } + WritePartsComplete++; + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } } - BlocksComplete++; - } - }, - [&, BlockIndex](const std::exception& Ex, std::atomic&) { - ZEN_ERROR("Failed writing block {}. Reason: {}", - BlockDescriptions[BlockIndex].BlockHash, - Ex.what()); - AbortFlag = true; - }); + }, + Work.DefaultErrorFunction()); + } } - } - }, - Work.DefaultErrorFunction()); + }, + Work.DefaultErrorFunction()); + } } else { ZEN_DEBUG("Skipping block {} due to cache reuse", BlockDescriptions[BlockIndex].BlockHash); } } - + ZEN_DEBUG("Fetching {} with {} slack (ideal {}) out of {} using {} requests for {} blocks", + NiceBytes(AllBlocksFetch), + NiceBytes(AllBlocksSlack), + NiceBytes(AllBlockChunksSize), + NiceBytes(AllBlocksSize), + AllBlockRequests, + BlocksNeededCount); ZEN_TRACE_CPU("HandleChunks_Wait"); Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { @@ -4412,13 +4816,13 @@ namespace { ZEN_ASSERT(ChunkCountToWrite >= ChunkCountWritten.load()); FilteredWrittenBytesPerSecond.Update(WriteToDiskBytes.load()); FilteredDownloadedBytesPerSecond.Update(BytesDownloaded.load()); - std::string Details = fmt::format("{}/{} chunks. {}/{} blocks. {} {}bits/s downloaded. {} {}B/s written", - ChunkCountWritten.load(), - ChunkCountToWrite, - BlocksComplete.load(), - BlocksNeededCount, + std::string Details = fmt::format("{}/{} ({} {}bits/s) downloaded. {}/{} ({} {}B/s) written.", + RequestsComplete.load(), + TotalRequestCount, NiceBytes(BytesDownloaded.load()), NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8), + ChunkCountWritten.load(), + ChunkCountToWrite, NiceBytes(WriteToDiskBytes.load()), NiceNum(FilteredWrittenBytesPerSecond.GetCurrent())); WriteProgressBar.UpdateState({.Task = "Writing chunks ", @@ -4437,6 +4841,13 @@ namespace { } WriteProgressBar.Finish(); + + ZEN_CONSOLE("Downloaded {} ({}bits/s). Wrote {} ({}B/s). Completed in {}", + NiceBytes(BytesDownloaded.load()), + NiceNum(GetBytesPerSecond(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS(), BytesDownloaded * 8)), + NiceBytes(WriteToDiskBytes.load()), + NiceNum(GetBytesPerSecond(FilteredWrittenBytesPerSecond.GetElapsedTimeUS(), WriteToDiskBytes.load())), + NiceTimeSpanMs(WriteTimer.GetElapsedTimeMs())); } for (const auto& SequenceIndexChunksLeftToWriteCounter : SequenceIndexChunksLeftToWriteCounters) @@ -4455,6 +4866,7 @@ namespace { }); // Move all files we will reuse to cache folder + // TODO: If WipeTargetFolder is false we could check which files are already correct and leave them in place for (uint32_t LocalPathIndex = 0; LocalPathIndex < LocalContent.Paths.size(); LocalPathIndex++) { const IoHash& RawHash = LocalContent.RawHashes[LocalPathIndex]; @@ -5186,6 +5598,7 @@ namespace { std::span BuildPartNames, const std::filesystem::path& Path, bool AllowMultiparts, + bool AllowPartialBlockRequests, bool WipeTargetFolder, bool PostDownloadVerify) { @@ -5202,8 +5615,8 @@ namespace { CreateDirectories(Path / ZenTempBlockFolderName); CreateDirectories(Path / ZenTempChunkFolderName); // TODO: Don't clear this - pick up files -> chunks to use CreateDirectories(Path / - ZenTempCacheFolderName); // TODO: Don't clear this - pick up files and use as sequences (non .tmp extension) and - // delete .tmp (maybe?) - chunk them? How do we know the file is worth chunking? + ZenTempCacheFolderName); // TODO: Don't clear this - pick up files and use as sequences (non .tmp extension) + // and delete .tmp (maybe?) - chunk them? How do we know the file is worth chunking? std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; @@ -5311,6 +5724,7 @@ namespace { RemoteContent, BlockDescriptions, LooseChunkHashes, + AllowPartialBlockRequests, WipeTargetFolder, LocalFolderState); @@ -5705,6 +6119,12 @@ BuildsCommand::BuildsCommand() "Allow large attachments to be transfered using multipart protocol. Defaults to true.", cxxopts::value(m_AllowMultiparts), ""); + m_DownloadOptions.add_option("", + "", + "allow-partial-block-requests", + "Allow request for partial chunk blocks. Defaults to true.", + cxxopts::value(m_AllowPartialBlockRequests), + ""); m_DownloadOptions .add_option("", "", "verify", "Enable post download verify of all tracked files", cxxopts::value(m_PostDownloadVerify), ""); m_DownloadOptions.parse_positional({"local-path", "build-id", "build-part-name"}); @@ -5728,6 +6148,18 @@ BuildsCommand::BuildsCommand() AddOutputOptions(m_TestOptions); m_TestOptions.add_options()("h,help", "Print help"); m_TestOptions.add_option("", "l", "local-path", "Root file system folder used as base", cxxopts::value(m_Path), ""); + m_TestOptions.add_option("", + "", + "allow-multipart", + "Allow large attachments to be transfered using multipart protocol. Defaults to true.", + cxxopts::value(m_AllowMultiparts), + ""); + m_TestOptions.add_option("", + "", + "allow-partial-block-requests", + "Allow request for partial chunk blocks. Defaults to true.", + cxxopts::value(m_AllowPartialBlockRequests), + ""); m_TestOptions.parse_positional({"local-path"}); m_TestOptions.positional_help("local-path"); @@ -6037,7 +6469,6 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_Bucket, GeneratedBuildId ? "Generated " : "", BuildId); - CreateDirectories(m_Path / ZenTempStorageFolderName); Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); StorageName = "Cloud DDC"; } @@ -6175,7 +6606,6 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_Namespace, m_Bucket, BuildId); - CreateDirectories(m_Path / ZenTempStorageFolderName); Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); StorageName = "Cloud DDC"; } @@ -6190,7 +6620,15 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); } - DownloadFolder(*Storage, BuildId, BuildPartIds, m_BuildPartNames, m_Path, m_AllowMultiparts, m_Clean, m_PostDownloadVerify); + DownloadFolder(*Storage, + BuildId, + BuildPartIds, + m_BuildPartNames, + m_Path, + m_AllowMultiparts, + m_AllowPartialBlockRequests, + m_Clean, + m_PostDownloadVerify); if (false) { @@ -6275,7 +6713,6 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_Namespace, m_Bucket, BuildId); - CreateDirectories(m_Path / ZenTempStorageFolderName); Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); StorageName = "Cloud DDC"; } @@ -6335,7 +6772,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) const std::filesystem::path DownloadPath = m_Path.parent_path() / (m_BuildPartName + "_download"); ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}'", BuildId, BuildPartId, m_BuildPartName, DownloadPath); - DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, true, true); + DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, m_AllowPartialBlockRequests, true, true); if (AbortFlag) { ZEN_CONSOLE("Download failed."); @@ -6347,7 +6784,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) BuildPartId, m_BuildPartName, DownloadPath); - DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, false, true); + DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, m_AllowPartialBlockRequests, false, true); if (AbortFlag) { ZEN_CONSOLE("Re-download failed. (identical target)"); @@ -6449,7 +6886,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) BuildPartId, m_BuildPartName, DownloadPath); - DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, false, true); + DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, m_AllowPartialBlockRequests, false, true); if (AbortFlag) { ZEN_CONSOLE("Re-download failed. (scrambled target)"); @@ -6487,7 +6924,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}' (original)", BuildId, BuildPartId, m_BuildPartName, DownloadPath); - DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, false, true); + DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, m_AllowPartialBlockRequests, false, true); if (AbortFlag) { ZEN_CONSOLE("Re-download failed."); @@ -6495,7 +6932,15 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}' (scrambled)", BuildId2, BuildPartId2, m_BuildPartName, DownloadPath); - DownloadFolder(*Storage, BuildId2, {BuildPartId2}, {}, DownloadPath, m_AllowMultiparts, false, true); + DownloadFolder(*Storage, + BuildId2, + {BuildPartId2}, + {}, + DownloadPath, + m_AllowMultiparts, + m_AllowPartialBlockRequests, + false, + true); if (AbortFlag) { ZEN_CONSOLE("Re-download failed."); @@ -6503,7 +6948,15 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } ZEN_CONSOLE("\nRe-download Build {}, Part {} ({}) to '{}' (scrambled)", BuildId2, BuildPartId2, m_BuildPartName, DownloadPath); - DownloadFolder(*Storage, BuildId2, {BuildPartId2}, {}, DownloadPath, m_AllowMultiparts, false, true); + DownloadFolder(*Storage, + BuildId2, + {BuildPartId2}, + {}, + DownloadPath, + m_AllowMultiparts, + m_AllowPartialBlockRequests, + false, + true); if (AbortFlag) { ZEN_CONSOLE("Re-download failed."); @@ -6549,7 +7002,6 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_Namespace, m_Bucket, BuildId); - CreateDirectories(m_Path / ZenTempStorageFolderName); Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); StorageName = "Cloud DDC"; } @@ -6617,7 +7069,6 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_Namespace, m_Bucket, BuildId); - CreateDirectories(m_Path / ZenTempStorageFolderName); Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); StorageName = "Cloud DDC"; } diff --git a/src/zen/cmds/builds_cmd.h b/src/zen/cmds/builds_cmd.h index c54fb4db1..838a17807 100644 --- a/src/zen/cmds/builds_cmd.h +++ b/src/zen/cmds/builds_cmd.h @@ -49,6 +49,7 @@ private: bool m_Clean = false; uint8_t m_BlockReuseMinPercentLimit = 85; bool m_AllowMultiparts = true; + bool m_AllowPartialBlockRequests = true; std::filesystem::path m_ManifestPath; // Direct access token (may expire) diff --git a/src/zenhttp/httpclient.cpp b/src/zenhttp/httpclient.cpp index e4c6d243d..30711a432 100644 --- a/src/zenhttp/httpclient.cpp +++ b/src/zenhttp/httpclient.cpp @@ -1148,6 +1148,30 @@ HttpClient::Download(std::string_view Url, const std::filesystem::path& TempFold return true; }; + uint64_t RequestedContentLength = (uint64_t)-1; + if (auto RangeIt = AdditionalHeader.Entries.find("Range"); RangeIt != AdditionalHeader.Entries.end()) + { + if (RangeIt->second.starts_with("bytes")) + { + size_t RangeStartPos = RangeIt->second.find('=', 5); + if (RangeStartPos != std::string::npos) + { + RangeStartPos++; + size_t RangeSplitPos = RangeIt->second.find('-', RangeStartPos); + if (RangeSplitPos != std::string::npos) + { + std::optional RequestedRangeStart = + ParseInt(RangeIt->second.substr(RangeStartPos, RangeSplitPos - RangeStartPos)); + std::optional RequestedRangeEnd = ParseInt(RangeIt->second.substr(RangeStartPos + 1)); + if (RequestedRangeStart.has_value() && RequestedRangeEnd.has_value()) + { + RequestedContentLength = RequestedRangeEnd.value() - 1; + } + } + } + } + } + cpr::Response Response; { std::vector> ReceivedHeaders; @@ -1155,10 +1179,10 @@ HttpClient::Download(std::string_view Url, const std::filesystem::path& TempFold std::pair Header = GetHeader(header); if (Header.first == "Content-Length"sv) { - std::optional ContentSize = ParseInt(Header.second); - if (ContentSize.has_value()) + std::optional ContentLength = ParseInt(Header.second); + if (ContentLength.has_value()) { - if (ContentSize.value() > 1024 * 1024) + if (ContentLength.value() > 1024 * 1024) { PayloadFile = std::make_unique(); std::error_code Ec = PayloadFile->Open(TempFolderPath); @@ -1172,7 +1196,7 @@ HttpClient::Download(std::string_view Url, const std::filesystem::path& TempFold } else { - PayloadString.reserve(ContentSize.value()); + PayloadString.reserve(ContentLength.value()); } } } @@ -1218,85 +1242,90 @@ HttpClient::Download(std::string_view Url, const std::filesystem::path& TempFold auto It = Response.header.find("Content-Length"); if (It != Response.header.end()) { - std::optional ContentLength = ParseInt(It->second); - if (ContentLength) - { - std::vector> ReceivedHeaders; + std::vector> ReceivedHeaders; - auto HeaderCallback = [&](std::string header, intptr_t) { - std::pair Header = GetHeader(header); - if (!Header.first.empty()) - { - ReceivedHeaders.emplace_back(std::move(Header)); - } + auto HeaderCallback = [&](std::string header, intptr_t) { + std::pair Header = GetHeader(header); + if (!Header.first.empty()) + { + ReceivedHeaders.emplace_back(std::move(Header)); + } - if (Header.first == "Content-Range"sv) + if (Header.first == "Content-Range"sv) + { + if (Header.second.starts_with("bytes "sv)) { - if (Header.second.starts_with("bytes "sv)) + size_t RangeStartEnd = Header.second.find('-', 6); + if (RangeStartEnd != std::string::npos) { - size_t RangeStartEnd = Header.second.find('-', 6); - if (RangeStartEnd != std::string::npos) + const auto Start = ParseInt(Header.second.substr(6, RangeStartEnd - 6)); + if (Start) { - const auto Start = ParseInt(Header.second.substr(6, RangeStartEnd - 6)); - if (Start) + uint64_t DownloadedSize = PayloadFile ? PayloadFile->GetSize() : PayloadString.length(); + if (Start.value() == DownloadedSize) { - uint64_t DownloadedSize = PayloadFile ? PayloadFile->GetSize() : PayloadString.length(); - if (Start.value() == DownloadedSize) - { - return 1; - } - else if (Start.value() > DownloadedSize) - { - return 0; - } - if (PayloadFile) - { - PayloadFile->ResetWritePos(Start.value()); - } - else - { - PayloadString = PayloadString.substr(0, Start.value()); - } return 1; } + else if (Start.value() > DownloadedSize) + { + return 0; + } + if (PayloadFile) + { + PayloadFile->ResetWritePos(Start.value()); + } + else + { + PayloadString = PayloadString.substr(0, Start.value()); + } + return 1; } } - return 0; } - return 1; - }; + return 0; + } + return 1; + }; - KeyValueMap HeadersWithRange(AdditionalHeader); - do - { - uint64_t DownloadedSize = PayloadFile ? PayloadFile->GetSize() : PayloadString.length(); + KeyValueMap HeadersWithRange(AdditionalHeader); + do + { + uint64_t DownloadedSize = PayloadFile ? PayloadFile->GetSize() : PayloadString.length(); - std::string Range = fmt::format("bytes={}-{}", DownloadedSize, DownloadedSize + ContentLength.value() - 1); - if (auto RangeIt = HeadersWithRange.Entries.find("Range"); RangeIt != HeadersWithRange.Entries.end()) + uint64_t ContentLength = RequestedContentLength; + if (ContentLength == uint64_t(-1)) + { + if (auto ParsedContentLength = ParseInt(It->second); ParsedContentLength.has_value()) { - if (RangeIt->second == Range) - { - // If we didn't make any progress, abort - break; - } + ContentLength = ParsedContentLength.value(); } - HeadersWithRange.Entries.insert_or_assign("Range", Range); - - Impl::Session Sess = m_Impl->AllocSession(m_BaseUri, - Url, - m_ConnectionSettings, - HeadersWithRange, - {}, - m_SessionId, - GetAccessToken()); - Response = Sess.Download(cpr::WriteCallback{DownloadCallback}, cpr::HeaderCallback{HeaderCallback}); - for (const std::pair& H : ReceivedHeaders) + } + + std::string Range = fmt::format("bytes={}-{}", DownloadedSize, DownloadedSize + ContentLength - 1); + if (auto RangeIt = HeadersWithRange.Entries.find("Range"); RangeIt != HeadersWithRange.Entries.end()) + { + if (RangeIt->second == Range) { - Response.header.insert_or_assign(H.first, H.second); + // If we didn't make any progress, abort + break; } - ReceivedHeaders.clear(); - } while (ShouldResume(Response)); - } + } + HeadersWithRange.Entries.insert_or_assign("Range", Range); + + Impl::Session Sess = m_Impl->AllocSession(m_BaseUri, + Url, + m_ConnectionSettings, + HeadersWithRange, + {}, + m_SessionId, + GetAccessToken()); + Response = Sess.Download(cpr::WriteCallback{DownloadCallback}, cpr::HeaderCallback{HeaderCallback}); + for (const std::pair& H : ReceivedHeaders) + { + Response.header.insert_or_assign(H.first, H.second); + } + ReceivedHeaders.clear(); + } while (ShouldResume(Response)); } } } diff --git a/src/zenutil/filebuildstorage.cpp b/src/zenutil/filebuildstorage.cpp index a4bb759e7..e57109006 100644 --- a/src/zenutil/filebuildstorage.cpp +++ b/src/zenutil/filebuildstorage.cpp @@ -325,7 +325,7 @@ public: return {}; } - virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash) override + virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash, uint64_t RangeOffset, uint64_t RangeBytes) override { ZEN_UNUSED(BuildId); SimulateLatency(0, 0); @@ -337,10 +337,19 @@ public: if (std::filesystem::is_regular_file(BlockPath)) { BasicFile File(BlockPath, BasicFile::Mode::kRead); - IoBuffer Payload = File.ReadAll(); - ZEN_ASSERT_SLOW(ValidateCompressedBuffer(RawHash, CompositeBuffer(SharedBuffer(Payload)))); - m_Stats.TotalBytesRead += Payload.GetSize(); + IoBuffer Payload; + if (RangeOffset != 0 || RangeBytes != (uint64_t)-1) + { + Payload = IoBuffer(RangeBytes); + File.Read(Payload.GetMutableView().GetData(), RangeBytes, RangeOffset); + } + else + { + Payload = File.ReadAll(); + ZEN_ASSERT_SLOW(ValidateCompressedBuffer(RawHash, CompositeBuffer(SharedBuffer(Payload)))); + } Payload.SetContentType(ZenContentType::kCompressedBinary); + m_Stats.TotalBytesRead += Payload.GetSize(); SimulateLatency(0, Payload.GetSize()); return Payload; } diff --git a/src/zenutil/include/zenutil/buildstorage.h b/src/zenutil/include/zenutil/buildstorage.h index 9c236310f..9d2bab170 100644 --- a/src/zenutil/include/zenutil/buildstorage.h +++ b/src/zenutil/include/zenutil/buildstorage.h @@ -40,7 +40,10 @@ public: std::function&& Transmitter, std::function&& OnSentBytes) = 0; - virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash) = 0; + virtual IoBuffer GetBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + uint64_t RangeOffset = 0, + uint64_t RangeBytes = (uint64_t)-1) = 0; virtual std::vector> GetLargeBuildBlob( const Oid& BuildId, const IoHash& RawHash, diff --git a/src/zenutil/include/zenutil/jupiter/jupitersession.h b/src/zenutil/include/zenutil/jupiter/jupitersession.h index 852271868..2c5fc73b8 100644 --- a/src/zenutil/include/zenutil/jupiter/jupitersession.h +++ b/src/zenutil/include/zenutil/jupiter/jupitersession.h @@ -123,7 +123,9 @@ public: std::string_view BucketId, const Oid& BuildId, const IoHash& Hash, - std::filesystem::path TempFolderPath); + std::filesystem::path TempFolderPath, + uint64_t Offset = 0, + uint64_t Size = (uint64_t)-1); JupiterResult PutMultipartBuildBlob(std::string_view Namespace, std::string_view BucketId, diff --git a/src/zenutil/jupiter/jupiterbuildstorage.cpp b/src/zenutil/jupiter/jupiterbuildstorage.cpp index 309885b05..bf89ce785 100644 --- a/src/zenutil/jupiter/jupiterbuildstorage.cpp +++ b/src/zenutil/jupiter/jupiterbuildstorage.cpp @@ -217,13 +217,15 @@ public: return WorkList; } - virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash) override + virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash, uint64_t RangeOffset, uint64_t RangeBytes) override { ZEN_TRACE_CPU("Jupiter::GetBuildBlob"); - Stopwatch ExecutionTimer; - auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); - JupiterResult GetBuildBlobResult = m_Session.GetBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, m_TempFolderPath); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + CreateDirectories(m_TempFolderPath); + JupiterResult GetBuildBlobResult = + m_Session.GetBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, m_TempFolderPath, RangeOffset, RangeBytes); AddStatistic(GetBuildBlobResult); if (!GetBuildBlobResult.Success) { diff --git a/src/zenutil/jupiter/jupitersession.cpp b/src/zenutil/jupiter/jupitersession.cpp index 06ac6ae36..68f214c06 100644 --- a/src/zenutil/jupiter/jupitersession.cpp +++ b/src/zenutil/jupiter/jupitersession.cpp @@ -698,11 +698,19 @@ JupiterSession::GetBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const IoHash& Hash, - std::filesystem::path TempFolderPath) + std::filesystem::path TempFolderPath, + uint64_t Offset, + uint64_t Size) { + HttpClient::KeyValueMap Headers; + if (Offset != 0 || Size != (uint64_t)-1) + { + Headers.Entries.insert({"Range", fmt::format("bytes={}-{}", Offset, Offset + Size - 1)}); + } HttpClient::Response Response = m_HttpClient.Download(fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}", Namespace, BucketId, BuildId, Hash.ToHexString()), - TempFolderPath); + TempFolderPath, + Headers); return detail::ConvertResponse(Response, "JupiterSession::GetBuildBlob"sv); } -- cgit v1.2.3 From 7de3d4218ee5969af6147f9ab20bda538a136d9a Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 10 Mar 2025 18:33:24 +0100 Subject: pick up existing cache (#299) - Improvement: Scavenge .zen temp folders for existing data (downloaded, decompressed or written) from previous failed run - Improvement: Faster abort during stream compression - Improvement: Try to move downloaded blobs with rename if possible avoiding an extra disk write - Improvement: Only clean temp folders on successful or cancelled build - keep it if download fails --- src/zen/cmds/builds_cmd.cpp | 2438 +++++++++++++++++++----------- src/zen/cmds/builds_cmd.h | 8 +- src/zencore/basicfile.cpp | 37 +- src/zencore/compress.cpp | 51 +- src/zencore/include/zencore/basicfile.h | 2 +- src/zencore/include/zencore/compress.h | 2 +- src/zencore/workthreadpool.cpp | 2 +- src/zenutil/chunkblock.cpp | 51 +- src/zenutil/include/zenutil/chunkblock.h | 9 +- 9 files changed, 1682 insertions(+), 918 deletions(-) (limited to 'src') diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index 1c9476b96..2c7a73fb3 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -83,15 +83,24 @@ namespace { const double DefaultLatency = 0; // .0010; const double DefaultDelayPerKBSec = 0; // 0.00005; - const std::string ZenFolderName = ".zen"; - const std::string ZenStateFilePath = fmt::format("{}/current_state.cbo", ZenFolderName); - const std::string ZenStateFileJsonPath = fmt::format("{}/current_state.json", ZenFolderName); - const std::string ZenTempFolderName = fmt::format("{}/tmp", ZenFolderName); - const std::string ZenTempCacheFolderName = fmt::format("{}/cache", ZenTempFolderName); - const std::string ZenTempStorageFolderName = fmt::format("{}/storage", ZenTempFolderName); - const std::string ZenTempBlockFolderName = fmt::format("{}/blocks", ZenTempFolderName); - const std::string ZenTempChunkFolderName = fmt::format("{}/chunks", ZenTempFolderName); - const std::string ZenExcludeManifestName = ".zen_exclude_manifest.txt"; + const std::string ZenFolderName = ".zen"; + const std::string ZenStateFilePath = fmt::format("{}/current_state.cbo", ZenFolderName); + const std::string ZenStateFileJsonPath = fmt::format("{}/current_state.json", ZenFolderName); + const std::string ZenTempFolderName = fmt::format("{}/tmp", ZenFolderName); + + const std::string ZenTempCacheFolderName = + fmt::format("{}/cache", ZenTempFolderName); // Decompressed and verified data - chunks & sequences + const std::string ZenTempBlockFolderName = fmt::format("{}/blocks", ZenTempFolderName); // Temp storage for whole and partial blocks + const std::string ZenTempChunkFolderName = + fmt::format("{}/chunks", ZenTempFolderName); // Temp storage for decompressed and validated chunks + + const std::string ZenTempDownloadFolderName = + fmt::format("{}/download", ZenTempFolderName); // Temp storage for unverfied downloaded blobs + + const std::string ZenTempStorageFolderName = + fmt::format("{}/storage", ZenTempFolderName); // Temp storage folder for BuildStorage implementations + + const std::string ZenExcludeManifestName = ".zen_exclude_manifest.txt"; const std::string UnsyncFolderName = ".unsync"; @@ -231,21 +240,31 @@ namespace { return AuthToken; } - CompositeBuffer WriteToTempFileIfNeeded(const CompositeBuffer& Buffer, - const std::filesystem::path& TempFolderPath, - const IoHash& Hash, - const std::string& Suffix = {}) + bool IsBufferDiskBased(const IoBuffer& Buffer) { - // If this is a file based buffer or a compressed buffer with a memory-based header, we don't need to rewrite to disk to save memory - std::span Segments = Buffer.GetSegments(); - ZEN_ASSERT(Buffer.GetSegments().size() > 0); IoBufferFileReference FileRef; - if (Segments.back().GetFileReference(FileRef)) + if (Buffer.GetFileReference(FileRef)) { - return Buffer; + return true; } + return false; + } + + bool IsBufferDiskBased(const CompositeBuffer& Buffer) + { + // If this is a file based buffer or a compressed buffer with a memory-based header, we don't need to rewrite to disk to save memory + std::span Segments = Buffer.GetSegments(); + ZEN_ASSERT(Buffer.GetSegments().size() > 0); + return IsBufferDiskBased(Segments.back().AsIoBuffer()); + } + + IoBuffer WriteToTempFile(CompositeBuffer&& Buffer, + const std::filesystem::path& TempFolderPath, + const IoHash& Hash, + const std::string& Suffix = {}) + { std::filesystem::path TempFilePath = (TempFolderPath / (Hash.ToHexString() + Suffix)).make_preferred(); - return CompositeBuffer(WriteToTempFile(Buffer, TempFilePath)); + return WriteToTempFile(std::move(Buffer), TempFilePath); } class FilteredRate @@ -1109,12 +1128,22 @@ namespace { IoHashStream Hash; bool CouldDecompress = Compressed.DecompressToStream(0, RawSize, [&Hash](uint64_t, const CompositeBuffer& RangeBuffer) { - for (const SharedBuffer& Segment : RangeBuffer.GetSegments()) + if (!AbortFlag) { - Hash.Append(Segment.GetView()); + for (const SharedBuffer& Segment : RangeBuffer.GetSegments()) + { + Hash.Append(Segment.GetView()); + } + return true; } + return false; }); + if (AbortFlag) + { + return CompositeBuffer{}; + } + if (!CouldDecompress) { throw std::runtime_error( @@ -1299,14 +1328,14 @@ namespace { { Work.ScheduleWork( VerifyPool, - [&, Payload = std::move(Payload), ChunkAttachment](std::atomic&) { + [&, Payload = std::move(Payload), ChunkAttachment](std::atomic&) mutable { if (!AbortFlag) { FilteredVerifiedBytesPerSecond.Start(); uint64_t CompressedSize; uint64_t DecompressedSize; - ValidateBlob(IoBuffer(Payload), ChunkAttachment, CompressedSize, DecompressedSize); + ValidateBlob(std::move(Payload), ChunkAttachment, CompressedSize, DecompressedSize); ZEN_CONSOLE_VERBOSE("Chunk attachment {} ({} -> {}) is valid", ChunkAttachment, NiceBytes(CompressedSize), @@ -1349,14 +1378,14 @@ namespace { { Work.ScheduleWork( VerifyPool, - [&, Payload = std::move(Payload), BlockAttachment](std::atomic&) { + [&, Payload = std::move(Payload), BlockAttachment](std::atomic&) mutable { if (!AbortFlag) { FilteredVerifiedBytesPerSecond.Start(); uint64_t CompressedSize; uint64_t DecompressedSize; - ValidateChunkBlock(IoBuffer(Payload), BlockAttachment, CompressedSize, DecompressedSize); + ValidateChunkBlock(std::move(Payload), BlockAttachment, CompressedSize, DecompressedSize); ZEN_CONSOLE_VERBOSE("Chunk block {} ({} -> {}) is valid", BlockAttachment, NiceBytes(CompressedSize), @@ -1564,8 +1593,12 @@ namespace { { throw std::runtime_error(fmt::format("Failed to compress large blob {}", ChunkHash)); } - CompositeBuffer TempPayload = WriteToTempFileIfNeeded(CompressedBlob.GetCompressed(), TempFolderPath, ChunkHash); - return CompressedBuffer::FromCompressedNoValidate(std::move(TempPayload)).GetCompressed(); + if (!IsBufferDiskBased(CompressedBlob.GetCompressed())) + { + IoBuffer TempPayload = WriteToTempFile(std::move(CompressedBlob).GetCompressed(), TempFolderPath, ChunkHash); + CompressedBlob = CompressedBuffer::FromCompressedNoValidate(std::move(TempPayload)); + } + return std::move(CompressedBlob).GetCompressed(); } struct GeneratedBlocks @@ -1637,9 +1670,13 @@ namespace { OutBlocks.BlockSizes[BlockIndex] = CompressedBlock.GetCompressedSize(); - CompositeBuffer Payload = WriteToTempFileIfNeeded(CompressedBlock.GetCompressed(), - Path / ZenTempBlockFolderName, - OutBlocks.BlockDescriptions[BlockIndex].BlockHash); + if (!IsBufferDiskBased(CompressedBlock.GetCompressed())) + { + IoBuffer TempPayload = WriteToTempFile(std::move(CompressedBlock).GetCompressed(), + Path / ZenTempBlockFolderName, + OutBlocks.BlockDescriptions[BlockIndex].BlockHash); + CompressedBlock = CompressedBuffer::FromCompressedNoValidate(std::move(TempPayload)); + } { CbObjectWriter Writer; Writer.AddString("createdBy", "zen"); @@ -1663,7 +1700,7 @@ namespace { PendingUploadCount++; Work.ScheduleWork( UploadBlocksPool, - [&, BlockIndex, Payload = std::move(Payload)](std::atomic&) { + [&, BlockIndex, Payload = std::move(CompressedBlock).GetCompressed()](std::atomic&) mutable { if (!AbortFlag) { if (GenerateBlocksStats.GeneratedBlockCount == NewBlockCount) @@ -1682,12 +1719,16 @@ namespace { BuildChunkBlockDescription(OutBlocks.BlockDescriptions[BlockIndex], OutBlocks.BlockMetaDatas[BlockIndex]); - const IoHash& BlockHash = OutBlocks.BlockDescriptions[BlockIndex].BlockHash; - Storage.PutBuildBlob(BuildId, BlockHash, ZenContentType::kCompressedBinary, Payload); - UploadStats.BlocksBytes += Payload.GetSize(); + const IoHash& BlockHash = OutBlocks.BlockDescriptions[BlockIndex].BlockHash; + const uint64_t CompressedBlockSize = Payload.GetSize(); + Storage.PutBuildBlob(BuildId, + BlockHash, + ZenContentType::kCompressedBinary, + std::move(Payload)); + UploadStats.BlocksBytes += CompressedBlockSize; ZEN_CONSOLE_VERBOSE("Uploaded block {} ({}) containing {} chunks", OutBlocks.BlockDescriptions[BlockIndex].BlockHash, - NiceBytes(Payload.GetSize()), + NiceBytes(CompressedBlockSize), OutBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); Storage.PutBlockMetadata(BuildId, @@ -1817,7 +1858,7 @@ namespace { auto AsyncUploadBlock = [&](const size_t BlockIndex, const IoHash BlockHash, CompositeBuffer&& Payload) { Work.ScheduleWork( UploadChunkPool, - [&, BlockIndex, BlockHash, Payload = std::move(Payload)](std::atomic&) { + [&, BlockIndex, BlockHash, Payload = std::move(Payload)](std::atomic&) mutable { if (!AbortFlag) { FilteredUploadedBytesPerSecond.Start(); @@ -1855,7 +1896,7 @@ namespace { auto AsyncUploadLooseChunk = [&](const IoHash& RawHash, uint64_t RawSize, CompositeBuffer&& Payload) { Work.ScheduleWork( UploadChunkPool, - [&, RawHash, RawSize, Payload = CompositeBuffer(std::move(Payload))](std::atomic&) { + [&, RawHash, RawSize, Payload = CompositeBuffer(std::move(Payload))](std::atomic&) mutable { if (!AbortFlag) { const uint64_t PayloadSize = Payload.GetSize(); @@ -1868,7 +1909,7 @@ namespace { ZenContentType::kCompressedBinary, PayloadSize, [Payload = std::move(Payload), &FilteredUploadedBytesPerSecond](uint64_t Offset, - uint64_t Size) -> IoBuffer { + uint64_t Size) mutable -> IoBuffer { FilteredUploadedBytesPerSecond.Start(); IoBuffer PartPayload = Payload.Mid(Offset, Size).Flatten().AsIoBuffer(); @@ -1974,9 +2015,11 @@ namespace { } ZEN_ASSERT(BlockDescription.BlockHash == BlockHash); - CompositeBuffer Payload = WriteToTempFileIfNeeded(CompressedBlock.GetCompressed(), - Path / ZenTempBlockFolderName, - BlockDescription.BlockHash); + CompositeBuffer Payload = IsBufferDiskBased(CompressedBlock.GetCompressed()) + ? std::move(CompressedBlock).GetCompressed() + : CompositeBuffer(WriteToTempFile(std::move(CompressedBlock).GetCompressed(), + Path / ZenTempBlockFolderName, + BlockDescription.BlockHash)); GenerateBlocksStats.GeneratedBlockByteCount += NewBlocks.BlockSizes[BlockIndex]; GenerateBlocksStats.GeneratedBlockCount++; @@ -3289,33 +3332,40 @@ namespace { uint64_t FileOffset, uint64_t TargetFinalSize) { + ZEN_TRACE_CPU("WriteFileCache_WriteToFile"); if (!SeenTargetIndexes.empty() && SeenTargetIndexes.back() == TargetIndex) { + ZEN_TRACE_CPU("WriteFileCache_WriteToFile_CacheWrite"); ZEN_ASSERT(OpenFileWriter); OpenFileWriter->Write(Buffer, FileOffset); } else { - Flush(); - const std::filesystem::path& TargetPath = GetTargetPath(TargetIndex); - CreateDirectories(TargetPath.parent_path()); - uint32_t Tries = 5; - std::unique_ptr NewOutputFile( - std::make_unique(TargetPath, BasicFile::Mode::kWrite, [&Tries, TargetPath](std::error_code& Ec) { - if (Tries < 3) - { - ZEN_CONSOLE("Failed opening file '{}': {}{}", TargetPath, Ec.message(), Tries > 1 ? " Retrying"sv : ""sv); - } - if (Tries > 1) - { - Sleep(100); - } - return --Tries > 0; - })); + std::unique_ptr NewOutputFile; + { + ZEN_TRACE_CPU("WriteFileCache_WriteToFile_Open"); + Flush(); + const std::filesystem::path& TargetPath = GetTargetPath(TargetIndex); + CreateDirectories(TargetPath.parent_path()); + uint32_t Tries = 5; + NewOutputFile = + std::make_unique(TargetPath, BasicFile::Mode::kWrite, [&Tries, TargetPath](std::error_code& Ec) { + if (Tries < 3) + { + ZEN_CONSOLE("Failed opening file '{}': {}{}", TargetPath, Ec.message(), Tries > 1 ? " Retrying"sv : ""sv); + } + if (Tries > 1) + { + Sleep(100); + } + return --Tries > 0; + }); + } const bool CacheWriter = TargetFinalSize > Buffer.GetSize(); if (CacheWriter) { + ZEN_TRACE_CPU("WriteFileCache_WriteToFile_CacheWrite"); ZEN_ASSERT_SLOW(std::find(SeenTargetIndexes.begin(), SeenTargetIndexes.end(), TargetIndex) == SeenTargetIndexes.end()); OutputFile = std::move(NewOutputFile); @@ -3325,6 +3375,7 @@ namespace { } else { + ZEN_TRACE_CPU("WriteFileCache_WriteToFile_Write"); NewOutputFile->Write(Buffer, FileOffset); } } @@ -3332,6 +3383,7 @@ namespace { void Flush() { + ZEN_TRACE_CPU("WriteFileCache_Flush"); OpenFileWriter = {}; OutputFile = {}; } @@ -3376,7 +3428,7 @@ namespace { const ChunkedFolderContent& RemoteContent, const ChunkedContentLookup& Lookup, std::span> SequenceIndexChunksLeftToWriteCounters, - const BlockWriteOps Ops, + const BlockWriteOps& Ops, std::atomic& OutChunksComplete, std::atomic& OutBytesWritten) { @@ -3420,13 +3472,19 @@ namespace { if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1) == 1) { const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; - const IoHash VerifyChunkHash = - IoHash::HashBuffer(IoBufferBuilder::MakeFromFile(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); - if (VerifyChunkHash != SequenceRawHash) { - throw std::runtime_error( - fmt::format("Written hunk sequence {} hash does not match expected hash {}", VerifyChunkHash, SequenceRawHash)); + ZEN_TRACE_CPU("VerifyChunkHash"); + const IoHash VerifyChunkHash = IoHash::HashBuffer( + IoBufferBuilder::MakeFromFile(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); + if (VerifyChunkHash != SequenceRawHash) + { + throw std::runtime_error(fmt::format("Written chunk sequence {} hash does not match expected hash {}", + VerifyChunkHash, + SequenceRawHash)); + } } + ZEN_TRACE_CPU("VerifyChunkHashes_rename"); + ZEN_ASSERT_SLOW(!std::filesystem::exists(GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); std::filesystem::rename(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); } @@ -3437,139 +3495,38 @@ namespace { bool GetBlockWriteOps(const ChunkedFolderContent& RemoteContent, const ChunkedContentLookup& Lookup, + std::span ChunkRawHashes, + std::span ChunkCompressedLengths, + std::span ChunkRawLengths, std::span> SequenceIndexChunksLeftToWriteCounters, std::span> RemoteChunkIndexNeedsCopyFromSourceFlags, - const CompositeBuffer& DecompressedBlockBuffer, + CompositeBuffer&& PartialBlockBuffer, + uint32_t FirstIncludedBlockChunkIndex, + uint32_t LastIncludedBlockChunkIndex, BlockWriteOps& OutOps) { ZEN_TRACE_CPU("GetBlockWriteOps"); - SharedBuffer BlockBuffer = DecompressedBlockBuffer.Flatten(); - uint64_t HeaderSize = 0; - if (IterateChunkBlock( - BlockBuffer, - [&](CompressedBuffer&& Chunk, const IoHash& ChunkHash) { - if (auto It = Lookup.ChunkHashToChunkIndex.find(ChunkHash); It != Lookup.ChunkHashToChunkIndex.end()) - { - const uint32_t ChunkIndex = It->second; - std::vector ChunkTargetPtrs = - GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, Lookup, ChunkIndex); - - if (!ChunkTargetPtrs.empty()) - { - bool NeedsWrite = true; - if (RemoteChunkIndexNeedsCopyFromSourceFlags[ChunkIndex].compare_exchange_strong(NeedsWrite, false)) - { - CompositeBuffer Decompressed = Chunk.DecompressToComposite(); - if (!Decompressed) - { - throw std::runtime_error(fmt::format("Decompression of build blob {} failed", ChunkHash)); - } - ZEN_ASSERT_SLOW(ChunkHash == IoHash::HashBuffer(Decompressed)); - ZEN_ASSERT(Decompressed.GetSize() == RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]); - for (const ChunkedContentLookup::ChunkSequenceLocation* Target : ChunkTargetPtrs) - { - OutOps.WriteOps.push_back( - BlockWriteOps::WriteOpData{.Target = Target, .ChunkBufferIndex = OutOps.ChunkBuffers.size()}); - } - OutOps.ChunkBuffers.emplace_back(std::move(Decompressed)); - } - } - } - }, - HeaderSize)) - { - std::sort(OutOps.WriteOps.begin(), - OutOps.WriteOps.end(), - [](const BlockWriteOps::WriteOpData& Lhs, const BlockWriteOps::WriteOpData& Rhs) { - if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex) - { - return true; - } - if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex) - { - return false; - } - return Lhs.Target->Offset < Rhs.Target->Offset; - }); - - return true; + MemoryView BlockMemoryView; + UniqueBuffer BlockMemoryBuffer; + IoBufferFileReference FileRef = {}; + if (PartialBlockBuffer.GetSegments().size() == 1 && PartialBlockBuffer.GetSegments()[0].AsIoBuffer().GetFileReference(FileRef)) + { + BlockMemoryBuffer = UniqueBuffer::Alloc(FileRef.FileChunkSize); + BasicFile Reader; + Reader.Attach(FileRef.FileHandle); + Reader.Read(BlockMemoryBuffer.GetData(), FileRef.FileChunkSize, FileRef.FileChunkOffset); + BlockMemoryView = BlockMemoryBuffer.GetView(); + Reader.Detach(); } else { - return false; - } - } - - bool WriteBlockToDisk(const std::filesystem::path& CacheFolderPath, - const ChunkedFolderContent& RemoteContent, - const ChunkBlockDescription& BlockDescription, - std::span> SequenceIndexChunksLeftToWriteCounters, - const CompositeBuffer& BlockBuffer, - const ChunkedContentLookup& Lookup, - std::span> RemoteChunkIndexNeedsCopyFromSourceFlags, - std::atomic& OutChunksComplete, - std::atomic& OutBytesWritten) - { - ZEN_TRACE_CPU("WriteBlockToDisk"); - - IoHash BlockRawHash; - uint64_t BlockRawSize; - CompressedBuffer CompressedBlockBuffer = CompressedBuffer::FromCompressed(BlockBuffer, BlockRawHash, BlockRawSize); - if (!CompressedBlockBuffer) - { - throw std::runtime_error(fmt::format("Block {} is not a compressed buffer", BlockDescription.BlockHash)); - } - - if (BlockRawHash != BlockDescription.BlockHash) - { - throw std::runtime_error( - fmt::format("Block {} header has a mismatching raw hash {}", BlockDescription.BlockHash, BlockRawHash)); - } - - CompositeBuffer DecompressedBlockBuffer = CompressedBlockBuffer.DecompressToComposite(); - if (!DecompressedBlockBuffer) - { - throw std::runtime_error(fmt::format("Block {} failed to decompress", BlockDescription.BlockHash)); - } - - ZEN_ASSERT_SLOW(BlockDescription.BlockHash == IoHash::HashBuffer(DecompressedBlockBuffer)); - - BlockWriteOps Ops; - if (GetBlockWriteOps(RemoteContent, - Lookup, - SequenceIndexChunksLeftToWriteCounters, - RemoteChunkIndexNeedsCopyFromSourceFlags, - DecompressedBlockBuffer, - Ops)) - { - WriteBlockChunkOps(CacheFolderPath, - RemoteContent, - Lookup, - SequenceIndexChunksLeftToWriteCounters, - Ops, - OutChunksComplete, - OutBytesWritten); - return true; + BlockMemoryView = PartialBlockBuffer.ViewOrCopyRange(0, PartialBlockBuffer.GetSize(), BlockMemoryBuffer); } - return false; - } - - bool GetPartialBlockWriteOps(const ChunkedFolderContent& RemoteContent, - const ChunkedContentLookup& Lookup, - const ChunkBlockDescription& BlockDescription, - std::span> SequenceIndexChunksLeftToWriteCounters, - std::span> RemoteChunkIndexNeedsCopyFromSourceFlags, - const CompositeBuffer& PartialBlockBuffer, - uint32_t FirstIncludedBlockChunkIndex, - uint32_t LastIncludedBlockChunkIndex, - BlockWriteOps& OutOps) - { - ZEN_TRACE_CPU("GetPartialBlockWriteOps"); uint32_t OffsetInBlock = 0; for (uint32_t ChunkBlockIndex = FirstIncludedBlockChunkIndex; ChunkBlockIndex <= LastIncludedBlockChunkIndex; ChunkBlockIndex++) { - const uint32_t ChunkCompressedSize = BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; - const IoHash& ChunkHash = BlockDescription.ChunkRawHashes[ChunkBlockIndex]; + const uint32_t ChunkCompressedSize = ChunkCompressedLengths[ChunkBlockIndex]; + const IoHash& ChunkHash = ChunkRawHashes[ChunkBlockIndex]; if (auto It = Lookup.ChunkHashToChunkIndex.find(ChunkHash); It != Lookup.ChunkHashToChunkIndex.end()) { const uint32_t ChunkIndex = It->second; @@ -3581,7 +3538,9 @@ namespace { bool NeedsWrite = true; if (RemoteChunkIndexNeedsCopyFromSourceFlags[ChunkIndex].compare_exchange_strong(NeedsWrite, false)) { - CompositeBuffer Chunk = PartialBlockBuffer.Mid(OffsetInBlock, ChunkCompressedSize); + // CompositeBuffer Chunk = PartialBlockBuffer.Mid(OffsetInBlock, ChunkCompressedSize); + MemoryView ChunkMemory = BlockMemoryView.Mid(OffsetInBlock, ChunkCompressedSize); + CompositeBuffer Chunk = CompositeBuffer(IoBuffer(IoBuffer::Wrap, ChunkMemory.GetData(), ChunkMemory.GetSize())); IoHash VerifyChunkHash; uint64_t VerifyRawSize; CompressedBuffer Compressed = CompressedBuffer::FromCompressed(Chunk, VerifyChunkHash, VerifyRawSize); @@ -3593,9 +3552,12 @@ namespace { { ZEN_ASSERT(false); } - if (VerifyRawSize != BlockDescription.ChunkRawLengths[ChunkBlockIndex]) + if (!ChunkRawLengths.empty()) { - ZEN_ASSERT(false); + if (VerifyRawSize != ChunkRawLengths[ChunkBlockIndex]) + { + ZEN_ASSERT(false); + } } CompositeBuffer Decompressed = Compressed.DecompressToComposite(); if (!Decompressed) @@ -3632,11 +3594,85 @@ namespace { return true; } + bool WriteBlockToDisk(const std::filesystem::path& CacheFolderPath, + const ChunkedFolderContent& RemoteContent, + const ChunkBlockDescription& BlockDescription, + std::span> SequenceIndexChunksLeftToWriteCounters, + CompositeBuffer&& BlockBuffer, + const ChunkedContentLookup& Lookup, + std::span> RemoteChunkIndexNeedsCopyFromSourceFlags, + std::atomic& OutChunksComplete, + std::atomic& OutBytesWritten) + { + ZEN_TRACE_CPU("WriteBlockToDisk"); + + BlockWriteOps Ops; + if ((BlockDescription.HeaderSize == 0) || BlockDescription.ChunkCompressedLengths.empty()) + { + ZEN_TRACE_CPU("WriteBlockToDisk_Legacy"); + + UniqueBuffer CopyBuffer; + const MemoryView BlockView = BlockBuffer.ViewOrCopyRange(0, BlockBuffer.GetSize(), CopyBuffer); + uint64_t HeaderSize; + const std::vector ChunkCompressedLengths = ReadChunkBlockHeader(BlockView, HeaderSize); + + CompositeBuffer PartialBlockBuffer = std::move(BlockBuffer).Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder() + HeaderSize); + + if (GetBlockWriteOps(RemoteContent, + Lookup, + BlockDescription.ChunkRawHashes, + ChunkCompressedLengths, + BlockDescription.ChunkRawLengths, + SequenceIndexChunksLeftToWriteCounters, + RemoteChunkIndexNeedsCopyFromSourceFlags, + std::move(PartialBlockBuffer), + 0, + gsl::narrow(BlockDescription.ChunkRawHashes.size() - 1), + Ops)) + { + WriteBlockChunkOps(CacheFolderPath, + RemoteContent, + Lookup, + SequenceIndexChunksLeftToWriteCounters, + Ops, + OutChunksComplete, + OutBytesWritten); + return true; + } + return false; + } + + CompositeBuffer PartialBlockBuffer = + std::move(BlockBuffer).Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize); + if (GetBlockWriteOps(RemoteContent, + Lookup, + BlockDescription.ChunkRawHashes, + BlockDescription.ChunkCompressedLengths, + BlockDescription.ChunkRawLengths, + SequenceIndexChunksLeftToWriteCounters, + RemoteChunkIndexNeedsCopyFromSourceFlags, + std::move(PartialBlockBuffer), + 0, + gsl::narrow(BlockDescription.ChunkRawHashes.size() - 1), + Ops)) + { + WriteBlockChunkOps(CacheFolderPath, + RemoteContent, + Lookup, + SequenceIndexChunksLeftToWriteCounters, + Ops, + OutChunksComplete, + OutBytesWritten); + return true; + } + return false; + } + bool WritePartialBlockToDisk(const std::filesystem::path& CacheFolderPath, const ChunkedFolderContent& RemoteContent, const ChunkBlockDescription& BlockDescription, std::span> SequenceIndexChunksLeftToWriteCounters, - const CompositeBuffer& PartialBlockBuffer, + CompositeBuffer&& PartialBlockBuffer, uint32_t FirstIncludedBlockChunkIndex, uint32_t LastIncludedBlockChunkIndex, const ChunkedContentLookup& Lookup, @@ -3646,15 +3682,17 @@ namespace { { ZEN_TRACE_CPU("WritePartialBlockToDisk"); BlockWriteOps Ops; - if (GetPartialBlockWriteOps(RemoteContent, - Lookup, - BlockDescription, - SequenceIndexChunksLeftToWriteCounters, - RemoteChunkIndexNeedsCopyFromSourceFlags, - PartialBlockBuffer, - FirstIncludedBlockChunkIndex, - LastIncludedBlockChunkIndex, - Ops)) + if (GetBlockWriteOps(RemoteContent, + Lookup, + BlockDescription.ChunkRawHashes, + BlockDescription.ChunkCompressedLengths, + BlockDescription.ChunkRawLengths, + SequenceIndexChunksLeftToWriteCounters, + RemoteChunkIndexNeedsCopyFromSourceFlags, + std::move(PartialBlockBuffer), + FirstIncludedBlockChunkIndex, + LastIncludedBlockChunkIndex, + Ops)) { WriteBlockChunkOps(CacheFolderPath, RemoteContent, @@ -3671,7 +3709,7 @@ namespace { } } - SharedBuffer Decompress(const CompositeBuffer& CompressedChunk, const IoHash& ChunkHash, const uint64_t ChunkRawSize) + SharedBuffer Decompress(CompositeBuffer&& CompressedChunk, const IoHash& ChunkHash, const uint64_t ChunkRawSize) { ZEN_TRACE_CPU("Decompress"); @@ -3709,7 +3747,7 @@ namespace { const ChunkedFolderContent& Content, const ChunkedContentLookup& Lookup, std::span ChunkTargets, - const CompositeBuffer& ChunkData, + CompositeBuffer&& ChunkData, WriteFileCache& OpenFileCache, std::atomic& OutBytesWritten) { @@ -3734,8 +3772,8 @@ namespace { } } - bool CanStreamDecompress(const ChunkedFolderContent& RemoteContent, - const std::vector Locations) + bool CanDecompressDirectToSequence(const ChunkedFolderContent& RemoteContent, + const std::vector Locations) { if (Locations.size() == 1) { @@ -3776,13 +3814,25 @@ namespace { } IoHashStream Hash; bool CouldDecompress = Compressed.DecompressToStream(0, (uint64_t)-1, [&](uint64_t Offset, const CompositeBuffer& RangeBuffer) { - DecompressedTemp.Write(RangeBuffer, Offset); - for (const SharedBuffer& Segment : RangeBuffer.GetSegments()) + ZEN_TRACE_CPU("StreamDecompress_Write"); + if (!AbortFlag) { - Hash.Append(Segment.GetView()); + DecompressedTemp.Write(RangeBuffer, Offset); + for (const SharedBuffer& Segment : RangeBuffer.GetSegments()) + { + Hash.Append(Segment.GetView()); + } + WriteToDiskBytes += RangeBuffer.GetSize(); + return true; } - WriteToDiskBytes += RangeBuffer.GetSize(); + return false; }); + + if (AbortFlag) + { + return; + } + if (!CouldDecompress) { throw std::runtime_error(fmt::format("Failed to decompress large blob {}", SequenceRawHash)); @@ -3801,9 +3851,82 @@ namespace { } } + bool WriteCompressedChunk(const std::filesystem::path& TargetFolder, + const ChunkedFolderContent& RemoteContent, + const ChunkedContentLookup& RemoteLookup, + const IoHash& ChunkHash, + const std::vector& ChunkTargetPtrs, + IoBuffer&& CompressedPart, + std::atomic& WriteToDiskBytes) + { + auto ChunkHashToChunkIndexIt = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); + ZEN_ASSERT(ChunkHashToChunkIndexIt != RemoteLookup.ChunkHashToChunkIndex.end()); + if (CanDecompressDirectToSequence(RemoteContent, ChunkTargetPtrs)) + { + const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[ChunkTargetPtrs.front()->SequenceIndex]; + StreamDecompress(TargetFolder, SequenceRawHash, CompositeBuffer(std::move(CompressedPart)), WriteToDiskBytes); + } + else + { + const uint32_t ChunkIndex = ChunkHashToChunkIndexIt->second; + SharedBuffer Chunk = + Decompress(CompositeBuffer(std::move(CompressedPart)), ChunkHash, RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]); + + if (!AbortFlag) + { + WriteFileCache OpenFileCache; + + WriteChunkToDisk(TargetFolder, + RemoteContent, + RemoteLookup, + ChunkTargetPtrs, + CompositeBuffer(std::move(Chunk)), + OpenFileCache, + WriteToDiskBytes); + return true; + } + } + return false; + } + + void CompleteChunkTargets(const std::filesystem::path& TargetFolder, + const ChunkedFolderContent& RemoteContent, + const IoHash& ChunkHash, + const std::vector& ChunkTargetPtrs, + std::span> SequenceIndexChunksLeftToWriteCounters, + const bool NeedHashVerify) + { + ZEN_TRACE_CPU("CompleteChunkTargets"); + + for (const ChunkedContentLookup::ChunkSequenceLocation* Location : ChunkTargetPtrs) + { + const uint32_t RemoteSequenceIndex = Location->SequenceIndex; + if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1) == 1) + { + const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + if (NeedHashVerify) + { + ZEN_TRACE_CPU("VerifyChunkHash"); + + const IoHash VerifyChunkHash = + IoHash::HashBuffer(IoBufferBuilder::MakeFromFile(GetTempChunkedSequenceFileName(TargetFolder, SequenceRawHash))); + if (VerifyChunkHash != ChunkHash) + { + throw std::runtime_error( + fmt::format("Written chunk sequence {} hash does not match expected hash {}", VerifyChunkHash, ChunkHash)); + } + } + + ZEN_TRACE_CPU("RenameToFinal"); + ZEN_ASSERT_SLOW(!std::filesystem::exists(GetFinalChunkedSequenceFileName(TargetFolder, SequenceRawHash))); + std::filesystem::rename(GetTempChunkedSequenceFileName(TargetFolder, SequenceRawHash), + GetFinalChunkedSequenceFileName(TargetFolder, SequenceRawHash)); + } + } + } + void DownloadLargeBlob(BuildStorage& Storage, - const std::filesystem::path& TempFolderPath, - const std::filesystem::path& CacheFolderPath, + const std::filesystem::path& Path, const ChunkedFolderContent& RemoteContent, const ChunkedContentLookup& RemoteLookup, const Oid& BuildId, @@ -3818,6 +3941,7 @@ namespace { std::atomic& BytesDownloaded, std::atomic& MultipartAttachmentCount, std::function&& OnDownloadComplete, + std::function&& OnWriteStart, std::function&& OnWriteComplete) { ZEN_TRACE_CPU("DownloadLargeBlob"); @@ -3828,8 +3952,11 @@ namespace { }; std::shared_ptr Workload(std::make_shared()); + std::filesystem::path DownloadFolder = Path / ZenTempDownloadFolderName; + std::filesystem::path TargetFolder = Path / ZenTempCacheFolderName; + std::error_code Ec; - Workload->TempFile.CreateTemporary(TempFolderPath, Ec); + Workload->TempFile.CreateTemporary(DownloadFolder, Ec); if (Ec) { throw std::runtime_error( @@ -3839,7 +3966,8 @@ namespace { BuildId, ChunkHash, PreferredMultipartChunkSize, - [&CacheFolderPath, + [DownloadFolder, + TargetFolder, &RemoteContent, &RemoteLookup, &Work, @@ -3849,6 +3977,7 @@ namespace { &BytesDownloaded, OnDownloadComplete = std::move(OnDownloadComplete), OnWriteComplete = std::move(OnWriteComplete), + OnWriteStart = std::move(OnWriteStart), &WriteToDiskBytes, SequenceIndexChunksLeftToWriteCounters, ChunkTargetPtrs = std::vector( @@ -3857,6 +3986,7 @@ namespace { if (!AbortFlag.load()) { + ZEN_TRACE_CPU("DownloadLargeBlob_Save"); Workload->TempFile.Write(Chunk.GetView(), Offset); if (Chunk.GetSize() == BytesRemaining) { @@ -3864,103 +3994,61 @@ namespace { Work.ScheduleWork( WritePool, // GetSyncWorkerPool(),// - [&CacheFolderPath, + [DownloadFolder, + TargetFolder, &RemoteContent, &RemoteLookup, ChunkHash, Workload, Offset, OnWriteComplete = std::move(OnWriteComplete), + OnWriteStart = std::move(OnWriteStart), &WriteToDiskBytes, SequenceIndexChunksLeftToWriteCounters, ChunkTargetPtrs](std::atomic&) { - ZEN_TRACE_CPU("DownloadLargeBlob_Work"); + ZEN_TRACE_CPU("DownloadLargeBlob_Write"); if (!AbortFlag) { - uint64_t CompressedSize = Workload->TempFile.FileSize(); - void* FileHandle = Workload->TempFile.Detach(); - IoBuffer CompressedPart = IoBuffer(IoBuffer::File, - FileHandle, - 0, - CompressedSize, - /*IsWholeFile*/ true); - if (!CompressedPart) + const std::filesystem::path CompressedChunkPath = DownloadFolder / ChunkHash.ToHexString(); + std::error_code Ec; + Workload->TempFile.MoveTemporaryIntoPlace(CompressedChunkPath, Ec); + if (Ec) { - throw std::runtime_error( - fmt::format("Multipart build blob {} is not a compressed buffer", ChunkHash)); + throw std::runtime_error(fmt::format("Failed moving downloaded chunk {} file to {}. Reason: {}", + ChunkHash, + CompressedChunkPath, + Ec.message())); } - CompressedPart.SetDeleteOnClose(true); - auto ChunkHashToChunkIndexIt = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); - ZEN_ASSERT(ChunkHashToChunkIndexIt != RemoteLookup.ChunkHashToChunkIndex.end()); - bool NeedHashVerify = true; - if (CanStreamDecompress(RemoteContent, ChunkTargetPtrs)) + IoBuffer CompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath); + if (!CompressedPart) { - const IoHash& SequenceRawHash = - RemoteContent.ChunkedContent.SequenceRawHashes[ChunkTargetPtrs.front()->SequenceIndex]; - StreamDecompress(CacheFolderPath, - SequenceRawHash, - CompositeBuffer(std::move(CompressedPart)), - WriteToDiskBytes); - NeedHashVerify = false; - OnWriteComplete(); + throw std::runtime_error(fmt::format("Could not open dowloaded compressed chunk {} from {}", + ChunkHash, + CompressedChunkPath)); } - else - { - const uint32_t ChunkIndex = ChunkHashToChunkIndexIt->second; - SharedBuffer Chunk = Decompress(CompositeBuffer(std::move(CompressedPart)), - ChunkHash, - RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]); - - // ZEN_ASSERT_SLOW(ChunkHash == - // IoHash::HashBuffer(Chunk.AsIoBuffer())); - if (!AbortFlag) - { - WriteFileCache OpenFileCache; + OnWriteStart(); - WriteChunkToDisk(CacheFolderPath, - RemoteContent, - RemoteLookup, - ChunkTargetPtrs, - CompositeBuffer(Chunk), - OpenFileCache, - WriteToDiskBytes); - OnWriteComplete(); - } - } + bool NeedHashVerify = WriteCompressedChunk(TargetFolder, + RemoteContent, + RemoteLookup, + ChunkHash, + ChunkTargetPtrs, + std::move(CompressedPart), + WriteToDiskBytes); if (!AbortFlag) { - // Write tracking, updating this must be done without any files open (WriteFileCache) - for (const ChunkedContentLookup::ChunkSequenceLocation* Location : ChunkTargetPtrs) - { - const uint32_t RemoteSequenceIndex = Location->SequenceIndex; - if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1) == 1) - { - const IoHash& SequenceRawHash = - RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; - if (NeedHashVerify) - { - ZEN_TRACE_CPU("VerifyChunkHash"); - - const IoHash VerifyChunkHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile( - GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); - if (VerifyChunkHash != ChunkHash) - { - throw std::runtime_error( - fmt::format("Written chunk sequence {} hash does not match expected hash {}", - VerifyChunkHash, - ChunkHash)); - } - } + std::filesystem::remove(CompressedChunkPath); - ZEN_TRACE_CPU("VerifyChunkHashes_rename"); - std::filesystem::rename(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), - GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); - } - } + CompleteChunkTargets(TargetFolder, + RemoteContent, + ChunkHash, + ChunkTargetPtrs, + SequenceIndexChunksLeftToWriteCounters, + NeedHashVerify); } } }, @@ -3977,6 +4065,7 @@ namespace { Work.ScheduleWork( NetworkPool, // GetSyncWorkerPool(),// [WorkItem = std::move(WorkItem)](std::atomic&) { + ZEN_TRACE_CPU("DownloadLargeBlob_Work"); if (!AbortFlag) { WorkItem(); @@ -4023,115 +4112,256 @@ namespace { Stopwatch CacheMappingTimer; - uint64_t CacheMappedBytesForReuse = 0; std::vector> SequenceIndexChunksLeftToWriteCounters(RemoteContent.ChunkedContent.SequenceRawHashes.size()); // std::vector RemoteSequenceIndexIsCachedFlags(RemoteContent.ChunkedContent.SequenceRawHashes.size(), false); - std::vector RemoteChunkIndexIsCachedFlags(RemoteContent.ChunkedContent.ChunkHashes.size()); + std::vector RemoteChunkIndexNeedsCopyFromLocalFileFlags(RemoteContent.ChunkedContent.ChunkHashes.size()); // Guard if he same chunks is in multiple blocks (can happen due to block reuse, cache reuse blocks writes directly) std::vector> RemoteChunkIndexNeedsCopyFromSourceFlags(RemoteContent.ChunkedContent.ChunkHashes.size()); - // Pick up all whole files we can use from current local state - for (uint32_t RemoteSequenceIndex = 0; RemoteSequenceIndex < RemoteContent.ChunkedContent.SequenceRawHashes.size(); - RemoteSequenceIndex++) + tsl::robin_map CachedChunkHashesFound; + tsl::robin_map CachedSequenceHashesFound; + uint64_t CachedChunkHashesByteCountFound = 0; + uint64_t CachedSequenceHashesByteCountFound = 0; { - const IoHash& RemoteSequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; - if (auto It = LocalLookup.RawHashToSequenceIndex.find(RemoteSequenceRawHash); It != LocalLookup.RawHashToSequenceIndex.end()) - { - SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = 0; - const uint32_t RemotePathIndex = GetFirstPathIndexForRawHash(RemoteLookup, RemoteSequenceRawHash); - CacheMappedBytesForReuse += RemoteContent.RawSizes[RemotePathIndex]; - } - else + ZEN_TRACE_CPU("UpdateFolder_CheckChunkCache"); + + DirectoryContent CacheDirContent; + GetDirectoryContent(CacheFolderPath, + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes, + CacheDirContent); + for (size_t Index = 0; Index < CacheDirContent.Files.size(); Index++) { - SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex]; + IoHash FileHash; + if (IoHash::TryParse(CacheDirContent.Files[Index].filename().string(), FileHash)) + { + if (auto ChunkIt = RemoteLookup.ChunkHashToChunkIndex.find(FileHash); + ChunkIt != RemoteLookup.ChunkHashToChunkIndex.end()) + { + const uint32_t ChunkIndex = ChunkIt->second; + const uint64_t ChunkSize = RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + if (ChunkSize == CacheDirContent.FileSizes[Index]) + { + CachedChunkHashesFound.insert({FileHash, ChunkIndex}); + CachedChunkHashesByteCountFound += ChunkSize; + continue; + } + } + else if (auto SequenceIt = RemoteLookup.RawHashToSequenceIndex.find(FileHash); + SequenceIt != RemoteLookup.RawHashToSequenceIndex.end()) + { + const uint32_t SequenceIndex = SequenceIt->second; + const uint32_t PathIndex = RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex]; + const uint64_t SequenceSize = RemoteContent.RawSizes[PathIndex]; + if (SequenceSize == CacheDirContent.FileSizes[Index]) + { + CachedSequenceHashesFound.insert({FileHash, SequenceIndex}); + CachedSequenceHashesByteCountFound += SequenceSize; + continue; + } + } + } + std::filesystem::remove(CacheDirContent.Files[Index]); } } - // Pick up all chunks in current local state - struct CacheCopyData + tsl::robin_map CachedBlocksFound; + uint64_t CachedBlocksByteCountFound = 0; { - uint32_t LocalSequenceIndex; - std::vector TargetChunkLocationPtrs; - struct ChunkTarget - { - uint32_t TargetChunkLocationCount; - uint64_t ChunkRawSize; - uint64_t CacheFileOffset; - }; - std::vector ChunkTargets; - }; - - tsl::robin_map RawHashToCacheCopyDataIndex; - std::vector CacheCopyDatas; + ZEN_TRACE_CPU("UpdateFolder_CheckBlockCache"); - for (uint32_t LocalSequenceIndex = 0; LocalSequenceIndex < LocalContent.ChunkedContent.SequenceRawHashes.size(); - LocalSequenceIndex++) - { - const IoHash& LocalSequenceRawHash = LocalContent.ChunkedContent.SequenceRawHashes[LocalSequenceIndex]; - const uint32_t LocalOrderOffset = LocalLookup.SequenceIndexChunkOrderOffset[LocalSequenceIndex]; + tsl::robin_map AllBlockSizes; + AllBlockSizes.reserve(BlockDescriptions.size()); + for (uint32_t BlockIndex = 0; BlockIndex < BlockDescriptions.size(); BlockIndex++) + { + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + AllBlockSizes.insert({BlockDescription.BlockHash, BlockIndex}); + } + DirectoryContent BlockDirContent; + GetDirectoryContent(Path / ZenTempBlockFolderName, + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes, + BlockDirContent); + CachedBlocksFound.reserve(BlockDirContent.Files.size()); + for (size_t Index = 0; Index < BlockDirContent.Files.size(); Index++) { - uint64_t SourceOffset = 0; - const uint32_t LocalChunkCount = LocalContent.ChunkedContent.ChunkCounts[LocalSequenceIndex]; - for (uint32_t LocalOrderIndex = 0; LocalOrderIndex < LocalChunkCount; LocalOrderIndex++) + IoHash FileHash; + if (IoHash::TryParse(BlockDirContent.Files[Index].filename().string(), FileHash)) { - const uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[LocalOrderOffset + LocalOrderIndex]; - const IoHash& LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; - const uint64_t LocalChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[LocalChunkIndex]; - - if (auto RemoteChunkIt = RemoteLookup.ChunkHashToChunkIndex.find(LocalChunkHash); - RemoteChunkIt != RemoteLookup.ChunkHashToChunkIndex.end()) + if (auto BlockIt = AllBlockSizes.find(FileHash); BlockIt != AllBlockSizes.end()) { - const uint32_t RemoteChunkIndex = RemoteChunkIt->second; - if (!RemoteChunkIndexIsCachedFlags[RemoteChunkIndex]) + const uint32_t BlockIndex = BlockIt->second; + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + uint64_t BlockSize = CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize; + for (uint64_t ChunkSize : BlockDescription.ChunkCompressedLengths) { - std::vector ChunkTargetPtrs = - GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteLookup, RemoteChunkIndex); + BlockSize += ChunkSize; + } - if (!ChunkTargetPtrs.empty()) - { - CacheCopyData::ChunkTarget Target = { - .TargetChunkLocationCount = gsl::narrow(ChunkTargetPtrs.size()), - .ChunkRawSize = LocalChunkRawSize, - .CacheFileOffset = SourceOffset}; - if (auto CopySourceIt = RawHashToCacheCopyDataIndex.find(LocalSequenceRawHash); - CopySourceIt != RawHashToCacheCopyDataIndex.end()) - { - CacheCopyData& Data = CacheCopyDatas[CopySourceIt->second]; - Data.TargetChunkLocationPtrs.insert(Data.TargetChunkLocationPtrs.end(), - ChunkTargetPtrs.begin(), - ChunkTargetPtrs.end()); - Data.ChunkTargets.push_back(Target); - } - else + if (BlockSize == BlockDirContent.FileSizes[Index]) + { + CachedBlocksFound.insert({FileHash, BlockIndex}); + CachedBlocksByteCountFound += BlockSize; + continue; + } + } + } + std::filesystem::remove(BlockDirContent.Files[Index]); + } + } + + std::vector LocalPathIndexesMatchingSequenceIndexes; + uint64_t LocalPathIndexesByteCountMatchingSequenceIndexes = 0; + // Pick up all whole files we can use from current local state + { + ZEN_TRACE_CPU("UpdateFolder_CheckLocalChunks"); + for (uint32_t RemoteSequenceIndex = 0; RemoteSequenceIndex < RemoteContent.ChunkedContent.SequenceRawHashes.size(); + RemoteSequenceIndex++) + { + const IoHash& RemoteSequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + if (auto CacheSequenceIt = CachedSequenceHashesFound.find(RemoteSequenceRawHash); + CacheSequenceIt != CachedSequenceHashesFound.end()) + { + // const uint32_t RemoteSequenceIndex = CacheSequenceIt->second; + // const uint32_t RemotePathIndex = GetFirstPathIndexForSeqeuenceIndex(RemoteLookup, RemoteSequenceIndex); + // RemoteSequenceByteCountFoundInCache += RemoteContent.RawSizes[RemotePathIndex]; + } + else if (auto CacheChunkIt = CachedChunkHashesFound.find(RemoteSequenceRawHash); + CacheChunkIt != CachedChunkHashesFound.end()) + { + // const uint32_t RemoteChunkIndex = CacheChunkIt->second; + // const uint32_t RemotePathIndex = GetFirstPathIndexForSeqeuenceIndex(RemoteLookup, RemoteSequenceIndex); + // RemoteSequenceByteCountFoundInCache += RemoteContent.RawSizes[RemotePathIndex]; + } + else if (auto It = LocalLookup.RawHashToSequenceIndex.find(RemoteSequenceRawHash); + It != LocalLookup.RawHashToSequenceIndex.end()) + { + const uint32_t LocalSequenceIndex = It->second; + const uint32_t LocalPathIndex = GetFirstPathIndexForSeqeuenceIndex(LocalLookup, LocalSequenceIndex); + uint64_t RawSize = LocalContent.RawSizes[LocalPathIndex]; + LocalPathIndexesMatchingSequenceIndexes.push_back(LocalPathIndex); + LocalPathIndexesByteCountMatchingSequenceIndexes += RawSize; + } + else + { + // We must write the sequence + SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = + RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex]; + } + } + } + // Pick up all chunks in current local state + struct CacheCopyData + { + uint32_t LocalSequenceIndex; + std::vector TargetChunkLocationPtrs; + struct ChunkTarget + { + uint32_t TargetChunkLocationCount; + uint64_t ChunkRawSize; + uint64_t CacheFileOffset; + }; + std::vector ChunkTargets; + }; + + tsl::robin_map RawHashToCacheCopyDataIndex; + std::vector CacheCopyDatas; + uint64_t LocalChunkHashesMatchingRemoteCount = 0; + uint64_t LocalChunkHashesMatchingRemoteByteCount = 0; + + { + ZEN_TRACE_CPU("UpdateFolder_GetLocalChunks"); + + for (uint32_t LocalSequenceIndex = 0; LocalSequenceIndex < LocalContent.ChunkedContent.SequenceRawHashes.size(); + LocalSequenceIndex++) + { + const IoHash& LocalSequenceRawHash = LocalContent.ChunkedContent.SequenceRawHashes[LocalSequenceIndex]; + const uint32_t LocalOrderOffset = LocalLookup.SequenceIndexChunkOrderOffset[LocalSequenceIndex]; + + { + uint64_t SourceOffset = 0; + const uint32_t LocalChunkCount = LocalContent.ChunkedContent.ChunkCounts[LocalSequenceIndex]; + for (uint32_t LocalOrderIndex = 0; LocalOrderIndex < LocalChunkCount; LocalOrderIndex++) + { + const uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[LocalOrderOffset + LocalOrderIndex]; + const IoHash& LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; + const uint64_t LocalChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[LocalChunkIndex]; + + if (auto RemoteChunkIt = RemoteLookup.ChunkHashToChunkIndex.find(LocalChunkHash); + RemoteChunkIt != RemoteLookup.ChunkHashToChunkIndex.end()) + { + const uint32_t RemoteChunkIndex = RemoteChunkIt->second; + if (!RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex]) + { + std::vector ChunkTargetPtrs = + GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteLookup, RemoteChunkIndex); + + if (!ChunkTargetPtrs.empty()) { - RawHashToCacheCopyDataIndex.insert_or_assign(LocalSequenceRawHash, CacheCopyDatas.size()); - CacheCopyDatas.push_back( - CacheCopyData{.LocalSequenceIndex = LocalSequenceIndex, - .TargetChunkLocationPtrs = ChunkTargetPtrs, - .ChunkTargets = std::vector{Target}}); + CacheCopyData::ChunkTarget Target = { + .TargetChunkLocationCount = gsl::narrow(ChunkTargetPtrs.size()), + .ChunkRawSize = LocalChunkRawSize, + .CacheFileOffset = SourceOffset}; + if (auto CopySourceIt = RawHashToCacheCopyDataIndex.find(LocalSequenceRawHash); + CopySourceIt != RawHashToCacheCopyDataIndex.end()) + { + CacheCopyData& Data = CacheCopyDatas[CopySourceIt->second]; + if (Data.TargetChunkLocationPtrs.size() > 1024) + { + RawHashToCacheCopyDataIndex.insert_or_assign(LocalSequenceRawHash, CacheCopyDatas.size()); + CacheCopyDatas.push_back( + CacheCopyData{.LocalSequenceIndex = LocalSequenceIndex, + .TargetChunkLocationPtrs = ChunkTargetPtrs, + .ChunkTargets = std::vector{Target}}); + } + else + { + Data.TargetChunkLocationPtrs.insert(Data.TargetChunkLocationPtrs.end(), + ChunkTargetPtrs.begin(), + ChunkTargetPtrs.end()); + Data.ChunkTargets.push_back(Target); + } + } + else + { + RawHashToCacheCopyDataIndex.insert_or_assign(LocalSequenceRawHash, CacheCopyDatas.size()); + CacheCopyDatas.push_back( + CacheCopyData{.LocalSequenceIndex = LocalSequenceIndex, + .TargetChunkLocationPtrs = ChunkTargetPtrs, + .ChunkTargets = std::vector{Target}}); + } + LocalChunkHashesMatchingRemoteByteCount += LocalChunkRawSize; + LocalChunkHashesMatchingRemoteCount++; + RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex] = true; } - CacheMappedBytesForReuse += LocalChunkRawSize; - RemoteChunkIndexIsCachedFlags[RemoteChunkIndex] = true; } } + SourceOffset += LocalChunkRawSize; } - SourceOffset += LocalChunkRawSize; } } } - if (CacheMappedBytesForReuse > 0) + if (!CachedSequenceHashesFound.empty() || !CachedChunkHashesFound.empty() || !CachedBlocksFound.empty() || + !LocalPathIndexesMatchingSequenceIndexes.empty() || LocalChunkHashesMatchingRemoteCount > 0) { - ZEN_CONSOLE("Mapped {} cached data for reuse in {}", - NiceBytes(CacheMappedBytesForReuse), - NiceTimeSpanMs(CacheMappingTimer.GetElapsedTimeMs())); + ZEN_CONSOLE( + "Cache: {} ({}) chunk sequences, {} ({}) chunks, {} ({}) blocks. Local state: {} ({}) chunk sequences, {} ({}) chunks", + CachedSequenceHashesFound.size(), + NiceBytes(CachedSequenceHashesByteCountFound), + CachedChunkHashesFound.size(), + NiceBytes(CachedChunkHashesByteCountFound), + CachedBlocksFound.size(), + NiceBytes(CachedBlocksByteCountFound), + LocalPathIndexesMatchingSequenceIndexes.size(), + NiceBytes(LocalPathIndexesByteCountMatchingSequenceIndexes), + LocalChunkHashesMatchingRemoteCount, + NiceBytes(LocalChunkHashesMatchingRemoteByteCount)); } uint32_t ChunkCountToWrite = 0; for (uint32_t RemoteChunkIndex = 0; RemoteChunkIndex < RemoteContent.ChunkedContent.ChunkHashes.size(); RemoteChunkIndex++) { - if (RemoteChunkIndexIsCachedFlags[RemoteChunkIndex]) + if (RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex]) { ChunkCountToWrite++; } @@ -4154,7 +4384,7 @@ namespace { std::atomic WritePartsComplete = 0; { - ZEN_TRACE_CPU("HandleChunks"); + ZEN_TRACE_CPU("WriteChunks"); Stopwatch WriteTimer; @@ -4169,17 +4399,21 @@ namespace { std::atomic BytesDownloaded = 0; - for (const IoHash ChunkHash : LooseChunkHashes) + struct LooseChunkHashWorkData { - if (AbortFlag) - { - break; - } + std::vector ChunkTargetPtrs; + uint32_t RemoteChunkIndex; + }; + std::vector LooseChunkHashWorks; + TotalPartWriteCount += CacheCopyDatas.size(); + + for (const IoHash ChunkHash : LooseChunkHashes) + { auto RemoteChunkIndexIt = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); ZEN_ASSERT(RemoteChunkIndexIt != RemoteLookup.ChunkHashToChunkIndex.end()); const uint32_t RemoteChunkIndex = RemoteChunkIndexIt->second; - if (RemoteChunkIndexIsCachedFlags[RemoteChunkIndex]) + if (RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex]) { ZEN_DEBUG("Skipping chunk {} due to cache reuse", ChunkHash); continue; @@ -4198,173 +4432,192 @@ namespace { { TotalRequestCount++; TotalPartWriteCount++; - Work.ScheduleWork( - NetworkPool, // GetSyncWorkerPool(),// - [&, ChunkHash, RemoteChunkIndex, ChunkTargetPtrs](std::atomic&) { - ZEN_TRACE_CPU("UpdateFolder_LooseChunk"); + LooseChunkHashWorks.push_back( + LooseChunkHashWorkData{.ChunkTargetPtrs = ChunkTargetPtrs, .RemoteChunkIndex = RemoteChunkIndex}); + } + } + } - if (!AbortFlag) + uint32_t BlockCount = gsl::narrow(BlockDescriptions.size()); + + std::vector ChunkIsPickedUpByBlock(RemoteContent.ChunkedContent.ChunkHashes.size(), false); + auto GetNeededChunkBlockIndexes = [&RemoteContent, + &RemoteLookup, + &RemoteChunkIndexNeedsCopyFromSourceFlags, + &ChunkIsPickedUpByBlock](const ChunkBlockDescription& BlockDescription) { + ZEN_TRACE_CPU("UpdateFolder_GetNeededChunkBlockIndexes"); + std::vector NeededBlockChunkIndexes; + for (uint32_t ChunkBlockIndex = 0; ChunkBlockIndex < BlockDescription.ChunkRawHashes.size(); ChunkBlockIndex++) + { + const IoHash& ChunkHash = BlockDescription.ChunkRawHashes[ChunkBlockIndex]; + if (auto It = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); It != RemoteLookup.ChunkHashToChunkIndex.end()) + { + const uint32_t RemoteChunkIndex = It->second; + if (!ChunkIsPickedUpByBlock[RemoteChunkIndex]) + { + if (RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex]) + { + ChunkIsPickedUpByBlock[RemoteChunkIndex] = true; + NeededBlockChunkIndexes.push_back(ChunkBlockIndex); + } + } + } + } + return NeededBlockChunkIndexes; + }; + + std::vector CachedChunkBlockIndexes; + + struct BlockRangeDescriptor + { + uint32_t BlockIndex = (uint32_t)-1; + uint64_t RangeStart = 0; + uint64_t RangeLength = 0; + uint32_t ChunkBlockIndexStart = 0; + uint32_t ChunkBlockIndexCount = 0; + }; + std::vector BlockRangeWorks; + + std::vector FullBlockWorks; + + size_t BlocksNeededCount = 0; + uint64_t AllBlocksSize = 0; + uint64_t AllBlocksFetch = 0; + uint64_t AllBlocksSlack = 0; + uint64_t AllBlockRequests = 0; + uint64_t AllBlockChunksSize = 0; + for (uint32_t BlockIndex = 0; BlockIndex < BlockCount; BlockIndex++) + { + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + const std::vector BlockChunkIndexNeeded = GetNeededChunkBlockIndexes(BlockDescription); + if (!BlockChunkIndexNeeded.empty()) + { + bool UsingCachedBlock = false; + if (auto It = CachedBlocksFound.find(BlockDescription.BlockHash); It != CachedBlocksFound.end()) + { + ZEN_TRACE_CPU("UpdateFolder_HandleBlocks_CacheGet"); + + TotalPartWriteCount++; + + std::filesystem::path BlockPath = Path / ZenTempBlockFolderName / BlockDescription.BlockHash.ToHexString(); + if (std::filesystem::exists(BlockPath)) + { + CachedChunkBlockIndexes.push_back(BlockIndex); + UsingCachedBlock = true; + } + } + + if (!UsingCachedBlock) + { + bool WantsToDoPartialBlockDownload = BlockChunkIndexNeeded.size() < BlockDescription.ChunkRawHashes.size(); + bool CanDoPartialBlockDownload = + (BlockDescription.HeaderSize > 0) && + (BlockDescription.ChunkCompressedLengths.size() == BlockDescription.ChunkRawHashes.size()); + if (AllowPartialBlockRequests && WantsToDoPartialBlockDownload && CanDoPartialBlockDownload) + { + std::vector BlockRanges; + + ZEN_TRACE_CPU("UpdateFolder_HandleBlocks_PartialAnalysis"); + + uint32_t NeedBlockChunkIndexOffset = 0; + uint32_t ChunkBlockIndex = 0; + uint32_t CurrentOffset = + gsl::narrow(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize); + + BlockRangeDescriptor NextRange{.BlockIndex = BlockIndex}; + while (NeedBlockChunkIndexOffset < BlockChunkIndexNeeded.size() && + ChunkBlockIndex < BlockDescription.ChunkRawHashes.size()) + { + const uint32_t ChunkCompressedLength = BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; + if (ChunkBlockIndex < BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) { - FilteredDownloadedBytesPerSecond.Start(); - if (RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex] >= LargeAttachmentSize) + if (NextRange.RangeLength > 0) { - DownloadLargeBlob( - Storage, - Path / ZenTempChunkFolderName, - CacheFolderPath, - RemoteContent, - RemoteLookup, - BuildId, - ChunkHash, - PreferredMultipartChunkSize, - ChunkTargetPtrs, - SequenceIndexChunksLeftToWriteCounters, - Work, - WritePool, - NetworkPool, - WriteToDiskBytes, - BytesDownloaded, - MultipartAttachmentCount, - [&](uint64_t BytesDownloaded) { - LooseChunksBytes += BytesDownloaded; - RequestsComplete++; - if (RequestsComplete == TotalRequestCount) - { - FilteredDownloadedBytesPerSecond.Stop(); - } - }, - [&]() { - ChunkCountWritten++; - WritePartsComplete++; - if (WritePartsComplete == TotalPartWriteCount) - { - FilteredWrittenBytesPerSecond.Stop(); - } - }); + BlockRanges.push_back(NextRange); + NextRange = {.BlockIndex = BlockIndex}; } - else + ChunkBlockIndex++; + CurrentOffset += ChunkCompressedLength; + } + else if (ChunkBlockIndex == BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) + { + AllBlockChunksSize += ChunkCompressedLength; + if (NextRange.RangeLength == 0) { - IoBuffer CompressedPart = Storage.GetBuildBlob(BuildId, ChunkHash); - if (!CompressedPart) - { - throw std::runtime_error(fmt::format("Chunk {} is missing", ChunkHash)); - } - BytesDownloaded += CompressedPart.GetSize(); - LooseChunksBytes += CompressedPart.GetSize(); - RequestsComplete++; - if (RequestsComplete == TotalRequestCount) - { - FilteredDownloadedBytesPerSecond.Stop(); - } - CompositeBuffer Payload = WriteToTempFileIfNeeded(CompositeBuffer(std::move(CompressedPart)), - Path / ZenTempChunkFolderName, - ChunkHash); - DownloadedChunks++; + NextRange.RangeStart = CurrentOffset; + NextRange.ChunkBlockIndexStart = ChunkBlockIndex; + } + NextRange.RangeLength += ChunkCompressedLength; + NextRange.ChunkBlockIndexCount++; + ChunkBlockIndex++; + CurrentOffset += ChunkCompressedLength; + NeedBlockChunkIndexOffset++; + } + else + { + ZEN_ASSERT(false); + } + } + AllBlocksSize += CurrentOffset; + if (NextRange.RangeLength > 0) + { + BlockRanges.push_back(NextRange); + } - if (!AbortFlag) - { - Work.ScheduleWork( - WritePool, - [&, ChunkHash, RemoteChunkIndex, ChunkTargetPtrs, CompressedPart = std::move(Payload)]( - std::atomic&) { - ZEN_TRACE_CPU("UpdateFolder_WriteBlob"); + ZEN_ASSERT(!BlockRanges.empty()); + std::vector CollapsedBlockRanges; + auto It = BlockRanges.begin(); + CollapsedBlockRanges.push_back(*It++); + uint64_t TotalSlack = 0; + while (It != BlockRanges.end()) + { + BlockRangeDescriptor& LastRange = CollapsedBlockRanges.back(); + uint64_t Slack = It->RangeStart - (LastRange.RangeStart + LastRange.RangeLength); + uint64_t BothRangeSize = It->RangeLength + LastRange.RangeLength; + if (Slack <= Max(BothRangeSize / 8, 64u * 1024u)) // Made up heuristic - we'll see how it pans out + { + LastRange.ChunkBlockIndexCount = + (It->ChunkBlockIndexStart + It->ChunkBlockIndexCount) - LastRange.ChunkBlockIndexStart; + LastRange.RangeLength = (It->RangeStart + It->RangeLength) - LastRange.RangeStart; + TotalSlack += Slack; + } + else + { + CollapsedBlockRanges.push_back(*It); + } + ++It; + } - if (!AbortFlag) - { - FilteredWrittenBytesPerSecond.Start(); + uint64_t TotalFetch = 0; + for (const BlockRangeDescriptor& Range : CollapsedBlockRanges) + { + TotalFetch += Range.RangeLength; + } - bool NeedHashVerify = true; - if (CanStreamDecompress(RemoteContent, ChunkTargetPtrs)) - { - const IoHash& SequenceRawHash = - RemoteContent.ChunkedContent - .SequenceRawHashes[ChunkTargetPtrs.front()->SequenceIndex]; - StreamDecompress(CacheFolderPath, - SequenceRawHash, - CompositeBuffer(CompressedPart), - WriteToDiskBytes); - ChunkCountWritten++; - WritePartsComplete++; - if (WritePartsComplete == TotalPartWriteCount) - { - FilteredWrittenBytesPerSecond.Stop(); - } - NeedHashVerify = false; - } - else - { - SharedBuffer Chunk = - Decompress(CompressedPart, - ChunkHash, - RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex]); - - { - WriteFileCache OpenFileCache; - WriteChunkToDisk(CacheFolderPath, - RemoteContent, - RemoteLookup, - ChunkTargetPtrs, - CompositeBuffer(Chunk), - OpenFileCache, - WriteToDiskBytes); - ChunkCountWritten++; - WritePartsComplete++; - if (WritePartsComplete == TotalPartWriteCount) - { - FilteredWrittenBytesPerSecond.Stop(); - } - } - } - if (!AbortFlag) - { - WritePartsComplete++; - - // Write tracking, updating this must be done without any files open - // (WriteFileCache) - for (const ChunkedContentLookup::ChunkSequenceLocation* Location : - ChunkTargetPtrs) - { - const uint32_t RemoteSequenceIndex = Location->SequenceIndex; - if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub( - 1) == 1) - { - const IoHash& SequenceRawHash = - RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; - if (NeedHashVerify) - { - ZEN_TRACE_CPU("UpdateFolder_VerifyHash"); - - const IoHash VerifyChunkHash = - IoHash::HashBuffer(IoBufferBuilder::MakeFromFile( - GetTempChunkedSequenceFileName(CacheFolderPath, - SequenceRawHash))); - if (VerifyChunkHash != SequenceRawHash) - { - throw std::runtime_error( - fmt::format("Written chunk sequence {} hash does not match " - "expected hash {}", - VerifyChunkHash, - SequenceRawHash)); - } - } - - ZEN_TRACE_CPU("UpdateFolder_rename"); - std::filesystem::rename( - GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), - GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); - } - } - } - } - }, - Work.DefaultErrorFunction()); - } - } - } - }, - Work.DefaultErrorFunction()); + AllBlocksFetch += TotalFetch; + AllBlocksSlack += TotalSlack; + BlocksNeededCount++; + AllBlockRequests += CollapsedBlockRanges.size(); + + TotalRequestCount += CollapsedBlockRanges.size(); + TotalPartWriteCount += CollapsedBlockRanges.size(); + + BlockRangeWorks.insert(BlockRangeWorks.end(), CollapsedBlockRanges.begin(), CollapsedBlockRanges.end()); + } + else + { + BlocksNeededCount++; + TotalRequestCount++; + TotalPartWriteCount++; + + FullBlockWorks.push_back(BlockIndex); + } } } + else + { + ZEN_DEBUG("Skipping block {} due to cache reuse", BlockDescriptions[BlockIndex].BlockHash); + } } for (size_t CopyDataIndex = 0; CopyDataIndex < CacheCopyDatas.size(); CopyDataIndex++) @@ -4374,14 +4627,12 @@ namespace { break; } - TotalPartWriteCount++; - Work.ScheduleWork( WritePool, // GetSyncWorkerPool(),// [&, CopyDataIndex](std::atomic&) { if (!AbortFlag) { - ZEN_TRACE_CPU("UpdateFolder_Copy"); + ZEN_TRACE_CPU("UpdateFolder_CopyLocal"); FilteredWrittenBytesPerSecond.Start(); const CacheCopyData& CopyData = CacheCopyDatas[CopyDataIndex]; @@ -4403,39 +4654,45 @@ namespace { }; std::vector WriteOps; - WriteOps.reserve(AllTargets.size()); - for (const CacheCopyData::ChunkTarget& ChunkTarget : CopyData.ChunkTargets) + if (!AbortFlag) { - std::span TargetRange = - AllTargets.subspan(TargetStart, ChunkTarget.TargetChunkLocationCount); - for (const ChunkedContentLookup::ChunkSequenceLocation* Target : TargetRange) + ZEN_TRACE_CPU("Sort"); + WriteOps.reserve(AllTargets.size()); + for (const CacheCopyData::ChunkTarget& ChunkTarget : CopyData.ChunkTargets) { - WriteOps.push_back(WriteOp{.Target = Target, - .CacheFileOffset = ChunkTarget.CacheFileOffset, - .ChunkSize = ChunkTarget.ChunkRawSize}); + std::span TargetRange = + AllTargets.subspan(TargetStart, ChunkTarget.TargetChunkLocationCount); + for (const ChunkedContentLookup::ChunkSequenceLocation* Target : TargetRange) + { + WriteOps.push_back(WriteOp{.Target = Target, + .CacheFileOffset = ChunkTarget.CacheFileOffset, + .ChunkSize = ChunkTarget.ChunkRawSize}); + } + TargetStart += ChunkTarget.TargetChunkLocationCount; } - TargetStart += ChunkTarget.TargetChunkLocationCount; - } - std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOp& Lhs, const WriteOp& Rhs) { - if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex) - { - return true; - } - else if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex) - { + std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOp& Lhs, const WriteOp& Rhs) { + if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex) + { + return true; + } + else if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex) + { + return false; + } + if (Lhs.Target->Offset < Rhs.Target->Offset) + { + return true; + } return false; - } - if (Lhs.Target->Offset < Rhs.Target->Offset) - { - return true; - } - return false; - }); + }); + } if (!AbortFlag) { + ZEN_TRACE_CPU("Write"); + BufferedOpenFile SourceFile(LocalFilePath); WriteFileCache OpenFileCache; for (const WriteOp& Op : WriteOps) @@ -4473,23 +4730,26 @@ namespace { // Write tracking, updating this must be done without any files open (WriteFileCache) for (const WriteOp& Op : WriteOps) { - ZEN_TRACE_CPU("UpdateFolder_Copy_VerifyHash"); - const uint32_t RemoteSequenceIndex = Op.Target->SequenceIndex; if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1) == 1) { const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; - const IoHash VerifyChunkHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile( - GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); - if (VerifyChunkHash != SequenceRawHash) { - throw std::runtime_error( - fmt::format("Written chunk sequence {} hash does not match expected hash {}", - VerifyChunkHash, - SequenceRawHash)); + ZEN_TRACE_CPU("VerifyHash"); + const IoHash VerifyChunkHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile( + GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); + if (VerifyChunkHash != SequenceRawHash) + { + throw std::runtime_error( + fmt::format("Written chunk sequence {} hash does not match expected hash {}", + VerifyChunkHash, + SequenceRawHash)); + } } - ZEN_TRACE_CPU("UpdateFolder_Copy_rename"); + ZEN_TRACE_CPU("rename"); + ZEN_ASSERT_SLOW( + !std::filesystem::exists(GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); std::filesystem::rename(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); } @@ -4508,300 +4768,643 @@ namespace { Work.DefaultErrorFunction()); } - size_t BlockCount = BlockDescriptions.size(); + for (uint32_t LooseChunkHashWorkIndex = 0; LooseChunkHashWorkIndex < LooseChunkHashWorks.size(); LooseChunkHashWorkIndex++) + { + if (AbortFlag) + { + break; + } - std::vector ChunkIsPickedUpByBlock(RemoteContent.ChunkedContent.ChunkHashes.size(), false); - auto GetNeededChunkBlockIndexes = [&RemoteContent, - &RemoteLookup, - &RemoteChunkIndexNeedsCopyFromSourceFlags, - &ChunkIsPickedUpByBlock](const ChunkBlockDescription& BlockDescription) { - std::vector NeededBlockChunkIndexes; - for (uint32_t ChunkBlockIndex = 0; ChunkBlockIndex < BlockDescription.ChunkRawHashes.size(); ChunkBlockIndex++) - { - const IoHash& ChunkHash = BlockDescription.ChunkRawHashes[ChunkBlockIndex]; - if (auto It = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); It != RemoteLookup.ChunkHashToChunkIndex.end()) - { - const uint32_t RemoteChunkIndex = It->second; - if (!ChunkIsPickedUpByBlock[RemoteChunkIndex]) - { - if (RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex]) - { - ChunkIsPickedUpByBlock[RemoteChunkIndex] = true; - NeededBlockChunkIndexes.push_back(ChunkBlockIndex); - } - } - } - } - return NeededBlockChunkIndexes; - }; + LooseChunkHashWorkData& LooseChunkHashWork = LooseChunkHashWorks[LooseChunkHashWorkIndex]; - size_t BlocksNeededCount = 0; - uint64_t AllBlocksSize = 0; - uint64_t AllBlocksFetch = 0; - uint64_t AllBlocksSlack = 0; - uint64_t AllBlockRequests = 0; - uint64_t AllBlockChunksSize = 0; - for (size_t BlockIndex = 0; BlockIndex < BlockCount; BlockIndex++) + std::vector ChunkTargetPtrs = + std::move(LooseChunkHashWork.ChunkTargetPtrs); + const uint32_t RemoteChunkIndex = LooseChunkHashWork.RemoteChunkIndex; + + Work.ScheduleWork( + NetworkPool, // NetworkPool, // GetSyncWorkerPool(),// + [&, RemoteChunkIndex, ChunkTargetPtrs](std::atomic&) { + if (!AbortFlag) + { + std::filesystem::path ExistingCompressedChunkPath; + { + const IoHash& ChunkHash = RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + std::filesystem::path CompressedChunkPath = Path / ZenTempDownloadFolderName / ChunkHash.ToHexString(); + if (std::filesystem::exists(CompressedChunkPath)) + { + IoBuffer ExistingCompressedPart = IoBufferBuilder::MakeFromFile(ExistingCompressedChunkPath); + if (ExistingCompressedPart) + { + IoHash RawHash; + uint64_t RawSize; + if (CompressedBuffer::ValidateCompressedHeader(ExistingCompressedPart, RawHash, RawSize)) + { + LooseChunksBytes += ExistingCompressedPart.GetSize(); + RequestsComplete++; + if (RequestsComplete == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + ExistingCompressedChunkPath = std::move(CompressedChunkPath); + } + else + { + std::error_code DummyEc; + std::filesystem::remove(CompressedChunkPath, DummyEc); + } + } + } + } + if (!ExistingCompressedChunkPath.empty()) + { + Work.ScheduleWork( + WritePool, // WritePool, GetSyncWorkerPool() + [&Path, + &RemoteContent, + &RemoteLookup, + &CacheFolderPath, + &SequenceIndexChunksLeftToWriteCounters, + &WriteToDiskBytes, + &ChunkCountWritten, + &WritePartsComplete, + &TotalPartWriteCount, + &FilteredWrittenBytesPerSecond, + RemoteChunkIndex, + ChunkTargetPtrs, + CompressedChunkPath = std::move(ExistingCompressedChunkPath)](std::atomic&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_WritePreDownloaded"); + + FilteredWrittenBytesPerSecond.Start(); + + const IoHash& ChunkHash = RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + + IoBuffer CompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath); + if (!CompressedPart) + { + throw std::runtime_error(fmt::format("Could not open dowloaded compressed chunk {} from {}", + ChunkHash, + CompressedChunkPath)); + } + + std::filesystem::path TargetFolder = Path / ZenTempCacheFolderName; + bool NeedHashVerify = WriteCompressedChunk(TargetFolder, + RemoteContent, + RemoteLookup, + ChunkHash, + ChunkTargetPtrs, + std::move(CompressedPart), + WriteToDiskBytes); + + if (!AbortFlag) + { + ChunkCountWritten++; + WritePartsComplete++; + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + + std::filesystem::remove(CompressedChunkPath); + + CompleteChunkTargets(TargetFolder, + RemoteContent, + ChunkHash, + ChunkTargetPtrs, + SequenceIndexChunksLeftToWriteCounters, + NeedHashVerify); + } + } + }, + Work.DefaultErrorFunction()); + } + else + { + FilteredDownloadedBytesPerSecond.Start(); + const IoHash& ChunkHash = RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + if (RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex] >= LargeAttachmentSize) + { + ZEN_TRACE_CPU("UpdateFolder_GetLargeChunk"); + DownloadLargeBlob( + Storage, + Path, + RemoteContent, + RemoteLookup, + BuildId, + ChunkHash, + PreferredMultipartChunkSize, + ChunkTargetPtrs, + SequenceIndexChunksLeftToWriteCounters, + Work, + WritePool, + NetworkPool, + WriteToDiskBytes, + BytesDownloaded, + MultipartAttachmentCount, + [&](uint64_t BytesDownloaded) { + LooseChunksBytes += BytesDownloaded; + RequestsComplete++; + if (RequestsComplete == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + }, + [&]() { FilteredWrittenBytesPerSecond.Start(); }, + [&]() { + ChunkCountWritten++; + WritePartsComplete++; + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + }); + } + else + { + ZEN_TRACE_CPU("UpdateFolder_GetChunk"); + + IoBuffer BuildBlob = Storage.GetBuildBlob(BuildId, ChunkHash); + if (!BuildBlob) + { + throw std::runtime_error(fmt::format("Chunk {} is missing", ChunkHash)); + } + uint64_t BlobSize = BuildBlob.GetSize(); + BytesDownloaded += BlobSize; + LooseChunksBytes += BlobSize; + RequestsComplete++; + if (RequestsComplete == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + + std::filesystem::path CompressedChunkPath; + + // Check if the dowloaded file is file based and we can move it directly without rewriting it + { + IoBufferFileReference FileRef; + if (BuildBlob.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) && + (FileRef.FileChunkSize == BlobSize)) + { + ZEN_TRACE_CPU("UpdateFolder_MoveTempChunk"); + std::error_code Ec; + std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec); + if (!Ec) + { + BuildBlob.SetDeleteOnClose(false); + BuildBlob = {}; + CompressedChunkPath = Path / ZenTempDownloadFolderName / ChunkHash.ToHexString(); + std::filesystem::rename(TempBlobPath, CompressedChunkPath, Ec); + if (Ec) + { + CompressedChunkPath = std::filesystem::path{}; + + // Re-open the temp file again + BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete); + BuildBlob = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BlobSize, true); + BuildBlob.SetDeleteOnClose(true); + } + } + } + } + + if (CompressedChunkPath.empty() && (BlobSize > 512u * 1024u)) + { + ZEN_TRACE_CPU("UpdateFolder_WriteTempChunk"); + // Could not be moved and rather large, lets store it on disk + CompressedChunkPath = Path / ZenTempDownloadFolderName / ChunkHash.ToHexString(); + TemporaryFile::SafeWriteFile(CompressedChunkPath, BuildBlob); + BuildBlob = {}; + } + DownloadedChunks++; + + if (!AbortFlag) + { + Work.ScheduleWork( + WritePool, // WritePool, GetSyncWorkerPool() + [&Path, + &RemoteContent, + &RemoteLookup, + &CacheFolderPath, + &SequenceIndexChunksLeftToWriteCounters, + &WriteToDiskBytes, + &ChunkCountWritten, + &WritePartsComplete, + &TotalPartWriteCount, + &FilteredWrittenBytesPerSecond, + RemoteChunkIndex, + ChunkTargetPtrs, + CompressedChunkPath, + CompressedPart = std::move(BuildBlob)](std::atomic&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_WriteChunk"); + + FilteredWrittenBytesPerSecond.Start(); + + const IoHash& ChunkHash = RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + if (CompressedChunkPath.empty()) + { + ZEN_ASSERT(CompressedPart); + } + else + { + ZEN_ASSERT(!CompressedPart); + CompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath); + if (!CompressedPart) + { + throw std::runtime_error( + fmt::format("Could not open dowloaded compressed chunk {} from {}", + ChunkHash, + CompressedChunkPath)); + } + } + + std::filesystem::path TargetFolder = Path / ZenTempCacheFolderName; + bool NeedHashVerify = WriteCompressedChunk(TargetFolder, + RemoteContent, + RemoteLookup, + ChunkHash, + ChunkTargetPtrs, + std::move(CompressedPart), + WriteToDiskBytes); + + if (!AbortFlag) + { + ChunkCountWritten++; + WritePartsComplete++; + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + + if (!CompressedChunkPath.empty()) + { + std::filesystem::remove(CompressedChunkPath); + } + + CompleteChunkTargets(TargetFolder, + RemoteContent, + ChunkHash, + ChunkTargetPtrs, + SequenceIndexChunksLeftToWriteCounters, + NeedHashVerify); + } + } + }, + Work.DefaultErrorFunction()); + } + } + } + } + }, + Work.DefaultErrorFunction()); + } + + for (uint32_t BlockIndex : CachedChunkBlockIndexes) { - if (Work.IsAborted()) + if (AbortFlag) { break; } - const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; - const std::vector BlockChunkIndexNeeded = GetNeededChunkBlockIndexes(BlockDescription); - if (!BlockChunkIndexNeeded.empty()) - { - bool WantsToDoPartialBlockDownload = BlockChunkIndexNeeded.size() < BlockDescription.ChunkRawHashes.size(); - bool CanDoPartialBlockDownload = (BlockDescription.HeaderSize > 0) && (BlockDescription.ChunkCompressedLengths.size() == - BlockDescription.ChunkRawHashes.size()); - if (AllowPartialBlockRequests && WantsToDoPartialBlockDownload && CanDoPartialBlockDownload) - { - struct BlockRangeDescriptor - { - uint64_t RangeStart = 0; - uint64_t RangeLength = 0; - uint32_t ChunkBlockIndexStart = 0; - uint32_t ChunkBlockIndexCount = 0; - }; - std::vector BlockRanges; - - ZEN_TRACE_CPU("UpdateFolder_HandleBlocks_PartialAnalisys"); - - uint32_t NeedBlockChunkIndexOffset = 0; - uint32_t ChunkBlockIndex = 0; - uint32_t CurrentOffset = - gsl::narrow(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize); - - BlockRangeDescriptor NextRange; - while (NeedBlockChunkIndexOffset < BlockChunkIndexNeeded.size() && - ChunkBlockIndex < BlockDescription.ChunkRawHashes.size()) + + Work.ScheduleWork( + WritePool, // GetSyncWorkerPool(), // WritePool, + [&, BlockIndex](std::atomic&) mutable { + if (!AbortFlag) { - const uint32_t ChunkCompressedLength = BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; - if (ChunkBlockIndex < BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) + ZEN_TRACE_CPU("UpdateFolder_WriteCachedBlock"); + + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + FilteredWrittenBytesPerSecond.Start(); + + std::filesystem::path BlockChunkPath = Path / ZenTempBlockFolderName / BlockDescription.BlockHash.ToHexString(); + IoBuffer BlockBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath); + if (!BlockBuffer) { - if (NextRange.RangeLength > 0) - { - BlockRanges.push_back(NextRange); - NextRange = {}; - } - ChunkBlockIndex++; - CurrentOffset += ChunkCompressedLength; + throw std::runtime_error( + fmt::format("Can not read block {} at {}", BlockDescription.BlockHash, BlockChunkPath)); } - else if (ChunkBlockIndex == BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) + + if (!WriteBlockToDisk(CacheFolderPath, + RemoteContent, + BlockDescription, + SequenceIndexChunksLeftToWriteCounters, + CompositeBuffer(std::move(BlockBuffer)), + RemoteLookup, + RemoteChunkIndexNeedsCopyFromSourceFlags, + ChunkCountWritten, + WriteToDiskBytes)) { - AllBlockChunksSize += ChunkCompressedLength; - if (NextRange.RangeLength == 0) - { - NextRange.RangeStart = CurrentOffset; - NextRange.ChunkBlockIndexStart = ChunkBlockIndex; - } - NextRange.RangeLength += ChunkCompressedLength; - NextRange.ChunkBlockIndexCount++; - ChunkBlockIndex++; - CurrentOffset += ChunkCompressedLength; - NeedBlockChunkIndexOffset++; + std::error_code DummyEc; + std::filesystem::remove(BlockChunkPath, DummyEc); + throw std::runtime_error(fmt::format("Block {} is malformed", BlockDescription.BlockHash)); } - else + WritePartsComplete++; + std::filesystem::remove(BlockChunkPath); + if (WritePartsComplete == TotalPartWriteCount) { - ZEN_ASSERT(false); + FilteredWrittenBytesPerSecond.Stop(); } } - AllBlocksSize += CurrentOffset; - if (NextRange.RangeLength > 0) - { - BlockRanges.push_back(NextRange); - NextRange = {}; - } + }, + Work.DefaultErrorFunction()); + } - ZEN_ASSERT(!BlockRanges.empty()); - std::vector CollapsedBlockRanges; - auto It = BlockRanges.begin(); - CollapsedBlockRanges.push_back(*It++); - uint64_t TotalSlack = 0; - while (It != BlockRanges.end()) + for (size_t BlockRangeIndex = 0; BlockRangeIndex < BlockRangeWorks.size(); BlockRangeIndex++) + { + if (AbortFlag) + { + break; + } + const BlockRangeDescriptor BlockRange = BlockRangeWorks[BlockRangeIndex]; + ZEN_ASSERT(BlockRange.BlockIndex != (uint32_t)-1); + const uint32_t BlockIndex = BlockRange.BlockIndex; + Work.ScheduleWork( + NetworkPool, // NetworkPool, // GetSyncWorkerPool() + [&, BlockIndex, BlockRange](std::atomic&) { + if (!AbortFlag) { - BlockRangeDescriptor& LastRange = CollapsedBlockRanges.back(); - uint64_t Slack = It->RangeStart - (LastRange.RangeStart + LastRange.RangeLength); - uint64_t BothRangeSize = It->RangeLength + LastRange.RangeLength; - if (Slack <= Max(BothRangeSize / 8, 64u * 1024u)) // Made up heuristic - we'll see how it pans out + ZEN_TRACE_CPU("UpdateFolder_GetPartialBlock"); + + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + + FilteredDownloadedBytesPerSecond.Start(); + IoBuffer BlockBuffer = + Storage.GetBuildBlob(BuildId, BlockDescription.BlockHash, BlockRange.RangeStart, BlockRange.RangeLength); + if (!BlockBuffer) { - LastRange.ChunkBlockIndexCount = - (It->ChunkBlockIndexStart + It->ChunkBlockIndexCount) - LastRange.ChunkBlockIndexStart; - LastRange.RangeLength = (It->RangeStart + It->RangeLength) - LastRange.RangeStart; - TotalSlack += Slack; + throw std::runtime_error(fmt::format("Block {} is missing", BlockDescription.BlockHash)); } - else + uint64_t BlockSize = BlockBuffer.GetSize(); + BytesDownloaded += BlockSize; + BlockBytes += BlockSize; + DownloadedBlocks++; + RequestsComplete++; + if (RequestsComplete == TotalRequestCount) { - CollapsedBlockRanges.push_back(*It); + FilteredDownloadedBytesPerSecond.Stop(); } - ++It; - } - uint64_t TotalFetch = 0; - for (const BlockRangeDescriptor& Range : CollapsedBlockRanges) - { - TotalFetch += Range.RangeLength; - } + std::filesystem::path BlockChunkPath; - AllBlocksFetch += TotalFetch; - AllBlocksSlack += TotalSlack; - BlocksNeededCount++; - AllBlockRequests += CollapsedBlockRanges.size(); + // Check if the dowloaded block is file based and we can move it directly without rewriting it + { + IoBufferFileReference FileRef; + if (BlockBuffer.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) && + (FileRef.FileChunkSize == BlockSize)) + { + ZEN_TRACE_CPU("UpdateFolder_MoveTempBlock"); - for (size_t BlockRangeIndex = 0; BlockRangeIndex < CollapsedBlockRanges.size(); BlockRangeIndex++) - { - TotalRequestCount++; - TotalPartWriteCount++; - const BlockRangeDescriptor BlockRange = CollapsedBlockRanges[BlockRangeIndex]; - // Partial block schedule - Work.ScheduleWork( - NetworkPool, // NetworkPool, // GetSyncWorkerPool() - [&, BlockIndex, BlockRange](std::atomic&) { - ZEN_TRACE_CPU("UpdateFolder_HandleBlocks_PartialGet"); - const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; - - FilteredDownloadedBytesPerSecond.Start(); - IoBuffer BlockBuffer = Storage.GetBuildBlob(BuildId, - BlockDescription.BlockHash, - BlockRange.RangeStart, - BlockRange.RangeLength); - if (!BlockBuffer) - { - throw std::runtime_error(fmt::format("Block {} is missing", BlockDescription.BlockHash)); - } - BytesDownloaded += BlockBuffer.GetSize(); - BlockBytes += BlockBuffer.GetSize(); - DownloadedBlocks++; - CompositeBuffer Payload = WriteToTempFileIfNeeded(CompositeBuffer(std::move(BlockBuffer)), - Path / ZenTempBlockFolderName, - BlockDescription.BlockHash, - fmt::format("_{}", BlockRange.RangeStart)); - RequestsComplete++; - if (RequestsComplete == TotalRequestCount) + std::error_code Ec; + std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec); + if (!Ec) { - FilteredDownloadedBytesPerSecond.Stop(); + BlockBuffer.SetDeleteOnClose(false); + BlockBuffer = {}; + BlockChunkPath = Path / ZenTempBlockFolderName / + fmt::format("{}_{:x}_{:x}", + BlockDescription.BlockHash, + BlockRange.RangeStart, + BlockRange.RangeLength); + std::filesystem::rename(TempBlobPath, BlockChunkPath, Ec); + if (Ec) + { + BlockChunkPath = std::filesystem::path{}; + + // Re-open the temp file again + BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete); + BlockBuffer = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BlockSize, true); + BlockBuffer.SetDeleteOnClose(true); + } } + } + } - if (!AbortFlag) - { - Work.ScheduleWork( - WritePool, // WritePool, // GetSyncWorkerPool(), - [&, BlockIndex, BlockRange, BlockPartialBuffer = std::move(Payload)](std::atomic&) { - if (!AbortFlag) - { - const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + if (BlockChunkPath.empty() && (BlockSize > 512u * 1024u)) + { + ZEN_TRACE_CPU("UpdateFolder_WriteTempBlock"); + // Could not be moved and rather large, lets store it on disk + BlockChunkPath = + Path / ZenTempBlockFolderName / + fmt::format("{}_{:x}_{:x}", BlockDescription.BlockHash, BlockRange.RangeStart, BlockRange.RangeLength); + TemporaryFile::SafeWriteFile(BlockChunkPath, BlockBuffer); + BlockBuffer = {}; + } - FilteredWrittenBytesPerSecond.Start(); + if (!AbortFlag) + { + Work.ScheduleWork( + WritePool, // WritePool, // GetSyncWorkerPool(), + [&, BlockIndex, BlockRange, BlockChunkPath, BlockPartialBuffer = std::move(BlockBuffer)]( + std::atomic&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_WritePartialBlock"); - if (!WritePartialBlockToDisk( - CacheFolderPath, - RemoteContent, - BlockDescription, - SequenceIndexChunksLeftToWriteCounters, - BlockPartialBuffer, - BlockRange.ChunkBlockIndexStart, - BlockRange.ChunkBlockIndexStart + BlockRange.ChunkBlockIndexCount - 1, - RemoteLookup, - RemoteChunkIndexNeedsCopyFromSourceFlags, - ChunkCountWritten, - WriteToDiskBytes)) - { - throw std::runtime_error( - fmt::format("Partial block {} is malformed", BlockDescription.BlockHash)); - } - WritePartsComplete++; - if (WritePartsComplete == TotalPartWriteCount) - { - FilteredWrittenBytesPerSecond.Stop(); - } + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + + if (BlockChunkPath.empty()) + { + ZEN_ASSERT(BlockPartialBuffer); + } + else + { + ZEN_ASSERT(!BlockPartialBuffer); + BlockPartialBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath); + if (!BlockPartialBuffer) + { + throw std::runtime_error(fmt::format("Could not open downloaded block {} from {}", + BlockDescription.BlockHash, + BlockChunkPath)); } - }, - [&, BlockIndex](const std::exception& Ex, std::atomic&) { - ZEN_ERROR("Failed writing block {}. Reason: {}", - BlockDescriptions[BlockIndex].BlockHash, - Ex.what()); - AbortFlag = true; - }); - } - }, - Work.DefaultErrorFunction()); + } + + FilteredWrittenBytesPerSecond.Start(); + + if (!WritePartialBlockToDisk( + CacheFolderPath, + RemoteContent, + BlockDescription, + SequenceIndexChunksLeftToWriteCounters, + CompositeBuffer(std::move(BlockPartialBuffer)), + BlockRange.ChunkBlockIndexStart, + BlockRange.ChunkBlockIndexStart + BlockRange.ChunkBlockIndexCount - 1, + RemoteLookup, + RemoteChunkIndexNeedsCopyFromSourceFlags, + ChunkCountWritten, + WriteToDiskBytes)) + { + std::error_code DummyEc; + std::filesystem::remove(BlockChunkPath, DummyEc); + throw std::runtime_error( + fmt::format("Partial block {} is malformed", BlockDescription.BlockHash)); + } + WritePartsComplete++; + + if (!BlockChunkPath.empty()) + { + std::filesystem::remove(BlockChunkPath); + } + + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + } + }, + Work.DefaultErrorFunction()); + } } - } - else - { - BlocksNeededCount++; - TotalRequestCount++; - TotalPartWriteCount++; + }, + Work.DefaultErrorFunction()); + } - Work.ScheduleWork( - NetworkPool, // GetSyncWorkerPool(), // NetworkPool, - [&, BlockIndex](std::atomic&) { - if (!AbortFlag) - { - ZEN_TRACE_CPU("UpdateFolder_HandleBlocks_Get"); + for (uint32_t BlockIndex : FullBlockWorks) + { + if (AbortFlag) + { + break; + } + Work.ScheduleWork( + NetworkPool, // GetSyncWorkerPool(), // NetworkPool, + [&, BlockIndex](std::atomic&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_GetFullBlock"); - // Full block schedule + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; - FilteredDownloadedBytesPerSecond.Start(); - IoBuffer BlockBuffer = Storage.GetBuildBlob(BuildId, BlockDescription.BlockHash); - if (!BlockBuffer) - { - throw std::runtime_error(fmt::format("Block {} is missing", BlockDescription.BlockHash)); - } - BytesDownloaded += BlockBuffer.GetSize(); - BlockBytes += BlockBuffer.GetSize(); - DownloadedBlocks++; - RequestsComplete++; - if (RequestsComplete == TotalRequestCount) - { - FilteredDownloadedBytesPerSecond.Stop(); - } + FilteredDownloadedBytesPerSecond.Start(); + IoBuffer BlockBuffer = Storage.GetBuildBlob(BuildId, BlockDescription.BlockHash); + if (!BlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} is missing", BlockDescription.BlockHash)); + } + uint64_t BlockSize = BlockBuffer.GetSize(); + BytesDownloaded += BlockSize; + BlockBytes += BlockSize; + DownloadedBlocks++; + RequestsComplete++; + if (RequestsComplete == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } - CompositeBuffer Payload = WriteToTempFileIfNeeded(CompositeBuffer(std::move(BlockBuffer)), - Path / ZenTempBlockFolderName, - BlockDescription.BlockHash); - if (!AbortFlag) + std::filesystem::path BlockChunkPath; + + // Check if the dowloaded block is file based and we can move it directly without rewriting it + { + IoBufferFileReference FileRef; + if (BlockBuffer.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) && + (FileRef.FileChunkSize == BlockSize)) + { + ZEN_TRACE_CPU("UpdateFolder_MoveTempBlock"); + std::error_code Ec; + std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec); + if (!Ec) { - Work.ScheduleWork( - WritePool, - [&, BlockIndex, BlockBuffer = std::move(Payload)](std::atomic&) { - if (!AbortFlag) - { - const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + BlockBuffer.SetDeleteOnClose(false); + BlockBuffer = {}; + BlockChunkPath = Path / ZenTempBlockFolderName / BlockDescription.BlockHash.ToHexString(); + std::filesystem::rename(TempBlobPath, BlockChunkPath, Ec); + if (Ec) + { + BlockChunkPath = std::filesystem::path{}; - FilteredWrittenBytesPerSecond.Start(); - if (!WriteBlockToDisk(CacheFolderPath, - RemoteContent, - BlockDescription, - SequenceIndexChunksLeftToWriteCounters, - BlockBuffer, - RemoteLookup, - RemoteChunkIndexNeedsCopyFromSourceFlags, - ChunkCountWritten, - WriteToDiskBytes)) - { - throw std::runtime_error( - fmt::format("Block {} is malformed", BlockDescription.BlockHash)); - } - WritePartsComplete++; - if (WritePartsComplete == TotalPartWriteCount) - { - FilteredWrittenBytesPerSecond.Stop(); - } - } - }, - Work.DefaultErrorFunction()); + // Re-open the temp file again + BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete); + BlockBuffer = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BlockSize, true); + BlockBuffer.SetDeleteOnClose(true); + } } } - }, - Work.DefaultErrorFunction()); - } - } - else - { - ZEN_DEBUG("Skipping block {} due to cache reuse", BlockDescriptions[BlockIndex].BlockHash); - } + } + + if (BlockChunkPath.empty() && (BlockSize > 512u * 1024u)) + { + ZEN_TRACE_CPU("UpdateFolder_WriteTempBlock"); + // Could not be moved and rather large, lets store it on disk + BlockChunkPath = Path / ZenTempBlockFolderName / BlockDescription.BlockHash.ToHexString(); + TemporaryFile::SafeWriteFile(BlockChunkPath, BlockBuffer); + BlockBuffer = {}; + } + + if (!AbortFlag) + { + Work.ScheduleWork( + WritePool, // WritePool, GetSyncWorkerPool() + [&RemoteContent, + &RemoteLookup, + &CacheFolderPath, + &RemoteChunkIndexNeedsCopyFromSourceFlags, + &SequenceIndexChunksLeftToWriteCounters, + BlockIndex, + &BlockDescriptions, + &ChunkCountWritten, + &WriteToDiskBytes, + &WritePartsComplete, + &TotalPartWriteCount, + &FilteredWrittenBytesPerSecond, + BlockChunkPath, + BlockBuffer = std::move(BlockBuffer)](std::atomic&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_WriteFullBlock"); + + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + + if (BlockChunkPath.empty()) + { + ZEN_ASSERT(BlockBuffer); + } + else + { + ZEN_ASSERT(!BlockBuffer); + BlockBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath); + if (!BlockBuffer) + { + throw std::runtime_error(fmt::format("Could not open dowloaded block {} from {}", + BlockDescription.BlockHash, + BlockChunkPath)); + } + } + + FilteredWrittenBytesPerSecond.Start(); + if (!WriteBlockToDisk(CacheFolderPath, + RemoteContent, + BlockDescription, + SequenceIndexChunksLeftToWriteCounters, + CompositeBuffer(std::move(BlockBuffer)), + RemoteLookup, + RemoteChunkIndexNeedsCopyFromSourceFlags, + ChunkCountWritten, + WriteToDiskBytes)) + { + std::error_code DummyEc; + std::filesystem::remove(BlockChunkPath, DummyEc); + throw std::runtime_error(fmt::format("Block {} is malformed", BlockDescription.BlockHash)); + } + WritePartsComplete++; + + if (!BlockChunkPath.empty()) + { + std::filesystem::remove(BlockChunkPath); + } + + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + } + }, + Work.DefaultErrorFunction()); + } + } + }, + Work.DefaultErrorFunction()); } + ZEN_DEBUG("Fetching {} with {} slack (ideal {}) out of {} using {} requests for {} blocks", NiceBytes(AllBlocksFetch), NiceBytes(AllBlocksSlack), @@ -4809,28 +5412,30 @@ namespace { NiceBytes(AllBlocksSize), AllBlockRequests, BlocksNeededCount); - ZEN_TRACE_CPU("HandleChunks_Wait"); - - Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { - ZEN_UNUSED(IsAborted, PendingWork); - ZEN_ASSERT(ChunkCountToWrite >= ChunkCountWritten.load()); - FilteredWrittenBytesPerSecond.Update(WriteToDiskBytes.load()); - FilteredDownloadedBytesPerSecond.Update(BytesDownloaded.load()); - std::string Details = fmt::format("{}/{} ({} {}bits/s) downloaded. {}/{} ({} {}B/s) written.", - RequestsComplete.load(), - TotalRequestCount, - NiceBytes(BytesDownloaded.load()), - NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8), - ChunkCountWritten.load(), - ChunkCountToWrite, - NiceBytes(WriteToDiskBytes.load()), - NiceNum(FilteredWrittenBytesPerSecond.GetCurrent())); - WriteProgressBar.UpdateState({.Task = "Writing chunks ", - .Details = Details, - .TotalCount = gsl::narrow(ChunkCountToWrite), - .RemainingCount = gsl::narrow(ChunkCountToWrite - ChunkCountWritten.load())}, - false); - }); + { + ZEN_TRACE_CPU("WriteChunks_Wait"); + + Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, PendingWork); + ZEN_ASSERT(ChunkCountToWrite >= ChunkCountWritten.load()); + FilteredWrittenBytesPerSecond.Update(WriteToDiskBytes.load()); + FilteredDownloadedBytesPerSecond.Update(BytesDownloaded.load()); + std::string Details = fmt::format("{}/{} ({} {}bits/s) downloaded. {}/{} ({} {}B/s) written.", + RequestsComplete.load(), + TotalRequestCount, + NiceBytes(BytesDownloaded.load()), + NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8), + ChunkCountWritten.load(), + ChunkCountToWrite, + NiceBytes(WriteToDiskBytes.load()), + NiceNum(FilteredWrittenBytesPerSecond.GetCurrent())); + WriteProgressBar.UpdateState({.Task = "Writing chunks ", + .Details = Details, + .TotalCount = gsl::narrow(ChunkCountToWrite), + .RemainingCount = gsl::narrow(ChunkCountToWrite - ChunkCountWritten.load())}, + false); + }); + } FilteredWrittenBytesPerSecond.Stop(); FilteredDownloadedBytesPerSecond.Stop(); @@ -4842,19 +5447,32 @@ namespace { WriteProgressBar.Finish(); - ZEN_CONSOLE("Downloaded {} ({}bits/s). Wrote {} ({}B/s). Completed in {}", + uint32_t RawSequencesMissingWriteCount = 0; + for (uint32_t SequenceIndex = 0; SequenceIndex < SequenceIndexChunksLeftToWriteCounters.size(); SequenceIndex++) + { + const auto& SequenceIndexChunksLeftToWriteCounter = SequenceIndexChunksLeftToWriteCounters[SequenceIndex]; + if (SequenceIndexChunksLeftToWriteCounter.load() != 0) + { + RawSequencesMissingWriteCount++; + const uint32_t PathIndex = RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex]; + const std::filesystem::path& IncompletePath = RemoteContent.Paths[PathIndex]; + ZEN_ASSERT(!IncompletePath.empty()); + const uint32_t ExpectedSequenceCount = RemoteContent.ChunkedContent.ChunkCounts[SequenceIndex]; + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounter.load() <= ExpectedSequenceCount); + } + } + ZEN_ASSERT(RawSequencesMissingWriteCount == 0); + + ZEN_CONSOLE("Downloaded {} ({}bits/s) in {}. Wrote {} ({}B/s) in {}. Completed in {}", NiceBytes(BytesDownloaded.load()), NiceNum(GetBytesPerSecond(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS(), BytesDownloaded * 8)), + NiceTimeSpanMs(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS() / 1000), NiceBytes(WriteToDiskBytes.load()), NiceNum(GetBytesPerSecond(FilteredWrittenBytesPerSecond.GetElapsedTimeUS(), WriteToDiskBytes.load())), + NiceTimeSpanMs(FilteredWrittenBytesPerSecond.GetElapsedTimeUS() / 1000), NiceTimeSpanMs(WriteTimer.GetElapsedTimeMs())); } - for (const auto& SequenceIndexChunksLeftToWriteCounter : SequenceIndexChunksLeftToWriteCounters) - { - ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounter.load() == 0); - } - std::vector> Targets; Targets.reserve(RemoteContent.Paths.size()); for (uint32_t RemotePathIndex = 0; RemotePathIndex < RemoteContent.Paths.size(); RemotePathIndex++) @@ -4867,17 +5485,24 @@ namespace { // Move all files we will reuse to cache folder // TODO: If WipeTargetFolder is false we could check which files are already correct and leave them in place - for (uint32_t LocalPathIndex = 0; LocalPathIndex < LocalContent.Paths.size(); LocalPathIndex++) + if (!LocalPathIndexesMatchingSequenceIndexes.empty()) { - const IoHash& RawHash = LocalContent.RawHashes[LocalPathIndex]; - if (RemoteLookup.RawHashToSequenceIndex.contains(RawHash)) + ZEN_TRACE_CPU("UpdateFolder_CacheReused"); + uint64_t TotalFullFileSizeCached = 0; + for (uint32_t LocalPathIndex : LocalPathIndexesMatchingSequenceIndexes) { + const IoHash& RawHash = LocalContent.RawHashes[LocalPathIndex]; const std::filesystem::path LocalFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred(); const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(CacheFolderPath, RawHash); ZEN_ASSERT_SLOW(std::filesystem::exists(LocalFilePath)); SetFileReadOnly(LocalFilePath, false); + ZEN_ASSERT_SLOW(!std::filesystem::exists(CacheFilePath)); std::filesystem::rename(LocalFilePath, CacheFilePath); + TotalFullFileSizeCached += std::filesystem::file_size(CacheFilePath); } + ZEN_CONSOLE("Saved {} ({}) unchanged files in cache", + LocalPathIndexesMatchingSequenceIndexes.size(), + NiceBytes(TotalFullFileSizeCached)); } if (WipeTargetFolder) @@ -4923,6 +5548,8 @@ namespace { } { + ZEN_TRACE_CPU("UpdateFolder_FinalizeTree"); + WorkerThreadPool& WritePool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // ProgressBar RebuildProgressBar(UsePlainProgress); @@ -4943,8 +5570,6 @@ namespace { break; } - ZEN_TRACE_CPU("UpdateFolder_FinalizeTree"); - size_t TargetCount = 1; const IoHash& RawHash = Targets[TargetOffset].first; while (Targets[TargetOffset + TargetCount].first == RawHash) @@ -5038,17 +5663,19 @@ namespace { TargetOffset += TargetCount; } - ZEN_TRACE_CPU("FinalizeTree_Wait"); + { + ZEN_TRACE_CPU("FinalizeTree_Wait"); - Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { - ZEN_UNUSED(IsAborted, PendingWork); - std::string Details = fmt::format("{}/{} files", TargetsComplete.load(), Targets.size()); - RebuildProgressBar.UpdateState({.Task = "Rebuilding state ", - .Details = Details, - .TotalCount = gsl::narrow(Targets.size()), - .RemainingCount = gsl::narrow(Targets.size() - TargetsComplete.load())}, - false); - }); + Work.Wait(UsePlainProgress ? 5000 : 200, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, PendingWork); + std::string Details = fmt::format("{}/{} files", TargetsComplete.load(), Targets.size()); + RebuildProgressBar.UpdateState({.Task = "Rebuilding state ", + .Details = Details, + .TotalCount = gsl::narrow(Targets.size()), + .RemainingCount = gsl::narrow(Targets.size() - TargetsComplete.load())}, + false); + }); + } if (AbortFlag) { @@ -5608,15 +6235,10 @@ namespace { const std::filesystem::path ZenTempFolder = Path / ZenTempFolderName; CreateDirectories(ZenTempFolder); - auto _ = MakeGuard([&]() { - CleanDirectory(ZenTempFolder, {}); - std::filesystem::remove(ZenTempFolder); - }); + CreateDirectories(Path / ZenTempBlockFolderName); - CreateDirectories(Path / ZenTempChunkFolderName); // TODO: Don't clear this - pick up files -> chunks to use - CreateDirectories(Path / - ZenTempCacheFolderName); // TODO: Don't clear this - pick up files and use as sequences (non .tmp extension) - // and delete .tmp (maybe?) - chunk them? How do we know the file is worth chunking? + CreateDirectories(Path / ZenTempCacheFolderName); + CreateDirectories(Path / ZenTempDownloadFolderName); std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; @@ -5748,6 +6370,8 @@ namespace { ZEN_CONSOLE("Downloaded build in {}.", NiceTimeSpanMs(DownloadTimer.GetElapsedTimeMs())); } } + CleanDirectory(ZenTempFolder, {}); + std::filesystem::remove(ZenTempFolder); } void DiffFolders(const std::filesystem::path& BasePath, const std::filesystem::path& ComparePath, bool OnlyChunked) @@ -6191,6 +6815,15 @@ BuildsCommand::BuildsCommand() ""); m_ValidateBuildPartOptions.parse_positional({"build-id", "build-part-id"}); m_ValidateBuildPartOptions.positional_help("build-id build-part-id"); + + AddCloudOptions(m_MultiTestDownloadOptions); + AddFileOptions(m_MultiTestDownloadOptions); + AddOutputOptions(m_MultiTestDownloadOptions); + m_MultiTestDownloadOptions + .add_option("", "l", "local-path", "Root file system folder used as base", cxxopts::value(m_Path), ""); + m_MultiTestDownloadOptions.add_option("", "", "build-ids", "Build Ids list separated by ','", cxxopts::value(m_BuildIds), ""); + m_MultiTestDownloadOptions.parse_positional({"local-path"}); + m_MultiTestDownloadOptions.positional_help("local-path"); } BuildsCommand::~BuildsCommand() = default; @@ -6667,6 +7300,79 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) return AbortFlag ? 11 : 0; } + if (SubOption == &m_MultiTestDownloadOptions) + { + if (m_Path.empty()) + { + throw zen::OptionParseException(fmt::format("local-path is required\n{}", m_DownloadOptions.help())); + } + + ParseStorageOptions(); + ParseAuthOptions(); + + HttpClient Http(m_BuildsUrl, ClientSettings); + // m_StoragePath = "D:\\buildstorage"; + // m_Path = "F:\\Saved\\DownloadedBuilds\\++Fortnite+Main-CL-XXXXXXXX\\WindowsClient"; + // std::vector BuildIdStrings{"07d3942f0e7f4ca1b13b0587", + // "07d394eed89d769f2254e75d", + // "07d3953f22fa3f8000fa6f0a", + // "07d3959df47ed1f42ddbe44c", + // "07d395fa7803d50804f14417", + // "07d3964f919d577a321a1fdd", + // "07d396a6ce875004e16b9528"}; + + BuildStorage::Statistics StorageStats; + std::unique_ptr Storage; + std::string StorageName; + if (!m_BuildsUrl.empty()) + { + ZEN_CONSOLE("Downloading {} to '{}' from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}'", + FormatArray(m_BuildIds, " "sv), + m_Path, + m_BuildsUrl, + Http.GetSessionId(), + m_Namespace, + m_Bucket); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); + StorageName = "Cloud DDC"; + } + else if (!m_StoragePath.empty()) + { + ZEN_CONSOLE("Downloading {}'to '{}' from folder {}", FormatArray(m_BuildIds, " "sv), m_Path, m_StoragePath); + Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); + StorageName = fmt::format("Disk {}", m_StoragePath.stem()); + } + else + { + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", m_UploadOptions.help())); + } + + for (const std::string& BuildIdString : m_BuildIds) + { + Oid BuildId = Oid::FromHexString(BuildIdString); + if (BuildId == Oid::Zero) + { + throw zen::OptionParseException(fmt::format("invalid build id {}\n{}", BuildIdString, m_DownloadOptions.help())); + } + DownloadFolder(*Storage, + BuildId, + {}, + {}, + m_Path, + m_AllowMultiparts, + m_AllowPartialBlockRequests, + BuildIdString == m_BuildIds.front(), + true); + if (AbortFlag) + { + ZEN_CONSOLE("Download cancelled"); + return 11; + } + ZEN_CONSOLE("\n"); + } + return 0; + } + if (SubOption == &m_TestOptions) { ParseStorageOptions(); diff --git a/src/zen/cmds/builds_cmd.h b/src/zen/cmds/builds_cmd.h index 838a17807..167a5d29f 100644 --- a/src/zen/cmds/builds_cmd.h +++ b/src/zen/cmds/builds_cmd.h @@ -92,18 +92,22 @@ private: cxxopts::Options m_TestOptions{"test", "Test upload and download with verify"}; + cxxopts::Options m_MultiTestDownloadOptions{"multi-test-download", "Test multiple sequenced downloads with verify"}; + std::vector m_BuildIds; + cxxopts::Options m_FetchBlobOptions{"fetch-blob", "Fetch a blob from remote store"}; std::string m_BlobHash; cxxopts::Options m_ValidateBuildPartOptions{"validate-part", "Fetch a build part and validate all referenced attachments"}; - cxxopts::Options* m_SubCommands[7] = {&m_ListOptions, + cxxopts::Options* m_SubCommands[8] = {&m_ListOptions, &m_UploadOptions, &m_DownloadOptions, &m_DiffOptions, &m_TestOptions, &m_FetchBlobOptions, - &m_ValidateBuildPartOptions}; + &m_ValidateBuildPartOptions, + &m_MultiTestDownloadOptions}; }; } // namespace zen diff --git a/src/zencore/basicfile.cpp b/src/zencore/basicfile.cpp index 6e879ca0d..95876cff4 100644 --- a/src/zencore/basicfile.cpp +++ b/src/zencore/basicfile.cpp @@ -858,7 +858,7 @@ BasicFileWriter::Flush() } IoBuffer -WriteToTempFile(const CompositeBuffer& Buffer, const std::filesystem::path& Path) +WriteToTempFile(CompositeBuffer&& Buffer, const std::filesystem::path& Path) { TemporaryFile Temp; std::error_code Ec; @@ -868,6 +868,7 @@ WriteToTempFile(const CompositeBuffer& Buffer, const std::filesystem::path& Path throw std::system_error(Ec, fmt::format("Failed to create temp file for blob at '{}'", Path)); } + uint64_t BufferSize = Buffer.GetSize(); { uint64_t Offset = 0; static const uint64_t BufferingSize = 256u * 1024u; @@ -899,21 +900,31 @@ WriteToTempFile(const CompositeBuffer& Buffer, const std::filesystem::path& Path Temp.MoveTemporaryIntoPlace(Path, Ec); if (Ec) { - IoBuffer TmpBuffer = IoBufferBuilder::MakeFromFile(Path); - if (TmpBuffer) + Ec.clear(); + BasicFile OpenTemp(Path, BasicFile::Mode::kDelete, Ec); + if (Ec) { - IoHash ExistingHash = IoHash::HashBuffer(TmpBuffer); - const IoHash ExpectedHash = IoHash::HashBuffer(Buffer); - if (ExistingHash == ExpectedHash) - { - TmpBuffer.SetDeleteOnClose(true); - return TmpBuffer; - } + throw std::system_error(Ec, fmt::format("Failed to move temp file to '{}'", Path)); } - throw std::system_error(Ec, fmt::format("Failed to move temp file to '{}'", Path)); - } + if (OpenTemp.FileSize() != BufferSize) + { + throw std::runtime_error(fmt::format("Failed to move temp file to '{}' - mismatching file size already exists", Path)); + } + IoBuffer TmpBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BufferSize, true); - IoBuffer TmpBuffer = IoBufferBuilder::MakeFromFile(Path); + IoHash ExistingHash = IoHash::HashBuffer(TmpBuffer); + const IoHash ExpectedHash = IoHash::HashBuffer(Buffer); + if (ExistingHash != ExpectedHash) + { + throw std::runtime_error(fmt::format("Failed to move temp file to '{}' - mismatching file hash already exists", Path)); + } + Buffer = CompositeBuffer{}; + TmpBuffer.SetDeleteOnClose(true); + return TmpBuffer; + } + Buffer = CompositeBuffer{}; + BasicFile OpenTemp(Path, BasicFile::Mode::kDelete); + IoBuffer TmpBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BufferSize, true); TmpBuffer.SetDeleteOnClose(true); return TmpBuffer; } diff --git a/src/zencore/compress.cpp b/src/zencore/compress.cpp index 1844f6a63..88c3bb5b9 100644 --- a/src/zencore/compress.cpp +++ b/src/zencore/compress.cpp @@ -193,7 +193,7 @@ public: const CompositeBuffer& CompressedData, uint64_t RawOffset, uint64_t RawSize, - std::function&& Callback) const = 0; + std::function&& Callback) const = 0; }; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -287,13 +287,16 @@ public: const CompositeBuffer& CompressedData, uint64_t RawOffset, uint64_t RawSize, - std::function&& Callback) const final + std::function&& Callback) const final { if (Header.Method == CompressionMethod::None && Header.TotalCompressedSize == CompressedData.GetSize() && Header.TotalCompressedSize == Header.TotalRawSize + sizeof(BufferHeader) && RawOffset < Header.TotalRawSize && (RawOffset + RawSize) <= Header.TotalRawSize) { - Callback(0, CompressedData.Mid(sizeof(BufferHeader) + RawOffset, RawSize)); + if (!Callback(0, CompressedData.Mid(sizeof(BufferHeader) + RawOffset, RawSize))) + { + return false; + } return true; } return false; @@ -616,7 +619,7 @@ public: const CompositeBuffer& CompressedData, uint64_t RawOffset, uint64_t RawSize, - std::function&& Callback) const final; + std::function&& Callback) const final; protected: virtual bool DecompressBlock(MutableMemoryView RawData, MemoryView CompressedData) const = 0; @@ -744,7 +747,7 @@ BlockDecoder::DecompressToStream(const BufferHeader& Header, const CompositeBuffer& CompressedData, uint64_t RawOffset, uint64_t RawSize, - std::function&& Callback) const + std::function&& Callback) const { if (Header.TotalCompressedSize != CompressedData.GetSize()) { @@ -814,14 +817,23 @@ BlockDecoder::DecompressToStream(const BufferHeader& Header, Source.Detach(); return false; } - Callback(BlockIndex * BlockSize + OffsetInFirstBlock, - CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawDataBuffer.GetData(), BytesToUncompress))); + if (!Callback(BlockIndex * BlockSize + OffsetInFirstBlock, + CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawDataBuffer.GetData(), BytesToUncompress)))) + { + Source.Detach(); + return false; + } } else { - Callback(BlockIndex * BlockSize + OffsetInFirstBlock, - CompositeBuffer( - IoBuffer(IoBuffer::Wrap, CompressedBlockCopy.GetView().Mid(OffsetInFirstBlock).GetData(), BytesToUncompress))); + if (!Callback( + BlockIndex * BlockSize + OffsetInFirstBlock, + CompositeBuffer( + IoBuffer(IoBuffer::Wrap, CompressedBlockCopy.GetView().Mid(OffsetInFirstBlock).GetData(), BytesToUncompress)))) + { + Source.Detach(); + return false; + } } OffsetInFirstBlock = 0; @@ -858,14 +870,21 @@ BlockDecoder::DecompressToStream(const BufferHeader& Header, { return false; } - Callback(BlockIndex * BlockSize + OffsetInFirstBlock, - CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawDataBuffer.GetData(), BytesToUncompress))); + if (!Callback(BlockIndex * BlockSize + OffsetInFirstBlock, + CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawDataBuffer.GetData(), BytesToUncompress)))) + { + return false; + } } else { - Callback(BlockIndex * BlockSize + OffsetInFirstBlock, - CompositeBuffer( - IoBuffer(IoBuffer::Wrap, CompressedBlockCopy.GetView().Mid(OffsetInFirstBlock).GetData(), BytesToUncompress))); + if (!Callback( + BlockIndex * BlockSize + OffsetInFirstBlock, + CompositeBuffer( + IoBuffer(IoBuffer::Wrap, CompressedBlockCopy.GetView().Mid(OffsetInFirstBlock).GetData(), BytesToUncompress)))) + { + return false; + } } OffsetInFirstBlock = 0; @@ -1978,7 +1997,7 @@ CompressedBuffer::DecompressToComposite() const bool CompressedBuffer::DecompressToStream(uint64_t RawOffset, uint64_t RawSize, - std::function&& Callback) const + std::function&& Callback) const { using namespace detail; if (CompressedData) diff --git a/src/zencore/include/zencore/basicfile.h b/src/zencore/include/zencore/basicfile.h index a78132879..57798b6f4 100644 --- a/src/zencore/include/zencore/basicfile.h +++ b/src/zencore/include/zencore/basicfile.h @@ -186,7 +186,7 @@ private: uint64_t m_BufferEnd; }; -IoBuffer WriteToTempFile(const CompositeBuffer& Buffer, const std::filesystem::path& Path); +IoBuffer WriteToTempFile(CompositeBuffer&& Buffer, const std::filesystem::path& Path); ZENCORE_API void basicfile_forcelink(); diff --git a/src/zencore/include/zencore/compress.h b/src/zencore/include/zencore/compress.h index 3969e9dbd..74fd5f767 100644 --- a/src/zencore/include/zencore/compress.h +++ b/src/zencore/include/zencore/compress.h @@ -209,7 +209,7 @@ public: */ [[nodiscard]] ZENCORE_API bool DecompressToStream(uint64_t RawOffset, uint64_t RawSize, - std::function&& Callback) const; + std::function&& Callback) const; /** A null compressed buffer. */ static const CompressedBuffer Null; diff --git a/src/zencore/workthreadpool.cpp b/src/zencore/workthreadpool.cpp index d15fb2e83..445fe939e 100644 --- a/src/zencore/workthreadpool.cpp +++ b/src/zencore/workthreadpool.cpp @@ -274,7 +274,7 @@ WorkerThreadPool::ScheduleWork(Ref Work) void WorkerThreadPool::ScheduleWork(std::function&& Work) { - ScheduleWork(Ref(new detail::LambdaWork(Work))); + ScheduleWork(Ref(new detail::LambdaWork(std::move(Work)))); } [[nodiscard]] size_t diff --git a/src/zenutil/chunkblock.cpp b/src/zenutil/chunkblock.cpp index a19cf5c1b..f3c14edc4 100644 --- a/src/zenutil/chunkblock.cpp +++ b/src/zenutil/chunkblock.cpp @@ -187,31 +187,54 @@ GenerateChunkBlock(std::vector>&& FetchChunks, return CompressedBlock; } -bool -IterateChunkBlock(const SharedBuffer& BlockPayload, - std::function Visitor, - uint64_t& OutHeaderSize) +std::vector +ReadChunkBlockHeader(const MemoryView BlockView, uint64_t& OutHeaderSize) { - ZEN_ASSERT(BlockPayload); - if (BlockPayload.GetSize() < 1) - { - return false; - } - - MemoryView BlockView = BlockPayload.GetView(); - const uint8_t* ReadPtr = reinterpret_cast(BlockView.GetData()); + const uint8_t* ReadPtr = reinterpret_cast(BlockView.GetData()); uint32_t NumberSize; uint64_t ChunkCount = ReadVarUInt(ReadPtr, NumberSize); ReadPtr += NumberSize; - std::vector ChunkSizes; + std::vector ChunkSizes; ChunkSizes.reserve(ChunkCount); while (ChunkCount--) { - ChunkSizes.push_back(ReadVarUInt(ReadPtr, NumberSize)); + if (ReadPtr >= BlockView.GetDataEnd()) + { + throw std::runtime_error("Invalid block header, block data ended unexpectedly"); + } + uint64_t ChunkSize = ReadVarUInt(ReadPtr, NumberSize); + if (ChunkSize > std::numeric_limits::max()) + { + throw std::runtime_error("Invalid block header, header data is corrupt"); + } + if (ChunkSize < 1) + { + throw std::runtime_error("Invalid block header, header data is corrupt"); + } + ChunkSizes.push_back(gsl::narrow(ChunkSize)); ReadPtr += NumberSize; } uint64_t Offset = std::distance((const uint8_t*)BlockView.GetData(), ReadPtr); OutHeaderSize = Offset; + return ChunkSizes; +} + +bool +IterateChunkBlock(const SharedBuffer& BlockPayload, + std::function Visitor, + uint64_t& OutHeaderSize) +{ + ZEN_ASSERT(BlockPayload); + if (BlockPayload.GetSize() < 1) + { + return false; + } + + MemoryView BlockView = BlockPayload.GetView(); + + std::vector ChunkSizes = ReadChunkBlockHeader(BlockView, OutHeaderSize); + uint64_t Offset = OutHeaderSize; + OutHeaderSize = Offset; for (uint64_t ChunkSize : ChunkSizes) { IoBuffer Chunk(BlockPayload.AsIoBuffer(), Offset, ChunkSize); diff --git a/src/zenutil/include/zenutil/chunkblock.h b/src/zenutil/include/zenutil/chunkblock.h index 21107fb7c..277580c74 100644 --- a/src/zenutil/include/zenutil/chunkblock.h +++ b/src/zenutil/include/zenutil/chunkblock.h @@ -31,9 +31,10 @@ CbObject BuildChunkBlockDescription(const ChunkBlockDescription& Block, ChunkBlockDescription GetChunkBlockDescription(const SharedBuffer& BlockPayload, const IoHash& RawHash); typedef std::function(const IoHash& RawHash)> FetchChunkFunc; -CompressedBuffer GenerateChunkBlock(std::vector>&& FetchChunks, ChunkBlockDescription& OutBlock); -bool IterateChunkBlock(const SharedBuffer& BlockPayload, - std::function Visitor, - uint64_t& OutHeaderSize); +CompressedBuffer GenerateChunkBlock(std::vector>&& FetchChunks, ChunkBlockDescription& OutBlock); +bool IterateChunkBlock(const SharedBuffer& BlockPayload, + std::function Visitor, + uint64_t& OutHeaderSize); +std::vector ReadChunkBlockHeader(const MemoryView BlockView, uint64_t& OutHeaderSize); } // namespace zen -- cgit v1.2.3 From 90db3ced033d4e06da2739e5d97cdeff2b0ba3b9 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Tue, 11 Mar 2025 09:58:07 +0100 Subject: Build command tweaks (#301) - Improvement: Don't chunk up .mp4 files as they generally won't benefit from deduplication or partial in-place-updates - Improvement: Emit build name to console output when downloading a build - Improvement: Added some debug logging - Bugfix: Logging setup would previously not function correctly when not logging to file --- src/zen/cmds/builds_cmd.cpp | 19 ++++++--- src/zen/zen.cpp | 53 +++++++++--------------- src/zencore/include/zencore/compactbinaryfmt.h | 23 ++++++++++ src/zenhttp/include/zenhttp/formatters.h | 17 ++++---- src/zenutil/include/zenutil/chunkingcontroller.h | 2 +- src/zenutil/include/zenutil/logging.h | 1 + src/zenutil/logging.cpp | 27 ++++++------ 7 files changed, 83 insertions(+), 59 deletions(-) create mode 100644 src/zencore/include/zencore/compactbinaryfmt.h (limited to 'src') diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index 2c7a73fb3..56c3c3c4f 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -5699,10 +5700,14 @@ namespace { std::vector> AvailableParts; CbObject BuildObject = Storage.GetBuild(BuildId); - ZEN_CONSOLE("GetBuild took {}. Payload size: {}", + + ZEN_CONSOLE("GetBuild took {}. Name: '{}', Payload size: {}", NiceTimeSpanMs(GetBuildTimer.GetElapsedTimeMs()), + BuildObject["BuildName"sv].AsString(), NiceBytes(BuildObject.GetSize())); + ZEN_DEBUG("Build object: {}", BuildObject); + CbObjectView PartsObject = BuildObject["parts"sv].AsObjectView(); if (!PartsObject) { @@ -5782,11 +5787,13 @@ namespace { { ZEN_TRACE_CPU("GetRemoteContent"); - Stopwatch GetBuildPartTimer; - CbObject BuildPartManifest = Storage.GetBuildPart(BuildId, BuildParts[0].first); + Stopwatch GetBuildPartTimer; + const Oid BuildPartId = BuildParts[0].first; + const std::string_view BuildPartName = BuildParts[0].second; + CbObject BuildPartManifest = Storage.GetBuildPart(BuildId, BuildPartId); ZEN_CONSOLE("GetBuildPart {} ('{}') took {}. Payload size: {}", - BuildParts[0].first, - BuildParts[0].second, + BuildPartId, + BuildPartName, NiceTimeSpanMs(GetBuildPartTimer.GetElapsedTimeMs()), NiceBytes(BuildPartManifest.GetSize())); @@ -5915,7 +5922,7 @@ namespace { OutPartContents.resize(1); ParseBuildPartManifest(Storage, BuildId, - BuildParts[0].first, + BuildPartId, BuildPartManifest, OutPartContents[0], OutBlockDescriptions, diff --git a/src/zen/zen.cpp b/src/zen/zen.cpp index 713a0d66d..4e6161e86 100644 --- a/src/zen/zen.cpp +++ b/src/zen/zen.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -377,31 +378,6 @@ main(int argc, char** argv) using namespace zen; using namespace std::literals; - zen::logging::InitializeLogging(); - - // Set output mode to handle virtual terminal sequences - zen::logging::EnableVTMode(); - std::set_terminate([]() { - void* Frames[8]; - uint32_t FrameCount = GetCallstack(2, 8, Frames); - CallstackFrames* Callstack = CreateCallstack(FrameCount, Frames); - ZEN_CRITICAL("Program exited abnormally via std::terminate()\n{}", CallstackToString(Callstack, " ")); - FreeCallstack(Callstack); - }); - - LoggerRef DefaultLogger = zen::logging::Default(); - auto& Sinks = DefaultLogger.SpdLogger->sinks(); - - Sinks.clear(); - auto ConsoleSink = std::make_shared(); - Sinks.push_back(ConsoleSink); - - zen::MaximizeOpenFileCount(); - - ////////////////////////////////////////////////////////////////////////// - - auto _ = zen::MakeGuard([] { spdlog::shutdown(); }); - AttachCommand AttachCmd; BenchCommand BenchCmd; BuildsCommand BuildsCmd; @@ -674,14 +650,25 @@ main(int argc, char** argv) exit(0); } - if (GlobalOptions.IsDebug) - { - logging::SetLogLevel(logging::level::Debug); - } - if (GlobalOptions.IsVerbose) - { - logging::SetLogLevel(logging::level::Trace); - } + zen::LoggingOptions LogOptions; + LogOptions.IsDebug = GlobalOptions.IsDebug; + LogOptions.IsVerbose = GlobalOptions.IsVerbose; + LogOptions.AllowAsync = false; + zen::InitializeLogging(LogOptions); + + std::set_terminate([]() { + void* Frames[8]; + uint32_t FrameCount = GetCallstack(2, 8, Frames); + CallstackFrames* Callstack = CreateCallstack(FrameCount, Frames); + ZEN_CRITICAL("Program exited abnormally via std::terminate()\n{}", CallstackToString(Callstack, " ")); + FreeCallstack(Callstack); + }); + + zen::MaximizeOpenFileCount(); + + ////////////////////////////////////////////////////////////////////////// + + auto _ = zen::MakeGuard([] { zen::ShutdownLogging(); }); #if ZEN_WITH_TRACE if (TraceHost.size()) diff --git a/src/zencore/include/zencore/compactbinaryfmt.h b/src/zencore/include/zencore/compactbinaryfmt.h new file mode 100644 index 000000000..ae0c3eb42 --- /dev/null +++ b/src/zencore/include/zencore/compactbinaryfmt.h @@ -0,0 +1,23 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include + +ZEN_THIRD_PARTY_INCLUDES_START +#include +ZEN_THIRD_PARTY_INCLUDES_END + +#include + +template +requires DerivedFrom +struct fmt::formatter : fmt::formatter +{ + auto format(const zen::CbObject& a, format_context& ctx) const + { + zen::ExtendableStringBuilder<1024> ObjStr; + zen::CompactBinaryToJson(a, ObjStr); + return fmt::formatter::format(ObjStr.ToView(), ctx); + } +}; diff --git a/src/zenhttp/include/zenhttp/formatters.h b/src/zenhttp/include/zenhttp/formatters.h index 0fa5dc6da..74da9ab05 100644 --- a/src/zenhttp/include/zenhttp/formatters.h +++ b/src/zenhttp/include/zenhttp/formatters.h @@ -7,6 +7,7 @@ #include #include #include +#include ZEN_THIRD_PARTY_INCLUDES_START #include @@ -67,15 +68,17 @@ struct fmt::formatter { using namespace std::literals; - if (Response.status_code == 200 || Response.status_code == 201) + zen::NiceTimeSpanMs NiceResponseTime(uint64_t(Response.elapsed * 1000)); + + if (zen::IsHttpSuccessCode(Response.status_code)) { return fmt::format_to(Ctx.out(), - "Url: {}, Status: {}, Bytes: {}/{} (Up/Down), Elapsed: {}s", + "Url: {}, Status: {}, Bytes: {}/{} (Up/Down), Elapsed: {}", Response.url.str(), Response.status_code, Response.uploaded_bytes, Response.downloaded_bytes, - Response.elapsed); + NiceResponseTime.c_str()); } else { @@ -90,12 +93,12 @@ struct fmt::formatter std::string_view Json = Obj.ToJson(Sb).ToView(); return fmt::format_to(Ctx.out(), - "Url: {}, Status: {}, Bytes: {}/{} (Up/Down), Elapsed: {}s, Response: '{}', Reason: '{}'", + "Url: {}, Status: {}, Bytes: {}/{} (Up/Down), Elapsed: {}, Response: '{}', Reason: '{}'", Response.url.str(), Response.status_code, Response.uploaded_bytes, Response.downloaded_bytes, - Response.elapsed, + NiceResponseTime.c_str(), Json, Response.reason); } @@ -104,12 +107,12 @@ struct fmt::formatter zen::BodyLogFormatter Body(Response.text); return fmt::format_to(Ctx.out(), - "Url: {}, Status: {}, Bytes: {}/{} (Up/Down), Elapsed: {}s, Reponse: '{}', Reason: '{}'", + "Url: {}, Status: {}, Bytes: {}/{} (Up/Down), Elapsed: {}, Response: '{}', Reason: '{}'", Response.url.str(), Response.status_code, Response.uploaded_bytes, Response.downloaded_bytes, - Response.elapsed, + NiceResponseTime.c_str(), Body.GetText(), Response.reason); } diff --git a/src/zenutil/include/zenutil/chunkingcontroller.h b/src/zenutil/include/zenutil/chunkingcontroller.h index ebc80e207..246f4498a 100644 --- a/src/zenutil/include/zenutil/chunkingcontroller.h +++ b/src/zenutil/include/zenutil/chunkingcontroller.h @@ -11,7 +11,7 @@ namespace zen { -const std::vector DefaultChunkingExcludeExtensions = {".exe", ".dll", ".pdb", ".self"}; +const std::vector DefaultChunkingExcludeExtensions = {".exe", ".dll", ".pdb", ".self", ".mp4"}; const ChunkedParams DefaultChunkedParams = {.MinSize = ((8u * 1u) * 1024u) - 128u, .MaxSize = 128u * 1024u, diff --git a/src/zenutil/include/zenutil/logging.h b/src/zenutil/include/zenutil/logging.h index ebf6372fc..d64eef207 100644 --- a/src/zenutil/include/zenutil/logging.h +++ b/src/zenutil/include/zenutil/logging.h @@ -32,6 +32,7 @@ struct LoggingOptions bool IsDebug = false; bool IsVerbose = false; bool IsTest = false; + bool AllowAsync = true; bool NoConsoleOutput = false; std::filesystem::path AbsLogFile; // Absolute path to main log file std::string LogId; diff --git a/src/zenutil/logging.cpp b/src/zenutil/logging.cpp index 0444fa2c4..762b75a59 100644 --- a/src/zenutil/logging.cpp +++ b/src/zenutil/logging.cpp @@ -49,7 +49,7 @@ BeginInitializeLogging(const LoggingOptions& LogOptions) zen::logging::InitializeLogging(); zen::logging::EnableVTMode(); - bool IsAsync = true; + bool IsAsync = LogOptions.AllowAsync; if (LogOptions.IsDebug) { @@ -181,7 +181,7 @@ FinishInitializeLogging(const LoggingOptions& LogOptions) LogLevel = logging::level::Debug; } - if (LogOptions.IsTest) + if (LogOptions.IsTest || LogOptions.IsVerbose) { LogLevel = logging::level::Trace; } @@ -194,18 +194,21 @@ FinishInitializeLogging(const LoggingOptions& LogOptions) spdlog::set_formatter( std::make_unique(LogOptions.LogId, std::chrono::system_clock::now())); // default to duration prefix - if (LogOptions.AbsLogFile.extension() == ".json") - { - g_FileSink->set_formatter(std::make_unique(LogOptions.LogId)); - } - else + if (g_FileSink) { - g_FileSink->set_formatter(std::make_unique(LogOptions.LogId)); // this will have a date prefix - } + if (LogOptions.AbsLogFile.extension() == ".json") + { + g_FileSink->set_formatter(std::make_unique(LogOptions.LogId)); + } + else + { + g_FileSink->set_formatter(std::make_unique(LogOptions.LogId)); // this will have a date prefix + } - const std::string StartLogTime = zen::DateTime::Now().ToIso8601(); + const std::string StartLogTime = zen::DateTime::Now().ToIso8601(); - spdlog::apply_all([&](auto Logger) { Logger->info("log starting at {}", StartLogTime); }); + spdlog::apply_all([&](auto Logger) { Logger->info("log starting at {}", StartLogTime); }); + } g_IsLoggingInitialized = true; } @@ -213,7 +216,7 @@ FinishInitializeLogging(const LoggingOptions& LogOptions) void ShutdownLogging() { - if (g_IsLoggingInitialized) + if (g_IsLoggingInitialized && g_FileSink) { auto DefaultLogger = zen::logging::Default(); ZEN_LOG_INFO(DefaultLogger, "log ending at {}", zen::DateTime::Now().ToIso8601()); -- cgit v1.2.3 From 5f251575449fff1985a465607ebe03cc320ac8c8 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 11 Mar 2025 14:32:25 +0100 Subject: async find blocks (#300) * put/get build and find blocks while scanning local folder when uploading * changelog * remove redundant move --- src/zen/cmds/builds_cmd.cpp | 122 +++++++++++++++++++++++++++++--------------- 1 file changed, 80 insertions(+), 42 deletions(-) (limited to 'src') diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index 56c3c3c4f..e03175256 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -81,6 +81,8 @@ namespace { const ChunksBlockParameters DefaultChunksBlockParams{.MaxBlockSize = 32u * 1024u * 1024u, .MaxChunkEmbedSize = DefaultChunkedParams.MaxSize}; + const uint64_t DefaultPreferredMultipartChunkSize = 32u * 1024u * 1024u; + const double DefaultLatency = 0; // .0010; const double DefaultDelayPerKBSec = 0; // 0.00005; @@ -2309,6 +2311,61 @@ namespace { CbObject ChunkerParameters; + struct PrepareBuildResult + { + std::vector KnownBlocks; + uint64_t PreferredMultipartChunkSize = DefaultPreferredMultipartChunkSize; + uint64_t PayloadSize = 0; + uint64_t PrepareBuildTimeMs = 0; + uint64_t FindBlocksTimeMs = 0; + uint64_t ElapsedTimeMs = 0; + }; + + FindBlocksStatistics FindBlocksStats; + + std::future PrepBuildResultFuture = + GetSmallWorkerPool(EWorkloadType::Burst) + .EnqueueTask(std::packaged_task{ + [&Storage, BuildId, &MetaData, CreateBuild, AllowMultiparts, IgnoreExistingBlocks, &FindBlocksStats] { + PrepareBuildResult Result; + Stopwatch Timer; + if (CreateBuild) + { + Stopwatch PutBuildTimer; + CbObject PutBuildResult = Storage.PutBuild(BuildId, MetaData); + Result.PrepareBuildTimeMs = PutBuildTimer.GetElapsedTimeMs(); + Result.PreferredMultipartChunkSize = PutBuildResult["chunkSize"sv].AsUInt64(Result.PreferredMultipartChunkSize); + Result.PayloadSize = MetaData.GetSize(); + } + else + { + Stopwatch GetBuildTimer; + CbObject Build = Storage.GetBuild(BuildId); + Result.PrepareBuildTimeMs = GetBuildTimer.GetElapsedTimeMs(); + Result.PayloadSize = Build.GetSize(); + if (auto ChunkSize = Build["chunkSize"sv].AsUInt64(); ChunkSize != 0) + { + Result.PreferredMultipartChunkSize = ChunkSize; + } + else if (AllowMultiparts) + { + ZEN_WARN("PreferredMultipartChunkSize is unknown. Defaulting to '{}'", + NiceBytes(Result.PreferredMultipartChunkSize)); + } + } + + if (!IgnoreExistingBlocks) + { + Stopwatch KnownBlocksTimer; + Result.KnownBlocks = Storage.FindBlocks(BuildId); + FindBlocksStats.FindBlockTimeMS = KnownBlocksTimer.GetElapsedTimeMs(); + FindBlocksStats.FoundBlockCount = Result.KnownBlocks.size(); + Result.FindBlocksTimeMs = KnownBlocksTimer.GetElapsedTimeMs(); + } + Result.ElapsedTimeMs = Timer.GetElapsedTimeMs(); + return Result; + }}); + ChunkedFolderContent LocalContent; GetFolderContentStatistics LocalFolderScanStats; @@ -2517,43 +2574,29 @@ namespace { NiceNum(GetBytesPerSecond(ChunkingStats.ElapsedWallTimeUS, ChunkingStats.BytesHashed))); } - const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalContent); - std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; - - if (CreateBuild) - { - Stopwatch PutBuildTimer; - CbObject PutBuildResult = Storage.PutBuild(BuildId, MetaData); - ZEN_CONSOLE("PutBuild took {}. Payload size: {}", - NiceTimeSpanMs(PutBuildTimer.GetElapsedTimeMs()), - NiceBytes(MetaData.GetSize())); - PreferredMultipartChunkSize = PutBuildResult["chunkSize"sv].AsUInt64(PreferredMultipartChunkSize); - } - else - { - Stopwatch GetBuildTimer; - CbObject Build = Storage.GetBuild(BuildId); - ZEN_CONSOLE("GetBuild took {}. Payload size: {}", NiceTimeSpanMs(GetBuildTimer.GetElapsedTimeMs()), NiceBytes(Build.GetSize())); - if (auto ChunkSize = Build["chunkSize"sv].AsUInt64(); ChunkSize != 0) - { - PreferredMultipartChunkSize = ChunkSize; - } - else if (AllowMultiparts) - { - ZEN_WARN("PreferredMultipartChunkSize is unknown. Defaulting to '{}'", NiceBytes(PreferredMultipartChunkSize)); - } - } - - const std::uint64_t LargeAttachmentSize = AllowMultiparts ? PreferredMultipartChunkSize * 4u : (std::uint64_t)-1; + const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalContent); - FindBlocksStatistics FindBlocksStats; GenerateBlocksStatistics GenerateBlocksStats; LooseChunksStatistics LooseChunksStats; - std::vector KnownBlocks; - std::vector ReuseBlockIndexes; - std::vector NewBlockChunkIndexes; - Stopwatch BlockArrangeTimer; + std::vector ReuseBlockIndexes; + std::vector NewBlockChunkIndexes; + + PrepareBuildResult PrepBuildResult = PrepBuildResultFuture.get(); + + ZEN_CONSOLE("Build prepare took {}. {} took {}, payload size {}{}", + NiceTimeSpanMs(PrepBuildResult.ElapsedTimeMs), + CreateBuild ? "PutBuild" : "GetBuild", + NiceTimeSpanMs(PrepBuildResult.PrepareBuildTimeMs), + NiceBytes(PrepBuildResult.PayloadSize), + IgnoreExistingBlocks ? "" + : fmt::format(". Found {} blocks in {}", + PrepBuildResult.KnownBlocks.size(), + NiceTimeSpanMs(PrepBuildResult.FindBlocksTimeMs))); + + const std::uint64_t LargeAttachmentSize = AllowMultiparts ? PrepBuildResult.PreferredMultipartChunkSize * 4u : (std::uint64_t)-1; + + Stopwatch BlockArrangeTimer; std::vector LooseChunkIndexes; { @@ -2583,12 +2626,7 @@ namespace { } else { - Stopwatch KnownBlocksTimer; - KnownBlocks = Storage.FindBlocks(BuildId); - FindBlocksStats.FindBlockTimeMS = KnownBlocksTimer.GetElapsedTimeMs(); - FindBlocksStats.FoundBlockCount = KnownBlocks.size(); - - ReuseBlockIndexes = FindReuseBlocks(KnownBlocks, + ReuseBlockIndexes = FindReuseBlocks(PrepBuildResult.KnownBlocks, LocalContent.ChunkedContent.ChunkHashes, BlockChunkIndexes, BlockReuseMinPercentLimit, @@ -2596,7 +2634,7 @@ namespace { FindBlocksStats); FindBlocksStats.AcceptedBlockCount = ReuseBlockIndexes.size(); - for (const ChunkBlockDescription& Description : KnownBlocks) + for (const ChunkBlockDescription& Description : PrepBuildResult.KnownBlocks) { for (uint32_t ChunkRawLength : Description.ChunkRawLengths) { @@ -2708,8 +2746,8 @@ namespace { AllChunkBlockDescriptions.reserve(ReuseBlockIndexes.size() + NewBlocks.BlockDescriptions.size()); for (size_t ReuseBlockIndex : ReuseBlockIndexes) { - AllChunkBlockDescriptions.push_back(KnownBlocks[ReuseBlockIndex]); - AllChunkBlockHashes.push_back(KnownBlocks[ReuseBlockIndex].BlockHash); + AllChunkBlockDescriptions.push_back(PrepBuildResult.KnownBlocks[ReuseBlockIndex]); + AllChunkBlockHashes.push_back(PrepBuildResult.KnownBlocks[ReuseBlockIndex].BlockHash); } AllChunkBlockDescriptions.insert(AllChunkBlockDescriptions.end(), NewBlocks.BlockDescriptions.begin(), -- cgit v1.2.3 From fb09d861fd76e459ac86bec388bd406aaca8e681 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 12 Mar 2025 10:51:57 +0100 Subject: improved block gen logic (#302) - Improvement: Reduced memory usage during upload and part upload validation - Improvement: Reduced I/O usage during upload and download - Improvement: Faster block regeneration when uploading in response to PutBuild/FinalizeBuild - Improvement: More trace scopes for build upload operations - Bugfix: Fixed crash during download when trying to write outside a file range --- src/zen/cmds/builds_cmd.cpp | 1007 ++++++++++++++------------ src/zen/zen.cpp | 3 + src/zenutil/chunkedcontent.cpp | 15 +- src/zenutil/chunkedfile.cpp | 6 + src/zenutil/chunkingcontroller.cpp | 4 + src/zenutil/filebuildstorage.cpp | 17 + src/zenutil/include/zenutil/chunkedcontent.h | 4 +- 7 files changed, 599 insertions(+), 457 deletions(-) (limited to 'src') diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index e03175256..baa46dda8 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -191,15 +191,34 @@ namespace { return SB.ToString(); } - void CleanDirectory(const std::filesystem::path& Path, std::span ExcludeDirectories) + bool CleanDirectory(const std::filesystem::path& Path, std::span ExcludeDirectories) { ZEN_TRACE_CPU("CleanDirectory"); + bool CleanWipe = true; + DirectoryContent LocalDirectoryContent; GetDirectoryContent(Path, DirectoryContentFlags::IncludeDirs | DirectoryContentFlags::IncludeFiles, LocalDirectoryContent); for (const std::filesystem::path& LocalFilePath : LocalDirectoryContent.Files) { - std::filesystem::remove(LocalFilePath); + try + { + std::filesystem::remove(LocalFilePath); + } + catch (const std::exception&) + { + // DeleteOnClose files may be a bit slow in getting cleaned up, so pause amd retry one time + Sleep(200); + try + { + std::filesystem::remove(LocalFilePath); + } + catch (const std::exception& Ex) + { + ZEN_WARN("Failed removing file {}. Reason: {}", LocalFilePath, Ex.what()); + CleanWipe = false; + } + } } for (const std::filesystem::path& LocalDirPath : LocalDirectoryContent.Directories) @@ -215,10 +234,28 @@ namespace { } if (!Leave) { - zen::CleanDirectory(LocalDirPath); - std::filesystem::remove(LocalDirPath); + try + { + zen::CleanDirectory(LocalDirPath); + std::filesystem::remove(LocalDirPath); + } + catch (const std::exception&) + { + Sleep(200); + try + { + zen::CleanDirectory(LocalDirPath); + std::filesystem::remove(LocalDirPath); + } + catch (const std::exception& Ex) + { + ZEN_WARN("Failed removing directory {}. Reason: {}", LocalDirPath, Ex.what()); + CleanWipe = false; + } + } } } + return CleanWipe; } std::string ReadAccessTokenFromFile(const std::filesystem::path& Path) @@ -1188,6 +1225,7 @@ namespace { uint64_t& OutCompressedSize, uint64_t& OutDecompressedSize) { + ZEN_TRACE_CPU("ValidateBlob"); IoBuffer Payload = Storage.GetBuildBlob(BuildId, BlobHash); if (!Payload) { @@ -1214,6 +1252,7 @@ namespace { const IoHash& ChunkHash, ReadFileCache& OpenFileCache) { + ZEN_TRACE_CPU("FetchChunk"); auto It = Lookup.ChunkHashToChunkIndex.find(ChunkHash); ZEN_ASSERT(It != Lookup.ChunkHashToChunkIndex.end()); uint32_t ChunkIndex = It->second; @@ -1233,6 +1272,7 @@ namespace { ChunkBlockDescription& OutBlockDescription, DiskStatistics& DiskStats) { + ZEN_TRACE_CPU("GenerateBlock"); ReadFileCache OpenFileCache(DiskStats, Path, Content, Lookup, 4); std::vector> BlockContent; @@ -1255,6 +1295,103 @@ namespace { return GenerateChunkBlock(std::move(BlockContent), OutBlockDescription); }; + CompressedBuffer RebuildBlock(const std::filesystem::path& Path, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + CompositeBuffer&& HeaderBuffer, + const std::vector& ChunksInBlock, + DiskStatistics& DiskStats) + { + ZEN_TRACE_CPU("RebuildBlock"); + ReadFileCache OpenFileCache(DiskStats, Path, Content, Lookup, 4); + + std::vector ResultBuffers; + ResultBuffers.reserve(HeaderBuffer.GetSegments().size() + ChunksInBlock.size()); + ResultBuffers.insert(ResultBuffers.end(), HeaderBuffer.GetSegments().begin(), HeaderBuffer.GetSegments().end()); + for (uint32_t ChunkIndex : ChunksInBlock) + { + std::span ChunkLocations = GetChunkSequenceLocations(Lookup, ChunkIndex); + ZEN_ASSERT(!ChunkLocations.empty()); + CompositeBuffer Chunk = OpenFileCache.GetRange(ChunkLocations[0].SequenceIndex, + ChunkLocations[0].Offset, + Content.ChunkedContent.ChunkRawSizes[ChunkIndex]); + ZEN_ASSERT_SLOW(IoHash::HashBuffer(Chunk) == Content.ChunkedContent.ChunkHashes[ChunkIndex]); + CompositeBuffer CompressedChunk = + CompressedBuffer::Compress(std::move(Chunk), OodleCompressor::Mermaid, OodleCompressionLevel::None).GetCompressed(); + ResultBuffers.insert(ResultBuffers.end(), CompressedChunk.GetSegments().begin(), CompressedChunk.GetSegments().end()); + } + return CompressedBuffer::FromCompressedNoValidate(CompositeBuffer(std::move(ResultBuffers))); + }; + + void DownloadLargeBlob(BuildStorage& Storage, + const std::filesystem::path& DownloadFolder, + const Oid& BuildId, + const IoHash& ChunkHash, + const std::uint64_t PreferredMultipartChunkSize, + ParallellWork& Work, + WorkerThreadPool& NetworkPool, + std::atomic& BytesDownloaded, + std::atomic& MultipartAttachmentCount, + std::function&& OnDownloadComplete) + { + ZEN_TRACE_CPU("DownloadLargeBlob"); + + struct WorkloadData + { + TemporaryFile TempFile; + }; + std::shared_ptr Workload(std::make_shared()); + + std::error_code Ec; + Workload->TempFile.CreateTemporary(DownloadFolder, Ec); + if (Ec) + { + throw std::runtime_error( + fmt::format("Failed opening temporary file '{}': {} ({})", Workload->TempFile.GetPath(), Ec.message(), Ec.value())); + } + std::vector> WorkItems = Storage.GetLargeBuildBlob( + BuildId, + ChunkHash, + PreferredMultipartChunkSize, + [Workload, &BytesDownloaded, OnDownloadComplete = std::move(OnDownloadComplete)](uint64_t Offset, + const IoBuffer& Chunk, + uint64_t BytesRemaining) { + BytesDownloaded += Chunk.GetSize(); + + if (!AbortFlag.load()) + { + ZEN_TRACE_CPU("DownloadLargeBlob_Save"); + Workload->TempFile.Write(Chunk.GetView(), Offset); + if (Chunk.GetSize() == BytesRemaining) + { + uint64_t PayloadSize = Workload->TempFile.FileSize(); + void* FileHandle = Workload->TempFile.Detach(); + ZEN_ASSERT(FileHandle != nullptr); + IoBuffer Payload(IoBuffer::File, FileHandle, 0, PayloadSize, true); + Payload.SetDeleteOnClose(true); + OnDownloadComplete(std::move(Payload)); + } + } + }); + if (!WorkItems.empty()) + { + MultipartAttachmentCount++; + } + for (auto& WorkItem : WorkItems) + { + Work.ScheduleWork( + NetworkPool, // GetSyncWorkerPool(),// + [WorkItem = std::move(WorkItem)](std::atomic&) { + ZEN_TRACE_CPU("DownloadLargeBlob_Work"); + if (!AbortFlag) + { + WorkItem(); + } + }, + Work.DefaultErrorFunction()); + } + } + void ValidateBuildPart(BuildStorage& Storage, const Oid& BuildId, Oid BuildPartId, const std::string_view BuildPartName) { Stopwatch Timer; @@ -1274,6 +1411,11 @@ namespace { throw std::runtime_error(fmt::format("Build {} does not have a part named '{}'", BuildId, BuildPartName)); } } + uint64_t PreferredMultipartChunkSize = DefaultPreferredMultipartChunkSize; + if (auto ChunkSize = Build["chunkSize"sv].AsUInt64(); ChunkSize != 0) + { + PreferredMultipartChunkSize = ChunkSize; + } CbObject BuildPart = Storage.GetBuildPart(BuildId, BuildPartId); ZEN_CONSOLE("Validating build part {}/{} ({})", BuildId, BuildPartId, NiceBytes(BuildPart.GetSize())); std::vector ChunkAttachments; @@ -1295,9 +1437,20 @@ namespace { } WorkerThreadPool& NetworkPool = GetSmallWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // + WorkerThreadPool& ReadPool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // WorkerThreadPool& VerifyPool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // ParallellWork Work(AbortFlag); + const std::filesystem::path TempFolder = ".zen-tmp"; + + CreateDirectories(TempFolder); + auto __ = MakeGuard([&TempFolder]() { + if (CleanDirectory(TempFolder, {})) + { + std::filesystem::remove(TempFolder); + } + }); + ProgressBar ProgressBar(UsePlainProgress); uint64_t AttachmentsToVerifyCount = ChunkAttachments.size() + BlockAttachments.size(); @@ -1308,51 +1461,58 @@ namespace { FilteredRate FilteredDownloadedBytesPerSecond; FilteredRate FilteredVerifiedBytesPerSecond; + std::atomic MultipartAttachmentCount = 0; + for (const IoHash& ChunkAttachment : ChunkAttachments) { Work.ScheduleWork( - NetworkPool, + ReadPool, [&, ChunkAttachment](std::atomic&) { if (!AbortFlag) { - FilteredDownloadedBytesPerSecond.Start(); - IoBuffer Payload = Storage.GetBuildBlob(BuildId, ChunkAttachment); - DownloadedAttachmentCount++; - DownloadedByteCount += Payload.GetSize(); - if (DownloadedAttachmentCount.load() == AttachmentsToVerifyCount) - { - FilteredDownloadedBytesPerSecond.Stop(); - } - if (!Payload) - { - throw std::runtime_error(fmt::format("Chunk attachment {} could not be found", ChunkAttachment)); - } - if (!AbortFlag) - { - Work.ScheduleWork( - VerifyPool, - [&, Payload = std::move(Payload), ChunkAttachment](std::atomic&) mutable { - if (!AbortFlag) - { - FilteredVerifiedBytesPerSecond.Start(); + ZEN_TRACE_CPU("ValidateBuildPart_GetChunk"); - uint64_t CompressedSize; - uint64_t DecompressedSize; - ValidateBlob(std::move(Payload), ChunkAttachment, CompressedSize, DecompressedSize); - ZEN_CONSOLE_VERBOSE("Chunk attachment {} ({} -> {}) is valid", - ChunkAttachment, - NiceBytes(CompressedSize), - NiceBytes(DecompressedSize)); - VerifiedAttachmentCount++; - VerifiedByteCount += DecompressedSize; - if (VerifiedAttachmentCount.load() == AttachmentsToVerifyCount) - { - FilteredVerifiedBytesPerSecond.Stop(); - } - } - }, - Work.DefaultErrorFunction()); - } + FilteredDownloadedBytesPerSecond.Start(); + DownloadLargeBlob(Storage, + TempFolder, + BuildId, + ChunkAttachment, + PreferredMultipartChunkSize, + Work, + NetworkPool, + DownloadedByteCount, + MultipartAttachmentCount, + [&, ChunkHash = ChunkAttachment](IoBuffer&& Payload) { + Payload.SetContentType(ZenContentType::kCompressedBinary); + if (!AbortFlag) + { + Work.ScheduleWork( + VerifyPool, + [&, Payload = std::move(Payload), ChunkHash](std::atomic&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("ValidateBuildPart_Validate"); + + FilteredVerifiedBytesPerSecond.Start(); + + uint64_t CompressedSize; + uint64_t DecompressedSize; + ValidateBlob(std::move(Payload), ChunkHash, CompressedSize, DecompressedSize); + ZEN_CONSOLE_VERBOSE("Chunk attachment {} ({} -> {}) is valid", + ChunkHash, + NiceBytes(CompressedSize), + NiceBytes(DecompressedSize)); + VerifiedAttachmentCount++; + VerifiedByteCount += DecompressedSize; + if (VerifiedAttachmentCount.load() == AttachmentsToVerifyCount) + { + FilteredVerifiedBytesPerSecond.Stop(); + } + } + }, + Work.DefaultErrorFunction()); + } + }); } }, Work.DefaultErrorFunction()); @@ -1365,6 +1525,8 @@ namespace { [&, BlockAttachment](std::atomic&) { if (!AbortFlag) { + ZEN_TRACE_CPU("ValidateBuildPart_GetBlock"); + FilteredDownloadedBytesPerSecond.Start(); IoBuffer Payload = Storage.GetBuildBlob(BuildId, BlockAttachment); DownloadedAttachmentCount++; @@ -1384,6 +1546,8 @@ namespace { [&, Payload = std::move(Payload), BlockAttachment](std::atomic&) mutable { if (!AbortFlag) { + ZEN_TRACE_CPU("ValidateBuildPart_ValidateBlock"); + FilteredVerifiedBytesPerSecond.Start(); uint64_t CompressedSize; @@ -1442,6 +1606,7 @@ namespace { std::vector& ChunkIndexes, std::vector>& OutBlocks) { + ZEN_TRACE_CPU("ArrangeChunksIntoBlocks"); std::sort(ChunkIndexes.begin(), ChunkIndexes.end(), [&Content, &Lookup](uint32_t Lhs, uint32_t Rhs) { const ChunkedContentLookup::ChunkSequenceLocation& LhsLocation = GetChunkSequenceLocations(Lookup, Lhs)[0]; const ChunkedContentLookup::ChunkSequenceLocation& RhsLocation = GetChunkSequenceLocations(Lookup, Rhs)[0]; @@ -1535,6 +1700,7 @@ namespace { uint32_t ChunkIndex, const std::filesystem::path& TempFolderPath) { + ZEN_TRACE_CPU("CompressChunk"); ZEN_ASSERT(!TempFolderPath.empty()); const IoHash& ChunkHash = Content.ChunkedContent.ChunkHashes[ChunkIndex]; const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; @@ -1608,7 +1774,7 @@ namespace { { std::vector BlockDescriptions; std::vector BlockSizes; - std::vector BlockBuffers; + std::vector BlockHeaders; std::vector BlockMetaDatas; std::vector MetaDataHasBeenUploaded; tsl::robin_map BlockHashToBlockIndex; @@ -1625,6 +1791,7 @@ namespace { UploadStatistics& UploadStats, GenerateBlocksStatistics& GenerateBlocksStats) { + ZEN_TRACE_CPU("GenerateBuildBlocks"); const std::size_t NewBlockCount = NewBlockChunks.size(); if (NewBlockCount > 0) { @@ -1632,22 +1799,23 @@ namespace { OutBlocks.BlockDescriptions.resize(NewBlockCount); OutBlocks.BlockSizes.resize(NewBlockCount); - OutBlocks.BlockBuffers.resize(NewBlockCount); OutBlocks.BlockMetaDatas.resize(NewBlockCount); + OutBlocks.BlockHeaders.resize(NewBlockCount); OutBlocks.MetaDataHasBeenUploaded.resize(NewBlockCount, false); OutBlocks.BlockHashToBlockIndex.reserve(NewBlockCount); RwLock Lock; - WorkerThreadPool& GenerateBlobsPool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool();// - WorkerThreadPool& UploadBlocksPool = GetSmallWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool();// + WorkerThreadPool& GenerateBlobsPool = + GetMediumWorkerPool(EWorkloadType::Burst); // GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool();// + WorkerThreadPool& UploadBlocksPool = GetSmallWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool();// FilteredRate FilteredGeneratedBytesPerSecond; FilteredRate FilteredUploadedBytesPerSecond; ParallellWork Work(AbortFlag); - std::atomic PendingUploadCount(0); + std::atomic QueuedPendingBlocksForUpload = 0; for (size_t BlockIndex = 0; BlockIndex < NewBlockCount; BlockIndex++) { @@ -1661,6 +1829,8 @@ namespace { [&, BlockIndex](std::atomic&) { if (!AbortFlag) { + ZEN_TRACE_CPU("GenerateBuildBlocks_Generate"); + FilteredGeneratedBytesPerSecond.Start(); // TODO: Convert ScheduleWork body to function @@ -1672,14 +1842,6 @@ namespace { OutBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); OutBlocks.BlockSizes[BlockIndex] = CompressedBlock.GetCompressedSize(); - - if (!IsBufferDiskBased(CompressedBlock.GetCompressed())) - { - IoBuffer TempPayload = WriteToTempFile(std::move(CompressedBlock).GetCompressed(), - Path / ZenTempBlockFolderName, - OutBlocks.BlockDescriptions[BlockIndex].BlockHash); - CompressedBlock = CompressedBuffer::FromCompressedNoValidate(std::move(TempPayload)); - } { CbObjectWriter Writer; Writer.AddString("createdBy", "zen"); @@ -1693,66 +1855,88 @@ namespace { BlockIndex); }); + { + std::span Segments = CompressedBlock.GetCompressed().GetSegments(); + ZEN_ASSERT(Segments.size() >= 2); + OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]); + } + if (GenerateBlocksStats.GeneratedBlockCount == NewBlockCount) { FilteredGeneratedBytesPerSecond.Stop(); } - if (!AbortFlag) + if (QueuedPendingBlocksForUpload.load() > 16) { - PendingUploadCount++; - Work.ScheduleWork( - UploadBlocksPool, - [&, BlockIndex, Payload = std::move(CompressedBlock).GetCompressed()](std::atomic&) mutable { - if (!AbortFlag) - { - if (GenerateBlocksStats.GeneratedBlockCount == NewBlockCount) - { - FilteredUploadedBytesPerSecond.Stop(); - OutBlocks.BlockBuffers[BlockIndex] = std::move(Payload); - } - else + std::span Segments = CompressedBlock.GetCompressed().GetSegments(); + ZEN_ASSERT(Segments.size() >= 2); + OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]); + } + else + { + if (!AbortFlag) + { + QueuedPendingBlocksForUpload++; + + Work.ScheduleWork( + UploadBlocksPool, + [&, BlockIndex, Payload = std::move(CompressedBlock)](std::atomic&) mutable { + auto _ = MakeGuard([&QueuedPendingBlocksForUpload] { QueuedPendingBlocksForUpload--; }); + if (!AbortFlag) { - FilteredUploadedBytesPerSecond.Start(); - // TODO: Convert ScheduleWork body to function - - PendingUploadCount--; - - const CbObject BlockMetaData = - BuildChunkBlockDescription(OutBlocks.BlockDescriptions[BlockIndex], - OutBlocks.BlockMetaDatas[BlockIndex]); - - const IoHash& BlockHash = OutBlocks.BlockDescriptions[BlockIndex].BlockHash; - const uint64_t CompressedBlockSize = Payload.GetSize(); - Storage.PutBuildBlob(BuildId, - BlockHash, - ZenContentType::kCompressedBinary, - std::move(Payload)); - UploadStats.BlocksBytes += CompressedBlockSize; - ZEN_CONSOLE_VERBOSE("Uploaded block {} ({}) containing {} chunks", - OutBlocks.BlockDescriptions[BlockIndex].BlockHash, - NiceBytes(CompressedBlockSize), - OutBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); - - Storage.PutBlockMetadata(BuildId, - OutBlocks.BlockDescriptions[BlockIndex].BlockHash, - BlockMetaData); - ZEN_CONSOLE_VERBOSE("Uploaded block {} metadata ({})", - OutBlocks.BlockDescriptions[BlockIndex].BlockHash, - NiceBytes(BlockMetaData.GetSize())); - - OutBlocks.MetaDataHasBeenUploaded[BlockIndex] = true; - - UploadStats.BlocksBytes += BlockMetaData.GetSize(); - UploadStats.BlockCount++; - if (UploadStats.BlockCount == NewBlockCount) + if (GenerateBlocksStats.GeneratedBlockCount == NewBlockCount) { + ZEN_TRACE_CPU("GenerateBuildBlocks_Save"); + FilteredUploadedBytesPerSecond.Stop(); + std::span Segments = Payload.GetCompressed().GetSegments(); + ZEN_ASSERT(Segments.size() >= 2); + OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]); + } + else + { + ZEN_TRACE_CPU("GenerateBuildBlocks_Upload"); + + FilteredUploadedBytesPerSecond.Start(); + // TODO: Convert ScheduleWork body to function + + const CbObject BlockMetaData = + BuildChunkBlockDescription(OutBlocks.BlockDescriptions[BlockIndex], + OutBlocks.BlockMetaDatas[BlockIndex]); + + const IoHash& BlockHash = OutBlocks.BlockDescriptions[BlockIndex].BlockHash; + const uint64_t CompressedBlockSize = Payload.GetCompressedSize(); + + Storage.PutBuildBlob(BuildId, + BlockHash, + ZenContentType::kCompressedBinary, + std::move(Payload).GetCompressed()); + UploadStats.BlocksBytes += CompressedBlockSize; + ZEN_CONSOLE_VERBOSE("Uploaded block {} ({}) containing {} chunks", + OutBlocks.BlockDescriptions[BlockIndex].BlockHash, + NiceBytes(CompressedBlockSize), + OutBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); + + Storage.PutBlockMetadata(BuildId, + OutBlocks.BlockDescriptions[BlockIndex].BlockHash, + BlockMetaData); + ZEN_CONSOLE_VERBOSE("Uploaded block {} metadata ({})", + OutBlocks.BlockDescriptions[BlockIndex].BlockHash, + NiceBytes(BlockMetaData.GetSize())); + + OutBlocks.MetaDataHasBeenUploaded[BlockIndex] = true; + + UploadStats.BlocksBytes += BlockMetaData.GetSize(); + UploadStats.BlockCount++; + if (UploadStats.BlockCount == NewBlockCount) + { + FilteredUploadedBytesPerSecond.Stop(); + } } } - } - }, - Work.DefaultErrorFunction()); + }, + Work.DefaultErrorFunction()); + } } } }, @@ -1783,6 +1967,8 @@ namespace { false); }); + ZEN_ASSERT(AbortFlag || QueuedPendingBlocksForUpload.load() == 0); + ProgressBar.Finish(); GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS = FilteredGeneratedBytesPerSecond.GetElapsedTimeUS(); @@ -1805,6 +1991,7 @@ namespace { GenerateBlocksStatistics& GenerateBlocksStats, LooseChunksStatistics& LooseChunksStats) { + ZEN_TRACE_CPU("UploadPartBlobs"); { ProgressBar ProgressBar(UsePlainProgress); @@ -1858,12 +2045,37 @@ namespace { const size_t UploadBlockCount = BlockIndexes.size(); const uint32_t UploadChunkCount = gsl::narrow(LooseChunkOrderIndexes.size()); - auto AsyncUploadBlock = [&](const size_t BlockIndex, const IoHash BlockHash, CompositeBuffer&& Payload) { + auto AsyncUploadBlock = [&](const size_t BlockIndex, + const IoHash BlockHash, + CompositeBuffer&& Payload, + std::atomic& QueuedPendingInMemoryBlocksForUpload) { + bool IsInMemoryBlock = true; + if (QueuedPendingInMemoryBlocksForUpload.load() > 16) + { + ZEN_TRACE_CPU("AsyncUploadBlock_WriteTempBlock"); + Payload = CompositeBuffer(WriteToTempFile(std::move(Payload), Path / ZenTempBlockFolderName, BlockHash)); + IsInMemoryBlock = false; + } + else + { + QueuedPendingInMemoryBlocksForUpload++; + } + Work.ScheduleWork( UploadChunkPool, - [&, BlockIndex, BlockHash, Payload = std::move(Payload)](std::atomic&) mutable { + [&, IsInMemoryBlock, BlockIndex, BlockHash, Payload = std::move(Payload)](std::atomic&) mutable { + auto _ = MakeGuard([IsInMemoryBlock, &QueuedPendingInMemoryBlocksForUpload] { + if (IsInMemoryBlock) + { + QueuedPendingInMemoryBlocksForUpload--; + } + }); if (!AbortFlag) { + ZEN_TRACE_CPU("AsyncUploadBlock"); + + const uint64_t PayloadSize = Payload.GetSize(); + FilteredUploadedBytesPerSecond.Start(); const CbObject BlockMetaData = BuildChunkBlockDescription(NewBlocks.BlockDescriptions[BlockIndex], NewBlocks.BlockMetaDatas[BlockIndex]); @@ -1871,10 +2083,10 @@ namespace { Storage.PutBuildBlob(BuildId, BlockHash, ZenContentType::kCompressedBinary, Payload); ZEN_CONSOLE_VERBOSE("Uploaded block {} ({}) containing {} chunks", NewBlocks.BlockDescriptions[BlockIndex].BlockHash, - NiceBytes(Payload.GetSize()), + NiceBytes(PayloadSize), NewBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); - UploadedBlockSize += Payload.GetSize(); - UploadStats.BlocksBytes += Payload.GetSize(); + UploadedBlockSize += PayloadSize; + UploadStats.BlocksBytes += PayloadSize; Storage.PutBlockMetadata(BuildId, BlockHash, BlockMetaData); ZEN_CONSOLE_VERBOSE("Uploaded block {} metadata ({})", @@ -1902,9 +2114,13 @@ namespace { [&, RawHash, RawSize, Payload = CompositeBuffer(std::move(Payload))](std::atomic&) mutable { if (!AbortFlag) { + ZEN_TRACE_CPU("AsyncUploadLooseChunk"); + const uint64_t PayloadSize = Payload.GetSize(); + ; if (PayloadSize >= LargeAttachmentSize) { + ZEN_TRACE_CPU("AsyncUploadLooseChunk_Multipart"); UploadStats.MultipartAttachmentCount++; std::vector> MultipartWork = Storage.PutLargeBuildBlob( BuildId, @@ -1938,6 +2154,7 @@ namespace { Work.ScheduleWork( UploadChunkPool, [Work = std::move(WorkPart)](std::atomic&) { + ZEN_TRACE_CPU("AsyncUploadLooseChunk_Multipart_Work"); if (!AbortFlag) { Work(); @@ -1949,6 +2166,7 @@ namespace { } else { + ZEN_TRACE_CPU("AsyncUploadLooseChunk_Singlepart"); Storage.PutBuildBlob(BuildId, RawHash, ZenContentType::kCompressedBinary, Payload); ZEN_CONSOLE_VERBOSE("Uploaded chunk {} ({})", RawHash, NiceBytes(PayloadSize)); UploadStats.ChunksBytes += Payload.GetSize(); @@ -1971,26 +2189,10 @@ namespace { std::atomic GeneratedBlockCount = 0; std::atomic GeneratedBlockByteCount = 0; - // Start upload of any pre-built blocks - for (const size_t BlockIndex : BlockIndexes) - { - if (CompositeBuffer BlockPayload = std::move(NewBlocks.BlockBuffers[BlockIndex]); BlockPayload) - { - const IoHash& BlockHash = NewBlocks.BlockDescriptions[BlockIndex].BlockHash; - if (!AbortFlag) - { - AsyncUploadBlock(BlockIndex, BlockHash, std::move(BlockPayload)); - } - // GeneratedBlockCount++; - } - else - { - GenerateBlockIndexes.push_back(BlockIndex); - } - } - std::vector CompressLooseChunkOrderIndexes; + std::atomic QueuedPendingInMemoryBlocksForUpload = 0; + // Start upload of any pre-compressed loose chunks for (const uint32_t LooseChunkOrderIndex : LooseChunkOrderIndexes) { @@ -1998,31 +2200,43 @@ namespace { } // Start generation of any non-prebuilt blocks and schedule upload - for (const size_t BlockIndex : GenerateBlockIndexes) + for (const size_t BlockIndex : BlockIndexes) { const IoHash& BlockHash = NewBlocks.BlockDescriptions[BlockIndex].BlockHash; if (!AbortFlag) { Work.ScheduleWork( - ReadChunkPool, + ReadChunkPool, // GetSyncWorkerPool() [&, BlockIndex](std::atomic&) { if (!AbortFlag) { + ZEN_TRACE_CPU("UploadPartBlobs_GenerateBlock"); + FilteredGenerateBlockBytesPerSecond.Start(); - ChunkBlockDescription BlockDescription; - CompressedBuffer CompressedBlock = - GenerateBlock(Path, Content, Lookup, NewBlockChunks[BlockIndex], BlockDescription, DiskStats); - if (!CompressedBlock) + + CompositeBuffer Payload; + if (NewBlocks.BlockHeaders[BlockIndex]) { - throw std::runtime_error(fmt::format("Failed generating block {}", BlockHash)); + Payload = RebuildBlock(Path, + Content, + Lookup, + std::move(NewBlocks.BlockHeaders[BlockIndex]), + NewBlockChunks[BlockIndex], + DiskStats) + .GetCompressed(); + } + else + { + ChunkBlockDescription BlockDescription; + CompressedBuffer CompressedBlock = + GenerateBlock(Path, Content, Lookup, NewBlockChunks[BlockIndex], BlockDescription, DiskStats); + if (!CompressedBlock) + { + throw std::runtime_error(fmt::format("Failed generating block {}", BlockHash)); + } + ZEN_ASSERT(BlockDescription.BlockHash == BlockHash); + Payload = std::move(CompressedBlock).GetCompressed(); } - ZEN_ASSERT(BlockDescription.BlockHash == BlockHash); - - CompositeBuffer Payload = IsBufferDiskBased(CompressedBlock.GetCompressed()) - ? std::move(CompressedBlock).GetCompressed() - : CompositeBuffer(WriteToTempFile(std::move(CompressedBlock).GetCompressed(), - Path / ZenTempBlockFolderName, - BlockDescription.BlockHash)); GenerateBlocksStats.GeneratedBlockByteCount += NewBlocks.BlockSizes[BlockIndex]; GenerateBlocksStats.GeneratedBlockCount++; @@ -2034,11 +2248,11 @@ namespace { } if (!AbortFlag) { - AsyncUploadBlock(BlockIndex, BlockHash, std::move(Payload)); + AsyncUploadBlock(BlockIndex, BlockHash, std::move(Payload), QueuedPendingInMemoryBlocksForUpload); } ZEN_CONSOLE_VERBOSE("Regenerated block {} ({}) containing {} chunks", NewBlocks.BlockDescriptions[BlockIndex].BlockHash, - NiceBytes(CompressedBlock.GetCompressedSize()), + NiceBytes(NewBlocks.BlockSizes[BlockIndex]), NewBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); } }, @@ -2059,6 +2273,8 @@ namespace { [&, ChunkIndex](std::atomic&) { if (!AbortFlag) { + ZEN_TRACE_CPU("UploadPartBlobs_CompressChunk"); + FilteredCompressedBytesPerSecond.Start(); CompositeBuffer Payload = CompressChunk(Path, Content, Lookup, ChunkIndex, Path / ZenTempChunkFolderName); ZEN_CONSOLE_VERBOSE("Compressed chunk {} ({} -> {})", @@ -2117,6 +2333,8 @@ namespace { false); }); + ZEN_ASSERT(AbortFlag || QueuedPendingInMemoryBlocksForUpload.load() == 0); + ProgressBar.Finish(); UploadStats.ElapsedWallTimeUS = FilteredUploadedBytesPerSecond.GetElapsedTimeUS(); GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS = FilteredGenerateBlockBytesPerSecond.GetElapsedTimeUS(); @@ -2131,6 +2349,8 @@ namespace { std::vector& OutUnusedChunkIndexes, FindBlocksStatistics& FindBlocksStats) { + ZEN_TRACE_CPU("FindReuseBlocks"); + // Find all blocks with a usage level higher than MinPercentLimit // Pick out the blocks with usage higher or equal to MinPercentLimit // Sort them with highest size usage - most usage first @@ -2303,8 +2523,10 @@ namespace { CreateDirectories(ZenTempFolder); CleanDirectory(ZenTempFolder, {}); auto _ = MakeGuard([&]() { - CleanDirectory(ZenTempFolder, {}); - std::filesystem::remove(ZenTempFolder); + if (CleanDirectory(ZenTempFolder, {})) + { + std::filesystem::remove(ZenTempFolder); + } }); CreateDirectories(Path / ZenTempBlockFolderName); CreateDirectories(Path / ZenTempChunkFolderName); @@ -2327,10 +2549,14 @@ namespace { GetSmallWorkerPool(EWorkloadType::Burst) .EnqueueTask(std::packaged_task{ [&Storage, BuildId, &MetaData, CreateBuild, AllowMultiparts, IgnoreExistingBlocks, &FindBlocksStats] { + ZEN_TRACE_CPU("PrepareBuild"); + PrepareBuildResult Result; Stopwatch Timer; if (CreateBuild) { + ZEN_TRACE_CPU("CreateBuild"); + Stopwatch PutBuildTimer; CbObject PutBuildResult = Storage.PutBuild(BuildId, MetaData); Result.PrepareBuildTimeMs = PutBuildTimer.GetElapsedTimeMs(); @@ -2339,6 +2565,7 @@ namespace { } else { + ZEN_TRACE_CPU("PutBuild"); Stopwatch GetBuildTimer; CbObject Build = Storage.GetBuild(BuildId); Result.PrepareBuildTimeMs = GetBuildTimer.GetElapsedTimeMs(); @@ -2356,6 +2583,7 @@ namespace { if (!IgnoreExistingBlocks) { + ZEN_TRACE_CPU("FindBlocks"); Stopwatch KnownBlocksTimer; Result.KnownBlocks = Storage.FindBlocks(BuildId); FindBlocksStats.FindBlockTimeMS = KnownBlocksTimer.GetElapsedTimeMs(); @@ -2523,11 +2751,8 @@ namespace { ChunkerParameters = ChunkParametersWriter.Save(); } - std::uint64_t TotalRawSize = 0; - for (uint64_t RawSize : Content.RawSizes) - { - TotalRawSize += RawSize; - } + std::uint64_t TotalRawSize = std::accumulate(Content.RawSizes.begin(), Content.RawSizes.end(), std::uint64_t(0)); + { ProgressBar ProgressBar(UsePlainProgress); FilteredRate FilteredBytesHashed; @@ -2956,9 +3181,9 @@ namespace { ForceUploadChunkHashes.push_back(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); } - for (size_t BlockIndex = 0; BlockIndex < NewBlocks.BlockBuffers.size(); BlockIndex++) + for (size_t BlockIndex = 0; BlockIndex < NewBlocks.BlockHeaders.size(); BlockIndex++) { - if (NewBlocks.BlockBuffers[BlockIndex]) + if (NewBlocks.BlockHeaders[BlockIndex]) { // Block was not uploaded during generation ForceUploadChunkHashes.push_back(NewBlocks.BlockDescriptions[BlockIndex].BlockHash); @@ -3457,8 +3682,8 @@ namespace { std::vector ChunkBuffers; struct WriteOpData { - const ChunkedContentLookup::ChunkSequenceLocation* Target; - size_t ChunkBufferIndex; + const ChunkedContentLookup::ChunkSequenceLocation* Target = nullptr; + size_t ChunkBufferIndex = (size_t)-1; }; std::vector WriteOps; }; @@ -3964,155 +4189,129 @@ namespace { } } - void DownloadLargeBlob(BuildStorage& Storage, - const std::filesystem::path& Path, - const ChunkedFolderContent& RemoteContent, - const ChunkedContentLookup& RemoteLookup, - const Oid& BuildId, - const IoHash& ChunkHash, - const std::uint64_t PreferredMultipartChunkSize, - const std::vector& ChunkTargetPtrs, - std::span> SequenceIndexChunksLeftToWriteCounters, - ParallellWork& Work, - WorkerThreadPool& WritePool, - WorkerThreadPool& NetworkPool, - std::atomic& WriteToDiskBytes, - std::atomic& BytesDownloaded, - std::atomic& MultipartAttachmentCount, - std::function&& OnDownloadComplete, - std::function&& OnWriteStart, - std::function&& OnWriteComplete) + void AsyncWriteDownloadedChunk(const std::filesystem::path& Path, + const ChunkedFolderContent& RemoteContent, + const ChunkedContentLookup& RemoteLookup, + uint32_t RemoteChunkIndex, + std::vector&& ChunkTargetPtrs, + ParallellWork& Work, + WorkerThreadPool& WritePool, + IoBuffer&& Payload, + std::span> SequenceIndexChunksLeftToWriteCounters, + std::atomic& WriteToDiskBytes, + std::atomic& ChunkCountWritten, + std::atomic& WritePartsComplete, + std::atomic& TotalPartWriteCount, + std::atomic& LooseChunksBytes, + FilteredRate& FilteredWrittenBytesPerSecond) { - ZEN_TRACE_CPU("DownloadLargeBlob"); + ZEN_TRACE_CPU("AsyncWriteDownloadedChunk"); - struct WorkloadData + const IoHash& ChunkHash = RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + + uint64_t Size = Payload.GetSize(); + LooseChunksBytes += Size; + + std::filesystem::path CompressedChunkPath; + + // Check if the dowloaded chunk is file based and we can move it directly without rewriting it { - TemporaryFile TempFile; - }; - std::shared_ptr Workload(std::make_shared()); + IoBufferFileReference FileRef; + if (Payload.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) && (FileRef.FileChunkSize == Size)) + { + ZEN_TRACE_CPU("MoveTempChunk"); + std::error_code Ec; + std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec); + if (!Ec) + { + Payload.SetDeleteOnClose(false); + Payload = {}; + CompressedChunkPath = Path / ZenTempDownloadFolderName / ChunkHash.ToHexString(); + std::filesystem::rename(TempBlobPath, CompressedChunkPath, Ec); + if (Ec) + { + CompressedChunkPath = std::filesystem::path{}; - std::filesystem::path DownloadFolder = Path / ZenTempDownloadFolderName; - std::filesystem::path TargetFolder = Path / ZenTempCacheFolderName; + // Re-open the temp file again + BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete); + Payload = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, Size, true); + Payload.SetDeleteOnClose(true); + } + } + } + } - std::error_code Ec; - Workload->TempFile.CreateTemporary(DownloadFolder, Ec); - if (Ec) + if (CompressedChunkPath.empty() && (Size > 512u * 1024u)) { - throw std::runtime_error( - fmt::format("Failed opening temporary file '{}': {} ({})", Workload->TempFile.GetPath(), Ec.message(), Ec.value())); + ZEN_TRACE_CPU("WriteTempChunk"); + // Could not be moved and rather large, lets store it on disk + CompressedChunkPath = Path / ZenTempDownloadFolderName / ChunkHash.ToHexString(); + TemporaryFile::SafeWriteFile(CompressedChunkPath, Payload); + Payload = {}; } - std::vector> WorkItems = Storage.GetLargeBuildBlob( - BuildId, - ChunkHash, - PreferredMultipartChunkSize, - [DownloadFolder, - TargetFolder, - &RemoteContent, - &RemoteLookup, - &Work, - &WritePool, - Workload, - ChunkHash, - &BytesDownloaded, - OnDownloadComplete = std::move(OnDownloadComplete), - OnWriteComplete = std::move(OnWriteComplete), - OnWriteStart = std::move(OnWriteStart), - &WriteToDiskBytes, + + Work.ScheduleWork( + WritePool, // GetSyncWorkerPool(),// + [&, SequenceIndexChunksLeftToWriteCounters, - ChunkTargetPtrs = std::vector( - ChunkTargetPtrs)](uint64_t Offset, const IoBuffer& Chunk, uint64_t BytesRemaining) { - BytesDownloaded += Chunk.GetSize(); + CompressedChunkPath, + RemoteChunkIndex, + ChunkTargetPtrs = std::move(ChunkTargetPtrs), + CompressedPart = std::move(Payload)](std::atomic&) mutable { + ZEN_TRACE_CPU("UpdateFolder_WriteChunk"); - if (!AbortFlag.load()) + if (!AbortFlag) { - ZEN_TRACE_CPU("DownloadLargeBlob_Save"); - Workload->TempFile.Write(Chunk.GetView(), Offset); - if (Chunk.GetSize() == BytesRemaining) + FilteredWrittenBytesPerSecond.Start(); + + const IoHash& ChunkHash = RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + if (CompressedChunkPath.empty()) { - OnDownloadComplete(Workload->TempFile.FileSize()); - - Work.ScheduleWork( - WritePool, // GetSyncWorkerPool(),// - [DownloadFolder, - TargetFolder, - &RemoteContent, - &RemoteLookup, - ChunkHash, - Workload, - Offset, - OnWriteComplete = std::move(OnWriteComplete), - OnWriteStart = std::move(OnWriteStart), - &WriteToDiskBytes, - SequenceIndexChunksLeftToWriteCounters, - ChunkTargetPtrs](std::atomic&) { - ZEN_TRACE_CPU("DownloadLargeBlob_Write"); + ZEN_ASSERT(CompressedPart); + } + else + { + ZEN_ASSERT(!CompressedPart); + CompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath); + if (!CompressedPart) + { + throw std::runtime_error( + fmt::format("Could not open dowloaded compressed chunk {} from {}", ChunkHash, CompressedChunkPath)); + } + } - if (!AbortFlag) - { - const std::filesystem::path CompressedChunkPath = DownloadFolder / ChunkHash.ToHexString(); - std::error_code Ec; - Workload->TempFile.MoveTemporaryIntoPlace(CompressedChunkPath, Ec); - if (Ec) - { - throw std::runtime_error(fmt::format("Failed moving downloaded chunk {} file to {}. Reason: {}", - ChunkHash, - CompressedChunkPath, - Ec.message())); - } + std::filesystem::path TargetFolder = Path / ZenTempCacheFolderName; - IoBuffer CompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath); - if (!CompressedPart) - { - throw std::runtime_error(fmt::format("Could not open dowloaded compressed chunk {} from {}", - ChunkHash, - CompressedChunkPath)); - } + bool NeedHashVerify = WriteCompressedChunk(TargetFolder, + RemoteContent, + RemoteLookup, + ChunkHash, + ChunkTargetPtrs, + std::move(CompressedPart), + WriteToDiskBytes); - OnWriteStart(); + if (!AbortFlag) + { + ChunkCountWritten++; + WritePartsComplete++; + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } - bool NeedHashVerify = WriteCompressedChunk(TargetFolder, - RemoteContent, - RemoteLookup, - ChunkHash, - ChunkTargetPtrs, - std::move(CompressedPart), - WriteToDiskBytes); + std::filesystem::remove(CompressedChunkPath); - if (!AbortFlag) - { - std::filesystem::remove(CompressedChunkPath); - - CompleteChunkTargets(TargetFolder, - RemoteContent, - ChunkHash, - ChunkTargetPtrs, - SequenceIndexChunksLeftToWriteCounters, - NeedHashVerify); - } - } - }, - Work.DefaultErrorFunction()); + CompleteChunkTargets(TargetFolder, + RemoteContent, + ChunkHash, + ChunkTargetPtrs, + SequenceIndexChunksLeftToWriteCounters, + NeedHashVerify); } } - }); - if (!WorkItems.empty()) - { - MultipartAttachmentCount++; - } - for (auto& WorkItem : WorkItems) - { - Work.ScheduleWork( - NetworkPool, // GetSyncWorkerPool(),// - [WorkItem = std::move(WorkItem)](std::atomic&) { - ZEN_TRACE_CPU("DownloadLargeBlob_Work"); - if (!AbortFlag) - { - WorkItem(); - } - }, - Work.DefaultErrorFunction()); - } - } + }, + Work.DefaultErrorFunction()); + }; void UpdateFolder(BuildStorage& Storage, const Oid& BuildId, @@ -4291,13 +4490,13 @@ namespace { // Pick up all chunks in current local state struct CacheCopyData { - uint32_t LocalSequenceIndex; + uint32_t LocalSequenceIndex = (uint32_t)-1; std::vector TargetChunkLocationPtrs; struct ChunkTarget { - uint32_t TargetChunkLocationCount; - uint64_t ChunkRawSize; - uint64_t CacheFileOffset; + uint32_t TargetChunkLocationCount = (uint32_t)-1; + uint64_t ChunkRawSize = (uint64_t)-1; + uint64_t CacheFileOffset = (uint64_t)-1; }; std::vector ChunkTargets; }; @@ -4419,8 +4618,8 @@ namespace { uint64_t TotalRequestCount = 0; std::atomic RequestsComplete = 0; std::atomic ChunkCountWritten = 0; - std::atomic TotalPartWriteCount = 0; - std::atomic WritePartsComplete = 0; + std::atomic TotalPartWriteCount = 0; + std::atomic WritePartsComplete = 0; { ZEN_TRACE_CPU("WriteChunks"); @@ -4441,7 +4640,7 @@ namespace { struct LooseChunkHashWorkData { std::vector ChunkTargetPtrs; - uint32_t RemoteChunkIndex; + uint32_t RemoteChunkIndex = (uint32_t)-1; }; std::vector LooseChunkHashWorks; @@ -4687,9 +4886,9 @@ namespace { struct WriteOp { - const ChunkedContentLookup::ChunkSequenceLocation* Target; - uint64_t CacheFileOffset; - uint64_t ChunkSize; + const ChunkedContentLookup::ChunkSequenceLocation* Target = nullptr; + uint64_t CacheFileOffset = (uint64_t)-1; + uint64_t ChunkSize = (uint64_t)-1; }; std::vector WriteOps; @@ -4821,10 +5020,11 @@ namespace { const uint32_t RemoteChunkIndex = LooseChunkHashWork.RemoteChunkIndex; Work.ScheduleWork( - NetworkPool, // NetworkPool, // GetSyncWorkerPool(),// - [&, RemoteChunkIndex, ChunkTargetPtrs](std::atomic&) { + WritePool, // NetworkPool, // GetSyncWorkerPool(),// + [&, RemoteChunkIndex, ChunkTargetPtrs](std::atomic&) mutable { if (!AbortFlag) { + ZEN_TRACE_CPU("UpdateFolder_ReadPreDownloaded"); std::filesystem::path ExistingCompressedChunkPath; { const IoHash& ChunkHash = RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; @@ -4925,39 +5125,37 @@ namespace { if (RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex] >= LargeAttachmentSize) { ZEN_TRACE_CPU("UpdateFolder_GetLargeChunk"); - DownloadLargeBlob( - Storage, - Path, - RemoteContent, - RemoteLookup, - BuildId, - ChunkHash, - PreferredMultipartChunkSize, - ChunkTargetPtrs, - SequenceIndexChunksLeftToWriteCounters, - Work, - WritePool, - NetworkPool, - WriteToDiskBytes, - BytesDownloaded, - MultipartAttachmentCount, - [&](uint64_t BytesDownloaded) { - LooseChunksBytes += BytesDownloaded; - RequestsComplete++; - if (RequestsComplete == TotalRequestCount) - { - FilteredDownloadedBytesPerSecond.Stop(); - } - }, - [&]() { FilteredWrittenBytesPerSecond.Start(); }, - [&]() { - ChunkCountWritten++; - WritePartsComplete++; - if (WritePartsComplete == TotalPartWriteCount) - { - FilteredWrittenBytesPerSecond.Stop(); - } - }); + DownloadLargeBlob(Storage, + Path / ZenTempDownloadFolderName, + BuildId, + ChunkHash, + PreferredMultipartChunkSize, + Work, + NetworkPool, + BytesDownloaded, + MultipartAttachmentCount, + [&, RemoteChunkIndex, ChunkTargetPtrs](IoBuffer&& Payload) mutable { + RequestsComplete++; + if (RequestsComplete == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + AsyncWriteDownloadedChunk(Path, + RemoteContent, + RemoteLookup, + RemoteChunkIndex, + std::move(ChunkTargetPtrs), + Work, + WritePool, + std::move(Payload), + SequenceIndexChunksLeftToWriteCounters, + WriteToDiskBytes, + ChunkCountWritten, + WritePartsComplete, + TotalPartWriteCount, + LooseChunksBytes, + FilteredWrittenBytesPerSecond); + }); } else { @@ -4970,129 +5168,27 @@ namespace { } uint64_t BlobSize = BuildBlob.GetSize(); BytesDownloaded += BlobSize; - LooseChunksBytes += BlobSize; + RequestsComplete++; if (RequestsComplete == TotalRequestCount) { FilteredDownloadedBytesPerSecond.Stop(); } - - std::filesystem::path CompressedChunkPath; - - // Check if the dowloaded file is file based and we can move it directly without rewriting it - { - IoBufferFileReference FileRef; - if (BuildBlob.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) && - (FileRef.FileChunkSize == BlobSize)) - { - ZEN_TRACE_CPU("UpdateFolder_MoveTempChunk"); - std::error_code Ec; - std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec); - if (!Ec) - { - BuildBlob.SetDeleteOnClose(false); - BuildBlob = {}; - CompressedChunkPath = Path / ZenTempDownloadFolderName / ChunkHash.ToHexString(); - std::filesystem::rename(TempBlobPath, CompressedChunkPath, Ec); - if (Ec) - { - CompressedChunkPath = std::filesystem::path{}; - - // Re-open the temp file again - BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete); - BuildBlob = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BlobSize, true); - BuildBlob.SetDeleteOnClose(true); - } - } - } - } - - if (CompressedChunkPath.empty() && (BlobSize > 512u * 1024u)) - { - ZEN_TRACE_CPU("UpdateFolder_WriteTempChunk"); - // Could not be moved and rather large, lets store it on disk - CompressedChunkPath = Path / ZenTempDownloadFolderName / ChunkHash.ToHexString(); - TemporaryFile::SafeWriteFile(CompressedChunkPath, BuildBlob); - BuildBlob = {}; - } - DownloadedChunks++; - - if (!AbortFlag) - { - Work.ScheduleWork( - WritePool, // WritePool, GetSyncWorkerPool() - [&Path, - &RemoteContent, - &RemoteLookup, - &CacheFolderPath, - &SequenceIndexChunksLeftToWriteCounters, - &WriteToDiskBytes, - &ChunkCountWritten, - &WritePartsComplete, - &TotalPartWriteCount, - &FilteredWrittenBytesPerSecond, - RemoteChunkIndex, - ChunkTargetPtrs, - CompressedChunkPath, - CompressedPart = std::move(BuildBlob)](std::atomic&) mutable { - if (!AbortFlag) - { - ZEN_TRACE_CPU("UpdateFolder_WriteChunk"); - - FilteredWrittenBytesPerSecond.Start(); - - const IoHash& ChunkHash = RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; - if (CompressedChunkPath.empty()) - { - ZEN_ASSERT(CompressedPart); - } - else - { - ZEN_ASSERT(!CompressedPart); - CompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath); - if (!CompressedPart) - { - throw std::runtime_error( - fmt::format("Could not open dowloaded compressed chunk {} from {}", - ChunkHash, - CompressedChunkPath)); - } - } - - std::filesystem::path TargetFolder = Path / ZenTempCacheFolderName; - bool NeedHashVerify = WriteCompressedChunk(TargetFolder, - RemoteContent, - RemoteLookup, - ChunkHash, - ChunkTargetPtrs, - std::move(CompressedPart), - WriteToDiskBytes); - - if (!AbortFlag) - { - ChunkCountWritten++; - WritePartsComplete++; - if (WritePartsComplete == TotalPartWriteCount) - { - FilteredWrittenBytesPerSecond.Stop(); - } - - if (!CompressedChunkPath.empty()) - { - std::filesystem::remove(CompressedChunkPath); - } - - CompleteChunkTargets(TargetFolder, - RemoteContent, - ChunkHash, - ChunkTargetPtrs, - SequenceIndexChunksLeftToWriteCounters, - NeedHashVerify); - } - } - }, - Work.DefaultErrorFunction()); - } + AsyncWriteDownloadedChunk(Path, + RemoteContent, + RemoteLookup, + RemoteChunkIndex, + std::move(ChunkTargetPtrs), + Work, + WritePool, + std::move(BuildBlob), + SequenceIndexChunksLeftToWriteCounters, + WriteToDiskBytes, + ChunkCountWritten, + WritePartsComplete, + TotalPartWriteCount, + LooseChunksBytes, + FilteredWrittenBytesPerSecond); } } } @@ -5375,7 +5471,7 @@ namespace { WritePool, // WritePool, GetSyncWorkerPool() [&RemoteContent, &RemoteLookup, - &CacheFolderPath, + CacheFolderPath, &RemoteChunkIndexNeedsCopyFromSourceFlags, &SequenceIndexChunksLeftToWriteCounters, BlockIndex, @@ -5550,7 +5646,10 @@ namespace { // Clean target folder ZEN_CONSOLE("Wiping {}", Path); - CleanDirectory(Path, DefaultExcludeFolders); + if (!CleanDirectory(Path, DefaultExcludeFolders)) + { + ZEN_WARN("Some files in {} could not be removed", Path); + } } else { @@ -6415,8 +6514,10 @@ namespace { ZEN_CONSOLE("Downloaded build in {}.", NiceTimeSpanMs(DownloadTimer.GetElapsedTimeMs())); } } - CleanDirectory(ZenTempFolder, {}); - std::filesystem::remove(ZenTempFolder); + if (CleanDirectory(ZenTempFolder, {})) + { + std::filesystem::remove(ZenTempFolder); + } } void DiffFolders(const std::filesystem::path& BasePath, const std::filesystem::path& ComparePath, bool OnlyChunked) @@ -7445,7 +7546,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) { m_StoragePath = GetRunningExecutablePath().parent_path() / ".tmpstore"; CreateDirectories(m_StoragePath); - CleanDirectory(m_StoragePath); + CleanDirectory(m_StoragePath, {}); } auto _ = MakeGuard([&]() { if (m_BuildsUrl.empty() && m_StoragePath.empty()) diff --git a/src/zen/zen.cpp b/src/zen/zen.cpp index 4e6161e86..0fcf9d871 100644 --- a/src/zen/zen.cpp +++ b/src/zen/zen.cpp @@ -577,12 +577,15 @@ main(int argc, char** argv) GlobalOptions.PassthroughArgs = PassthroughArgs; GlobalOptions.PassthroughArgV = PassthroughArgV; + std::string MemoryOptions; + std::string SubCommand = ""; cxxopts::Options Options("zen", "Zen management tool"); Options.add_options()("d, debug", "Enable debugging", cxxopts::value(GlobalOptions.IsDebug)); Options.add_options()("v, verbose", "Enable verbose logging", cxxopts::value(GlobalOptions.IsVerbose)); + Options.add_options()("malloc", "Configure memory allocator subsystem", cxxopts::value(MemoryOptions)->default_value("mimalloc")); Options.add_options()("help", "Show command line help"); Options.add_options()("c, command", "Sub command", cxxopts::value(SubCommand)); diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp index 4ca89d996..bb1ee5183 100644 --- a/src/zenutil/chunkedcontent.cpp +++ b/src/zenutil/chunkedcontent.cpp @@ -96,6 +96,8 @@ namespace { uint32_t PathIndex, std::atomic& AbortFlag) { + ZEN_TRACE_CPU("ChunkFolderContent"); + const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex]; const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex]; @@ -136,6 +138,8 @@ namespace { } else { + ZEN_TRACE_CPU("HashOnly"); + IoBuffer Buffer = IoBufferBuilder::MakeFromFile((FolderPath / Path).make_preferred()); const IoHash Hash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed); @@ -228,6 +232,7 @@ FolderContent::operator==(const FolderContent& Rhs) const bool FolderContent::AreKnownFilesEqual(const FolderContent& Rhs) const { + ZEN_TRACE_CPU("FolderContent::AreKnownFilesEqual"); tsl::robin_map RhsPathToIndex; const size_t RhsPathCount = Rhs.Paths.size(); RhsPathToIndex.reserve(RhsPathCount); @@ -259,6 +264,7 @@ FolderContent::AreKnownFilesEqual(const FolderContent& Rhs) const void FolderContent::UpdateState(const FolderContent& Rhs, std::vector& OutPathIndexesOufOfDate) { + ZEN_TRACE_CPU("FolderContent::UpdateState"); tsl::robin_map RhsPathToIndex; const uint32_t RhsPathCount = gsl::narrow(Rhs.Paths.size()); RhsPathToIndex.reserve(RhsPathCount); @@ -297,6 +303,7 @@ FolderContent::UpdateState(const FolderContent& Rhs, std::vector& OutP FolderContent GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vector& OutDeletedPathIndexes) { + ZEN_TRACE_CPU("FolderContent::GetUpdatedContent"); FolderContent Result = {.Platform = Old.Platform}; tsl::robin_map NewPathToIndex; const uint32_t NewPathCount = gsl::narrow(New.Paths.size()); @@ -332,6 +339,7 @@ GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vecto void SaveFolderContentToCompactBinary(const FolderContent& Content, CbWriter& Output) { + ZEN_TRACE_CPU("SaveFolderContentToCompactBinary"); Output.AddString("platform"sv, ToString(Content.Platform)); compactbinary_helpers::WriteArray(Content.Paths, "paths"sv, Output); compactbinary_helpers::WriteArray(Content.RawSizes, "rawSizes"sv, Output); @@ -342,6 +350,7 @@ SaveFolderContentToCompactBinary(const FolderContent& Content, CbWriter& Output) FolderContent LoadFolderContentToCompactBinary(CbObjectView Input) { + ZEN_TRACE_CPU("LoadFolderContentToCompactBinary"); FolderContent Content; Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform()); compactbinary_helpers::ReadArray("paths"sv, Input, Content.Paths); @@ -494,6 +503,7 @@ GetFolderContent(GetFolderContentStatistics& Stats, void SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbWriter& Output) { + ZEN_TRACE_CPU("SaveChunkedFolderContentToCompactBinary"); Output.AddString("platform"sv, ToString(Content.Platform)); compactbinary_helpers::WriteArray(Content.Paths, "paths"sv, Output); compactbinary_helpers::WriteArray(Content.RawSizes, "rawSizes"sv, Output); @@ -512,6 +522,7 @@ SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbW ChunkedFolderContent LoadChunkedFolderContentToCompactBinary(CbObjectView Input) { + ZEN_TRACE_CPU("LoadChunkedFolderContentToCompactBinary"); ChunkedFolderContent Content; Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform()); compactbinary_helpers::ReadArray("paths"sv, Input, Content.Paths); @@ -788,7 +799,7 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) struct ChunkLocationReference { - uint32_t ChunkIndex; + uint32_t ChunkIndex = (uint32_t)-1; ChunkedContentLookup::ChunkSequenceLocation Location; }; @@ -853,7 +864,7 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) { Result.ChunkHashToChunkIndex.insert({Content.ChunkedContent.ChunkHashes[ChunkIndex], ChunkIndex}); uint32_t Count = 0; - while (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex) + while ((RangeOffset + Count < Locations.size()) && (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex)) { Result.ChunkSequenceLocations.push_back(Locations[RangeOffset + Count].Location); Count++; diff --git a/src/zenutil/chunkedfile.cpp b/src/zenutil/chunkedfile.cpp index 4f9344039..a2c041ffd 100644 --- a/src/zenutil/chunkedfile.cpp +++ b/src/zenutil/chunkedfile.cpp @@ -3,6 +3,7 @@ #include #include +#include #include "chunking.h" @@ -33,6 +34,7 @@ namespace { IoBuffer SerializeChunkedInfo(const ChunkedInfo& Info) { + ZEN_TRACE_CPU("SerializeChunkedInfo"); size_t HeaderSize = RoundUp(sizeof(ChunkedHeader), 16) + RoundUp(sizeof(uint32_t) * Info.ChunkSequence.size(), 16) + RoundUp(sizeof(IoHash) * Info.ChunkHashes.size(), 16); IoBuffer HeaderData(HeaderSize); @@ -65,6 +67,7 @@ SerializeChunkedInfo(const ChunkedInfo& Info) ChunkedInfo DeserializeChunkedInfo(IoBuffer& Buffer) { + ZEN_TRACE_CPU("DeserializeChunkedInfo"); MemoryView View = Buffer.GetView(); ChunkedHeader Header; { @@ -99,6 +102,7 @@ DeserializeChunkedInfo(IoBuffer& Buffer) void Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, std::function GetChunk) { + ZEN_TRACE_CPU("Reconstruct"); BasicFile Reconstructed; Reconstructed.Open(TargetPath, BasicFile::Mode::kTruncate); BasicFileWriter ReconstructedWriter(Reconstructed, 64 * 1024); @@ -119,6 +123,8 @@ ChunkData(BasicFile& RawData, std::atomic* BytesProcessed, std::atomic* AbortFlag) { + ZEN_TRACE_CPU("ChunkData"); + ChunkedInfoWithSource Result; tsl::robin_map FoundChunks; diff --git a/src/zenutil/chunkingcontroller.cpp b/src/zenutil/chunkingcontroller.cpp index 017d12433..2a7057a46 100644 --- a/src/zenutil/chunkingcontroller.cpp +++ b/src/zenutil/chunkingcontroller.cpp @@ -4,6 +4,7 @@ #include #include +#include ZEN_THIRD_PARTY_INCLUDES_START #include @@ -61,6 +62,7 @@ public: std::atomic& BytesProcessed, std::atomic& AbortFlag) const override { + ZEN_TRACE_CPU("BasicChunkingController::ProcessFile"); const bool ExcludeFromChunking = std::find(m_ChunkExcludeExtensions.begin(), m_ChunkExcludeExtensions.end(), InputPath.extension()) != m_ChunkExcludeExtensions.end(); @@ -136,6 +138,7 @@ public: std::atomic& BytesProcessed, std::atomic& AbortFlag) const override { + ZEN_TRACE_CPU("ChunkingControllerWithFixedChunking::ProcessFile"); if (RawSize < m_ChunkFileSizeLimit) { return false; @@ -145,6 +148,7 @@ public: if (FixedChunking) { + ZEN_TRACE_CPU("FixedChunking"); IoHashStream FullHash; IoBuffer Source = IoBufferBuilder::MakeFromFile(InputPath); uint64_t Offset = 0; diff --git a/src/zenutil/filebuildstorage.cpp b/src/zenutil/filebuildstorage.cpp index e57109006..47a4e1cc4 100644 --- a/src/zenutil/filebuildstorage.cpp +++ b/src/zenutil/filebuildstorage.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace zen { @@ -36,6 +37,7 @@ public: virtual CbObject ListBuilds(CbObject Query) override { + ZEN_TRACE_CPU("FileBuildStorage::ListBuilds"); ZEN_UNUSED(Query); SimulateLatency(Query.GetSize(), 0); @@ -72,6 +74,7 @@ public: virtual CbObject PutBuild(const Oid& BuildId, const CbObject& MetaData) override { + ZEN_TRACE_CPU("FileBuildStorage::PutBuild"); SimulateLatency(MetaData.GetSize(), 0); Stopwatch ExecutionTimer; @@ -93,6 +96,7 @@ public: virtual CbObject GetBuild(const Oid& BuildId) override { + ZEN_TRACE_CPU("FileBuildStorage::GetBuild"); SimulateLatency(0, 0); Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); @@ -105,6 +109,7 @@ public: virtual void FinalizeBuild(const Oid& BuildId) override { + ZEN_TRACE_CPU("FileBuildStorage::FinalizeBuild"); SimulateLatency(0, 0); Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); @@ -119,6 +124,7 @@ public: std::string_view PartName, const CbObject& MetaData) override { + ZEN_TRACE_CPU("FileBuildStorage::PutBuildPart"); SimulateLatency(MetaData.GetSize(), 0); Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); @@ -164,6 +170,7 @@ public: virtual CbObject GetBuildPart(const Oid& BuildId, const Oid& BuildPartId) override { + ZEN_TRACE_CPU("FileBuildStorage::GetBuildPart"); SimulateLatency(0, 0); Stopwatch ExecutionTimer; @@ -186,6 +193,7 @@ public: virtual std::vector FinalizeBuildPart(const Oid& BuildId, const Oid& BuildPartId, const IoHash& PartHash) override { + ZEN_TRACE_CPU("FileBuildStorage::FinalizeBuildPart"); SimulateLatency(0, 0); Stopwatch ExecutionTimer; @@ -215,6 +223,7 @@ public: ZenContentType ContentType, const CompositeBuffer& Payload) override { + ZEN_TRACE_CPU("FileBuildStorage::PutBuildBlob"); ZEN_UNUSED(BuildId); ZEN_ASSERT(ContentType == ZenContentType::kCompressedBinary); SimulateLatency(Payload.GetSize(), 0); @@ -242,6 +251,7 @@ public: std::function&& Transmitter, std::function&& OnSentBytes) override { + ZEN_TRACE_CPU("FileBuildStorage::PutLargeBuildBlob"); ZEN_UNUSED(BuildId); ZEN_UNUSED(ContentType); SimulateLatency(0, 0); @@ -281,6 +291,7 @@ public: uint64_t Size = Min(32u * 1024u * 1024u, PayloadSize - Offset); WorkItems.push_back([this, RawHash, BlockPath, Workload, Offset, Size]() { + ZEN_TRACE_CPU("FileBuildStorage::PutLargeBuildBlob_Work"); IoBuffer PartPayload = Workload->Transmitter(Offset, Size); SimulateLatency(PartPayload.GetSize(), 0); @@ -327,6 +338,7 @@ public: virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash, uint64_t RangeOffset, uint64_t RangeBytes) override { + ZEN_TRACE_CPU("FileBuildStorage::GetBuildBlob"); ZEN_UNUSED(BuildId); SimulateLatency(0, 0); Stopwatch ExecutionTimer; @@ -363,6 +375,7 @@ public: uint64_t ChunkSize, std::function&& Receiver) override { + ZEN_TRACE_CPU("FileBuildStorage::GetLargeBuildBlob"); ZEN_UNUSED(BuildId); SimulateLatency(0, 0); Stopwatch ExecutionTimer; @@ -392,6 +405,7 @@ public: { uint64_t Size = Min(ChunkSize, BlobSize - Offset); WorkItems.push_back([this, BlockPath, Workload, Offset, Size]() { + ZEN_TRACE_CPU("FileBuildStorage::GetLargeBuildBlob_Work"); SimulateLatency(0, 0); IoBuffer PartPayload(Size); Workload->BlobFile.Read(PartPayload.GetMutableView().GetData(), Size, Offset); @@ -411,6 +425,7 @@ public: virtual void PutBlockMetadata(const Oid& BuildId, const IoHash& BlockRawHash, const CbObject& MetaData) override { + ZEN_TRACE_CPU("FileBuildStorage::PutBlockMetadata"); ZEN_UNUSED(BuildId); SimulateLatency(MetaData.GetSize(), 0); @@ -429,6 +444,7 @@ public: virtual std::vector FindBlocks(const Oid& BuildId) override { + ZEN_TRACE_CPU("FileBuildStorage::FindBlocks"); ZEN_UNUSED(BuildId); SimulateLatency(0, 0); Stopwatch ExecutionTimer; @@ -461,6 +477,7 @@ public: virtual std::vector GetBlockMetadata(const Oid& BuildId, std::span BlockHashes) override { + ZEN_TRACE_CPU("FileBuildStorage::GetBlockMetadata"); ZEN_UNUSED(BuildId); SimulateLatency(0, 0); Stopwatch ExecutionTimer; diff --git a/src/zenutil/include/zenutil/chunkedcontent.h b/src/zenutil/include/zenutil/chunkedcontent.h index 309341550..57b55cb8e 100644 --- a/src/zenutil/include/zenutil/chunkedcontent.h +++ b/src/zenutil/include/zenutil/chunkedcontent.h @@ -124,8 +124,8 @@ struct ChunkedContentLookup { struct ChunkSequenceLocation { - uint32_t SequenceIndex; - uint64_t Offset; + uint32_t SequenceIndex = (uint32_t)-1; + uint64_t Offset = (uint64_t)-1; }; tsl::robin_map ChunkHashToChunkIndex; tsl::robin_map RawHashToSequenceIndex; -- cgit v1.2.3 From 26f718b04bc0fba3831124d9dec705c2febd426e Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 12 Mar 2025 11:19:06 +0100 Subject: fix mac/linux builds command (#303) * fix linux/mac version of GetModificationTickFromPath and CopyFile --- src/zencore/filesystem.cpp | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/zencore/filesystem.cpp b/src/zencore/filesystem.cpp index 85feab2f7..9f3f4f7fc 100644 --- a/src/zencore/filesystem.cpp +++ b/src/zencore/filesystem.cpp @@ -590,7 +590,7 @@ CopyFile(const std::filesystem::path& FromPath, const std::filesystem::path& ToP ScopedFd $From = {FromFd}; // To file - int ToFd = open(ToPath.c_str(), O_WRONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0666); + int ToFd = open(ToPath.c_str(), O_WRONLY | O_CREAT | O_CLOEXEC, 0666); if (ToFd < 0) { ThrowLastError(fmt::format("failed to create file {}", ToPath)); @@ -598,9 +598,14 @@ CopyFile(const std::filesystem::path& FromPath, const std::filesystem::path& ToP fchmod(ToFd, 0666); ScopedFd $To = {ToFd}; + struct stat Stat; + fstat(FromFd, &Stat); + + size_t FileSizeBytes = Stat.st_size; + // Copy impl - static const size_t BufferSize = 64 << 10; - void* Buffer = malloc(BufferSize); + const size_t BufferSize = Min(FileSizeBytes, 64u << 10); + void* Buffer = malloc(BufferSize); while (true) { int BytesRead = read(FromFd, Buffer, BufferSize); @@ -610,7 +615,7 @@ CopyFile(const std::filesystem::path& FromPath, const std::filesystem::path& ToP break; } - if (write(ToFd, Buffer, BytesRead) != BufferSize) + if (write(ToFd, Buffer, BytesRead) != BytesRead) { Success = false; break; @@ -621,7 +626,7 @@ CopyFile(const std::filesystem::path& FromPath, const std::filesystem::path& ToP if (!Success) { - ThrowLastError("file copy failed"sv); + ThrowLastError(fmt::format("file copy from {} to {} failed", FromPath, ToPath)); } return true; @@ -1483,16 +1488,7 @@ GetModificationTickFromPath(const std::filesystem::path& Filename) { ThrowLastError(fmt::format("Failed to open file {} to check modification tick.", Filename)); } - auto _ = MakeGuard([Handle]() { CloseHandle(Handle); }); -#else - int Fd = open(Filename.c_str(), O_RDONLY | O_CLOEXEC); - if (Fd <= 9) - { - ThrowLastError(fmt::format("Failed to open file {} to check modification tick.", Filename)); - } - Handle = (void*)uintptr_t(Fd); - auto _ = MakeGuard([Handle]() { close(int(uintptr_t(Handle))); }); -#endif + auto _ = MakeGuard([Handle]() { CloseHandle(Handle); }); std::error_code Ec; uint64_t ModificatonTick = GetModificationTickFromHandle(Handle, Ec); if (Ec) @@ -1500,6 +1496,15 @@ GetModificationTickFromPath(const std::filesystem::path& Filename) ThrowSystemError(Ec.value(), Ec.message()); } return ModificatonTick; +#else + struct stat Stat; + int err = stat(Filename.native().c_str(), &Stat); + if (err) + { + ThrowLastError(fmt::format("Failed to get mode of file {}", Filename)); + } + return gsl::narrow(Stat.st_mtime); +#endif } std::filesystem::path -- cgit v1.2.3 From 7046fc9dc202307ba92d05a6386bfb52e9db0ab9 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Wed, 12 Mar 2025 13:54:16 +0100 Subject: fixes for log timestamps (#304) * add GetTimeSinceProcessStart returning time since process start. implemented using https://github.com/maxliani/GetTimeSinceProcessStart/tree/main * fix fractions when using epoch mode. Previously it would show the fraction from the absolute time stamp and not relative to epoch * used GetTimeSinceProcessStart to offset the epoch so that it represents the process spawn time --- src/zencore/include/zencore/timer.h | 4 ++++ src/zencore/timer.cpp | 11 +++++++++++ src/zencore/xmake.lua | 1 + src/zenutil/include/zenutil/logging/fullformatter.h | 7 ++++++- src/zenutil/logging.cpp | 6 ++++-- 5 files changed, 26 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/zencore/include/zencore/timer.h b/src/zencore/include/zencore/timer.h index e4ddc3505..767dc4314 100644 --- a/src/zencore/include/zencore/timer.h +++ b/src/zencore/include/zencore/timer.h @@ -21,6 +21,10 @@ ZENCORE_API uint64_t GetHifreqTimerFrequency(); ZENCORE_API double GetHifreqTimerToSeconds(); ZENCORE_API uint64_t GetHifreqTimerFrequencySafe(); // May be used during static init +// Query time since process was spawned (returns time in ms) + +uint64_t GetTimeSinceProcessStart(); + class Stopwatch { public: diff --git a/src/zencore/timer.cpp b/src/zencore/timer.cpp index 1655e912d..95536cb26 100644 --- a/src/zencore/timer.cpp +++ b/src/zencore/timer.cpp @@ -12,8 +12,19 @@ # include #endif +#define GTSPS_IMPLEMENTATION +#include "GetTimeSinceProcessStart.h" + namespace zen { +uint64_t +GetTimeSinceProcessStart() +{ + double TimeInSeconds = ::GetTimeSinceProcessStart(); + + return uint64_t(TimeInSeconds * 1000); +} + uint64_t GetHifreqTimerValue() { diff --git a/src/zencore/xmake.lua b/src/zencore/xmake.lua index b8b14084c..13611a2e9 100644 --- a/src/zencore/xmake.lua +++ b/src/zencore/xmake.lua @@ -29,6 +29,7 @@ target('zencore') end add_includedirs("include", {public=true}) + add_includedirs("$(projectdir)/thirdparty/GetTimeSinceProcessStart") add_includedirs("$(projectdir)/thirdparty/utfcpp/source") add_includedirs("$(projectdir)/thirdparty/Oodle/include") add_includedirs("$(projectdir)/thirdparty/trace", {public=true}) diff --git a/src/zenutil/include/zenutil/logging/fullformatter.h b/src/zenutil/include/zenutil/logging/fullformatter.h index 07ad408fa..0326870e5 100644 --- a/src/zenutil/include/zenutil/logging/fullformatter.h +++ b/src/zenutil/include/zenutil/logging/fullformatter.h @@ -45,6 +45,8 @@ public: std::chrono::seconds TimestampSeconds; + std::chrono::milliseconds millis; + if (m_UseFullDate) { TimestampSeconds = std::chrono::duration_cast(msg.time.time_since_epoch()); @@ -69,6 +71,8 @@ public: spdlog::details::fmt_helper::pad2(m_CachedLocalTm.tm_sec, m_CachedDatetime); m_CachedDatetime.push_back('.'); } + + millis = spdlog::details::fmt_helper::time_fraction(msg.time); } else { @@ -97,6 +101,8 @@ public: spdlog::details::fmt_helper::pad2(LogSecs, m_CachedDatetime); m_CachedDatetime.push_back('.'); } + + millis = std::chrono::duration_cast(ElapsedTime - TimestampSeconds); } { @@ -104,7 +110,6 @@ public: OutBuffer.append(m_CachedDatetime.begin(), m_CachedDatetime.end()); } - auto millis = spdlog::details::fmt_helper::time_fraction(msg.time); spdlog::details::fmt_helper::pad3(static_cast(millis.count()), OutBuffer); OutBuffer.push_back(']'); OutBuffer.push_back(' '); diff --git a/src/zenutil/logging.cpp b/src/zenutil/logging.cpp index 762b75a59..cb0fd6679 100644 --- a/src/zenutil/logging.cpp +++ b/src/zenutil/logging.cpp @@ -16,6 +16,7 @@ ZEN_THIRD_PARTY_INCLUDES_END #include #include #include +#include #include #include #include @@ -191,8 +192,9 @@ FinishInitializeLogging(const LoggingOptions& LogOptions) logging::RefreshLogLevels(LogLevel); spdlog::flush_on(spdlog::level::err); spdlog::flush_every(std::chrono::seconds{2}); - spdlog::set_formatter( - std::make_unique(LogOptions.LogId, std::chrono::system_clock::now())); // default to duration prefix + spdlog::set_formatter(std::make_unique( + LogOptions.LogId, + std::chrono::system_clock::now() - std::chrono::milliseconds(GetTimeSinceProcessStart()))); // default to duration prefix if (g_FileSink) { -- cgit v1.2.3 From cd128dd76526ed01c9a6c125b7ecddd9e3e08e57 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Wed, 12 Mar 2025 14:48:46 +0100 Subject: ProgressBar improvements (#305) * changed ProgressBar so it doesn't use printf. printf by default is very slow on Windows due to weird buffering behaviour. During a 2 minute build download I profiled 35 CPU seconds inside printf * changed so ProgressBar uses plain output mode if stdout is not a console/tty --- src/zen/zen.cpp | 43 +++++++++++++++++++++++++++++++++++++++++-- src/zen/zen.h | 1 + 2 files changed, 42 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/zen/zen.cpp b/src/zen/zen.cpp index 0fcf9d871..9d0eab7dc 100644 --- a/src/zen/zen.cpp +++ b/src/zen/zen.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -56,6 +57,10 @@ ZEN_THIRD_PARTY_INCLUDES_END #include +#ifndef ZEN_PLATFORM_WINDOWS +# include +#endif + ////////////////////////////////////////////////////////////////////////// namespace zen { @@ -261,8 +266,22 @@ ZenCmdBase::ResolveTargetHostSpec(const std::string& InHostSpec) return ResolveTargetHostSpec(InHostSpec, /* out */ Dummy); } +static bool +IsStdoutTty() +{ +#if ZEN_PLATFORM_WINDOWS + static HANDLE hStdOut = ::GetStdHandle(STD_OUTPUT_HANDLE); + DWORD dwMode = 0; + static bool IsConsole = ::GetConsoleMode(hStdOut, &dwMode); + return IsConsole; +#else + return isatty(fileno(stdout)); +#endif +} + ProgressBar::ProgressBar(bool PlainProgress, bool ShowDetails) -: m_PlainProgress(PlainProgress) +: m_StdoutIsTty(IsStdoutTty()) +, m_PlainProgress(PlainProgress || !m_StdoutIsTty) , m_ShowDetails(ShowDetails) , m_LastUpdateMS(m_SW.GetElapsedTimeMs() - 10000) { @@ -323,7 +342,27 @@ ProgressBar::UpdateState(const State& NewState, bool DoLinebreak) ETA, NewState.Details.empty() ? "" : fmt::format(". {}", NewState.Details)); std::string::size_type EraseLength = m_LastOutputLength > Output.length() ? (m_LastOutputLength - Output.length()) : 0; - printf("%s%s%s", Output.c_str(), std::string(EraseLength, ' ').c_str(), DoLinebreak ? "\n" : ""); + + ExtendableStringBuilder<128> LineToPrint; + LineToPrint << Output << std::string(EraseLength, ' '); + if (DoLinebreak) + LineToPrint << "\n"; + +#if ZEN_PLATFORM_WINDOWS + static HANDLE hStdOut = GetStdHandle(STD_OUTPUT_HANDLE); + + if (m_StdoutIsTty) + { + WriteConsoleA(hStdOut, LineToPrint.c_str(), (DWORD)LineToPrint.Size(), 0, 0); + } + else + { + ::WriteFile(hStdOut, (LPCVOID)LineToPrint.c_str(), (DWORD)LineToPrint.Size(), 0, 0); + } +#else + fwrite(LineToPrint.c_str(), 1, LineToPrint.Size(), stdout); +#endif + m_LastOutputLength = DoLinebreak ? 0 : Output.length(); m_State = NewState; } diff --git a/src/zen/zen.h b/src/zen/zen.h index 835c2b6ac..6765101db 100644 --- a/src/zen/zen.h +++ b/src/zen/zen.h @@ -94,6 +94,7 @@ public: bool HasActiveTask() const; private: + const bool m_StdoutIsTty = true; const bool m_PlainProgress; const bool m_ShowDetails; Stopwatch m_SW; -- cgit v1.2.3 From 9268991f2cedd999d0b0e6175b6e32c89acddf38 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Wed, 12 Mar 2025 10:40:20 +0100 Subject: Remove spurious '4' in conditional code block --- src/zen/cmds/builds_cmd.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index baa46dda8..d3536768b 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -3049,18 +3049,18 @@ namespace { std::vector OutLooseChunkHashes; std::vector OutLooseChunkRawSizes; std::vector OutBlockRawHashes; - 4 ReadBuildContentFromCompactBinary(PartManifestWriter.Save(), - VerifyFolderContent.Platform, - VerifyFolderContent.Paths, - VerifyFolderContent.RawHashes, - VerifyFolderContent.RawSizes, - VerifyFolderContent.Attributes, - VerifyFolderContent.ChunkedContent.SequenceRawHashes, - VerifyFolderContent.ChunkedContent.ChunkCounts, - OutAbsoluteChunkOrders, - OutLooseChunkHashes, - OutLooseChunkRawSizes, - OutBlockRawHashes); + ReadBuildContentFromCompactBinary(PartManifestWriter.Save(), + VerifyFolderContent.Platform, + VerifyFolderContent.Paths, + VerifyFolderContent.RawHashes, + VerifyFolderContent.RawSizes, + VerifyFolderContent.Attributes, + VerifyFolderContent.ChunkedContent.SequenceRawHashes, + VerifyFolderContent.ChunkedContent.ChunkCounts, + OutAbsoluteChunkOrders, + OutLooseChunkHashes, + OutLooseChunkRawSizes, + OutBlockRawHashes); ZEN_ASSERT(OutBlockRawHashes == AllChunkBlockHashes); for (uint32_t OrderIndex = 0; OrderIndex < OutAbsoluteChunkOrders.size(); OrderIndex++) -- cgit v1.2.3 From 908f99b749fbbf9754f9485d680914792034334c Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 12 Mar 2025 18:58:24 +0100 Subject: fix quoted command lines arguments (#306) Handling of quotes and quotes with leading backslash for command line parsing - UE-231677 --- src/zen/cmds/admin_cmd.cpp | 19 +++-- src/zen/cmds/admin_cmd.h | 8 +- src/zen/cmds/builds_cmd.cpp | 119 +++++++++++++++----------- src/zen/cmds/builds_cmd.h | 38 ++++----- src/zen/cmds/status_cmd.cpp | 9 +- src/zen/cmds/status_cmd.h | 6 +- src/zen/cmds/up_cmd.cpp | 43 ++++++---- src/zen/cmds/up_cmd.h | 28 +++--- src/zen/cmds/workspaces_cmd.cpp | 91 ++++++++++---------- src/zen/cmds/workspaces_cmd.h | 34 ++++---- src/zen/zen.cpp | 18 ++++ src/zencore/filesystem.cpp | 13 +++ src/zencore/include/zencore/filesystem.h | 2 + src/zencore/include/zencore/process.h | 3 + src/zencore/process.cpp | 142 +++++++++++++++++++++++++++++++ 15 files changed, 391 insertions(+), 182 deletions(-) (limited to 'src') diff --git a/src/zen/cmds/admin_cmd.cpp b/src/zen/cmds/admin_cmd.cpp index 995ed4136..835e01151 100644 --- a/src/zen/cmds/admin_cmd.cpp +++ b/src/zen/cmds/admin_cmd.cpp @@ -714,26 +714,29 @@ CopyStateCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) throw OptionParseException("data path must be given"); } - if (!std::filesystem::is_directory(m_DataPath)) + std::filesystem::path DataPath = StringToPath(m_DataPath); + std::filesystem::path TargetPath = StringToPath(m_TargetPath); + + if (!std::filesystem::is_directory(DataPath)) { throw OptionParseException("data path must exist"); } - if (m_TargetPath.empty()) + if (TargetPath.empty()) { throw OptionParseException("target path must be given"); } - std::filesystem::path RootManifestPath = m_DataPath / "root_manifest"; - std::filesystem::path TargetRootManifestPath = m_TargetPath / "root_manifest"; + std::filesystem::path RootManifestPath = DataPath / "root_manifest"; + std::filesystem::path TargetRootManifestPath = TargetPath / "root_manifest"; if (!TryCopy(RootManifestPath, TargetRootManifestPath)) { throw OptionParseException("data path is invalid, missing root_manifest"); } - std::filesystem::path CachePath = m_DataPath / "cache"; - std::filesystem::path TargetCachePath = m_TargetPath / "cache"; + std::filesystem::path CachePath = DataPath / "cache"; + std::filesystem::path TargetCachePath = TargetPath / "cache"; // Copy cache state DirectoryContent CacheDirectoryContent; @@ -778,8 +781,8 @@ CopyStateCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } } - std::filesystem::path CasPath = m_DataPath / "cas"; - std::filesystem::path TargetCasPath = m_TargetPath / "cas"; + std::filesystem::path CasPath = DataPath / "cas"; + std::filesystem::path TargetCasPath = TargetPath / "cas"; { std::filesystem::path UCasRootPath = CasPath / ".ucas_root"; diff --git a/src/zen/cmds/admin_cmd.h b/src/zen/cmds/admin_cmd.h index c593b2cac..8b6d3e258 100644 --- a/src/zen/cmds/admin_cmd.h +++ b/src/zen/cmds/admin_cmd.h @@ -155,10 +155,10 @@ public: virtual cxxopts::Options& Options() override { return m_Options; } private: - cxxopts::Options m_Options{"copy-state", "Copy zen server disk state"}; - std::filesystem::path m_DataPath; - std::filesystem::path m_TargetPath; - bool m_SkipLogs = false; + cxxopts::Options m_Options{"copy-state", "Copy zen server disk state"}; + std::string m_DataPath; + std::string m_TargetPath; + bool m_SkipLogs = false; }; } // namespace zen diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index d3536768b..f0ee4904e 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -6689,7 +6689,7 @@ BuildsCommand::BuildsCommand() m_Options.add_options()("h,help", "Print help"); auto AddAuthOptions = [this](cxxopts::Options& Ops) { - Ops.add_option("", "", "system-dir", "Specify system root", cxxopts::value(m_SystemRootDir), ""); + Ops.add_option("", "", "system-dir", "Specify system root", cxxopts::value(m_SystemRootDir), ""); // Direct access token (may expire) Ops.add_option("auth-token", @@ -7025,7 +7025,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) auto CreateAuthMgr = [&]() { if (!Auth) { - std::filesystem::path DataRoot = m_SystemRootDir.empty() ? PickDefaultSystemRootDirectory() : m_SystemRootDir; + std::filesystem::path DataRoot = m_SystemRootDir.empty() ? PickDefaultSystemRootDirectory() : StringToPath(m_SystemRootDir); if (m_EncryptionKey.empty()) { @@ -7147,8 +7147,9 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } else if (!m_StoragePath.empty()) { - ZEN_CONSOLE("Querying builds in folder '{}'.", m_StoragePath); - Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); + std::filesystem::path StoragePath = StringToPath(m_StoragePath); + ZEN_CONSOLE("Querying builds in folder '{}'.", StoragePath); + Storage = CreateFileBuildStorage(StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); } else { @@ -7199,9 +7200,11 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } } + std::filesystem::path Path = StringToPath(m_Path); + if (m_BuildPartName.empty()) { - m_BuildPartName = m_Path.filename().string(); + m_BuildPartName = Path.filename().string(); } const bool GeneratedBuildId = m_BuildId.empty(); @@ -7241,26 +7244,27 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) { ZEN_CONSOLE("Uploading '{}' from '{}' to cloud endpoint '{}'. SessionId: '{}'. Namespace '{}', Bucket '{}', {}BuildId '{}'", m_BuildPartName, - m_Path, + Path, m_BuildsUrl, Http.GetSessionId(), m_Namespace, m_Bucket, GeneratedBuildId ? "Generated " : "", BuildId); - Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, Path / ZenTempStorageFolderName); StorageName = "Cloud DDC"; } else if (!m_StoragePath.empty()) { + std::filesystem::path StoragePath = StringToPath(m_StoragePath); ZEN_CONSOLE("Uploading '{}' from '{}' to folder '{}'. {}BuildId '{}'", m_BuildPartName, - m_Path, - m_StoragePath, + Path, + StoragePath, GeneratedBuildId ? "Generated " : "", BuildId); - Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, m_WriteMetadataAsJson, DefaultLatency, DefaultDelayPerKBSec); - StorageName = fmt::format("Disk {}", m_StoragePath.stem()); + Storage = CreateFileBuildStorage(StoragePath, StorageStats, m_WriteMetadataAsJson, DefaultLatency, DefaultDelayPerKBSec); + StorageName = fmt::format("Disk {}", StoragePath.stem()); } else { @@ -7303,7 +7307,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) BuildId, BuildPartId, m_BuildPartName, - m_Path, + Path, m_ManifestPath, m_BlockReuseMinPercentLimit, m_AllowMultiparts, @@ -7372,6 +7376,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } } + std::filesystem::path Path = StringToPath(m_Path); + BuildStorage::Statistics StorageStats; std::unique_ptr Storage; std::string StorageName; @@ -7379,20 +7385,21 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) { ZEN_CONSOLE("Downloading '{}' to '{}' from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}', BuildId '{}'", BuildId, - m_Path, + Path, m_BuildsUrl, Http.GetSessionId(), m_Namespace, m_Bucket, BuildId); - Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, Path / ZenTempStorageFolderName); StorageName = "Cloud DDC"; } else if (!m_StoragePath.empty()) { - ZEN_CONSOLE("Downloading '{}' to '{}' from folder {}. BuildId '{}'", BuildId, m_Path, m_StoragePath, BuildId); - Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); - StorageName = fmt::format("Disk {}", m_StoragePath.stem()); + std::filesystem::path StoragePath = StringToPath(m_StoragePath); + ZEN_CONSOLE("Downloading '{}' to '{}' from folder {}. BuildId '{}'", BuildId, Path, StoragePath, BuildId); + Storage = CreateFileBuildStorage(StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); + StorageName = fmt::format("Disk {}", StoragePath.stem()); } else { @@ -7403,7 +7410,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) BuildId, BuildPartIds, m_BuildPartNames, - m_Path, + Path, m_AllowMultiparts, m_AllowPartialBlockRequests, m_Clean, @@ -7442,7 +7449,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) { throw zen::OptionParseException(fmt::format("compare-path is required\n{}", m_DownloadOptions.help())); } - DiffFolders(m_Path, m_DiffPath, m_OnlyChunked); + std::filesystem::path Path = StringToPath(m_Path); + DiffFolders(Path, m_DiffPath, m_OnlyChunked); return AbortFlag ? 11 : 0; } @@ -7467,6 +7475,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) // "07d3964f919d577a321a1fdd", // "07d396a6ce875004e16b9528"}; + std::filesystem::path Path = StringToPath(m_Path); + BuildStorage::Statistics StorageStats; std::unique_ptr Storage; std::string StorageName; @@ -7474,19 +7484,20 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) { ZEN_CONSOLE("Downloading {} to '{}' from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}'", FormatArray(m_BuildIds, " "sv), - m_Path, + Path, m_BuildsUrl, Http.GetSessionId(), m_Namespace, m_Bucket); - Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, Path / ZenTempStorageFolderName); StorageName = "Cloud DDC"; } else if (!m_StoragePath.empty()) { - ZEN_CONSOLE("Downloading {}'to '{}' from folder {}", FormatArray(m_BuildIds, " "sv), m_Path, m_StoragePath); - Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); - StorageName = fmt::format("Disk {}", m_StoragePath.stem()); + std::filesystem::path StoragePath = StringToPath(m_StoragePath); + ZEN_CONSOLE("Downloading {}'to '{}' from folder {}", FormatArray(m_BuildIds, " "sv), Path, StoragePath); + Storage = CreateFileBuildStorage(StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); + StorageName = fmt::format("Disk {}", StoragePath.stem()); } else { @@ -7504,7 +7515,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) BuildId, {}, {}, - m_Path, + Path, m_AllowMultiparts, m_AllowPartialBlockRequests, BuildIdString == m_BuildIds.front(), @@ -7531,8 +7542,10 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) throw zen::OptionParseException(fmt::format("local-path is required\n{}", m_DownloadOptions.help())); } + std::filesystem::path Path = StringToPath(m_Path); + m_BuildId = Oid::NewOid().ToString(); - m_BuildPartName = m_Path.filename().string(); + m_BuildPartName = Path.filename().string(); m_BuildPartId = Oid::NewOid().ToString(); m_CreateBuild = true; @@ -7542,16 +7555,18 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) std::unique_ptr Storage; std::string StorageName; - if (m_BuildsUrl.empty() && m_StoragePath.empty()) + std::filesystem::path StoragePath = StringToPath(m_StoragePath); + + if (m_BuildsUrl.empty() && StoragePath.empty()) { - m_StoragePath = GetRunningExecutablePath().parent_path() / ".tmpstore"; - CreateDirectories(m_StoragePath); - CleanDirectory(m_StoragePath, {}); + m_StoragePath = (GetRunningExecutablePath().parent_path() / ".tmpstore").generic_string(); + CreateDirectories(StoragePath); + CleanDirectory(StoragePath, {}); } auto _ = MakeGuard([&]() { - if (m_BuildsUrl.empty() && m_StoragePath.empty()) + if (m_BuildsUrl.empty() && StoragePath.empty()) { - DeleteDirectories(m_StoragePath); + DeleteDirectories(StoragePath); } }); @@ -7559,24 +7574,24 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) { ZEN_CONSOLE("Using '{}' to '{}' from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}', BuildId '{}'", m_BuildPartName.empty() ? m_BuildPartId : m_BuildPartName, - m_Path, + Path, m_BuildsUrl, Http.GetSessionId(), m_Namespace, m_Bucket, BuildId); - Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, Path / ZenTempStorageFolderName); StorageName = "Cloud DDC"; } - else if (!m_StoragePath.empty()) + else if (!StoragePath.empty()) { ZEN_CONSOLE("Using '{}' to '{}' from folder {}. BuildId '{}'", m_BuildPartName.empty() ? m_BuildPartId : m_BuildPartName, - m_Path, - m_StoragePath, + Path, + StoragePath, BuildId); - Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); - StorageName = fmt::format("Disk {}", m_StoragePath.stem()); + Storage = CreateFileBuildStorage(StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); + StorageName = fmt::format("Disk {}", StoragePath.stem()); } else { @@ -7608,7 +7623,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) BuildId, BuildPartId, m_BuildPartName, - m_Path, + Path, {}, m_BlockReuseMinPercentLimit, m_AllowMultiparts, @@ -7622,7 +7637,7 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) return 11; } - const std::filesystem::path DownloadPath = m_Path.parent_path() / (m_BuildPartName + "_download"); + const std::filesystem::path DownloadPath = Path.parent_path() / (m_BuildPartName + "_download"); ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}'", BuildId, BuildPartId, m_BuildPartName, DownloadPath); DownloadFolder(*Storage, BuildId, {BuildPartId}, {}, DownloadPath, m_AllowMultiparts, m_AllowPartialBlockRequests, true, true); if (AbortFlag) @@ -7846,6 +7861,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) std::unique_ptr Storage; std::string StorageName; + std::filesystem::path Path = StringToPath(m_Path); + if (!m_BuildsUrl.empty()) { ZEN_CONSOLE("Using from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}', BuildId '{}'", @@ -7854,14 +7871,15 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_Namespace, m_Bucket, BuildId); - Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, Path / ZenTempStorageFolderName); StorageName = "Cloud DDC"; } else if (!m_StoragePath.empty()) { - ZEN_CONSOLE("Using folder {}. BuildId '{}'", m_StoragePath, BuildId); - Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); - StorageName = fmt::format("Disk {}", m_StoragePath.stem()); + std::filesystem::path StoragePath = StringToPath(m_StoragePath); + ZEN_CONSOLE("Using folder {}. BuildId '{}'", StoragePath, BuildId); + Storage = CreateFileBuildStorage(StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); + StorageName = fmt::format("Disk {}", StoragePath.stem()); } else { @@ -7913,6 +7931,8 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) std::unique_ptr Storage; std::string StorageName; + std::filesystem::path Path = StringToPath(m_Path); + if (!m_BuildsUrl.empty()) { ZEN_CONSOLE("Using from cloud endpoint {}. SessionId: '{}'. Namespace '{}', Bucket '{}', BuildId '{}'", @@ -7921,14 +7941,15 @@ BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_Namespace, m_Bucket, BuildId); - Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, m_Path / ZenTempStorageFolderName); + Storage = CreateJupiterBuildStorage(Log(), Http, StorageStats, m_Namespace, m_Bucket, Path / ZenTempStorageFolderName); StorageName = "Cloud DDC"; } else if (!m_StoragePath.empty()) { - ZEN_CONSOLE("Using folder {}. BuildId '{}'", m_StoragePath, BuildId); - Storage = CreateFileBuildStorage(m_StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); - StorageName = fmt::format("Disk {}", m_StoragePath.stem()); + std::filesystem::path StoragePath = StringToPath(m_StoragePath); + ZEN_CONSOLE("Using folder {}. BuildId '{}'", StoragePath, BuildId); + Storage = CreateFileBuildStorage(StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); + StorageName = fmt::format("Disk {}", StoragePath.stem()); } else { diff --git a/src/zen/cmds/builds_cmd.h b/src/zen/cmds/builds_cmd.h index 167a5d29f..60953efad 100644 --- a/src/zen/cmds/builds_cmd.h +++ b/src/zen/cmds/builds_cmd.h @@ -25,7 +25,7 @@ public: private: cxxopts::Options m_Options{Name, Description}; - std::filesystem::path m_SystemRootDir; + std::string m_SystemRootDir; bool m_PlainProgress = false; bool m_Verbose = false; @@ -37,20 +37,20 @@ private: std::string m_Bucket; // file storage - std::filesystem::path m_StoragePath; - bool m_WriteMetadataAsJson = false; - - std::string m_BuildId; - bool m_CreateBuild = false; - std::string m_BuildMetadataPath; - std::string m_BuildMetadata; - std::string m_BuildPartName; // Defaults to name of leaf folder in m_Path - std::string m_BuildPartId; // Defaults to a generated id when creating part, looked up when downloading using m_BuildPartName - bool m_Clean = false; - uint8_t m_BlockReuseMinPercentLimit = 85; - bool m_AllowMultiparts = true; - bool m_AllowPartialBlockRequests = true; - std::filesystem::path m_ManifestPath; + std::string m_StoragePath; + bool m_WriteMetadataAsJson = false; + + std::string m_BuildId; + bool m_CreateBuild = false; + std::string m_BuildMetadataPath; + std::string m_BuildMetadata; + std::string m_BuildPartName; // Defaults to name of leaf folder in m_Path + std::string m_BuildPartId; // Defaults to a generated id when creating part, looked up when downloading using m_BuildPartName + bool m_Clean = false; + uint8_t m_BlockReuseMinPercentLimit = 85; + bool m_AllowMultiparts = true; + bool m_AllowPartialBlockRequests = true; + std::string m_ManifestPath; // Direct access token (may expire) std::string m_AccessToken; @@ -76,7 +76,7 @@ private: cxxopts::Options m_ListOptions{"list", "List available builds"}; - std::filesystem::path m_Path; + std::string m_Path; cxxopts::Options m_UploadOptions{"upload", "Upload a folder"}; bool m_PostUploadVerify = false; @@ -86,9 +86,9 @@ private: std::vector m_BuildPartIds; bool m_PostDownloadVerify = false; - cxxopts::Options m_DiffOptions{"diff", "Compare two local folders"}; - std::filesystem::path m_DiffPath; - bool m_OnlyChunked = false; + cxxopts::Options m_DiffOptions{"diff", "Compare two local folders"}; + std::string m_DiffPath; + bool m_OnlyChunked = false; cxxopts::Options m_TestOptions{"test", "Test upload and download with verify"}; diff --git a/src/zen/cmds/status_cmd.cpp b/src/zen/cmds/status_cmd.cpp index 16754e747..4d1534e05 100644 --- a/src/zen/cmds/status_cmd.cpp +++ b/src/zen/cmds/status_cmd.cpp @@ -32,16 +32,17 @@ StatusCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) uint16_t EffectivePort = 0; if (!m_DataDir.empty()) { - if (!std::filesystem::is_regular_file(m_DataDir / ".lock")) + std::filesystem::path DataDir = StringToPath(m_DataDir); + if (!std::filesystem::is_regular_file(DataDir / ".lock")) { - ZEN_CONSOLE("lock file does not exist in directory '{}'", m_DataDir); + ZEN_CONSOLE("lock file does not exist in directory '{}'", DataDir); return 1; } - LockFileInfo Info = ReadLockFilePayload(LoadCompactBinaryObject(IoBufferBuilder::MakeFromFile(m_DataDir / ".lock"))); + LockFileInfo Info = ReadLockFilePayload(LoadCompactBinaryObject(IoBufferBuilder::MakeFromFile(DataDir / ".lock"))); std::string Reason; if (!ValidateLockFileInfo(Info, Reason)) { - ZEN_CONSOLE("lock file in directory '{}' is not valid. Reason: '{}'", m_DataDir, Reason); + ZEN_CONSOLE("lock file in directory '{}' is not valid. Reason: '{}'", DataDir, Reason); return 1; } EffectivePort = Info.EffectiveListenPort; diff --git a/src/zen/cmds/status_cmd.h b/src/zen/cmds/status_cmd.h index 46bda9ee6..00ad0e758 100644 --- a/src/zen/cmds/status_cmd.h +++ b/src/zen/cmds/status_cmd.h @@ -20,9 +20,9 @@ public: private: int GetLockFileEffectivePort() const; - cxxopts::Options m_Options{"status", "Show zen status"}; - uint16_t m_Port = 0; - std::filesystem::path m_DataDir; + cxxopts::Options m_Options{"status", "Show zen status"}; + uint16_t m_Port = 0; + std::string m_DataDir; }; } // namespace zen diff --git a/src/zen/cmds/up_cmd.cpp b/src/zen/cmds/up_cmd.cpp index ac2f42a86..44a41146c 100644 --- a/src/zen/cmds/up_cmd.cpp +++ b/src/zen/cmds/up_cmd.cpp @@ -77,13 +77,15 @@ UpCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } } - if (m_ProgramBaseDir.empty()) + std::filesystem::path ProgramBaseDir = StringToPath(m_ProgramBaseDir); + + if (ProgramBaseDir.empty()) { std::filesystem::path ExePath = zen::GetRunningExecutablePath(); - m_ProgramBaseDir = ExePath.parent_path(); + ProgramBaseDir = ExePath.parent_path(); } ZenServerEnvironment ServerEnvironment; - ServerEnvironment.Initialize(m_ProgramBaseDir); + ServerEnvironment.Initialize(ProgramBaseDir); ZenServerInstance Server(ServerEnvironment); std::string ServerArguments = GlobalOptions.PassthroughCommandLine; if ((m_Port != 0) && (ServerArguments.find("--port"sv) == std::string::npos)) @@ -153,18 +155,20 @@ AttachCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) Instance.Sweep(); ZenServerState::ZenServerEntry* Entry = Instance.Lookup(m_Port); - if (!m_DataDir.empty()) + std::filesystem::path DataDir = StringToPath(m_DataDir); + + if (!DataDir.empty()) { - if (!std::filesystem::is_regular_file(m_DataDir / ".lock")) + if (!std::filesystem::is_regular_file(DataDir / ".lock")) { - ZEN_CONSOLE("lock file does not exist in directory '{}'", m_DataDir); + ZEN_CONSOLE("lock file does not exist in directory '{}'", DataDir); return 1; } - LockFileInfo Info = ReadLockFilePayload(LoadCompactBinaryObject(IoBufferBuilder::MakeFromFile(m_DataDir / ".lock"))); + LockFileInfo Info = ReadLockFilePayload(LoadCompactBinaryObject(IoBufferBuilder::MakeFromFile(DataDir / ".lock"))); std::string Reason; if (!ValidateLockFileInfo(Info, Reason)) { - ZEN_CONSOLE("lock file in directory '{}' is not valid. Reason: '{}'", m_DataDir, Reason); + ZEN_CONSOLE("lock file in directory '{}' is not valid. Reason: '{}'", DataDir, Reason); return 1; } Entry = Instance.LookupByEffectivePort(Info.EffectiveListenPort); @@ -214,24 +218,27 @@ DownCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) Instance.Initialize(); ZenServerState::ZenServerEntry* Entry = Instance.Lookup(m_Port); - if (m_ProgramBaseDir.empty()) + std::filesystem::path ProgramBaseDir = StringToPath(m_ProgramBaseDir); + if (ProgramBaseDir.empty()) { - std::filesystem::path ExePath = zen::GetRunningExecutablePath(); - m_ProgramBaseDir = ExePath.parent_path(); + std::filesystem::path ExePath = GetRunningExecutablePath(); + ProgramBaseDir = ExePath.parent_path(); } - if (!m_DataDir.empty()) + std::filesystem::path DataDir = StringToPath(m_DataDir); + + if (!DataDir.empty()) { - if (!std::filesystem::is_regular_file(m_DataDir / ".lock")) + if (!std::filesystem::is_regular_file(DataDir / ".lock")) { - ZEN_CONSOLE("lock file does not exist in directory '{}'", m_DataDir); + ZEN_CONSOLE("lock file does not exist in directory '{}'", DataDir); return 1; } - LockFileInfo Info = ReadLockFilePayload(LoadCompactBinaryObject(IoBufferBuilder::MakeFromFile(m_DataDir / ".lock"))); + LockFileInfo Info = ReadLockFilePayload(LoadCompactBinaryObject(IoBufferBuilder::MakeFromFile(DataDir / ".lock"))); std::string Reason; if (!ValidateLockFileInfo(Info, Reason)) { - ZEN_CONSOLE("lock file in directory '{}' is not valid. Reason: '{}'", m_DataDir, Reason); + ZEN_CONSOLE("lock file in directory '{}' is not valid. Reason: '{}'", DataDir, Reason); return 1; } Entry = Instance.LookupByEffectivePort(Info.EffectiveListenPort); @@ -244,7 +251,7 @@ DownCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) try { ZenServerEnvironment ServerEnvironment; - ServerEnvironment.Initialize(m_ProgramBaseDir); + ServerEnvironment.Initialize(ProgramBaseDir); ZenServerInstance Server(ServerEnvironment); Server.AttachToRunningServer(EntryPort); @@ -309,7 +316,7 @@ DownCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) if (m_ForceTerminate) { // Try to find the running executable by path name - std::filesystem::path ServerExePath = m_ProgramBaseDir / "zenserver" ZEN_EXE_SUFFIX_LITERAL; + std::filesystem::path ServerExePath = ProgramBaseDir / "zenserver" ZEN_EXE_SUFFIX_LITERAL; ProcessHandle RunningProcess; if (std::error_code Ec = FindProcess(ServerExePath, RunningProcess); !Ec) { diff --git a/src/zen/cmds/up_cmd.h b/src/zen/cmds/up_cmd.h index c9af16749..32d8ddab3 100644 --- a/src/zen/cmds/up_cmd.h +++ b/src/zen/cmds/up_cmd.h @@ -18,11 +18,11 @@ public: virtual cxxopts::Options& Options() override { return m_Options; } private: - cxxopts::Options m_Options{"up", "Bring up zen service"}; - uint16_t m_Port = 0; - bool m_ShowConsole = false; - bool m_ShowLog = false; - std::filesystem::path m_ProgramBaseDir; + cxxopts::Options m_Options{"up", "Bring up zen service"}; + uint16_t m_Port = 0; + bool m_ShowConsole = false; + bool m_ShowLog = false; + std::string m_ProgramBaseDir; }; class AttachCommand : public ZenCmdBase @@ -35,10 +35,10 @@ public: virtual cxxopts::Options& Options() override { return m_Options; } private: - cxxopts::Options m_Options{"attach", "Add a sponsor process to a running zen service"}; - uint16_t m_Port = 0; - int m_OwnerPid = 0; - std::filesystem::path m_DataDir; + cxxopts::Options m_Options{"attach", "Add a sponsor process to a running zen service"}; + uint16_t m_Port = 0; + int m_OwnerPid = 0; + std::string m_DataDir; }; class DownCommand : public ZenCmdBase @@ -51,11 +51,11 @@ public: virtual cxxopts::Options& Options() override { return m_Options; } private: - cxxopts::Options m_Options{"down", "Bring down zen service"}; - uint16_t m_Port = 0; - bool m_ForceTerminate = false; - std::filesystem::path m_ProgramBaseDir; - std::filesystem::path m_DataDir; + cxxopts::Options m_Options{"down", "Bring down zen service"}; + uint16_t m_Port = 0; + bool m_ForceTerminate = false; + std::string m_ProgramBaseDir; + std::string m_DataDir; }; } // namespace zen diff --git a/src/zen/cmds/workspaces_cmd.cpp b/src/zen/cmds/workspaces_cmd.cpp index 166d4218d..5f3f8f7ca 100644 --- a/src/zen/cmds/workspaces_cmd.cpp +++ b/src/zen/cmds/workspaces_cmd.cpp @@ -25,16 +25,7 @@ namespace { if (!Path.empty()) { std::u8string PathString = Path.u8string(); - if (PathString.ends_with(std::filesystem::path::preferred_separator) || PathString.ends_with('/')) - { - PathString.pop_back(); - Path = std::filesystem::path(PathString); - } - // Special case if user gives a path with quotes and includes a backslash at the end: - // ="path\" cxxopts strips the leading quote only but not the trailing. - // As we expect paths here and we don't want trailing slashes we strip away the quote - // manually if the string does not start with a quote UE-231677 - else if (PathString[0] != '\"' && PathString[PathString.length() - 1] == '\"') + if (PathString.ends_with(std::filesystem::path::preferred_separator) || PathString.starts_with('/')) { PathString.pop_back(); Path = std::filesystem::path(PathString); @@ -96,7 +87,7 @@ WorkspaceCommand::WorkspaceCommand() { m_Options.add_options()("h,help", "Print help"); m_Options.add_option("", "u", "hosturl", "Host URL", cxxopts::value(m_HostName)->default_value(""), ""); - m_Options.add_options()("system-dir", "Specify system root", cxxopts::value(m_SystemRootDir)); + m_Options.add_options()("system-dir", "Specify system root", cxxopts::value(m_SystemRootDir)); m_Options.add_option("", "v", "verb", "Verb for workspace - create, remove, info", cxxopts::value(m_Verb), ""); m_Options.parse_positional({"verb"}); m_Options.positional_help("verb"); @@ -148,16 +139,18 @@ WorkspaceCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) m_HostName = ResolveTargetHostSpec(m_HostName); - if (m_SystemRootDir.empty()) + std::filesystem::path SystemRootDir = StringToPath(m_SystemRootDir); + + if (SystemRootDir.empty()) { - m_SystemRootDir = PickDefaultSystemRootDirectory(); - if (m_SystemRootDir.empty()) + SystemRootDir = PickDefaultSystemRootDirectory(); + if (SystemRootDir.empty()) { throw zen::OptionParseException("unable to resolve system root directory"); } } - std::filesystem::path StatePath = m_SystemRootDir / "workspaces"; + std::filesystem::path StatePath = SystemRootDir / "workspaces"; if (!ParseOptions(*SubOption, gsl::narrow(SubCommandArguments.size()), SubCommandArguments.data())) { @@ -171,12 +164,12 @@ WorkspaceCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) throw zen::OptionParseException(fmt::format("path is required\n{}", m_CreateOptions.help())); } - RemoveTrailingPathSeparator(m_Path); + std::filesystem::path Path = StringToPath(m_Path); if (m_Id.empty()) { - m_Id = Workspaces::PathToId(m_Path).ToString(); - ZEN_CONSOLE("Using generated workspace id {} from path '{}'", m_Id, m_Path); + m_Id = Workspaces::PathToId(Path).ToString(); + ZEN_CONSOLE("Using generated workspace id {} from path '{}'", m_Id, Path); } if (Oid::TryFromHexString(m_Id) == Oid::Zero) @@ -187,7 +180,7 @@ WorkspaceCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) if (Workspaces::AddWorkspace( Log(), StatePath, - {.Id = Oid::FromHexString(m_Id), .RootPath = m_Path, .AllowShareCreationFromHttp = m_AllowShareCreationFromHttp})) + {.Id = Oid::FromHexString(m_Id), .RootPath = Path, .AllowShareCreationFromHttp = m_AllowShareCreationFromHttp})) { if (!m_HostName.empty()) { @@ -287,7 +280,7 @@ WorkspaceShareCommand::WorkspaceShareCommand() { m_Options.add_options()("h,help", "Print help"); m_Options.add_option("", "u", "hosturl", "Host URL", cxxopts::value(m_HostName)->default_value(""), ""); - m_Options.add_options()("system-dir", "Specify system root", cxxopts::value(m_SystemRootDir)); + m_Options.add_options()("system-dir", "Specify system root", cxxopts::value(m_SystemRootDir)); m_Options.add_option("", "v", "verb", "Verb for workspace - create, remove, info", cxxopts::value(m_Verb), ""); m_Options.parse_positional({"verb"}); m_Options.positional_help("verb"); @@ -399,16 +392,18 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** m_HostName = ResolveTargetHostSpec(m_HostName); - if (m_SystemRootDir.empty()) + std::filesystem::path SystemRootDir = StringToPath(m_SystemRootDir); + + if (SystemRootDir.empty()) { - m_SystemRootDir = PickDefaultSystemRootDirectory(); - if (m_SystemRootDir.empty()) + SystemRootDir = PickDefaultSystemRootDirectory(); + if (SystemRootDir.empty()) { throw zen::OptionParseException("unable to resolve system root directory"); } } - std::filesystem::path StatePath = m_SystemRootDir / "workspaces"; + std::filesystem::path StatePath = SystemRootDir / "workspaces"; if (!ParseOptions(*SubOption, gsl::narrow(SubCommandArguments.size()), SubCommandArguments.data())) { @@ -417,7 +412,8 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** if (SubOption == &m_CreateOptions) { - if (m_WorkspaceRoot.empty()) + std::filesystem::path WorkspaceRoot = StringToPath(m_WorkspaceRoot); + if (WorkspaceRoot.empty()) { if (m_WorkspaceId.empty()) { @@ -436,15 +432,15 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** ZEN_CONSOLE("Workspace {} does not exist", m_WorkspaceId); return 0; } - m_WorkspaceRoot = WorkspaceConfig.RootPath; + WorkspaceRoot = WorkspaceConfig.RootPath; } else { - RemoveTrailingPathSeparator(m_WorkspaceRoot); + RemoveTrailingPathSeparator(WorkspaceRoot); if (m_WorkspaceId.empty()) { - m_WorkspaceId = Workspaces::PathToId(m_WorkspaceRoot).ToString(); - ZEN_CONSOLE("Using generated workspace id {} from path '{}'", m_WorkspaceId, m_WorkspaceRoot); + m_WorkspaceId = Workspaces::PathToId(WorkspaceRoot).ToString(); + ZEN_CONSOLE("Using generated workspace id {} from path '{}'", m_WorkspaceId, WorkspaceRoot); } else { @@ -453,23 +449,25 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** throw zen::OptionParseException(fmt::format("workspace id '{}' is invalid", m_WorkspaceId)); } } - if (Workspaces::AddWorkspace(Log(), StatePath, {.Id = Oid::FromHexString(m_WorkspaceId), .RootPath = m_WorkspaceRoot})) + if (Workspaces::AddWorkspace(Log(), StatePath, {.Id = Oid::FromHexString(m_WorkspaceId), .RootPath = WorkspaceRoot})) { - ZEN_CONSOLE("Created workspace {} using root path '{}'", m_WorkspaceId, m_WorkspaceRoot); + ZEN_CONSOLE("Created workspace {} using root path '{}'", m_WorkspaceId, WorkspaceRoot); } else { - ZEN_CONSOLE("Using existing workspace {} with root path '{}'", m_WorkspaceId, m_WorkspaceRoot); + ZEN_CONSOLE("Using existing workspace {} with root path '{}'", m_WorkspaceId, WorkspaceRoot); } } - RemoveTrailingPathSeparator(m_SharePath); - RemoveLeadingPathSeparator(m_SharePath); + std::filesystem::path SharePath = StringToPath(m_SharePath); + + RemoveLeadingPathSeparator(SharePath); + RemoveTrailingPathSeparator(SharePath); if (m_ShareId.empty()) { - m_ShareId = Workspaces::PathToId(m_SharePath).ToString(); - ZEN_CONSOLE("Using generated share id {}, for path '{}'", m_ShareId, m_SharePath); + m_ShareId = Workspaces::PathToId(SharePath).ToString(); + ZEN_CONSOLE("Using generated share id {}, for path '{}'", m_ShareId, SharePath); } if (Oid::TryFromHexString(m_ShareId) == Oid::Zero) @@ -478,8 +476,8 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** } if (Workspaces::AddWorkspaceShare(Log(), - m_WorkspaceRoot, - {.Id = Oid::FromHexString(m_ShareId), .SharePath = m_SharePath, .Alias = m_Alias})) + WorkspaceRoot, + {.Id = Oid::FromHexString(m_ShareId), .SharePath = SharePath, .Alias = m_Alias})) { if (!m_HostName.empty()) { @@ -531,7 +529,8 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** ZEN_CONSOLE("Workspace {} does not exist", m_WorkspaceId); return 0; } - m_WorkspaceRoot = WorkspaceConfig.RootPath; + + std::filesystem::path WorkspaceRoot = WorkspaceConfig.RootPath; if (m_ShareId.empty()) { @@ -543,8 +542,7 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** throw zen::OptionParseException(fmt::format("workspace id '{}' is invalid", m_ShareId)); } - Workspaces::WorkspaceShareConfiguration Share = - Workspaces::FindWorkspaceShare(Log(), m_WorkspaceRoot, Oid::FromHexString(m_ShareId)); + Workspaces::WorkspaceShareConfiguration Share = Workspaces::FindWorkspaceShare(Log(), WorkspaceRoot, Oid::FromHexString(m_ShareId)); if (Share.Id == Oid::Zero) { ZEN_CONSOLE("Workspace share {} does not exist", m_ShareId); @@ -556,6 +554,7 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** if (SubOption == &m_RemoveOptions) { + std::filesystem::path WorkspaceRoot; if (!m_Alias.empty()) { Workspaces::WorkspaceConfiguration WorkspaceConfig; @@ -566,9 +565,9 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** ZEN_CONSOLE("Workspace share with alias {} does not exist", m_Alias); return 0; } - m_ShareId = ShareConfig.Id.ToString(); - m_WorkspaceId = WorkspaceConfig.Id.ToString(); - m_WorkspaceRoot = WorkspaceConfig.RootPath; + m_ShareId = ShareConfig.Id.ToString(); + m_WorkspaceId = WorkspaceConfig.Id.ToString(); + WorkspaceRoot = WorkspaceConfig.RootPath; } if (m_WorkspaceId.empty()) @@ -587,7 +586,7 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** ZEN_CONSOLE("Workspace {} does not exist", m_WorkspaceId); return 0; } - m_WorkspaceRoot = WorkspaceConfig.RootPath; + WorkspaceRoot = WorkspaceConfig.RootPath; if (m_ShareId.empty()) { @@ -599,7 +598,7 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** throw zen::OptionParseException(fmt::format("workspace id '{}' is invalid", m_ShareId)); } - if (Workspaces::RemoveWorkspaceShare(Log(), m_WorkspaceRoot, Oid::FromHexString(m_ShareId))) + if (Workspaces::RemoveWorkspaceShare(Log(), WorkspaceRoot, Oid::FromHexString(m_ShareId))) { if (!m_HostName.empty()) { diff --git a/src/zen/cmds/workspaces_cmd.h b/src/zen/cmds/workspaces_cmd.h index de0edd061..86452e25e 100644 --- a/src/zen/cmds/workspaces_cmd.h +++ b/src/zen/cmds/workspaces_cmd.h @@ -21,17 +21,17 @@ public: virtual cxxopts::Options& Options() override { return m_Options; } private: - cxxopts::Options m_Options{Name, Description}; - std::string m_HostName; - std::filesystem::path m_SystemRootDir; + cxxopts::Options m_Options{Name, Description}; + std::string m_HostName; + std::string m_SystemRootDir; std::string m_Verb; // create, info, remove std::string m_Id; - cxxopts::Options m_CreateOptions{"create", "Create a workspace"}; - std::filesystem::path m_Path; - bool m_AllowShareCreationFromHttp = false; + cxxopts::Options m_CreateOptions{"create", "Create a workspace"}; + std::string m_Path; + bool m_AllowShareCreationFromHttp = false; cxxopts::Options m_InfoOptions{"info", "Info about a workspace"}; @@ -53,17 +53,17 @@ public: virtual cxxopts::Options& Options() override { return m_Options; } private: - cxxopts::Options m_Options{Name, Description}; - std::string m_HostName; - std::filesystem::path m_SystemRootDir; - std::string m_WorkspaceId; - std::filesystem::path m_WorkspaceRoot; - std::string m_Verb; // create, info, remove - std::string m_ShareId; - std::string m_Alias; - - cxxopts::Options m_CreateOptions{"create", "Create a workspace share"}; - std::filesystem::path m_SharePath; + cxxopts::Options m_Options{Name, Description}; + std::string m_HostName; + std::string m_SystemRootDir; + std::string m_WorkspaceId; + std::string m_WorkspaceRoot; + std::string m_Verb; // create, info, remove + std::string m_ShareId; + std::string m_Alias; + + cxxopts::Options m_CreateOptions{"create", "Create a workspace share"}; + std::string m_SharePath; bool m_Refresh = false; diff --git a/src/zen/zen.cpp b/src/zen/zen.cpp index 9d0eab7dc..6f831349b 100644 --- a/src/zen/zen.cpp +++ b/src/zen/zen.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -414,6 +415,23 @@ ProgressBar::HasActiveTask() const int main(int argc, char** argv) { + std::vector Args; +#if ZEN_PLATFORM_WINDOWS + LPWSTR RawCommandLine = GetCommandLine(); + std::string CommandLine = zen::WideToUtf8(RawCommandLine); + Args = zen::ParseCommandLine(CommandLine); +#else + Args.reserve(argc); + for (int I = 0; I < argc; I++) + { + Args.push_back(std::string(argv[I])); + } +#endif + std::vector RawArgs = zen::StripCommandlineQuotes(Args); + + argc = gsl::narrow(RawArgs.size()); + argv = RawArgs.data(); + using namespace zen; using namespace std::literals; diff --git a/src/zencore/filesystem.cpp b/src/zencore/filesystem.cpp index 9f3f4f7fc..05e2bf049 100644 --- a/src/zencore/filesystem.cpp +++ b/src/zencore/filesystem.cpp @@ -2043,6 +2043,19 @@ SetFileReadOnly(const std::filesystem::path& Filename, bool ReadOnly) return false; } +std::filesystem::path +StringToPath(const std::string_view& Path) +{ + if (Path.length() > 2 && Path.front() == '\"' && Path.back() == '\"') + { + return std::filesystem::path(Path.substr(1, Path.length() - 2)).make_preferred(); + } + else + { + return std::filesystem::path(Path).make_preferred(); + } +} + ////////////////////////////////////////////////////////////////////////// // // Testing related code follows... diff --git a/src/zencore/include/zencore/filesystem.h b/src/zencore/include/zencore/filesystem.h index e020668fc..9a2b15d1d 100644 --- a/src/zencore/include/zencore/filesystem.h +++ b/src/zencore/include/zencore/filesystem.h @@ -292,6 +292,8 @@ uint32_t MakeFileModeReadOnly(uint32_t FileMode, bool ReadOnly); bool SetFileReadOnly(const std::filesystem::path& Filename, bool ReadOnly); +std::filesystem::path StringToPath(const std::string_view& Path); + ////////////////////////////////////////////////////////////////////////// void filesystem_forcelink(); // internal diff --git a/src/zencore/include/zencore/process.h b/src/zencore/include/zencore/process.h index d1394cd9a..0c5931ba0 100644 --- a/src/zencore/include/zencore/process.h +++ b/src/zencore/include/zencore/process.h @@ -100,6 +100,9 @@ int GetProcessId(CreateProcResult ProcId); std::filesystem::path GetProcessExecutablePath(int Pid, std::error_code& OutEc); std::error_code FindProcess(const std::filesystem::path& ExecutableImage, ProcessHandle& OutHandle); +std::vector ParseCommandLine(std::string_view CommandLine); +std::vector StripCommandlineQuotes(std::vector& InOutArgs); + void process_forcelink(); // internal } // namespace zen diff --git a/src/zencore/process.cpp b/src/zencore/process.cpp index c51e8f69d..0761521dc 100644 --- a/src/zencore/process.cpp +++ b/src/zencore/process.cpp @@ -1047,6 +1047,118 @@ FindProcess(const std::filesystem::path& ExecutableImage, ProcessHandle& OutHand #endif // ZEN_PLATFORM_LINUX } +std::vector +ParseCommandLine(std::string_view CommandLine) +{ + auto IsWhitespaceOrEnd = [](std::string_view CommandLine, std::string::size_type Pos) { + if (Pos == CommandLine.length()) + { + return true; + } + if (CommandLine[Pos] == ' ') + { + return true; + } + return false; + }; + + bool IsParsingArg = false; + bool IsInQuote = false; + + std::string::size_type Pos = 0; + std::string::size_type ArgStart = 0; + std::vector Args; + while (Pos < CommandLine.length()) + { + if (IsInQuote) + { + if (CommandLine[Pos] == '"' && IsWhitespaceOrEnd(CommandLine, Pos + 1)) + { + Args.push_back(std::string(CommandLine.substr(ArgStart, Pos - ArgStart + 1))); + Pos++; + IsInQuote = false; + IsParsingArg = false; + } + else + { + Pos++; + } + } + else if (IsParsingArg) + { + ZEN_ASSERT(Pos > ArgStart); + if (CommandLine[Pos] == ' ') + { + Args.push_back(std::string(CommandLine.substr(ArgStart, Pos - ArgStart))); + Pos++; + IsParsingArg = false; + } + else if (CommandLine[Pos] == '"') + { + IsInQuote = true; + Pos++; + } + else + { + Pos++; + } + } + else if (CommandLine[Pos] == '"') + { + IsInQuote = true; + IsParsingArg = true; + ArgStart = Pos; + Pos++; + } + else if (CommandLine[Pos] != ' ') + { + IsParsingArg = true; + ArgStart = Pos; + Pos++; + } + else + { + Pos++; + } + } + if (IsParsingArg) + { + ZEN_ASSERT(Pos > ArgStart); + Args.push_back(std::string(CommandLine.substr(ArgStart))); + } + + return Args; +} + +std::vector +StripCommandlineQuotes(std::vector& InOutArgs) +{ + std::vector RawArgs; + RawArgs.reserve(InOutArgs.size()); + for (std::string& Arg : InOutArgs) + { + std::string::size_type EscapedQuotePos = Arg.find("\\\"", 1); + while (EscapedQuotePos != std::string::npos && Arg.rfind('\"', EscapedQuotePos - 1) != std::string::npos) + { + Arg.erase(EscapedQuotePos, 1); + EscapedQuotePos = Arg.find("\\\"", EscapedQuotePos); + } + + if (Arg.starts_with("\"")) + { + if (Arg.find('"', 1) == Arg.length() - 1) + { + if (Arg.find(' ', 1) == std::string::npos) + { + Arg = Arg.substr(1, Arg.length() - 2); + } + } + } + RawArgs.push_back(const_cast(Arg.c_str())); + } + return RawArgs; +} + #if ZEN_WITH_TESTS void @@ -1123,6 +1235,36 @@ TEST_CASE("BuildArgV") } } +TEST_CASE("CommandLine") +{ + std::vector v1 = ParseCommandLine("c:\\my\\exe.exe \"quoted arg\" \"one\",two,\"three\\\""); + CHECK_EQ(v1[0], "c:\\my\\exe.exe"); + CHECK_EQ(v1[1], "\"quoted arg\""); + CHECK_EQ(v1[2], "\"one\",two,\"three\\\""); + + std::vector v2 = ParseCommandLine( + "--tracehost 127.0.0.1 builds download --url=https://jupiter.devtools.epicgames.com --namespace=ue.oplog " + "--bucket=citysample.packaged-build.fortnite-main.windows \"c:\\just\\a\\path\" " + "--access-token-path=\"C:\\Users\\dan.engelbrecht\\jupiter-token.json\" \"D:\\Dev\\Spaced Folder\\Target\\\" " + "--alt-path=\"D:\\Dev\\Spaced Folder2\\Target\\\" 07dn23ifiwesnvoasjncasab --build-part-name win64,linux,ps5"); + + std::vector v2Stripped = StripCommandlineQuotes(v2); + CHECK_EQ(v2Stripped[0], std::string("--tracehost")); + CHECK_EQ(v2Stripped[1], std::string("127.0.0.1")); + CHECK_EQ(v2Stripped[2], std::string("builds")); + CHECK_EQ(v2Stripped[3], std::string("download")); + CHECK_EQ(v2Stripped[4], std::string("--url=https://jupiter.devtools.epicgames.com")); + CHECK_EQ(v2Stripped[5], std::string("--namespace=ue.oplog")); + CHECK_EQ(v2Stripped[6], std::string("--bucket=citysample.packaged-build.fortnite-main.windows")); + CHECK_EQ(v2Stripped[7], std::string("c:\\just\\a\\path")); + CHECK_EQ(v2Stripped[8], std::string("--access-token-path=\"C:\\Users\\dan.engelbrecht\\jupiter-token.json\"")); + CHECK_EQ(v2Stripped[9], std::string("\"D:\\Dev\\Spaced Folder\\Target\"")); + CHECK_EQ(v2Stripped[10], std::string("--alt-path=\"D:\\Dev\\Spaced Folder2\\Target\"")); + CHECK_EQ(v2Stripped[11], std::string("07dn23ifiwesnvoasjncasab")); + CHECK_EQ(v2Stripped[12], std::string("--build-part-name")); + CHECK_EQ(v2Stripped[13], std::string("win64,linux,ps5")); +} + TEST_SUITE_END(/* core.process */); #endif -- cgit v1.2.3