From d39724eb644cab4ec5bbf19a703cb770b34e68c4 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 12 Feb 2025 08:58:52 +0100 Subject: improved builds api interface in jupiter (#281) * multipart upload/download iterface in jupiter * review fixes --- .../include/zenutil/jupiter/jupitersession.h | 70 +++-- src/zenutil/jupiter/jupitersession.cpp | 293 +++++++++++++++++++++ 2 files changed, 340 insertions(+), 23 deletions(-) (limited to 'src/zenutil') diff --git a/src/zenutil/include/zenutil/jupiter/jupitersession.h b/src/zenutil/include/zenutil/jupiter/jupitersession.h index 6a80332f4..075c35b40 100644 --- a/src/zenutil/include/zenutil/jupiter/jupitersession.h +++ b/src/zenutil/include/zenutil/jupiter/jupitersession.h @@ -102,29 +102,48 @@ public: std::vector Filter(std::string_view Namespace, std::string_view BucketId, const std::vector& ChunkHashes); - JupiterResult PutBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const IoBuffer& Payload); - JupiterResult GetBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); - JupiterResult FinalizeBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); - PutBuildPartResult PutBuildPart(std::string_view Namespace, - std::string_view BucketId, - const Oid& BuildId, - const Oid& PartId, - std::string_view PartName, - const IoBuffer& Payload); - JupiterResult GetBuildPart(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const Oid& PartId); - JupiterResult PutBuildBlob(std::string_view Namespace, - std::string_view BucketId, - const Oid& BuildId, - const Oid& PartId, - const IoHash& Hash, - ZenContentType ContentType, - const CompositeBuffer& Payload); - JupiterResult GetBuildBlob(std::string_view Namespace, - std::string_view BucketId, - const Oid& BuildId, - const Oid& PartId, - const IoHash& Hash, - std::filesystem::path TempFolderPath); + JupiterResult ListBuilds(std::string_view Namespace, std::string_view BucketId, const IoBuffer& Payload); + JupiterResult PutBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const IoBuffer& Payload); + JupiterResult GetBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); + JupiterResult FinalizeBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); + PutBuildPartResult PutBuildPart(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& PartId, + std::string_view PartName, + const IoBuffer& Payload); + JupiterResult GetBuildPart(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const Oid& PartId); + JupiterResult PutBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& PartId, + const IoHash& Hash, + ZenContentType ContentType, + const CompositeBuffer& Payload); + JupiterResult GetBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& PartId, + const IoHash& Hash, + std::filesystem::path TempFolderPath); + + JupiterResult PutMultipartBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& PartId, + const IoHash& Hash, + ZenContentType ContentType, + uint64_t PayloadSize, + std::function&& Transmitter, + std::vector>& OutWorkItems); + JupiterResult GetMultipartBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& PartId, + const IoHash& Hash, + uint64_t ChunkSize, + std::function&& Receiver, + std::vector>& OutWorkItems); JupiterResult PutBlockMetadata(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, @@ -137,6 +156,11 @@ public: const Oid& PartId, const IoHash& RawHash); JupiterResult FindBlocks(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const Oid& PartId); + JupiterResult GetBlockMetadata(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& PartId, + IoBuffer Payload); private: inline LoggerRef Log() { return m_Log; } diff --git a/src/zenutil/jupiter/jupitersession.cpp b/src/zenutil/jupiter/jupitersession.cpp index f706a7efc..d56927b44 100644 --- a/src/zenutil/jupiter/jupitersession.cpp +++ b/src/zenutil/jupiter/jupitersession.cpp @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include @@ -354,6 +356,16 @@ JupiterSession::CacheTypeExists(std::string_view Namespace, std::string_view Typ return Result; } +JupiterResult +JupiterSession::ListBuilds(std::string_view Namespace, std::string_view BucketId, const IoBuffer& Payload) +{ + ZEN_ASSERT(Payload.GetContentType() == ZenContentType::kCbObject); + HttpClient::Response Response = m_HttpClient.Post(fmt::format("/api/v2/builds/{}/{}/search", Namespace, BucketId), + Payload, + {HttpClient::Accept(ZenContentType::kCbObject)}); + return detail::ConvertResponse(Response, "JupiterSession::ListBuilds"sv); +} + JupiterResult JupiterSession::PutBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const IoBuffer& Payload) { @@ -436,6 +448,271 @@ JupiterSession::PutBuildBlob(std::string_view Namespace, return detail::ConvertResponse(Response, "JupiterSession::PutBuildBlob"sv); } +JupiterResult +JupiterSession::PutMultipartBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& PartId, + const IoHash& Hash, + ZenContentType ContentType, + uint64_t PayloadSize, + std::function&& Transmitter, + std::vector>& OutWorkItems) +{ + struct MultipartUploadResponse + { + struct Part + { + uint64_t FirstByte; + uint64_t LastByte; + std::string PartId; + std::string QueryString; + }; + + std::string UploadId; + std::string BlobName; + std::vector Parts; + + static MultipartUploadResponse Parse(CbObject& Payload) + { + MultipartUploadResponse Result; + Result.UploadId = Payload["uploadId"sv].AsString(); + Result.BlobName = Payload["blobName"sv].AsString(); + CbArrayView PartsArray = Payload["parts"sv].AsArrayView(); + Result.Parts.reserve(PartsArray.Num()); + for (CbFieldView PartView : PartsArray) + { + CbObjectView PartObject = PartView.AsObjectView(); + Result.Parts.emplace_back(Part{ + .FirstByte = PartObject["firstByte"sv].AsUInt64(), + .LastByte = PartObject["lastByte"sv].AsUInt64(), + .PartId = std::string(PartObject["partId"sv].AsString()), + .QueryString = std::string(PartObject["queryString"sv].AsString()), + }); + } + return Result; + } + }; + + CbObjectWriter StartMultipartPayloadWriter; + StartMultipartPayloadWriter.AddInteger("blobLength"sv, PayloadSize); + CbObject StartMultipartPayload = StartMultipartPayloadWriter.Save(); + + std::string StartMultipartResponseRequestString = fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}/startMultipartUpload", + Namespace, + BucketId, + BuildId, + PartId, + Hash.ToHexString()); + // ZEN_INFO("POST: {}", StartMultipartResponseRequestString); + HttpClient::Response StartMultipartResponse = + m_HttpClient.Post(StartMultipartResponseRequestString, StartMultipartPayload, HttpClient::Accept(ZenContentType::kCbObject)); + if (!StartMultipartResponse.IsSuccess()) + { + ZEN_WARN("{}", StartMultipartResponse.ErrorMessage("startMultipartUpload: ")); + return detail::ConvertResponse(StartMultipartResponse, "JupiterSession::PutMultipartBuildBlob"sv); + } + CbObject ResponseObject = LoadCompactBinaryObject(StartMultipartResponse.ResponsePayload); + + struct WorkloadData + { + MultipartUploadResponse PartDescription; + std::function Transmitter; + std::atomic PartsLeft; + }; + + std::shared_ptr Workload(std::make_shared()); + + Workload->PartDescription = MultipartUploadResponse::Parse(ResponseObject); + Workload->Transmitter = std::move(Transmitter); + Workload->PartsLeft = Workload->PartDescription.Parts.size(); + + for (size_t PartIndex = 0; PartIndex < Workload->PartDescription.Parts.size(); PartIndex++) + { + OutWorkItems.emplace_back([this, Namespace, BucketId, BuildId, PartId, Hash, ContentType, Workload, PartIndex]( + bool& OutIsComplete) -> JupiterResult { + const MultipartUploadResponse::Part& Part = Workload->PartDescription.Parts[PartIndex]; + IoBuffer PartPayload = Workload->Transmitter(Part.FirstByte, Part.LastByte - Part.FirstByte); + std::string MultipartUploadResponseRequestString = fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}/uploadMultipart{}", + Namespace, + BucketId, + BuildId, + PartId, + Hash.ToHexString(), + Part.QueryString); + // ZEN_INFO("PUT: {}", MultipartUploadResponseRequestString); + HttpClient::Response MultipartUploadResponse = m_HttpClient.Put(MultipartUploadResponseRequestString, PartPayload); + if (!MultipartUploadResponse.IsSuccess()) + { + ZEN_WARN("{}", MultipartUploadResponse.ErrorMessage(MultipartUploadResponseRequestString)); + } + OutIsComplete = Workload->PartsLeft.fetch_sub(1) == 1; + if (OutIsComplete) + { + int64_t TotalUploadedBytes = MultipartUploadResponse.UploadedBytes; + int64_t TotalDownloadedBytes = MultipartUploadResponse.DownloadedBytes; + double TotalElapsedSeconds = MultipartUploadResponse.ElapsedSeconds; + HttpClient::Response MultipartEndResponse = MultipartUploadResponse; + while (MultipartEndResponse.IsSuccess()) + { + CbObjectWriter CompletePayloadWriter; + CompletePayloadWriter.AddString("blobName"sv, Workload->PartDescription.BlobName); + CompletePayloadWriter.AddString("uploadId"sv, Workload->PartDescription.UploadId); + CompletePayloadWriter.AddBool("isCompressed"sv, ContentType == ZenContentType::kCompressedBinary); + CompletePayloadWriter.BeginArray("partIds"sv); + std::unordered_map PartNameToIndex; + for (size_t UploadPartIndex = 0; UploadPartIndex < Workload->PartDescription.Parts.size(); UploadPartIndex++) + { + const MultipartUploadResponse::Part& PartDescription = Workload->PartDescription.Parts[UploadPartIndex]; + PartNameToIndex.insert({PartDescription.PartId, UploadPartIndex}); + CompletePayloadWriter.AddString(PartDescription.PartId); + } + CompletePayloadWriter.EndArray(); // "partIds" + CbObject CompletePayload = CompletePayloadWriter.Save(); + + std::string MultipartEndResponseRequestString = + fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}/completeMultipart", + Namespace, + BucketId, + BuildId, + PartId, + Hash.ToHexString()); + + MultipartEndResponse = m_HttpClient.Post(MultipartEndResponseRequestString, + CompletePayload, + HttpClient::Accept(ZenContentType::kCbObject)); + TotalUploadedBytes += MultipartEndResponse.UploadedBytes; + TotalDownloadedBytes += MultipartEndResponse.DownloadedBytes; + TotalElapsedSeconds += MultipartEndResponse.ElapsedSeconds; + if (MultipartEndResponse.IsSuccess()) + { + CbObject ResponseObject = MultipartEndResponse.AsObject(); + CbArrayView MissingPartsArrayView = ResponseObject["missingParts"sv].AsArrayView(); + if (MissingPartsArrayView.Num() == 0) + { + break; + } + else + { + for (CbFieldView PartIdView : MissingPartsArrayView) + { + std::string RetryPartId(PartIdView.AsString()); + size_t RetryPartIndex = PartNameToIndex.at(RetryPartId); + const MultipartUploadResponse::Part& RetryPart = Workload->PartDescription.Parts[RetryPartIndex]; + IoBuffer RetryPartPayload = + Workload->Transmitter(RetryPart.FirstByte, RetryPart.LastByte - RetryPart.FirstByte); + std::string RetryMultipartUploadResponseRequestString = + fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}/uploadMultipart{}", + Namespace, + BucketId, + BuildId, + RetryPartId, + Hash.ToHexString(), + RetryPart.QueryString); + + MultipartUploadResponse = m_HttpClient.Put(RetryMultipartUploadResponseRequestString, RetryPartPayload); + TotalUploadedBytes = MultipartUploadResponse.UploadedBytes; + TotalDownloadedBytes = MultipartUploadResponse.DownloadedBytes; + TotalElapsedSeconds = MultipartUploadResponse.ElapsedSeconds; + if (!MultipartUploadResponse.IsSuccess()) + { + ZEN_WARN("{}", MultipartUploadResponse.ErrorMessage(RetryMultipartUploadResponseRequestString)); + MultipartEndResponse = MultipartUploadResponse; + } + } + } + } + else + { + ZEN_WARN("{}", MultipartEndResponse.ErrorMessage(MultipartEndResponseRequestString)); + } + } + MultipartEndResponse.UploadedBytes = TotalUploadedBytes; + MultipartEndResponse.DownloadedBytes = TotalDownloadedBytes; + MultipartEndResponse.ElapsedSeconds = TotalElapsedSeconds; + return detail::ConvertResponse(MultipartEndResponse, "JupiterSession::PutMultipartBuildBlob"sv); + } + return detail::ConvertResponse(MultipartUploadResponse, "JupiterSession::PutMultipartBuildBlob"sv); + }); + } + return detail::ConvertResponse(StartMultipartResponse, "JupiterSession::PutMultipartBuildBlob"sv); +} + +JupiterResult +JupiterSession::GetMultipartBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& PartId, + const IoHash& Hash, + uint64_t ChunkSize, + std::function&& Receiver, + std::vector>& OutWorkItems) +{ + std::string RequestUrl = + fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}", Namespace, BucketId, BuildId, PartId, Hash.ToHexString()); + HttpClient::Response Response = + m_HttpClient.Get(RequestUrl, HttpClient::KeyValueMap({{"Range", fmt::format("bytes={}-{}", 0, ChunkSize - 1)}})); + if (Response.IsSuccess()) + { + if (std::string_view ContentRange = Response.Header.Entries["Content-Range"]; !ContentRange.empty()) + { + if (std::string_view::size_type SizeDelimiterPos = ContentRange.find('/'); SizeDelimiterPos != std::string_view::npos) + { + if (std::optional TotalSizeMaybe = ParseInt(ContentRange.substr(SizeDelimiterPos + 1)); + TotalSizeMaybe.has_value()) + { + uint64_t TotalSize = TotalSizeMaybe.value(); + uint64_t PayloadSize = Response.ResponsePayload.GetSize(); + + Receiver(0, Response.ResponsePayload, TotalSize); + + if (TotalSize > PayloadSize) + { + struct WorkloadData + { + std::function Receiver; + std::atomic BytesRemaining; + }; + + std::shared_ptr Workload(std::make_shared()); + Workload->Receiver = std::move(Receiver); + Workload->BytesRemaining = TotalSize - PayloadSize; + + uint64_t Offset = PayloadSize; + while (Offset < TotalSize) + { + uint64_t PartSize = Min(ChunkSize, TotalSize - Offset); + OutWorkItems.emplace_back( + [this, Namespace, BucketId, BuildId, PartId, Hash, TotalSize, Workload, Offset, PartSize]() + -> JupiterResult { + std::string RequestUrl = fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}", + Namespace, + BucketId, + BuildId, + PartId, + Hash.ToHexString()); + HttpClient::Response Response = m_HttpClient.Get( + RequestUrl, + HttpClient::KeyValueMap({{"Range", fmt::format("bytes={}-{}", Offset, Offset + PartSize - 1)}})); + if (Response.IsSuccess()) + { + uint64_t ByteRemaning = Workload->BytesRemaining.fetch_sub(Response.ResponsePayload.GetSize()); + Workload->Receiver(Offset, Response.ResponsePayload, ByteRemaning); + } + return detail::ConvertResponse(Response, "JupiterSession::GetMultipartBuildBlob"sv); + }); + Offset += PartSize; + } + } + return detail::ConvertResponse(Response, "JupiterSession::GetMultipartBuildBlob"sv); + } + } + } + Receiver(0, Response.ResponsePayload, Response.ResponsePayload.GetSize()); + } + return detail::ConvertResponse(Response, "JupiterSession::GetMultipartBuildBlob"sv); +} + JupiterResult JupiterSession::GetBuildBlob(std::string_view Namespace, std::string_view BucketId, @@ -447,6 +724,7 @@ JupiterSession::GetBuildBlob(std::string_view Namespace, HttpClient::Response Response = m_HttpClient.Download( fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}", Namespace, BucketId, BuildId, PartId, Hash.ToHexString()), TempFolderPath); + return detail::ConvertResponse(Response, "JupiterSession::GetBuildBlob"sv); } @@ -502,4 +780,19 @@ JupiterSession::FindBlocks(std::string_view Namespace, std::string_view BucketId return detail::ConvertResponse(Response, "JupiterSession::FindBlocks"sv); } +JupiterResult +JupiterSession::GetBlockMetadata(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& PartId, + IoBuffer Payload) +{ + ZEN_ASSERT(Payload.GetContentType() == ZenContentType::kCbObject); + HttpClient::Response Response = + m_HttpClient.Post(fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blocks/getBlockMetadata", Namespace, BucketId, BuildId, PartId), + Payload, + HttpClient::Accept(ZenContentType::kCbObject)); + return detail::ConvertResponse(Response, "JupiterSession::GetBlockMetadata"sv); +} + } // namespace zen -- cgit v1.2.3 From da9179d330a37132488f6deb8d8068783b087256 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 12 Feb 2025 09:02:35 +0100 Subject: moving and small refactor of chunk blocks to prepare for builds api (#282) --- src/zenutil/chunkblock.cpp | 166 ++++++++++ src/zenutil/chunkedfile.cpp | 510 ++++++++++++++++++++++++++++++ src/zenutil/chunking.cpp | 382 ++++++++++++++++++++++ src/zenutil/chunking.h | 56 ++++ src/zenutil/include/zenutil/chunkblock.h | 32 ++ src/zenutil/include/zenutil/chunkedfile.h | 58 ++++ src/zenutil/zenutil.cpp | 2 + 7 files changed, 1206 insertions(+) create mode 100644 src/zenutil/chunkblock.cpp create mode 100644 src/zenutil/chunkedfile.cpp create mode 100644 src/zenutil/chunking.cpp create mode 100644 src/zenutil/chunking.h create mode 100644 src/zenutil/include/zenutil/chunkblock.h create mode 100644 src/zenutil/include/zenutil/chunkedfile.h (limited to 'src/zenutil') diff --git a/src/zenutil/chunkblock.cpp b/src/zenutil/chunkblock.cpp new file mode 100644 index 000000000..6dae5af11 --- /dev/null +++ b/src/zenutil/chunkblock.cpp @@ -0,0 +1,166 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include + +#include +#include + +#include + +namespace zen { + +using namespace std::literals; + +ChunkBlockDescription +ParseChunkBlockDescription(const CbObjectView& BlockObject) +{ + ChunkBlockDescription Result; + Result.BlockHash = BlockObject["rawHash"sv].AsHash(); + if (Result.BlockHash != IoHash::Zero) + { + CbArrayView ChunksArray = BlockObject["rawHashes"sv].AsArrayView(); + Result.ChunkHashes.reserve(ChunksArray.Num()); + for (CbFieldView ChunkView : ChunksArray) + { + Result.ChunkHashes.push_back(ChunkView.AsHash()); + } + + CbArrayView ChunkRawLengthsArray = BlockObject["chunkRawLengths"sv].AsArrayView(); + std::vector ChunkLengths; + Result.ChunkRawLengths.reserve(ChunkRawLengthsArray.Num()); + for (CbFieldView ChunkView : ChunkRawLengthsArray) + { + Result.ChunkRawLengths.push_back(ChunkView.AsUInt32()); + } + } + return Result; +} + +std::vector +ParseChunkBlockDescriptionList(const CbObjectView& BlocksObject) +{ + if (!BlocksObject) + { + return {}; + } + std::vector Result; + CbArrayView Blocks = BlocksObject["blocks"].AsArrayView(); + Result.reserve(Blocks.Num()); + for (CbFieldView BlockView : Blocks) + { + CbObjectView BlockObject = BlockView.AsObjectView(); + Result.emplace_back(ParseChunkBlockDescription(BlockObject)); + } + return Result; +} + +CbObject +BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView MetaData) +{ + ZEN_ASSERT(Block.ChunkRawLengths.size() == Block.ChunkHashes.size()); + + CbObjectWriter Writer; + Writer.AddHash("rawHash"sv, Block.BlockHash); + Writer.BeginArray("rawHashes"sv); + { + for (const IoHash& ChunkHash : Block.ChunkHashes) + { + Writer.AddHash(ChunkHash); + } + } + Writer.EndArray(); + Writer.BeginArray("chunkRawLengths"); + { + for (uint32_t ChunkSize : Block.ChunkRawLengths) + { + Writer.AddInteger(ChunkSize); + } + } + Writer.EndArray(); + + Writer.AddObject("metadata", MetaData); + + return Writer.Save(); +} + +CompressedBuffer +GenerateChunkBlock(std::vector>&& FetchChunks, ChunkBlockDescription& OutBlock) +{ + const size_t ChunkCount = FetchChunks.size(); + + std::vector ChunkSegments; + ChunkSegments.resize(1); + ChunkSegments.reserve(1 + ChunkCount); + OutBlock.ChunkHashes.reserve(ChunkCount); + OutBlock.ChunkRawLengths.reserve(ChunkCount); + { + IoBuffer TempBuffer(ChunkCount * 9); + MutableMemoryView View = TempBuffer.GetMutableView(); + uint8_t* BufferStartPtr = reinterpret_cast(View.GetData()); + uint8_t* BufferEndPtr = BufferStartPtr; + BufferEndPtr += WriteVarUInt(gsl::narrow(ChunkCount), BufferEndPtr); + for (const auto& It : FetchChunks) + { + std::pair Chunk = It.second(It.first); + uint64_t ChunkSize = 0; + std::span Segments = Chunk.second.GetCompressed().GetSegments(); + for (const SharedBuffer& Segment : Segments) + { + ChunkSize += Segment.GetSize(); + ChunkSegments.push_back(Segment); + } + BufferEndPtr += WriteVarUInt(ChunkSize, BufferEndPtr); + OutBlock.ChunkHashes.push_back(It.first); + OutBlock.ChunkRawLengths.push_back(gsl::narrow(Chunk.first)); + } + ZEN_ASSERT(BufferEndPtr <= View.GetDataEnd()); + ptrdiff_t TempBufferLength = std::distance(BufferStartPtr, BufferEndPtr); + ChunkSegments[0] = SharedBuffer(IoBuffer(TempBuffer, 0, gsl::narrow(TempBufferLength))); + } + CompressedBuffer CompressedBlock = + CompressedBuffer::Compress(CompositeBuffer(std::move(ChunkSegments)), OodleCompressor::Mermaid, OodleCompressionLevel::None); + OutBlock.BlockHash = CompressedBlock.DecodeRawHash(); + return CompressedBlock; +} + +bool +IterateChunkBlock(const SharedBuffer& BlockPayload, std::function Visitor) +{ + ZEN_ASSERT(BlockPayload); + if (BlockPayload.GetSize() < 1) + { + return false; + } + + MemoryView BlockView = BlockPayload.GetView(); + const uint8_t* ReadPtr = reinterpret_cast(BlockView.GetData()); + uint32_t NumberSize; + uint64_t ChunkCount = ReadVarUInt(ReadPtr, NumberSize); + ReadPtr += NumberSize; + std::vector ChunkSizes; + ChunkSizes.reserve(ChunkCount); + while (ChunkCount--) + { + ChunkSizes.push_back(ReadVarUInt(ReadPtr, NumberSize)); + ReadPtr += NumberSize; + } + for (uint64_t ChunkSize : ChunkSizes) + { + IoBuffer Chunk(IoBuffer::Wrap, ReadPtr, ChunkSize); + IoHash AttachmentRawHash; + uint64_t AttachmentRawSize; + CompressedBuffer CompressedChunk = CompressedBuffer::FromCompressed(SharedBuffer(Chunk), AttachmentRawHash, AttachmentRawSize); + + if (!CompressedChunk) + { + ZEN_ERROR("Invalid chunk in block"); + return false; + } + Visitor(std::move(CompressedChunk), AttachmentRawHash); + ReadPtr += ChunkSize; + ZEN_ASSERT(ReadPtr <= BlockView.GetDataEnd()); + } + return true; +}; + +} // namespace zen diff --git a/src/zenutil/chunkedfile.cpp b/src/zenutil/chunkedfile.cpp new file mode 100644 index 000000000..c08492eb0 --- /dev/null +++ b/src/zenutil/chunkedfile.cpp @@ -0,0 +1,510 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include + +#include + +#include "chunking.h" + +ZEN_THIRD_PARTY_INCLUDES_START +#include +#include +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +namespace { + struct ChunkedHeader + { + static constexpr uint32_t ExpectedMagic = 0x646b6863; // chkd + static constexpr uint32_t CurrentVersion = 1; + + uint32_t Magic = ExpectedMagic; + uint32_t Version = CurrentVersion; + uint32_t ChunkSequenceLength; + uint32_t ChunkHashCount; + uint64_t ChunkSequenceOffset; + uint64_t ChunkHashesOffset; + uint64_t RawSize = 0; + IoHash RawHash; + }; +} // namespace + +IoBuffer +SerializeChunkedInfo(const ChunkedInfo& Info) +{ + size_t HeaderSize = RoundUp(sizeof(ChunkedHeader), 16) + RoundUp(sizeof(uint32_t) * Info.ChunkSequence.size(), 16) + + RoundUp(sizeof(IoHash) * Info.ChunkHashes.size(), 16); + IoBuffer HeaderData(HeaderSize); + + ChunkedHeader Header; + Header.ChunkSequenceLength = gsl::narrow(Info.ChunkSequence.size()); + Header.ChunkHashCount = gsl::narrow(Info.ChunkHashes.size()); + Header.ChunkSequenceOffset = RoundUp(sizeof(ChunkedHeader), 16); + Header.ChunkHashesOffset = RoundUp(Header.ChunkSequenceOffset + sizeof(uint32_t) * Header.ChunkSequenceLength, 16); + Header.RawSize = Info.RawSize; + Header.RawHash = Info.RawHash; + + MutableMemoryView WriteView = HeaderData.GetMutableView(); + { + MutableMemoryView HeaderWriteView = WriteView.Left(sizeof(Header)); + HeaderWriteView.CopyFrom(MemoryView(&Header, sizeof(Header))); + } + { + MutableMemoryView ChunkSequenceWriteView = WriteView.Mid(Header.ChunkSequenceOffset, sizeof(uint32_t) * Header.ChunkSequenceLength); + ChunkSequenceWriteView.CopyFrom(MemoryView(Info.ChunkSequence.data(), ChunkSequenceWriteView.GetSize())); + } + { + MutableMemoryView ChunksWriteView = WriteView.Mid(Header.ChunkHashesOffset, sizeof(IoHash) * Header.ChunkHashCount); + ChunksWriteView.CopyFrom(MemoryView(Info.ChunkHashes.data(), ChunksWriteView.GetSize())); + } + + return HeaderData; +} + +ChunkedInfo +DeserializeChunkedInfo(IoBuffer& Buffer) +{ + MemoryView View = Buffer.GetView(); + ChunkedHeader Header; + { + MutableMemoryView HeaderWriteView(&Header, sizeof(Header)); + HeaderWriteView.CopyFrom(View.Left(sizeof(Header))); + } + if (Header.Magic != ChunkedHeader::ExpectedMagic) + { + return {}; + } + if (Header.Version != ChunkedHeader::CurrentVersion) + { + return {}; + } + ChunkedInfo Info; + Info.RawSize = Header.RawSize; + Info.RawHash = Header.RawHash; + Info.ChunkSequence.resize(Header.ChunkSequenceLength); + Info.ChunkHashes.resize(Header.ChunkHashCount); + { + MutableMemoryView ChunkSequenceWriteView(Info.ChunkSequence.data(), sizeof(uint32_t) * Header.ChunkSequenceLength); + ChunkSequenceWriteView.CopyFrom(View.Mid(Header.ChunkSequenceOffset, ChunkSequenceWriteView.GetSize())); + } + { + MutableMemoryView ChunksWriteView(Info.ChunkHashes.data(), sizeof(IoHash) * Header.ChunkHashCount); + ChunksWriteView.CopyFrom(View.Mid(Header.ChunkHashesOffset, ChunksWriteView.GetSize())); + } + + return Info; +} + +void +Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, std::function GetChunk) +{ + BasicFile Reconstructed; + Reconstructed.Open(TargetPath, BasicFile::Mode::kTruncate); + BasicFileWriter ReconstructedWriter(Reconstructed, 64 * 1024); + uint64_t Offset = 0; + for (uint32_t SequenceIndex : Info.ChunkSequence) + { + IoBuffer Chunk = GetChunk(Info.ChunkHashes[SequenceIndex]); + ReconstructedWriter.Write(Chunk.GetData(), Chunk.GetSize(), Offset); + Offset += Chunk.GetSize(); + } +} + +ChunkedInfoWithSource +ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Params, std::atomic* BytesProcessed) +{ + ChunkedInfoWithSource Result; + tsl::robin_map FoundChunks; + + ZenChunkHelper Chunker; + Chunker.SetUseThreshold(Params.UseThreshold); + Chunker.SetChunkSize(Params.MinSize, Params.MaxSize, Params.AvgSize); + size_t End = Offset + Size; + const size_t ScanBufferSize = 1u * 1024 * 1024; // (Params.MaxSize * 9) / 3;//1 * 1024 * 1024; + BasicFileBuffer RawBuffer(RawData, ScanBufferSize); + MemoryView SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset); + ZEN_ASSERT(!SliceView.IsEmpty()); + size_t SliceSize = SliceView.GetSize(); + IoHashStream RawHashStream; + while (Offset < End) + { + size_t ScanLength = Chunker.ScanChunk(SliceView.GetData(), SliceSize); + if (ScanLength == ZenChunkHelper::kNoBoundaryFound) + { + if (Offset + SliceSize == End) + { + ScanLength = SliceSize; + } + else + { + SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset); + SliceSize = SliceView.GetSize(); + Chunker.Reset(); + continue; + } + } + uint32_t ChunkLength = gsl::narrow(ScanLength); // +HashedLength); + MemoryView ChunkView = SliceView.Left(ScanLength); + RawHashStream.Append(ChunkView); + IoHash ChunkHash = IoHash::HashBuffer(ChunkView); + SliceView.RightChopInline(ScanLength); + if (auto It = FoundChunks.find(ChunkHash); It != FoundChunks.end()) + { + Result.Info.ChunkSequence.push_back(It->second); + } + else + { + uint32_t ChunkIndex = gsl::narrow(Result.Info.ChunkHashes.size()); + FoundChunks.insert_or_assign(ChunkHash, ChunkIndex); + Result.Info.ChunkHashes.push_back(ChunkHash); + Result.ChunkSources.push_back(ChunkSource{.Offset = Offset, .Size = ChunkLength}); + Result.Info.ChunkSequence.push_back(ChunkIndex); + } + + SliceSize = SliceView.GetSize(); + Offset += ChunkLength; + if (BytesProcessed != nullptr) + { + BytesProcessed->fetch_add(ChunkLength); + } + } + Result.Info.RawSize = Size; + Result.Info.RawHash = RawHashStream.GetHash(); + return Result; +} + +} // namespace zen + +#if ZEN_WITH_TESTS +# include +# include +# include +# include +# include +# include +# include +# include +# include + +# include "chunking.h" + +ZEN_THIRD_PARTY_INCLUDES_START +# include +# include +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { +# if 0 +TEST_CASE("chunkedfile.findparams") +{ +# if 1 + DirectoryContent SourceContent1; + GetDirectoryContent("E:\\Temp\\ChunkingTestData\\31379208", DirectoryContentFlags::IncludeFiles, SourceContent1); + const std::vector& SourceFiles1 = SourceContent1.Files; + DirectoryContent SourceContent2; + GetDirectoryContent("E:\\Temp\\ChunkingTestData\\31379208_2", DirectoryContentFlags::IncludeFiles, SourceContent2); + const std::vector& SourceFiles2 = SourceContent2.Files; +# else + std::filesystem::path SourcePath1 = + "E:\\Temp\\ChunkingTestData\\31375996\\ShaderArchive-FortniteGame_Chunk10-PCD3D_SM6-PCD3D_SM6.ushaderbytecode"; + std::filesystem::path SourcePath2 = + "E:\\Temp\\ChunkingTestData\\31379208\\ShaderArchive-FortniteGame_Chunk10-PCD3D_SM6-PCD3D_SM6.ushaderbytecode"; + const std::vector& SourceFiles1 = {SourcePath1}; + const std::vector& SourceFiles2 = {SourcePath2}; +# endif + ChunkedParams Params[] = {ChunkedParams{.UseThreshold = false, .MinSize = 17280, .MaxSize = 139264, .AvgSize = 36340}, + ChunkedParams{.UseThreshold = false, .MinSize = 15456, .MaxSize = 122880, .AvgSize = 35598}, + ChunkedParams{.UseThreshold = false, .MinSize = 16848, .MaxSize = 135168, .AvgSize = 39030}, + ChunkedParams{.UseThreshold = false, .MinSize = 14256, .MaxSize = 114688, .AvgSize = 36222}, + ChunkedParams{.UseThreshold = false, .MinSize = 15744, .MaxSize = 126976, .AvgSize = 36600}, + ChunkedParams{.UseThreshold = false, .MinSize = 15264, .MaxSize = 122880, .AvgSize = 35442}, + ChunkedParams{.UseThreshold = false, .MinSize = 16464, .MaxSize = 131072, .AvgSize = 37950}, + ChunkedParams{.UseThreshold = false, .MinSize = 15408, .MaxSize = 122880, .AvgSize = 38914}, + ChunkedParams{.UseThreshold = false, .MinSize = 15408, .MaxSize = 122880, .AvgSize = 35556}, + ChunkedParams{.UseThreshold = false, .MinSize = 15360, .MaxSize = 122880, .AvgSize = 35520}, + ChunkedParams{.UseThreshold = false, .MinSize = 15312, .MaxSize = 122880, .AvgSize = 35478}, + ChunkedParams{.UseThreshold = false, .MinSize = 16896, .MaxSize = 135168, .AvgSize = 39072}, + ChunkedParams{.UseThreshold = false, .MinSize = 15360, .MaxSize = 122880, .AvgSize = 38880}, + ChunkedParams{.UseThreshold = false, .MinSize = 15840, .MaxSize = 126976, .AvgSize = 36678}, + ChunkedParams{.UseThreshold = false, .MinSize = 16800, .MaxSize = 135168, .AvgSize = 38994}, + ChunkedParams{.UseThreshold = false, .MinSize = 15888, .MaxSize = 126976, .AvgSize = 36714}, + ChunkedParams{.UseThreshold = false, .MinSize = 15792, .MaxSize = 126976, .AvgSize = 36636}, + ChunkedParams{.UseThreshold = false, .MinSize = 14880, .MaxSize = 118784, .AvgSize = 37609}, + ChunkedParams{.UseThreshold = false, .MinSize = 15936, .MaxSize = 126976, .AvgSize = 36756}, + ChunkedParams{.UseThreshold = false, .MinSize = 15456, .MaxSize = 122880, .AvgSize = 38955}, + ChunkedParams{.UseThreshold = false, .MinSize = 15984, .MaxSize = 126976, .AvgSize = 36792}, + ChunkedParams{.UseThreshold = false, .MinSize = 14400, .MaxSize = 114688, .AvgSize = 36338}, + ChunkedParams{.UseThreshold = false, .MinSize = 14832, .MaxSize = 118784, .AvgSize = 37568}, + ChunkedParams{.UseThreshold = false, .MinSize = 16944, .MaxSize = 135168, .AvgSize = 39108}, + ChunkedParams{.UseThreshold = false, .MinSize = 14352, .MaxSize = 114688, .AvgSize = 36297}, + ChunkedParams{.UseThreshold = false, .MinSize = 14208, .MaxSize = 114688, .AvgSize = 36188}, + ChunkedParams{.UseThreshold = false, .MinSize = 14448, .MaxSize = 114688, .AvgSize = 36372}, + ChunkedParams{.UseThreshold = false, .MinSize = 13296, .MaxSize = 106496, .AvgSize = 36592}, + ChunkedParams{.UseThreshold = false, .MinSize = 15264, .MaxSize = 122880, .AvgSize = 38805}, + ChunkedParams{.UseThreshold = false, .MinSize = 14304, .MaxSize = 114688, .AvgSize = 36263}, + ChunkedParams{.UseThreshold = false, .MinSize = 14784, .MaxSize = 118784, .AvgSize = 37534}, + ChunkedParams{.UseThreshold = false, .MinSize = 15312, .MaxSize = 122880, .AvgSize = 38839}, + ChunkedParams{.UseThreshold = false, .MinSize = 14256, .MaxSize = 114688, .AvgSize = 39360}, + ChunkedParams{.UseThreshold = false, .MinSize = 13776, .MaxSize = 110592, .AvgSize = 37976}, + ChunkedParams{.UseThreshold = false, .MinSize = 14736, .MaxSize = 118784, .AvgSize = 37493}, + ChunkedParams{.UseThreshold = false, .MinSize = 14928, .MaxSize = 118784, .AvgSize = 37643}, + ChunkedParams{.UseThreshold = false, .MinSize = 14448, .MaxSize = 114688, .AvgSize = 39504}, + ChunkedParams{.UseThreshold = false, .MinSize = 13392, .MaxSize = 106496, .AvgSize = 36664}, + ChunkedParams{.UseThreshold = false, .MinSize = 13872, .MaxSize = 110592, .AvgSize = 38048}, + ChunkedParams{.UseThreshold = false, .MinSize = 14352, .MaxSize = 114688, .AvgSize = 39432}, + ChunkedParams{.UseThreshold = false, .MinSize = 13200, .MaxSize = 106496, .AvgSize = 36520}, + ChunkedParams{.UseThreshold = false, .MinSize = 17328, .MaxSize = 139264, .AvgSize = 36378}, + ChunkedParams{.UseThreshold = false, .MinSize = 17376, .MaxSize = 139264, .AvgSize = 36421}, + ChunkedParams{.UseThreshold = false, .MinSize = 17424, .MaxSize = 139264, .AvgSize = 36459}, + ChunkedParams{.UseThreshold = false, .MinSize = 17472, .MaxSize = 139264, .AvgSize = 36502}, + ChunkedParams{.UseThreshold = false, .MinSize = 17520, .MaxSize = 139264, .AvgSize = 36540}, + ChunkedParams{.UseThreshold = false, .MinSize = 17808, .MaxSize = 143360, .AvgSize = 37423}, + ChunkedParams{.UseThreshold = false, .MinSize = 17856, .MaxSize = 143360, .AvgSize = 37466}, + ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 25834}, + ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 21917}, + ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 29751}, + ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 33668}, + ChunkedParams{.UseThreshold = false, .MinSize = 17952, .MaxSize = 143360, .AvgSize = 37547}, + ChunkedParams{.UseThreshold = false, .MinSize = 17904, .MaxSize = 143360, .AvgSize = 37504}, + ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 22371}, + ChunkedParams{.UseThreshold = false, .MinSize = 18000, .MaxSize = 143360, .AvgSize = 37585}, + ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 26406}, + ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 26450}, + ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 30615}, + ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 30441}, + ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 22417}, + ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 22557}, + ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 30528}, + ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 27112}, + ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 34644}, + ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 34476}, + ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 35408}, + ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 38592}, + ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 30483}, + ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 26586}, + ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 26496}, + ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 31302}, + ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 34516}, + ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 22964}, + ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 35448}, + ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 38630}, + ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 23010}, + ChunkedParams{.UseThreshold = false, .MinSize = 18816, .MaxSize = 151552, .AvgSize = 31260}, + ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 34600}, + ChunkedParams{.UseThreshold = false, .MinSize = 18864, .MaxSize = 151552, .AvgSize = 27156}, + ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 30570}, + ChunkedParams{.UseThreshold = false, .MinSize = 18384, .MaxSize = 147456, .AvgSize = 38549}, + ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 22510}, + ChunkedParams{.UseThreshold = false, .MinSize = 18528, .MaxSize = 147456, .AvgSize = 38673}, + ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 34560}, + ChunkedParams{.UseThreshold = false, .MinSize = 18432, .MaxSize = 147456, .AvgSize = 22464}, + ChunkedParams{.UseThreshold = false, .MinSize = 18480, .MaxSize = 147456, .AvgSize = 26540}, + ChunkedParams{.UseThreshold = false, .MinSize = 18336, .MaxSize = 147456, .AvgSize = 38511}, + ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 23057}, + ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 27202}, + ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 31347}, + ChunkedParams{.UseThreshold = false, .MinSize = 18912, .MaxSize = 151552, .AvgSize = 35492}, + ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 31389}, + ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 27246}, + ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 23103}, + ChunkedParams{.UseThreshold = false, .MinSize = 18960, .MaxSize = 151552, .AvgSize = 35532}, + ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 23150}, + ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 27292}, + ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 31434}, + ChunkedParams{.UseThreshold = false, .MinSize = 19008, .MaxSize = 151552, .AvgSize = 35576}, + ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 27336}, + ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 23196}, + ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 31476}, + ChunkedParams{.UseThreshold = false, .MinSize = 19056, .MaxSize = 151552, .AvgSize = 35616}, + ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 27862}, + ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 32121}, + ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 23603}, + ChunkedParams{.UseThreshold = false, .MinSize = 19344, .MaxSize = 155648, .AvgSize = 36380}, + ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 27908}, + ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 23650}, + ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 32166}, + ChunkedParams{.UseThreshold = false, .MinSize = 19392, .MaxSize = 155648, .AvgSize = 36424}, + ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 23696}, + ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 32253}, + ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 32208}, + ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 23743}, + ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 36548}, + ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 28042}, + ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 23789}, + ChunkedParams{.UseThreshold = false, .MinSize = 19536, .MaxSize = 155648, .AvgSize = 32295}, + ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 36508}, + ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 27952}, + ChunkedParams{.UseThreshold = false, .MinSize = 19488, .MaxSize = 155648, .AvgSize = 27998}, + ChunkedParams{.UseThreshold = false, .MinSize = 19440, .MaxSize = 155648, .AvgSize = 36464}}; + + static const size_t ParamsCount = sizeof(Params) / sizeof(ChunkedParams); + std::vector Infos1(SourceFiles1.size()); + std::vector Infos2(SourceFiles2.size()); + + WorkerThreadPool WorkerPool(32); + + for (size_t I = 0; I < ParamsCount; I++) + { + for (int UseThreshold = 0; UseThreshold < 2; UseThreshold++) + { + Latch WorkLatch(1); + ChunkedParams Param = Params[I]; + Param.UseThreshold = UseThreshold == 1; + Stopwatch Timer; + for (size_t F = 0; F < SourceFiles1.size(); F++) + { + WorkLatch.AddCount(1); + WorkerPool.ScheduleWork([&WorkLatch, F, Param, &SourceFiles1, &Infos1]() { + auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); + BasicFile SourceData1; + SourceData1.Open(SourceFiles1[F], BasicFile::Mode::kRead); + Infos1[F] = ChunkData(SourceData1, 0, SourceData1.FileSize(), Param); + }); + } + for (size_t F = 0; F < SourceFiles2.size(); F++) + { + WorkLatch.AddCount(1); + WorkerPool.ScheduleWork([&WorkLatch, F, Param, &SourceFiles2, &Infos2]() { + auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); + BasicFile SourceData2; + SourceData2.Open(SourceFiles2[F], BasicFile::Mode::kRead); + Infos2[F] = ChunkData(SourceData2, 0, SourceData2.FileSize(), Param); + }); + } + WorkLatch.CountDown(); + WorkLatch.Wait(); + uint64_t ChunkTimeMS = Timer.GetElapsedTimeMs(); + + uint64_t Raw1Size = 0; + tsl::robin_set Chunks1; + size_t ChunkedSize1 = 0; + for (size_t F = 0; F < SourceFiles1.size(); F++) + { + const ChunkedInfoWithSource& Info = Infos1[F]; + Raw1Size += Info.Info.RawSize; + for (uint32_t Chunk1Index = 0; Chunk1Index < Info.Info.ChunkHashes.size(); ++Chunk1Index) + { + const IoHash ChunkHash = Info.Info.ChunkHashes[Chunk1Index]; + if (Chunks1.insert(ChunkHash).second) + { + ChunkedSize1 += Info.ChunkSources[Chunk1Index].Size; + } + } + } + + uint64_t Raw2Size = 0; + tsl::robin_set Chunks2; + size_t ChunkedSize2 = 0; + size_t DiffSize = 0; + for (size_t F = 0; F < SourceFiles2.size(); F++) + { + const ChunkedInfoWithSource& Info = Infos2[F]; + Raw2Size += Info.Info.RawSize; + for (uint32_t Chunk2Index = 0; Chunk2Index < Info.Info.ChunkHashes.size(); ++Chunk2Index) + { + const IoHash ChunkHash = Info.Info.ChunkHashes[Chunk2Index]; + if (Chunks2.insert(ChunkHash).second) + { + ChunkedSize2 += Info.ChunkSources[Chunk2Index].Size; + if (!Chunks1.contains(ChunkHash)) + { + DiffSize += Info.ChunkSources[Chunk2Index].Size; + } + } + } + } + + ZEN_INFO( + "Diff = {}, Chunks1 = {}, Chunks2 = {}, .UseThreshold = {}, .MinSize = {}, .MaxSize = {}, .AvgSize = {}, RawSize(1) = {}, " + "RawSize(2) = {}, " + "Saved(1) = {}, Saved(2) = {} in {}", + NiceBytes(DiffSize), + Chunks1.size(), + Chunks2.size(), + Param.UseThreshold, + Param.MinSize, + Param.MaxSize, + Param.AvgSize, + NiceBytes(Raw1Size), + NiceBytes(Raw2Size), + NiceBytes(Raw1Size - ChunkedSize1), + NiceBytes(Raw2Size - ChunkedSize2), + NiceTimeSpanMs(ChunkTimeMS)); + } + } + +# if 0 + for (int64_t MinSizeBase = (12u * 1024u); MinSizeBase <= (32u * 1024u); MinSizeBase += 512) + { + for (int64_t Wiggle = -132; Wiggle < 126; Wiggle += 2) + { + // size_t MinSize = 7 * 1024 - 61; // (size_t)(MinSizeBase + Wiggle); + // size_t MaxSize = 16 * (7 * 1024); // 8 * 7 * 1024;// MinSizeBase * 6; + // size_t AvgSize = MaxSize / 2; // 4 * 7 * 1024;// MinSizeBase * 3; + size_t MinSize = (size_t)(MinSizeBase + Wiggle); + //for (size_t MaxSize = (MinSize * 4) - 768; MaxSize < (MinSize * 5) + 768; MaxSize += 64) + size_t MaxSize = 8u * MinSizeBase; + { + for (size_t AvgSize = (MaxSize - MinSize) / 32 + MinSize; AvgSize < (MaxSize - MinSize) / 4 + MinSize; AvgSize += (MaxSize - MinSize) / 32) +// size_t AvgSize = (MaxSize - MinSize) / 4 + MinSize; + { + WorkLatch.AddCount(1); + WorkerPool.ScheduleWork([&WorkLatch, MinSize, MaxSize, AvgSize, SourcePath1, SourcePath2]() + { + auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); + ChunkedParams Params{ .UseThreshold = true, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize }; + BasicFile SourceData1; + SourceData1.Open(SourcePath1, BasicFile::Mode::kRead); + BasicFile SourceData2; + SourceData2.Open(SourcePath2, BasicFile::Mode::kRead); + ChunkedInfoWithSource Info1 = ChunkData(SourceData1, Params); + ChunkedInfoWithSource Info2 = ChunkData(SourceData2, Params); + + tsl::robin_set Chunks1; + Chunks1.reserve(Info1.Info.ChunkHashes.size()); + Chunks1.insert(Info1.Info.ChunkHashes.begin(), Info1.Info.ChunkHashes.end()); + size_t ChunkedSize1 = 0; + for (uint32_t Chunk1Index = 0; Chunk1Index < Info1.Info.ChunkHashes.size(); ++Chunk1Index) + { + ChunkedSize1 += Info1.ChunkSources[Chunk1Index].Size; + } + size_t DiffSavedSize = 0; + size_t ChunkedSize2 = 0; + for (uint32_t Chunk2Index = 0; Chunk2Index < Info2.Info.ChunkHashes.size(); ++Chunk2Index) + { + ChunkedSize2 += Info2.ChunkSources[Chunk2Index].Size; + if (Chunks1.find(Info2.Info.ChunkHashes[Chunk2Index]) == Chunks1.end()) + { + DiffSavedSize += Info2.ChunkSources[Chunk2Index].Size; + } + } + ZEN_INFO("Diff {}, Chunks1: {}, Chunks2: {}, Min: {}, Max: {}, Avg: {}, Saved(1) {}, Saved(2) {}", + NiceBytes(DiffSavedSize), + Info1.Info.ChunkHashes.size(), + Info2.Info.ChunkHashes.size(), + MinSize, + MaxSize, + AvgSize, + NiceBytes(Info1.Info.RawSize - ChunkedSize1), + NiceBytes(Info2.Info.RawSize - ChunkedSize2)); + }); + } + } + } + } +# endif // 0 + + // WorkLatch.CountDown(); + // WorkLatch.Wait(); +} +# endif // 0 + +void +chunkedfile_forcelink() +{ +} + +} // namespace zen + +#endif diff --git a/src/zenutil/chunking.cpp b/src/zenutil/chunking.cpp new file mode 100644 index 000000000..30edd322a --- /dev/null +++ b/src/zenutil/chunking.cpp @@ -0,0 +1,382 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "chunking.h" + +#include + +#include + +namespace zen::detail { + +static const uint32_t BuzhashTable[] = { + 0x458be752, 0xc10748cc, 0xfbbcdbb8, 0x6ded5b68, 0xb10a82b5, 0x20d75648, 0xdfc5665f, 0xa8428801, 0x7ebf5191, 0x841135c7, 0x65cc53b3, + 0x280a597c, 0x16f60255, 0xc78cbc3e, 0x294415f5, 0xb938d494, 0xec85c4e6, 0xb7d33edc, 0xe549b544, 0xfdeda5aa, 0x882bf287, 0x3116737c, + 0x05569956, 0xe8cc1f68, 0x0806ac5e, 0x22a14443, 0x15297e10, 0x50d090e7, 0x4ba60f6f, 0xefd9f1a7, 0x5c5c885c, 0x82482f93, 0x9bfd7c64, + 0x0b3e7276, 0xf2688e77, 0x8fad8abc, 0xb0509568, 0xf1ada29f, 0xa53efdfe, 0xcb2b1d00, 0xf2a9e986, 0x6463432b, 0x95094051, 0x5a223ad2, + 0x9be8401b, 0x61e579cb, 0x1a556a14, 0x5840fdc2, 0x9261ddf6, 0xcde002bb, 0x52432bb0, 0xbf17373e, 0x7b7c222f, 0x2955ed16, 0x9f10ca59, + 0xe840c4c9, 0xccabd806, 0x14543f34, 0x1462417a, 0x0d4a1f9c, 0x087ed925, 0xd7f8f24c, 0x7338c425, 0xcf86c8f5, 0xb19165cd, 0x9891c393, + 0x325384ac, 0x0308459d, 0x86141d7e, 0xc922116a, 0xe2ffa6b6, 0x53f52aed, 0x2cd86197, 0xf5b9f498, 0xbf319c8f, 0xe0411fae, 0x977eb18c, + 0xd8770976, 0x9833466a, 0xc674df7f, 0x8c297d45, 0x8ca48d26, 0xc49ed8e2, 0x7344f874, 0x556f79c7, 0x6b25eaed, 0xa03e2b42, 0xf68f66a4, + 0x8e8b09a2, 0xf2e0e62a, 0x0d3a9806, 0x9729e493, 0x8c72b0fc, 0x160b94f6, 0x450e4d3d, 0x7a320e85, 0xbef8f0e1, 0x21d73653, 0x4e3d977a, + 0x1e7b3929, 0x1cc6c719, 0xbe478d53, 0x8d752809, 0xe6d8c2c6, 0x275f0892, 0xc8acc273, 0x4cc21580, 0xecc4a617, 0xf5f7be70, 0xe795248a, + 0x375a2fe9, 0x425570b6, 0x8898dcf8, 0xdc2d97c4, 0x0106114b, 0x364dc22f, 0x1e0cad1f, 0xbe63803c, 0x5f69fac2, 0x4d5afa6f, 0x1bc0dfb5, + 0xfb273589, 0x0ea47f7b, 0x3c1c2b50, 0x21b2a932, 0x6b1223fd, 0x2fe706a8, 0xf9bd6ce2, 0xa268e64e, 0xe987f486, 0x3eacf563, 0x1ca2018c, + 0x65e18228, 0x2207360a, 0x57cf1715, 0x34c37d2b, 0x1f8f3cde, 0x93b657cf, 0x31a019fd, 0xe69eb729, 0x8bca7b9b, 0x4c9d5bed, 0x277ebeaf, + 0xe0d8f8ae, 0xd150821c, 0x31381871, 0xafc3f1b0, 0x927db328, 0xe95effac, 0x305a47bd, 0x426ba35b, 0x1233af3f, 0x686a5b83, 0x50e072e5, + 0xd9d3bb2a, 0x8befc475, 0x487f0de6, 0xc88dff89, 0xbd664d5e, 0x971b5d18, 0x63b14847, 0xd7d3c1ce, 0x7f583cf3, 0x72cbcb09, 0xc0d0a81c, + 0x7fa3429b, 0xe9158a1b, 0x225ea19a, 0xd8ca9ea3, 0xc763b282, 0xbb0c6341, 0x020b8293, 0xd4cd299d, 0x58cfa7f8, 0x91b4ee53, 0x37e4d140, + 0x95ec764c, 0x30f76b06, 0x5ee68d24, 0x679c8661, 0xa41979c2, 0xf2b61284, 0x4fac1475, 0x0adb49f9, 0x19727a23, 0x15a7e374, 0xc43a18d5, + 0x3fb1aa73, 0x342fc615, 0x924c0793, 0xbee2d7f0, 0x8a279de9, 0x4aa2d70c, 0xe24dd37f, 0xbe862c0b, 0x177c22c2, 0x5388e5ee, 0xcd8a7510, + 0xf901b4fd, 0xdbc13dbc, 0x6c0bae5b, 0x64efe8c7, 0x48b02079, 0x80331a49, 0xca3d8ae6, 0xf3546190, 0xfed7108b, 0xc49b941b, 0x32baf4a9, + 0xeb833a4a, 0x88a3f1a5, 0x3a91ce0a, 0x3cc27da1, 0x7112e684, 0x4a3096b1, 0x3794574c, 0xa3c8b6f3, 0x1d213941, 0x6e0a2e00, 0x233479f1, + 0x0f4cd82f, 0x6093edd2, 0x5d7d209e, 0x464fe319, 0xd4dcac9e, 0x0db845cb, 0xfb5e4bc3, 0xe0256ce1, 0x09fb4ed1, 0x0914be1e, 0xa5bdb2c3, + 0xc6eb57bb, 0x30320350, 0x3f397e91, 0xa67791bc, 0x86bc0e2c, 0xefa0a7e2, 0xe9ff7543, 0xe733612c, 0xd185897b, 0x329e5388, 0x91dd236b, + 0x2ecb0d93, 0xf4d82a3d, 0x35b5c03f, 0xe4e606f0, 0x05b21843, 0x37b45964, 0x5eff22f4, 0x6027f4cc, 0x77178b3c, 0xae507131, 0x7bf7cabc, + 0xf9c18d66, 0x593ade65, 0xd95ddf11, +}; + +// ROL operation (compiler turns this into a ROL when optimizing) +ZEN_FORCEINLINE static uint32_t +Rotate32(uint32_t Value, size_t RotateCount) +{ + RotateCount &= 31; + + return ((Value) << (RotateCount)) | ((Value) >> (32 - RotateCount)); +} + +} // namespace zen::detail + +namespace zen { + +void +ZenChunkHelper::Reset() +{ + InternalReset(); + + m_BytesScanned = 0; +} + +void +ZenChunkHelper::InternalReset() +{ + m_CurrentHash = 0; + m_CurrentChunkSize = 0; + m_WindowSize = 0; +} + +void +ZenChunkHelper::SetChunkSize(size_t MinSize, size_t MaxSize, size_t AvgSize) +{ + if (m_WindowSize) + return; // Already started + + static_assert(kChunkSizeLimitMin > kWindowSize); + + if (AvgSize) + { + // TODO: Validate AvgSize range + } + else + { + if (MinSize && MaxSize) + { + AvgSize = std::lrint(std::pow(2, (std::log2(MinSize) + std::log2(MaxSize)) / 2)); + } + else if (MinSize) + { + AvgSize = MinSize * 4; + } + else if (MaxSize) + { + AvgSize = MaxSize / 4; + } + else + { + AvgSize = kDefaultAverageChunkSize; + } + } + + if (MinSize) + { + // TODO: Validate MinSize range + } + else + { + MinSize = std::max(AvgSize / 4, kChunkSizeLimitMin); + } + + if (MaxSize) + { + // TODO: Validate MaxSize range + } + else + { + MaxSize = std::min(AvgSize * 4, kChunkSizeLimitMax); + } + + m_Discriminator = gsl::narrow(AvgSize - MinSize); + + if (m_Discriminator < MinSize) + { + m_Discriminator = gsl::narrow(MinSize); + } + + if (m_Discriminator > MaxSize) + { + m_Discriminator = gsl::narrow(MaxSize); + } + + m_Threshold = gsl::narrow((uint64_t(std::numeric_limits::max()) + 1) / m_Discriminator); + + m_ChunkSizeMin = MinSize; + m_ChunkSizeMax = MaxSize; + m_ChunkSizeAvg = AvgSize; +} + +size_t +ZenChunkHelper::ScanChunk(const void* DataBytesIn, size_t ByteCount) +{ + size_t Result = InternalScanChunk(DataBytesIn, ByteCount); + + if (Result == kNoBoundaryFound) + { + m_BytesScanned += ByteCount; + } + else + { + m_BytesScanned += Result; + } + + return Result; +} + +size_t +ZenChunkHelper::InternalScanChunk(const void* DataBytesIn, size_t ByteCount) +{ + size_t CurrentOffset = 0; + const uint8_t* CursorPtr = reinterpret_cast(DataBytesIn); + + // There's no point in updating the hash if we know we're not + // going to have a cut point, so just skip the data. This logic currently + // provides roughly a 20% speedup on my machine + + const size_t NeedHashOffset = m_ChunkSizeMin - kWindowSize; + + if (m_CurrentChunkSize < NeedHashOffset) + { + const uint32_t SkipBytes = gsl::narrow(std::min(ByteCount, NeedHashOffset - m_CurrentChunkSize)); + + ByteCount -= SkipBytes; + m_CurrentChunkSize += SkipBytes; + CurrentOffset += SkipBytes; + CursorPtr += SkipBytes; + + m_WindowSize = 0; + + if (ByteCount == 0) + { + return kNoBoundaryFound; + } + } + + // Fill window first + + if (m_WindowSize < kWindowSize) + { + const uint32_t FillBytes = uint32_t(std::min(ByteCount, kWindowSize - m_WindowSize)); + + memcpy(&m_Window[m_WindowSize], CursorPtr, FillBytes); + + CursorPtr += FillBytes; + + m_WindowSize += FillBytes; + m_CurrentChunkSize += FillBytes; + + CurrentOffset += FillBytes; + ByteCount -= FillBytes; + + if (m_WindowSize < kWindowSize) + { + return kNoBoundaryFound; + } + + // We have a full window, initialize hash + + uint32_t CurrentHash = 0; + + for (int i = 1; i < kWindowSize; ++i) + { + CurrentHash ^= detail::Rotate32(detail::BuzhashTable[m_Window[i - 1]], kWindowSize - i); + } + + m_CurrentHash = CurrentHash ^ detail::BuzhashTable[m_Window[kWindowSize - 1]]; + } + + // Scan for boundaries (i.e points where the hash matches the value determined by + // the discriminator) + + uint32_t CurrentHash = m_CurrentHash; + uint32_t CurrentChunkSize = m_CurrentChunkSize; + + size_t Index = CurrentChunkSize % kWindowSize; + + if (m_Threshold && m_UseThreshold) + { + // This is roughly 4x faster than the general modulo approach on my + // TR 3990X (~940MB/sec) and doesn't require any special parameters to + // achieve max performance + + while (ByteCount) + { + const uint8_t NewByte = *CursorPtr; + const uint8_t OldByte = m_Window[Index]; + + CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^ + detail::BuzhashTable[NewByte]; + + CurrentChunkSize++; + CurrentOffset++; + + if (CurrentChunkSize >= m_ChunkSizeMin) + { + bool FoundBoundary; + + if (CurrentChunkSize >= m_ChunkSizeMax) + { + FoundBoundary = true; + } + else + { + FoundBoundary = CurrentHash <= m_Threshold; + } + + if (FoundBoundary) + { + // Boundary found! + InternalReset(); + + return CurrentOffset; + } + } + + m_Window[Index++] = *CursorPtr; + + if (Index == kWindowSize) + { + Index = 0; + } + + ++CursorPtr; + --ByteCount; + } + } + else if ((m_Discriminator & (m_Discriminator - 1)) == 0) + { + // This is quite a bit faster than the generic modulo path, but + // requires a very specific average chunk size to be used. If you + // pass in an even power-of-two divided by 0.75 as the average + // chunk size you'll hit this path + + const uint32_t Mask = m_Discriminator - 1; + + while (ByteCount) + { + const uint8_t NewByte = *CursorPtr; + const uint8_t OldByte = m_Window[Index]; + + CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^ + detail::BuzhashTable[NewByte]; + + CurrentChunkSize++; + CurrentOffset++; + + if (CurrentChunkSize >= m_ChunkSizeMin) + { + bool FoundBoundary; + + if (CurrentChunkSize >= m_ChunkSizeMax) + { + FoundBoundary = true; + } + else + { + FoundBoundary = (CurrentHash & Mask) == Mask; + } + + if (FoundBoundary) + { + // Boundary found! + InternalReset(); + + return CurrentOffset; + } + } + + m_Window[Index++] = *CursorPtr; + + if (Index == kWindowSize) + { + Index = 0; + } + + ++CursorPtr; + --ByteCount; + } + } + else + { + // This is the slowest path, which caps out around 250MB/sec for large sizes + // on my TR3900X + + while (ByteCount) + { + const uint8_t NewByte = *CursorPtr; + const uint8_t OldByte = m_Window[Index]; + + CurrentHash = detail::Rotate32(CurrentHash, 1) ^ detail::Rotate32(detail::BuzhashTable[OldByte], m_WindowSize) ^ + detail::BuzhashTable[NewByte]; + + CurrentChunkSize++; + CurrentOffset++; + + if (CurrentChunkSize >= m_ChunkSizeMin) + { + bool FoundBoundary; + + if (CurrentChunkSize >= m_ChunkSizeMax) + { + FoundBoundary = true; + } + else + { + FoundBoundary = (CurrentHash % m_Discriminator) == (m_Discriminator - 1); + } + + if (FoundBoundary) + { + // Boundary found! + InternalReset(); + + return CurrentOffset; + } + } + + m_Window[Index++] = *CursorPtr; + + if (Index == kWindowSize) + { + Index = 0; + } + + ++CursorPtr; + --ByteCount; + } + } + + m_CurrentChunkSize = CurrentChunkSize; + m_CurrentHash = CurrentHash; + + return kNoBoundaryFound; +} + +} // namespace zen diff --git a/src/zenutil/chunking.h b/src/zenutil/chunking.h new file mode 100644 index 000000000..09c56454f --- /dev/null +++ b/src/zenutil/chunking.h @@ -0,0 +1,56 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once +#include + +namespace zen { + +/** Content-defined chunking helper + */ +class ZenChunkHelper +{ +public: + void SetChunkSize(size_t MinSize, size_t MaxSize, size_t AvgSize); + size_t ScanChunk(const void* DataBytes, size_t ByteCount); + void Reset(); + + // This controls which chunking approach is used - threshold or + // modulo based. Threshold is faster and generates similarly sized + // chunks + void SetUseThreshold(bool NewState) { m_UseThreshold = NewState; } + + inline size_t ChunkSizeMin() const { return m_ChunkSizeMin; } + inline size_t ChunkSizeMax() const { return m_ChunkSizeMax; } + inline size_t ChunkSizeAvg() const { return m_ChunkSizeAvg; } + inline uint64_t BytesScanned() const { return m_BytesScanned; } + + static constexpr size_t kNoBoundaryFound = size_t(~0ull); + +private: + size_t m_ChunkSizeMin = 0; + size_t m_ChunkSizeMax = 0; + size_t m_ChunkSizeAvg = 0; + + uint32_t m_Discriminator = 0; // Computed in SetChunkSize() + uint32_t m_Threshold = 0; // Computed in SetChunkSize() + + bool m_UseThreshold = true; + + static constexpr size_t kChunkSizeLimitMax = 64 * 1024 * 1024; + static constexpr size_t kChunkSizeLimitMin = 1024; + static constexpr size_t kDefaultAverageChunkSize = 64 * 1024; + + static constexpr int kWindowSize = 48; + uint8_t m_Window[kWindowSize]; + uint32_t m_WindowSize = 0; + + uint32_t m_CurrentHash = 0; + uint32_t m_CurrentChunkSize = 0; + + uint64_t m_BytesScanned = 0; + + size_t InternalScanChunk(const void* DataBytes, size_t ByteCount); + void InternalReset(); +}; + +} // namespace zen diff --git a/src/zenutil/include/zenutil/chunkblock.h b/src/zenutil/include/zenutil/chunkblock.h new file mode 100644 index 000000000..9b7414629 --- /dev/null +++ b/src/zenutil/include/zenutil/chunkblock.h @@ -0,0 +1,32 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include + +#include +#include + +#include +#include + +namespace zen { + +struct ChunkBlockDescription +{ + IoHash BlockHash; + std::vector ChunkHashes; + std::vector ChunkRawLengths; +}; + +std::vector ParseChunkBlockDescriptionList(const CbObjectView& BlocksObject); +ChunkBlockDescription ParseChunkBlockDescription(const CbObjectView& BlockObject); +CbObject BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView MetaData); + +typedef std::function(const IoHash& RawHash)> FetchChunkFunc; + +CompressedBuffer GenerateChunkBlock(std::vector>&& FetchChunks, ChunkBlockDescription& OutBlock); +bool IterateChunkBlock(const SharedBuffer& BlockPayload, + std::function Visitor); + +} // namespace zen diff --git a/src/zenutil/include/zenutil/chunkedfile.h b/src/zenutil/include/zenutil/chunkedfile.h new file mode 100644 index 000000000..7110ad317 --- /dev/null +++ b/src/zenutil/include/zenutil/chunkedfile.h @@ -0,0 +1,58 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include +#include +#include + +#include +#include + +namespace zen { + +class BasicFile; + +struct ChunkedInfo +{ + uint64_t RawSize = 0; + IoHash RawHash; + std::vector ChunkSequence; + std::vector ChunkHashes; +}; + +struct ChunkSource +{ + uint64_t Offset; // 8 + uint32_t Size; // 4 +}; + +struct ChunkedInfoWithSource +{ + ChunkedInfo Info; + std::vector ChunkSources; +}; + +struct ChunkedParams +{ + bool UseThreshold = true; + size_t MinSize = (2u * 1024u) - 128u; + size_t MaxSize = (16u * 1024u); + size_t AvgSize = (3u * 1024u); +}; + +static const ChunkedParams UShaderByteCodeParams = {.UseThreshold = true, .MinSize = 17280, .MaxSize = 139264, .AvgSize = 36340}; + +ChunkedInfoWithSource ChunkData(BasicFile& RawData, + uint64_t Offset, + uint64_t Size, + ChunkedParams Params = {}, + std::atomic* BytesProcessed = nullptr); +void Reconstruct(const ChunkedInfo& Info, + const std::filesystem::path& TargetPath, + std::function GetChunk); +IoBuffer SerializeChunkedInfo(const ChunkedInfo& Info); +ChunkedInfo DeserializeChunkedInfo(IoBuffer& Buffer); + +void chunkedfile_forcelink(); +} // namespace zen diff --git a/src/zenutil/zenutil.cpp b/src/zenutil/zenutil.cpp index c54144549..19eb63ce9 100644 --- a/src/zenutil/zenutil.cpp +++ b/src/zenutil/zenutil.cpp @@ -6,6 +6,7 @@ # include # include +# include namespace zen { @@ -15,6 +16,7 @@ zenutil_forcelinktests() cachepolicy_forcelink(); cache::rpcrecord_forcelink(); cacherequests_forcelink(); + chunkedfile_forcelink(); } } // namespace zen -- cgit v1.2.3 From 5bc5b0dd59c0f02afe553e5074dfe57951b19044 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 25 Feb 2025 15:48:43 +0100 Subject: improvements and infrastructure for upcoming builds api command line (#284) * add modification tick to filesystem traversal * add ShowDetails option to ProgressBar * log callstack if we terminate process * handle chunking if MaxSize > 1MB * BasicFile write helpers and WriteToTempFile simplifications * bugfix for CompositeBuffer::IterateRange when using DecompressToComposite for actually comrpessed data revert of earlier optimization * faster compress/decompress for large disk-based files * enable progress feedback in IoHash::HashBuffer * add payload validation in HttpClient::Get * fix range requests (range is including end byte) * remove BuildPartId for blob/block related operations in builds api --- src/zenutil/chunkedfile.cpp | 2 +- .../include/zenutil/jupiter/jupitersession.h | 13 +--- src/zenutil/jupiter/jupitersession.cpp | 85 ++++++++-------------- src/zenutil/logging.cpp | 9 ++- 4 files changed, 40 insertions(+), 69 deletions(-) (limited to 'src/zenutil') diff --git a/src/zenutil/chunkedfile.cpp b/src/zenutil/chunkedfile.cpp index c08492eb0..3f3a6661c 100644 --- a/src/zenutil/chunkedfile.cpp +++ b/src/zenutil/chunkedfile.cpp @@ -121,7 +121,7 @@ ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Para Chunker.SetUseThreshold(Params.UseThreshold); Chunker.SetChunkSize(Params.MinSize, Params.MaxSize, Params.AvgSize); size_t End = Offset + Size; - const size_t ScanBufferSize = 1u * 1024 * 1024; // (Params.MaxSize * 9) / 3;//1 * 1024 * 1024; + const size_t ScanBufferSize = Max(1u * 1024 * 1024, Params.MaxSize); BasicFileBuffer RawBuffer(RawData, ScanBufferSize); MemoryView SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset); ZEN_ASSERT(!SliceView.IsEmpty()); diff --git a/src/zenutil/include/zenutil/jupiter/jupitersession.h b/src/zenutil/include/zenutil/jupiter/jupitersession.h index 075c35b40..852271868 100644 --- a/src/zenutil/include/zenutil/jupiter/jupitersession.h +++ b/src/zenutil/include/zenutil/jupiter/jupitersession.h @@ -116,21 +116,18 @@ public: JupiterResult PutBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, ZenContentType ContentType, const CompositeBuffer& Payload); JupiterResult GetBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, std::filesystem::path TempFolderPath); JupiterResult PutMultipartBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, ZenContentType ContentType, uint64_t PayloadSize, @@ -139,7 +136,6 @@ public: JupiterResult GetMultipartBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, uint64_t ChunkSize, std::function&& Receiver, @@ -147,7 +143,6 @@ public: JupiterResult PutBlockMetadata(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, const IoBuffer& Payload); FinalizeBuildPartResult FinalizeBuildPart(std::string_view Namespace, @@ -155,12 +150,8 @@ public: const Oid& BuildId, const Oid& PartId, const IoHash& RawHash); - JupiterResult FindBlocks(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const Oid& PartId); - JupiterResult GetBlockMetadata(std::string_view Namespace, - std::string_view BucketId, - const Oid& BuildId, - const Oid& PartId, - IoBuffer Payload); + JupiterResult FindBlocks(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); + JupiterResult GetBlockMetadata(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, IoBuffer Payload); private: inline LoggerRef Log() { return m_Log; } diff --git a/src/zenutil/jupiter/jupitersession.cpp b/src/zenutil/jupiter/jupitersession.cpp index d56927b44..06ac6ae36 100644 --- a/src/zenutil/jupiter/jupitersession.cpp +++ b/src/zenutil/jupiter/jupitersession.cpp @@ -436,15 +436,14 @@ JupiterResult JupiterSession::PutBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, ZenContentType ContentType, const CompositeBuffer& Payload) { - HttpClient::Response Response = m_HttpClient.Upload( - fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}", Namespace, BucketId, BuildId, PartId, Hash.ToHexString()), - Payload, - ContentType); + HttpClient::Response Response = + m_HttpClient.Upload(fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}", Namespace, BucketId, BuildId, Hash.ToHexString()), + Payload, + ContentType); return detail::ConvertResponse(Response, "JupiterSession::PutBuildBlob"sv); } @@ -452,7 +451,6 @@ JupiterResult JupiterSession::PutMultipartBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, ZenContentType ContentType, uint64_t PayloadSize, @@ -498,12 +496,8 @@ JupiterSession::PutMultipartBuildBlob(std::string_view Namespace, StartMultipartPayloadWriter.AddInteger("blobLength"sv, PayloadSize); CbObject StartMultipartPayload = StartMultipartPayloadWriter.Save(); - std::string StartMultipartResponseRequestString = fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}/startMultipartUpload", - Namespace, - BucketId, - BuildId, - PartId, - Hash.ToHexString()); + std::string StartMultipartResponseRequestString = + fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}/startMultipartUpload", Namespace, BucketId, BuildId, Hash.ToHexString()); // ZEN_INFO("POST: {}", StartMultipartResponseRequestString); HttpClient::Response StartMultipartResponse = m_HttpClient.Post(StartMultipartResponseRequestString, StartMultipartPayload, HttpClient::Accept(ZenContentType::kCbObject)); @@ -529,15 +523,14 @@ JupiterSession::PutMultipartBuildBlob(std::string_view Namespace, for (size_t PartIndex = 0; PartIndex < Workload->PartDescription.Parts.size(); PartIndex++) { - OutWorkItems.emplace_back([this, Namespace, BucketId, BuildId, PartId, Hash, ContentType, Workload, PartIndex]( + OutWorkItems.emplace_back([this, Namespace, BucketId, BuildId, Hash, ContentType, Workload, PartIndex]( bool& OutIsComplete) -> JupiterResult { const MultipartUploadResponse::Part& Part = Workload->PartDescription.Parts[PartIndex]; IoBuffer PartPayload = Workload->Transmitter(Part.FirstByte, Part.LastByte - Part.FirstByte); - std::string MultipartUploadResponseRequestString = fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}/uploadMultipart{}", + std::string MultipartUploadResponseRequestString = fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}/uploadMultipart{}", Namespace, BucketId, BuildId, - PartId, Hash.ToHexString(), Part.QueryString); // ZEN_INFO("PUT: {}", MultipartUploadResponseRequestString); @@ -571,12 +564,7 @@ JupiterSession::PutMultipartBuildBlob(std::string_view Namespace, CbObject CompletePayload = CompletePayloadWriter.Save(); std::string MultipartEndResponseRequestString = - fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}/completeMultipart", - Namespace, - BucketId, - BuildId, - PartId, - Hash.ToHexString()); + fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}/completeMultipart", Namespace, BucketId, BuildId, Hash.ToHexString()); MultipartEndResponse = m_HttpClient.Post(MultipartEndResponseRequestString, CompletePayload, @@ -600,13 +588,12 @@ JupiterSession::PutMultipartBuildBlob(std::string_view Namespace, size_t RetryPartIndex = PartNameToIndex.at(RetryPartId); const MultipartUploadResponse::Part& RetryPart = Workload->PartDescription.Parts[RetryPartIndex]; IoBuffer RetryPartPayload = - Workload->Transmitter(RetryPart.FirstByte, RetryPart.LastByte - RetryPart.FirstByte); + Workload->Transmitter(RetryPart.FirstByte, RetryPart.LastByte - RetryPart.FirstByte - 1); std::string RetryMultipartUploadResponseRequestString = - fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}/uploadMultipart{}", + fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}/uploadMultipart{}", Namespace, BucketId, BuildId, - RetryPartId, Hash.ToHexString(), RetryPart.QueryString); @@ -642,14 +629,12 @@ JupiterResult JupiterSession::GetMultipartBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, uint64_t ChunkSize, std::function&& Receiver, std::vector>& OutWorkItems) { - std::string RequestUrl = - fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}", Namespace, BucketId, BuildId, PartId, Hash.ToHexString()); + std::string RequestUrl = fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}", Namespace, BucketId, BuildId, Hash.ToHexString()); HttpClient::Response Response = m_HttpClient.Get(RequestUrl, HttpClient::KeyValueMap({{"Range", fmt::format("bytes={}-{}", 0, ChunkSize - 1)}})); if (Response.IsSuccess()) @@ -683,17 +668,12 @@ JupiterSession::GetMultipartBuildBlob(std::string_view Namespa { uint64_t PartSize = Min(ChunkSize, TotalSize - Offset); OutWorkItems.emplace_back( - [this, Namespace, BucketId, BuildId, PartId, Hash, TotalSize, Workload, Offset, PartSize]() - -> JupiterResult { - std::string RequestUrl = fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}", - Namespace, - BucketId, - BuildId, - PartId, - Hash.ToHexString()); - HttpClient::Response Response = m_HttpClient.Get( - RequestUrl, - HttpClient::KeyValueMap({{"Range", fmt::format("bytes={}-{}", Offset, Offset + PartSize - 1)}})); + [this, Namespace, BucketId, BuildId, Hash, TotalSize, Workload, Offset, PartSize]() -> JupiterResult { + std::string RequestUrl = + fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}", Namespace, BucketId, BuildId, Hash.ToHexString()); + HttpClient::Response Response = m_HttpClient.Get( + RequestUrl, + HttpClient::KeyValueMap({{"Range", fmt::format("bytes={}-{}", Offset, Offset + PartSize - 1)}})); if (Response.IsSuccess()) { uint64_t ByteRemaning = Workload->BytesRemaining.fetch_sub(Response.ResponsePayload.GetSize()); @@ -717,13 +697,12 @@ JupiterResult JupiterSession::GetBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, std::filesystem::path TempFolderPath) { - HttpClient::Response Response = m_HttpClient.Download( - fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}", Namespace, BucketId, BuildId, PartId, Hash.ToHexString()), - TempFolderPath); + HttpClient::Response Response = + m_HttpClient.Download(fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}", Namespace, BucketId, BuildId, Hash.ToHexString()), + TempFolderPath); return detail::ConvertResponse(Response, "JupiterSession::GetBuildBlob"sv); } @@ -732,14 +711,13 @@ JupiterResult JupiterSession::PutBlockMetadata(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, const IoBuffer& Payload) { ZEN_ASSERT(Payload.GetContentType() == ZenContentType::kCbObject); - HttpClient::Response Response = m_HttpClient.Put( - fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blocks/{}/metadata", Namespace, BucketId, BuildId, PartId, Hash.ToHexString()), - Payload); + HttpClient::Response Response = + m_HttpClient.Put(fmt::format("/api/v2/builds/{}/{}/{}/blocks/{}/metadata", Namespace, BucketId, BuildId, Hash.ToHexString()), + Payload); return detail::ConvertResponse(Response, "JupiterSession::PutBlockMetadata"sv); } @@ -772,24 +750,19 @@ JupiterSession::FinalizeBuildPart(std::string_view Namespace, } JupiterResult -JupiterSession::FindBlocks(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const Oid& PartId) +JupiterSession::FindBlocks(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId) { - HttpClient::Response Response = - m_HttpClient.Get(fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blocks/listBlocks", Namespace, BucketId, BuildId, PartId), - HttpClient::Accept(ZenContentType::kCbObject)); + HttpClient::Response Response = m_HttpClient.Get(fmt::format("/api/v2/builds/{}/{}/{}/blocks/listBlocks", Namespace, BucketId, BuildId), + HttpClient::Accept(ZenContentType::kCbObject)); return detail::ConvertResponse(Response, "JupiterSession::FindBlocks"sv); } JupiterResult -JupiterSession::GetBlockMetadata(std::string_view Namespace, - std::string_view BucketId, - const Oid& BuildId, - const Oid& PartId, - IoBuffer Payload) +JupiterSession::GetBlockMetadata(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, IoBuffer Payload) { ZEN_ASSERT(Payload.GetContentType() == ZenContentType::kCbObject); HttpClient::Response Response = - m_HttpClient.Post(fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blocks/getBlockMetadata", Namespace, BucketId, BuildId, PartId), + m_HttpClient.Post(fmt::format("/api/v2/builds/{}/{}/{}/blocks/getBlockMetadata", Namespace, BucketId, BuildId), Payload, HttpClient::Accept(ZenContentType::kCbObject)); return detail::ConvertResponse(Response, "JupiterSession::GetBlockMetadata"sv); diff --git a/src/zenutil/logging.cpp b/src/zenutil/logging.cpp index 6314c407f..0444fa2c4 100644 --- a/src/zenutil/logging.cpp +++ b/src/zenutil/logging.cpp @@ -10,6 +10,7 @@ ZEN_THIRD_PARTY_INCLUDES_START #include ZEN_THIRD_PARTY_INCLUDES_END +#include #include #include #include @@ -97,7 +98,13 @@ BeginInitializeLogging(const LoggingOptions& LogOptions) } } - std::set_terminate([]() { ZEN_CRITICAL("Program exited abnormally via std::terminate()"); }); + std::set_terminate([]() { + void* Frames[8]; + uint32_t FrameCount = GetCallstack(2, 8, Frames); + CallstackFrames* Callstack = CreateCallstack(FrameCount, Frames); + ZEN_CRITICAL("Program exited abnormally via std::terminate()\n{}", CallstackToString(Callstack, " ")); + FreeCallstack(Callstack); + }); // Default -- cgit v1.2.3 From 7d8fe45af3b49d800f84f0ddce051c0b3b2e837d Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 26 Feb 2025 15:10:14 +0100 Subject: builds upload command (#278) - Feature: **EXPERIMENTAL** New `zen builds` command to list, upload and download folders to Cloud Build API - `builds list` list available builds (**INCOMPLETE - FILTERING MISSING**) - `builds upload` upload a folder to Cloud Build API - `--local-path` source folder to upload - `--create-build` creates a new parent build object (using the object id), if omitted a parent build must exist and `--build-id` must be given - `--build-id` an Oid in hex form for the Build identifier to use - omit to have the id auto generated - `--build-part-id` and Oid in hex form for the Build Part identifier for the folder - omit to have the id auto generated - `--build-part-name` name of the build part - if omitted the name of the leaf folder name give in `--local-path` - `--metadata-path` path to a json formatted file with meta data information about the build. Meta-data must be provided if `--create-build` is set - `--metadata` key-value pairs separated by ';' with build meta data for the build. (key1=value1;key2=value2). Meta-data must be provided if `--create-build` is set - `--clean` ignore any existing blocks of chunk data and upload a fresh set of blocks - `--allow-multipart` enable usage of multi-part http upload requests - `--manifest-path` path to text file listing files to include in upload. Exclude to upload everything in `--local-path` - `builds download` download a folder from Cloud Build API (**INCOMPLETE - WILL WIPE UNTRACKED DATA FROM TARGET FOLDER**) - `--local-path` target folder to download to - `--build-id` an Oid in hex form for the Build identifier to use - `--build-part-id` a comma separated list of Oid in hex for the build part identifier(s) to download - mutually exclusive to `--build-part-name` - `--build-part-name` a comma separated list of names for the build part(s) to download - if omitted the name of the leaf folder name give in `--local-path` - `--clean` deletes all data in target folder before downloading (NON-CLEAN IS NOT IMPLEMENTED YET) - `--allow-multipart` enable usage of multi-part http download reqeusts - `builds diff` download a folder from Cloud Build API - `--local-path` target folder to download to - `--compare-path` folder to compare target with - `--only-chunked` compare only files that would be chunked - `builds fetch-blob` fetch and validate a blob from remote store - `--build-id` an Oid in hex form for the Build identifier to use - `--blob-hash` an IoHash in hex form identifying the blob to download - `builds validate part` fetch a build part and validate all referenced attachments - `--build-id` an Oid in hex form for the Build identifier to use - `--build-part-id` an Oid in hex for the build part identifier to validate - mutually exclusive to `--build-part-name` - `--build-part-name` a name for the build part to validate - mutually exclusive to `--build-part-id` - `builds test` a series of operation that uploads, downloads and test various aspects of incremental operations - `--local-path` source folder to upload - Options for Cloud Build API remote store (`list`, `upload`, `download`, `fetch-blob`, `validate-part`) - `--url` Cloud Builds URL - `--assume-http2` assume that the builds endpoint is a HTTP/2 endpoint skipping HTTP/1.1 upgrade handshake - `--namespace` Builds Storage namespace - `--bucket` Builds Storage bucket - Authentication options for Cloud Build API - Auth token - `--access-token` http auth Cloud Storage access token - `--access-token-env` name of environment variable that holds the Http auth Cloud Storage access token - `--access-token-path` path to json file that holds the Http auth Cloud Storage access token - OpenId authentication - `--openid-provider-name` Open ID provider name - `--openid-provider-url` Open ID provider url - `--openid-client-id`Open ID client id - `--openid-refresh-token` Open ID refresh token - `--encryption-aes-key` 256 bit AES encryption key for storing OpenID credentials - `--encryption-aes-iv` 128 bit AES encryption initialization vector for storing OpenID credentials - OAuth authentication - `--oauth-url` OAuth provier url - `--oauth-clientid` OAuth client id - `--oauth-clientsecret` OAuth client secret - Options for file based remote store used for for testing purposes (`list`, `upload`, `download`, `fetch-blob`, `validate-part`, `test`) - `--storage-path` path to folder to store builds data - `--json-metadata` enable json output in store for all compact binary objects (off by default) - Output options for all builds commands - `--plain-progress` use plain line-by-line progress output - `--verbose` --- src/zenutil/chunkblock.cpp | 94 ++- src/zenutil/chunkedcontent.cpp | 865 +++++++++++++++++++++ src/zenutil/chunkingcontroller.cpp | 265 +++++++ src/zenutil/filebuildstorage.cpp | 616 +++++++++++++++ src/zenutil/include/zenutil/buildstorage.h | 55 ++ src/zenutil/include/zenutil/chunkblock.h | 17 +- src/zenutil/include/zenutil/chunkedcontent.h | 256 ++++++ src/zenutil/include/zenutil/chunkingcontroller.h | 55 ++ src/zenutil/include/zenutil/filebuildstorage.h | 16 + .../include/zenutil/jupiter/jupiterbuildstorage.h | 17 + src/zenutil/include/zenutil/parallellwork.h | 69 ++ src/zenutil/jupiter/jupiterbuildstorage.cpp | 371 +++++++++ 12 files changed, 2678 insertions(+), 18 deletions(-) create mode 100644 src/zenutil/chunkedcontent.cpp create mode 100644 src/zenutil/chunkingcontroller.cpp create mode 100644 src/zenutil/filebuildstorage.cpp create mode 100644 src/zenutil/include/zenutil/buildstorage.h create mode 100644 src/zenutil/include/zenutil/chunkedcontent.h create mode 100644 src/zenutil/include/zenutil/chunkingcontroller.h create mode 100644 src/zenutil/include/zenutil/filebuildstorage.h create mode 100644 src/zenutil/include/zenutil/jupiter/jupiterbuildstorage.h create mode 100644 src/zenutil/include/zenutil/parallellwork.h create mode 100644 src/zenutil/jupiter/jupiterbuildstorage.cpp (limited to 'src/zenutil') diff --git a/src/zenutil/chunkblock.cpp b/src/zenutil/chunkblock.cpp index 6dae5af11..a19cf5c1b 100644 --- a/src/zenutil/chunkblock.cpp +++ b/src/zenutil/chunkblock.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -18,20 +19,27 @@ ParseChunkBlockDescription(const CbObjectView& BlockObject) Result.BlockHash = BlockObject["rawHash"sv].AsHash(); if (Result.BlockHash != IoHash::Zero) { + Result.HeaderSize = BlockObject["headerSize"sv].AsUInt64(); CbArrayView ChunksArray = BlockObject["rawHashes"sv].AsArrayView(); - Result.ChunkHashes.reserve(ChunksArray.Num()); + Result.ChunkRawHashes.reserve(ChunksArray.Num()); for (CbFieldView ChunkView : ChunksArray) { - Result.ChunkHashes.push_back(ChunkView.AsHash()); + Result.ChunkRawHashes.push_back(ChunkView.AsHash()); } - CbArrayView ChunkRawLengthsArray = BlockObject["chunkRawLengths"sv].AsArrayView(); - std::vector ChunkLengths; + CbArrayView ChunkRawLengthsArray = BlockObject["chunkRawLengths"sv].AsArrayView(); Result.ChunkRawLengths.reserve(ChunkRawLengthsArray.Num()); for (CbFieldView ChunkView : ChunkRawLengthsArray) { Result.ChunkRawLengths.push_back(ChunkView.AsUInt32()); } + + CbArrayView ChunkCompressedLengthsArray = BlockObject["chunkCompressedLengths"sv].AsArrayView(); + Result.ChunkCompressedLengths.reserve(ChunkCompressedLengthsArray.Num()); + for (CbFieldView ChunkView : ChunkCompressedLengthsArray) + { + Result.ChunkCompressedLengths.push_back(ChunkView.AsUInt32()); + } } return Result; } @@ -57,18 +65,23 @@ ParseChunkBlockDescriptionList(const CbObjectView& BlocksObject) CbObject BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView MetaData) { - ZEN_ASSERT(Block.ChunkRawLengths.size() == Block.ChunkHashes.size()); + ZEN_ASSERT(Block.BlockHash != IoHash::Zero); + ZEN_ASSERT(Block.HeaderSize > 0); + ZEN_ASSERT(Block.ChunkRawLengths.size() == Block.ChunkRawHashes.size()); + ZEN_ASSERT(Block.ChunkCompressedLengths.size() == Block.ChunkRawHashes.size()); CbObjectWriter Writer; Writer.AddHash("rawHash"sv, Block.BlockHash); + Writer.AddInteger("headerSize"sv, Block.HeaderSize); Writer.BeginArray("rawHashes"sv); { - for (const IoHash& ChunkHash : Block.ChunkHashes) + for (const IoHash& ChunkHash : Block.ChunkRawHashes) { Writer.AddHash(ChunkHash); } } Writer.EndArray(); + Writer.BeginArray("chunkRawLengths"); { for (uint32_t ChunkSize : Block.ChunkRawLengths) @@ -78,11 +91,58 @@ BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView Meta } Writer.EndArray(); + Writer.BeginArray("chunkCompressedLengths"); + { + for (uint32_t ChunkSize : Block.ChunkCompressedLengths) + { + Writer.AddInteger(ChunkSize); + } + } + Writer.EndArray(); + Writer.AddObject("metadata", MetaData); return Writer.Save(); } +ChunkBlockDescription +GetChunkBlockDescription(const SharedBuffer& BlockPayload, const IoHash& RawHash) +{ + ChunkBlockDescription BlockDescription = {{.BlockHash = IoHash::HashBuffer(BlockPayload)}}; + if (BlockDescription.BlockHash != RawHash) + { + throw std::runtime_error(fmt::format("Block {} content hash {} does not match block hash", RawHash, BlockDescription.BlockHash)); + } + if (IterateChunkBlock( + BlockPayload, + [&BlockDescription, RawHash](CompressedBuffer&& Chunk, const IoHash& AttachmentHash) { + if (CompositeBuffer Decompressed = Chunk.DecompressToComposite(); Decompressed) + { + IoHash ChunkHash = IoHash::HashBuffer(Decompressed.Flatten()); + if (ChunkHash != AttachmentHash) + { + throw std::runtime_error( + fmt::format("Chunk {} in block {} content hash {} does not match chunk", AttachmentHash, RawHash, ChunkHash)); + } + BlockDescription.ChunkRawHashes.push_back(AttachmentHash); + BlockDescription.ChunkRawLengths.push_back(gsl::narrow(Decompressed.GetSize())); + BlockDescription.ChunkCompressedLengths.push_back(gsl::narrow(Chunk.GetCompressedSize())); + } + else + { + throw std::runtime_error(fmt::format("Chunk {} in block {} is not a compressed buffer", AttachmentHash, RawHash)); + } + }, + BlockDescription.HeaderSize)) + { + return BlockDescription; + } + else + { + throw std::runtime_error(fmt::format("Block {} is malformed", RawHash)); + } +} + CompressedBuffer GenerateChunkBlock(std::vector>&& FetchChunks, ChunkBlockDescription& OutBlock) { @@ -91,8 +151,9 @@ GenerateChunkBlock(std::vector>&& FetchChunks, std::vector ChunkSegments; ChunkSegments.resize(1); ChunkSegments.reserve(1 + ChunkCount); - OutBlock.ChunkHashes.reserve(ChunkCount); + OutBlock.ChunkRawHashes.reserve(ChunkCount); OutBlock.ChunkRawLengths.reserve(ChunkCount); + OutBlock.ChunkCompressedLengths.reserve(ChunkCount); { IoBuffer TempBuffer(ChunkCount * 9); MutableMemoryView View = TempBuffer.GetMutableView(); @@ -106,16 +167,19 @@ GenerateChunkBlock(std::vector>&& FetchChunks, std::span Segments = Chunk.second.GetCompressed().GetSegments(); for (const SharedBuffer& Segment : Segments) { + ZEN_ASSERT(Segment.IsOwned()); ChunkSize += Segment.GetSize(); ChunkSegments.push_back(Segment); } BufferEndPtr += WriteVarUInt(ChunkSize, BufferEndPtr); - OutBlock.ChunkHashes.push_back(It.first); + OutBlock.ChunkRawHashes.push_back(It.first); OutBlock.ChunkRawLengths.push_back(gsl::narrow(Chunk.first)); + OutBlock.ChunkCompressedLengths.push_back(gsl::narrow(ChunkSize)); } ZEN_ASSERT(BufferEndPtr <= View.GetDataEnd()); ptrdiff_t TempBufferLength = std::distance(BufferStartPtr, BufferEndPtr); ChunkSegments[0] = SharedBuffer(IoBuffer(TempBuffer, 0, gsl::narrow(TempBufferLength))); + OutBlock.HeaderSize = TempBufferLength; } CompressedBuffer CompressedBlock = CompressedBuffer::Compress(CompositeBuffer(std::move(ChunkSegments)), OodleCompressor::Mermaid, OodleCompressionLevel::None); @@ -124,7 +188,9 @@ GenerateChunkBlock(std::vector>&& FetchChunks, } bool -IterateChunkBlock(const SharedBuffer& BlockPayload, std::function Visitor) +IterateChunkBlock(const SharedBuffer& BlockPayload, + std::function Visitor, + uint64_t& OutHeaderSize) { ZEN_ASSERT(BlockPayload); if (BlockPayload.GetSize() < 1) @@ -144,21 +210,23 @@ IterateChunkBlock(const SharedBuffer& BlockPayload, std::function + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +ZEN_THIRD_PARTY_INCLUDES_START +#include +#include +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +using namespace std::literals; + +namespace { + void AddCunkSequence(ChunkingStatistics& Stats, + ChunkedContentData& InOutChunkedContent, + tsl::robin_map& ChunkHashToChunkIndex, + const IoHash& RawHash, + std::span ChunkSequence, + std::span ChunkHashes, + std::span ChunkRawSizes) + { + ZEN_ASSERT(ChunkHashes.size() == ChunkRawSizes.size()); + InOutChunkedContent.ChunkCounts.push_back(gsl::narrow(ChunkSequence.size())); + InOutChunkedContent.ChunkOrders.reserve(InOutChunkedContent.ChunkOrders.size() + ChunkSequence.size()); + + for (uint32_t ChunkedSequenceIndex : ChunkSequence) + { + const IoHash& ChunkHash = ChunkHashes[ChunkedSequenceIndex]; + if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) + { + uint32_t ChunkIndex = gsl::narrow(It->second); + InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); + } + else + { + uint32_t ChunkIndex = gsl::narrow(InOutChunkedContent.ChunkHashes.size()); + ChunkHashToChunkIndex.insert_or_assign(ChunkHash, ChunkIndex); + InOutChunkedContent.ChunkHashes.push_back(ChunkHash); + InOutChunkedContent.ChunkRawSizes.push_back(ChunkRawSizes[ChunkedSequenceIndex]); + InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); + Stats.UniqueChunksFound++; + Stats.UniqueBytesFound += ChunkRawSizes[ChunkedSequenceIndex]; + } + } + InOutChunkedContent.SequenceRawHashes.push_back(RawHash); + Stats.UniqueSequencesFound++; + } + + void AddCunkSequence(ChunkingStatistics& Stats, + ChunkedContentData& InOutChunkedContent, + tsl::robin_map& ChunkHashToChunkIndex, + const IoHash& RawHash, + const uint64_t RawSize) + { + InOutChunkedContent.ChunkCounts.push_back(1); + + if (auto It = ChunkHashToChunkIndex.find(RawHash); It != ChunkHashToChunkIndex.end()) + { + uint32_t ChunkIndex = gsl::narrow(It->second); + InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); + } + else + { + uint32_t ChunkIndex = gsl::narrow(InOutChunkedContent.ChunkHashes.size()); + ChunkHashToChunkIndex.insert_or_assign(RawHash, ChunkIndex); + InOutChunkedContent.ChunkHashes.push_back(RawHash); + InOutChunkedContent.ChunkRawSizes.push_back(RawSize); + InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); + Stats.UniqueChunksFound++; + Stats.UniqueBytesFound += RawSize; + } + InOutChunkedContent.SequenceRawHashes.push_back(RawHash); + Stats.UniqueSequencesFound++; + } + + IoHash HashOneFile(ChunkingStatistics& Stats, + const ChunkingController& InChunkingController, + ChunkedFolderContent& OutChunkedContent, + tsl::robin_map& ChunkHashToChunkIndex, + tsl::robin_map& RawHashToSequenceRawHashIndex, + RwLock& Lock, + const std::filesystem::path& FolderPath, + uint32_t PathIndex) + { + const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex]; + const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex]; + + if (RawSize == 0) + { + return IoHash::Zero; + } + else + { + ChunkedInfoWithSource Chunked; + const bool DidChunking = + InChunkingController.ProcessFile((FolderPath / Path).make_preferred(), RawSize, Chunked, Stats.BytesHashed); + if (DidChunking) + { + Lock.WithExclusiveLock([&]() { + if (!RawHashToSequenceRawHashIndex.contains(Chunked.Info.RawHash)) + { + RawHashToSequenceRawHashIndex.insert( + {Chunked.Info.RawHash, gsl::narrow(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); + std::vector ChunkSizes; + ChunkSizes.reserve(Chunked.ChunkSources.size()); + for (const ChunkSource& Source : Chunked.ChunkSources) + { + ChunkSizes.push_back(Source.Size); + } + AddCunkSequence(Stats, + OutChunkedContent.ChunkedContent, + ChunkHashToChunkIndex, + Chunked.Info.RawHash, + Chunked.Info.ChunkSequence, + Chunked.Info.ChunkHashes, + ChunkSizes); + Stats.UniqueSequencesFound++; + } + }); + Stats.FilesChunked++; + return Chunked.Info.RawHash; + } + else + { + IoBuffer Buffer = IoBufferBuilder::MakeFromFile((FolderPath / Path).make_preferred()); + const IoHash Hash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed); + + Lock.WithExclusiveLock([&]() { + if (!RawHashToSequenceRawHashIndex.contains(Hash)) + { + RawHashToSequenceRawHashIndex.insert( + {Hash, gsl::narrow(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); + AddCunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Hash, RawSize); + Stats.UniqueSequencesFound++; + } + }); + return Hash; + } + } + } + + std::string PathCompareString(const std::filesystem::path& Path) { return ToLower(Path.generic_string()); } + +} // namespace + +std::string_view FolderContentSourcePlatformNames[(size_t)SourcePlatform::_Count] = {"Windows"sv, "Linux"sv, "MacOS"sv}; + +std::string_view +ToString(SourcePlatform Platform) +{ + return FolderContentSourcePlatformNames[(size_t)Platform]; +} + +SourcePlatform +FromString(std::string_view Platform, SourcePlatform Default) +{ + for (size_t Index = 0; Index < (size_t)SourcePlatform::_Count; Index++) + { + if (Platform == FolderContentSourcePlatformNames[Index]) + { + return (SourcePlatform)Index; + } + } + return Default; +} + +SourcePlatform +GetSourceCurrentPlatform() +{ +#if ZEN_PLATFORM_WINDOWS + return SourcePlatform::Windows; +#endif +#if ZEN_PLATFORM_MAC + return SourcePlatform::MacOS; +#endif +#if ZEN_PLATFORM_LINUX + return SourcePlatform::Linux; +#endif +} + +bool +FolderContent::AreFileAttributesEqual(const uint32_t Lhs, const uint32_t Rhs) +{ +#if ZEN_PLATFORM_WINDOWS + return (Lhs & 0xff) == (Rhs & 0xff); +#endif +#if ZEN_PLATFORM_MAC + return Lhs == Rhs; +#endif +#if ZEN_PLATFORM_LINUX + return Lhs == Rhs; +#endif +} + +bool +FolderContent::operator==(const FolderContent& Rhs) const +{ + if ((Platform == Rhs.Platform) && (RawSizes == Rhs.RawSizes) && (Attributes == Rhs.Attributes) && + (ModificationTicks == Rhs.ModificationTicks) && (Paths.size() == Rhs.Paths.size())) + { + size_t PathCount = 0; + for (size_t PathIndex = 0; PathIndex < PathCount; PathIndex++) + { + if (Paths[PathIndex].generic_string() != Rhs.Paths[PathIndex].generic_string()) + { + return false; + } + } + return true; + } + return false; +} + +bool +FolderContent::AreKnownFilesEqual(const FolderContent& Rhs) const +{ + tsl::robin_map RhsPathToIndex; + const size_t RhsPathCount = Rhs.Paths.size(); + RhsPathToIndex.reserve(RhsPathCount); + for (size_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++) + { + RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex}); + } + const size_t PathCount = Paths.size(); + for (size_t PathIndex = 0; PathIndex < PathCount; PathIndex++) + { + if (auto It = RhsPathToIndex.find(Paths[PathIndex].generic_string()); It != RhsPathToIndex.end()) + { + const size_t RhsPathIndex = It->second; + if ((RawSizes[PathIndex] != Rhs.RawSizes[RhsPathIndex]) || + (!AreFileAttributesEqual(Attributes[PathIndex], Rhs.Attributes[RhsPathIndex])) || + (ModificationTicks[PathIndex] != Rhs.ModificationTicks[RhsPathIndex])) + { + return false; + } + } + else + { + return false; + } + } + return true; +} + +void +FolderContent::UpdateState(const FolderContent& Rhs, std::vector& OutPathIndexesOufOfDate) +{ + tsl::robin_map RhsPathToIndex; + const uint32_t RhsPathCount = gsl::narrow(Rhs.Paths.size()); + RhsPathToIndex.reserve(RhsPathCount); + for (uint32_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++) + { + RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex}); + } + uint32_t PathCount = gsl::narrow(Paths.size()); + for (uint32_t PathIndex = 0; PathIndex < PathCount;) + { + if (auto It = RhsPathToIndex.find(Paths[PathIndex].generic_string()); It != RhsPathToIndex.end()) + { + const uint32_t RhsPathIndex = It->second; + + if ((RawSizes[PathIndex] != Rhs.RawSizes[RhsPathIndex]) || + (ModificationTicks[PathIndex] != Rhs.ModificationTicks[RhsPathIndex])) + { + RawSizes[PathIndex] = Rhs.RawSizes[RhsPathIndex]; + ModificationTicks[PathIndex] = Rhs.ModificationTicks[RhsPathIndex]; + OutPathIndexesOufOfDate.push_back(PathIndex); + } + Attributes[PathIndex] = Rhs.Attributes[RhsPathIndex]; + PathIndex++; + } + else + { + Paths.erase(Paths.begin() + PathIndex); + RawSizes.erase(RawSizes.begin() + PathIndex); + Attributes.erase(Attributes.begin() + PathIndex); + ModificationTicks.erase(ModificationTicks.begin() + PathIndex); + PathCount--; + } + } +} + +FolderContent +GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vector& OutDeletedPathIndexes) +{ + FolderContent Result = {.Platform = Old.Platform}; + tsl::robin_map NewPathToIndex; + const uint32_t NewPathCount = gsl::narrow(New.Paths.size()); + NewPathToIndex.reserve(NewPathCount); + for (uint32_t NewPathIndex = 0; NewPathIndex < NewPathCount; NewPathIndex++) + { + NewPathToIndex.insert({New.Paths[NewPathIndex].generic_string(), NewPathIndex}); + } + uint32_t OldPathCount = gsl::narrow(Old.Paths.size()); + for (uint32_t OldPathIndex = 0; OldPathIndex < OldPathCount; OldPathIndex++) + { + if (auto It = NewPathToIndex.find(Old.Paths[OldPathIndex].generic_string()); It != NewPathToIndex.end()) + { + const uint32_t NewPathIndex = It->second; + + if ((Old.RawSizes[OldPathIndex] != New.RawSizes[NewPathIndex]) || + (Old.ModificationTicks[OldPathIndex] != New.ModificationTicks[NewPathIndex])) + { + Result.Paths.push_back(New.Paths[NewPathIndex]); + Result.RawSizes.push_back(New.RawSizes[NewPathIndex]); + Result.Attributes.push_back(New.Attributes[NewPathIndex]); + Result.ModificationTicks.push_back(New.ModificationTicks[NewPathIndex]); + } + } + else + { + OutDeletedPathIndexes.push_back(Old.Paths[OldPathIndex]); + } + } + return Result; +} + +void +SaveFolderContentToCompactBinary(const FolderContent& Content, CbWriter& Output) +{ + Output.AddString("platform"sv, ToString(Content.Platform)); + compactbinary_helpers::WriteArray(Content.Paths, "paths"sv, Output); + compactbinary_helpers::WriteArray(Content.RawSizes, "rawSizes"sv, Output); + compactbinary_helpers::WriteArray(Content.Attributes, "attributes"sv, Output); + compactbinary_helpers::WriteArray(Content.ModificationTicks, "modificationTimes"sv, Output); +} + +FolderContent +LoadFolderContentToCompactBinary(CbObjectView Input) +{ + FolderContent Content; + Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform()); + compactbinary_helpers::ReadArray("paths"sv, Input, Content.Paths); + compactbinary_helpers::ReadArray("rawSizes"sv, Input, Content.RawSizes); + compactbinary_helpers::ReadArray("attributes"sv, Input, Content.Attributes); + compactbinary_helpers::ReadArray("modificationTimes"sv, Input, Content.ModificationTicks); + return Content; +} + +FolderContent +GetFolderContent(GetFolderContentStatistics& Stats, + const std::filesystem::path& RootPath, + std::function&& AcceptDirectory, + std::function&& AcceptFile, + WorkerThreadPool& WorkerPool, + int32_t UpdateInteralMS, + std::function&& UpdateCallback, + std::atomic& AbortFlag) +{ + Stopwatch Timer; + auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); + + FolderContent Content; + struct AsyncVisitor : public GetDirectoryContentVisitor + { + AsyncVisitor(GetFolderContentStatistics& Stats, + std::atomic& AbortFlag, + FolderContent& Content, + std::function&& AcceptDirectory, + std::function&& AcceptFile) + : m_Stats(Stats) + , m_AbortFlag(AbortFlag) + , m_FoundContent(Content) + , m_AcceptDirectory(std::move(AcceptDirectory)) + , m_AcceptFile(std::move(AcceptFile)) + { + } + virtual void AsyncVisitDirectory(const std::filesystem::path& RelativeRoot, DirectoryContent&& Content) override + { + if (!m_AbortFlag) + { + m_Stats.FoundFileCount += Content.FileNames.size(); + for (uint64_t FileSize : Content.FileSizes) + { + m_Stats.FoundFileByteCount += FileSize; + } + std::string RelativeDirectoryPath = RelativeRoot.generic_string(); + if (m_AcceptDirectory(RelativeDirectoryPath)) + { + std::vector Paths; + std::vector RawSizes; + std::vector Attributes; + std::vector ModificatonTicks; + Paths.reserve(Content.FileNames.size()); + RawSizes.reserve(Content.FileNames.size()); + Attributes.reserve(Content.FileNames.size()); + ModificatonTicks.reserve(Content.FileModificationTicks.size()); + + for (size_t FileIndex = 0; FileIndex < Content.FileNames.size(); FileIndex++) + { + const std::filesystem::path& FileName = Content.FileNames[FileIndex]; + std::string RelativePath = (RelativeRoot / FileName).generic_string(); + std::replace(RelativePath.begin(), RelativePath.end(), '\\', '/'); + if (m_AcceptFile(RelativePath, Content.FileSizes[FileIndex], Content.FileAttributes[FileIndex])) + { + Paths.emplace_back(std::move(RelativePath)); + RawSizes.emplace_back(Content.FileSizes[FileIndex]); + Attributes.emplace_back(Content.FileAttributes[FileIndex]); + ModificatonTicks.emplace_back(Content.FileModificationTicks[FileIndex]); + + m_Stats.AcceptedFileCount++; + m_Stats.AcceptedFileByteCount += Content.FileSizes[FileIndex]; + } + } + m_Lock.WithExclusiveLock([&]() { + m_FoundContent.Paths.insert(m_FoundContent.Paths.end(), Paths.begin(), Paths.end()); + m_FoundContent.RawSizes.insert(m_FoundContent.RawSizes.end(), RawSizes.begin(), RawSizes.end()); + m_FoundContent.Attributes.insert(m_FoundContent.Attributes.end(), Attributes.begin(), Attributes.end()); + m_FoundContent.ModificationTicks.insert(m_FoundContent.ModificationTicks.end(), + ModificatonTicks.begin(), + ModificatonTicks.end()); + }); + } + } + } + + GetFolderContentStatistics& m_Stats; + std::atomic& m_AbortFlag; + RwLock m_Lock; + FolderContent& m_FoundContent; + std::function m_AcceptDirectory; + std::function m_AcceptFile; + } Visitor(Stats, AbortFlag, Content, std::move(AcceptDirectory), std::move(AcceptFile)); + + Latch PendingWork(1); + GetDirectoryContent(RootPath, + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive | DirectoryContentFlags::IncludeFileSizes | + DirectoryContentFlags::IncludeAttributes | DirectoryContentFlags::IncludeModificationTick, + Visitor, + WorkerPool, + PendingWork); + PendingWork.CountDown(); + while (!PendingWork.Wait(UpdateInteralMS)) + { + UpdateCallback(AbortFlag.load(), PendingWork.Remaining()); + } + std::vector Order; + size_t PathCount = Content.Paths.size(); + Order.resize(Content.Paths.size()); + std::vector Parents; + Parents.reserve(PathCount); + std::vector Filenames; + Filenames.reserve(PathCount); + for (size_t OrderIndex = 0; OrderIndex < PathCount; OrderIndex++) + { + Order[OrderIndex] = OrderIndex; + Parents.emplace_back(Content.Paths[OrderIndex].parent_path().generic_string()); + Filenames.emplace_back(Content.Paths[OrderIndex].filename().generic_string()); + } + std::sort(Order.begin(), Order.end(), [&Parents, &Filenames](size_t Lhs, size_t Rhs) { + const std::string& LhsParent = Parents[Lhs]; + const std::string& RhsParent = Parents[Rhs]; + if (LhsParent < RhsParent) + { + return true; + } + else if (LhsParent > RhsParent) + { + return false; + } + return Filenames[Lhs] < Filenames[Rhs]; + }); + FolderContent OrderedContent; + OrderedContent.Paths.reserve(PathCount); + OrderedContent.RawSizes.reserve(PathCount); + OrderedContent.Attributes.reserve(PathCount); + OrderedContent.ModificationTicks.reserve(PathCount); + for (size_t OrderIndex : Order) + { + OrderedContent.Paths.emplace_back(std::move(Content.Paths[OrderIndex])); + OrderedContent.RawSizes.emplace_back(Content.RawSizes[OrderIndex]); + OrderedContent.Attributes.emplace_back(Content.Attributes[OrderIndex]); + OrderedContent.ModificationTicks.emplace_back(Content.ModificationTicks[OrderIndex]); + } + return OrderedContent; +} + +void +SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbWriter& Output) +{ + Output.AddString("platform"sv, ToString(Content.Platform)); + compactbinary_helpers::WriteArray(Content.Paths, "paths"sv, Output); + compactbinary_helpers::WriteArray(Content.RawSizes, "rawSizes"sv, Output); + compactbinary_helpers::WriteArray(Content.Attributes, "attributes"sv, Output); + compactbinary_helpers::WriteArray(Content.RawHashes, "rawHashes"sv, Output); + + Output.BeginObject("chunkedContent"); + compactbinary_helpers::WriteArray(Content.ChunkedContent.SequenceRawHashes, "sequenceRawHashes"sv, Output); + compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkCounts, "chunkCounts"sv, Output); + compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkOrders, "chunkOrders"sv, Output); + compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkHashes, "chunkHashes"sv, Output); + compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkRawSizes, "chunkRawSizes"sv, Output); + Output.EndObject(); // chunkedContent +} + +ChunkedFolderContent +LoadChunkedFolderContentToCompactBinary(CbObjectView Input) +{ + ChunkedFolderContent Content; + Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform()); + compactbinary_helpers::ReadArray("paths"sv, Input, Content.Paths); + compactbinary_helpers::ReadArray("rawSizes"sv, Input, Content.RawSizes); + compactbinary_helpers::ReadArray("attributes"sv, Input, Content.Attributes); + compactbinary_helpers::ReadArray("rawHashes"sv, Input, Content.RawHashes); + + CbObjectView ChunkedContentView = Input["chunkedContent"sv].AsObjectView(); + compactbinary_helpers::ReadArray("sequenceRawHashes"sv, ChunkedContentView, Content.ChunkedContent.SequenceRawHashes); + compactbinary_helpers::ReadArray("chunkCounts"sv, ChunkedContentView, Content.ChunkedContent.ChunkCounts); + compactbinary_helpers::ReadArray("chunkOrders"sv, ChunkedContentView, Content.ChunkedContent.ChunkOrders); + compactbinary_helpers::ReadArray("chunkHashes"sv, ChunkedContentView, Content.ChunkedContent.ChunkHashes); + compactbinary_helpers::ReadArray("chunkRawSizes"sv, ChunkedContentView, Content.ChunkedContent.ChunkRawSizes); + return Content; +} + +ChunkedFolderContent +MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span Overlays) +{ + ZEN_ASSERT(!Overlays.empty()); + + ChunkedFolderContent Result; + const size_t BasePathCount = Base.Paths.size(); + Result.Paths.reserve(BasePathCount); + Result.RawSizes.reserve(BasePathCount); + Result.Attributes.reserve(BasePathCount); + Result.RawHashes.reserve(BasePathCount); + + const size_t BaseChunkCount = Base.ChunkedContent.ChunkHashes.size(); + Result.ChunkedContent.SequenceRawHashes.reserve(Base.ChunkedContent.SequenceRawHashes.size()); + Result.ChunkedContent.ChunkCounts.reserve(BaseChunkCount); + Result.ChunkedContent.ChunkHashes.reserve(BaseChunkCount); + Result.ChunkedContent.ChunkRawSizes.reserve(BaseChunkCount); + Result.ChunkedContent.ChunkOrders.reserve(Base.ChunkedContent.ChunkOrders.size()); + + tsl::robin_map GenericPathToActualPath; + for (const std::filesystem::path& Path : Base.Paths) + { + GenericPathToActualPath.insert({PathCompareString(Path), Path}); + } + for (const ChunkedFolderContent& Overlay : Overlays) + { + for (const std::filesystem::path& Path : Overlay.Paths) + { + GenericPathToActualPath.insert({PathCompareString(Path), Path}); + } + } + + tsl::robin_map RawHashToSequenceRawHashIndex; + + auto BuildOverlayPaths = [](std::span Overlays) -> tsl::robin_set { + tsl::robin_set Result; + for (const ChunkedFolderContent& OverlayContent : Overlays) + { + for (const std::filesystem::path& Path : OverlayContent.Paths) + { + Result.insert(PathCompareString(Path)); + } + } + return Result; + }; + + auto AddContent = [&BuildOverlayPaths](ChunkedFolderContent& Result, + const ChunkedFolderContent& OverlayContent, + tsl::robin_map& ChunkHashToChunkIndex, + tsl::robin_map& RawHashToSequenceRawHashIndex, + const tsl::robin_map& GenericPathToActualPath, + std::span Overlays) { + const ChunkedContentLookup OverlayLookup = BuildChunkedContentLookup(OverlayContent); + tsl::robin_set BaseOverlayPaths = BuildOverlayPaths(Overlays); + for (uint32_t PathIndex = 0; PathIndex < OverlayContent.Paths.size(); PathIndex++) + { + std::string GenericPath = PathCompareString(OverlayContent.Paths[PathIndex]); + if (!BaseOverlayPaths.contains(GenericPath)) + { + // This asset will not be overridden by a later layer - add it + + const std::filesystem::path OriginalPath = GenericPathToActualPath.at(GenericPath); + Result.Paths.push_back(OriginalPath); + const IoHash& RawHash = OverlayContent.RawHashes[PathIndex]; + Result.RawSizes.push_back(OverlayContent.RawSizes[PathIndex]); + Result.Attributes.push_back(OverlayContent.Attributes[PathIndex]); + Result.RawHashes.push_back(RawHash); + + if (OverlayContent.RawSizes[PathIndex] > 0) + { + if (!RawHashToSequenceRawHashIndex.contains(RawHash)) + { + RawHashToSequenceRawHashIndex.insert( + {RawHash, gsl::narrow(Result.ChunkedContent.SequenceRawHashes.size())}); + const uint32_t SequenceRawHashIndex = OverlayLookup.RawHashToSequenceRawHashIndex.at(RawHash); + const uint32_t OrderIndexOffset = OverlayLookup.SequenceRawHashIndexChunkOrderOffset[SequenceRawHashIndex]; + const uint32_t ChunkCount = OverlayContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + ChunkingStatistics Stats; + std::span OriginalChunkOrder = + std::span(OverlayContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); + AddCunkSequence(Stats, + Result.ChunkedContent, + ChunkHashToChunkIndex, + RawHash, + OriginalChunkOrder, + OverlayContent.ChunkedContent.ChunkHashes, + OverlayContent.ChunkedContent.ChunkRawSizes); + Stats.UniqueSequencesFound++; + } + } + } + } + }; + + tsl::robin_map MergedChunkHashToChunkIndex; + AddContent(Result, Base, MergedChunkHashToChunkIndex, RawHashToSequenceRawHashIndex, GenericPathToActualPath, Overlays); + for (uint32_t OverlayIndex = 0; OverlayIndex < Overlays.size(); OverlayIndex++) + { + AddContent(Result, + Overlays[OverlayIndex], + MergedChunkHashToChunkIndex, + RawHashToSequenceRawHashIndex, + GenericPathToActualPath, + Overlays.subspan(OverlayIndex + 1)); + } + return Result; +} + +ChunkedFolderContent +DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span DeletedPaths) +{ + ZEN_ASSERT(DeletedPaths.size() <= BaseContent.Paths.size()); + ChunkedFolderContent Result = {.Platform = BaseContent.Platform}; + if (DeletedPaths.size() < BaseContent.Paths.size()) + { + tsl::robin_set DeletedPathSet; + DeletedPathSet.reserve(DeletedPaths.size()); + for (const std::filesystem::path& DeletedPath : DeletedPaths) + { + DeletedPathSet.insert(PathCompareString(DeletedPath)); + } + const ChunkedContentLookup BaseLookup = BuildChunkedContentLookup(BaseContent); + tsl::robin_map ChunkHashToChunkIndex; + + tsl::robin_map RawHashToSequenceRawHashIndex; + for (uint32_t PathIndex = 0; PathIndex < BaseContent.Paths.size(); PathIndex++) + { + const std::filesystem::path& Path = BaseContent.Paths[PathIndex]; + if (!DeletedPathSet.contains(PathCompareString(Path))) + { + const IoHash& RawHash = BaseContent.RawHashes[PathIndex]; + const uint64_t RawSize = BaseContent.RawSizes[PathIndex]; + Result.Paths.push_back(Path); + Result.RawSizes.push_back(RawSize); + Result.Attributes.push_back(BaseContent.Attributes[PathIndex]); + Result.RawHashes.push_back(RawHash); + if (RawSize > 0) + { + if (!RawHashToSequenceRawHashIndex.contains(RawHash)) + { + RawHashToSequenceRawHashIndex.insert( + {RawHash, gsl::narrow(Result.ChunkedContent.SequenceRawHashes.size())}); + const uint32_t SequenceRawHashIndex = BaseLookup.RawHashToSequenceRawHashIndex.at(RawHash); + const uint32_t OrderIndexOffset = BaseLookup.SequenceRawHashIndexChunkOrderOffset[SequenceRawHashIndex]; + const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + ChunkingStatistics Stats; + std::span OriginalChunkOrder = + std::span(BaseContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); + AddCunkSequence(Stats, + Result.ChunkedContent, + ChunkHashToChunkIndex, + RawHash, + OriginalChunkOrder, + BaseContent.ChunkedContent.ChunkHashes, + BaseContent.ChunkedContent.ChunkRawSizes); + Stats.UniqueSequencesFound++; + } + } + } + } + } + return Result; +} + +ChunkedFolderContent +ChunkFolderContent(ChunkingStatistics& Stats, + WorkerThreadPool& WorkerPool, + const std::filesystem::path& RootPath, + const FolderContent& Content, + const ChunkingController& InChunkingController, + int32_t UpdateInteralMS, + std::function&& UpdateCallback, + std::atomic& AbortFlag) +{ + Stopwatch Timer; + auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); + + ChunkedFolderContent Result = {.Platform = Content.Platform, + .Paths = Content.Paths, + .RawSizes = Content.RawSizes, + .Attributes = Content.Attributes}; + const size_t ItemCount = Result.Paths.size(); + Result.RawHashes.resize(ItemCount, IoHash::Zero); + Result.ChunkedContent.SequenceRawHashes.reserve(ItemCount); // Up to 1 per file, maybe less + Result.ChunkedContent.ChunkCounts.reserve(ItemCount); // Up to one per file + Result.ChunkedContent.ChunkOrders.reserve(ItemCount); // At least 1 per file, maybe more + Result.ChunkedContent.ChunkHashes.reserve(ItemCount); // At least 1 per file, maybe more + Result.ChunkedContent.ChunkRawSizes.reserve(ItemCount); // At least 1 per file, maybe more + tsl::robin_map ChunkHashToChunkIndex; + tsl::robin_map RawHashToChunkSequenceIndex; + RawHashToChunkSequenceIndex.reserve(ItemCount); + ChunkHashToChunkIndex.reserve(ItemCount); + { + std::vector Order; + Order.resize(ItemCount); + for (uint32_t I = 0; I < ItemCount; I++) + { + Order[I] = I; + } + + // Handle the biggest files first so we don't end up with one straggling large file at the end + // std::sort(Order.begin(), Order.end(), [&](uint32_t Lhs, uint32_t Rhs) { return Result.RawSizes[Lhs] > Result.RawSizes[Rhs]; + //}); + + tsl::robin_map RawHashToSequenceRawHashIndex; + RawHashToSequenceRawHashIndex.reserve(ItemCount); + + RwLock Lock; + + ParallellWork Work(AbortFlag); + + for (uint32_t PathIndex : Order) + { + if (Work.IsAborted()) + { + break; + } + Work.ScheduleWork( + WorkerPool, // GetSyncWorkerPool() + [&, PathIndex](std::atomic& AbortFlag) { + if (!AbortFlag) + { + IoHash RawHash = HashOneFile(Stats, + InChunkingController, + Result, + ChunkHashToChunkIndex, + RawHashToSequenceRawHashIndex, + Lock, + RootPath, + PathIndex); + Lock.WithExclusiveLock([&]() { Result.RawHashes[PathIndex] = RawHash; }); + Stats.FilesProcessed++; + } + }, + [&, PathIndex](const std::exception& Ex, std::atomic& AbortFlag) { + ZEN_CONSOLE("Failed scanning file {}. Reason: {}", Result.Paths[PathIndex], Ex.what()); + AbortFlag = true; + }); + } + + Work.Wait(UpdateInteralMS, [&](bool IsAborted, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted); + ZEN_UNUSED(PendingWork); + UpdateCallback(Work.IsAborted(), Work.PendingWork().Remaining()); + }); + } + return Result; +} + +ChunkedContentLookup +BuildChunkedContentLookup(const ChunkedFolderContent& Content) +{ + struct ChunkLocationReference + { + uint32_t ChunkIndex; + ChunkedContentLookup::ChunkLocation Location; + }; + + ChunkedContentLookup Result; + { + const uint32_t SequenceRawHashesCount = gsl::narrow(Content.ChunkedContent.SequenceRawHashes.size()); + Result.RawHashToSequenceRawHashIndex.reserve(SequenceRawHashesCount); + Result.SequenceRawHashIndexChunkOrderOffset.reserve(SequenceRawHashesCount); + uint32_t OrderOffset = 0; + for (uint32_t SequenceRawHashIndex = 0; SequenceRawHashIndex < Content.ChunkedContent.SequenceRawHashes.size(); + SequenceRawHashIndex++) + { + Result.RawHashToSequenceRawHashIndex.insert( + {Content.ChunkedContent.SequenceRawHashes[SequenceRawHashIndex], SequenceRawHashIndex}); + Result.SequenceRawHashIndexChunkOrderOffset.push_back(OrderOffset); + OrderOffset += Content.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + } + } + + std::vector Locations; + Locations.reserve(Content.ChunkedContent.ChunkOrders.size()); + for (uint32_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++) + { + if (Content.RawSizes[PathIndex] > 0) + { + const IoHash& RawHash = Content.RawHashes[PathIndex]; + uint32_t SequenceRawHashIndex = Result.RawHashToSequenceRawHashIndex.at(RawHash); + const uint32_t OrderOffset = Result.SequenceRawHashIndexChunkOrderOffset[SequenceRawHashIndex]; + const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + uint64_t LocationOffset = 0; + for (size_t OrderIndex = OrderOffset; OrderIndex < OrderOffset + ChunkCount; OrderIndex++) + { + uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex]; + + Locations.push_back(ChunkLocationReference{ChunkIndex, ChunkedContentLookup::ChunkLocation{PathIndex, LocationOffset}}); + + LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + ZEN_ASSERT(LocationOffset == Content.RawSizes[PathIndex]); + } + } + + std::sort(Locations.begin(), Locations.end(), [](const ChunkLocationReference& Lhs, const ChunkLocationReference& Rhs) { + if (Lhs.ChunkIndex < Rhs.ChunkIndex) + { + return true; + } + if (Lhs.ChunkIndex > Rhs.ChunkIndex) + { + return false; + } + if (Lhs.Location.PathIndex < Rhs.Location.PathIndex) + { + return true; + } + if (Lhs.Location.PathIndex > Rhs.Location.PathIndex) + { + return false; + } + return Lhs.Location.Offset < Rhs.Location.Offset; + }); + + Result.ChunkLocations.reserve(Locations.size()); + const uint32_t ChunkCount = gsl::narrow(Content.ChunkedContent.ChunkHashes.size()); + Result.ChunkHashToChunkIndex.reserve(ChunkCount); + size_t RangeOffset = 0; + for (uint32_t ChunkIndex = 0; ChunkIndex < ChunkCount; ChunkIndex++) + { + Result.ChunkHashToChunkIndex.insert({Content.ChunkedContent.ChunkHashes[ChunkIndex], ChunkIndex}); + uint32_t Count = 0; + while (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex) + { + Result.ChunkLocations.push_back(Locations[RangeOffset + Count].Location); + Count++; + } + Result.ChunkLocationOffset.push_back(RangeOffset); + Result.ChunkLocationCounts.push_back(Count); + RangeOffset += Count; + } + + return Result; +} + +} // namespace zen diff --git a/src/zenutil/chunkingcontroller.cpp b/src/zenutil/chunkingcontroller.cpp new file mode 100644 index 000000000..bc0e57b14 --- /dev/null +++ b/src/zenutil/chunkingcontroller.cpp @@ -0,0 +1,265 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include + +#include +#include + +ZEN_THIRD_PARTY_INCLUDES_START +#include +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { +using namespace std::literals; + +namespace { + std::vector ReadStringArray(CbArrayView StringArray) + { + std::vector Result; + Result.reserve(StringArray.Num()); + for (CbFieldView FieldView : StringArray) + { + Result.emplace_back(FieldView.AsString()); + } + return Result; + } + + ChunkedParams ReadChunkParams(CbObjectView Params) + { + bool UseThreshold = Params["UseThreshold"sv].AsBool(true); + size_t MinSize = Params["MinSize"sv].AsUInt64(DefaultChunkedParams.MinSize); + size_t MaxSize = Params["MaxSize"sv].AsUInt64(DefaultChunkedParams.MaxSize); + size_t AvgSize = Params["AvgSize"sv].AsUInt64(DefaultChunkedParams.AvgSize); + + return ChunkedParams{.UseThreshold = UseThreshold, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize}; + } + +} // namespace + +class BasicChunkingController : public ChunkingController +{ +public: + BasicChunkingController(std::span ExcludeExtensions, + uint64_t ChunkFileSizeLimit, + const ChunkedParams& ChunkingParams) + : m_ChunkExcludeExtensions(ExcludeExtensions.begin(), ExcludeExtensions.end()) + , m_ChunkFileSizeLimit(ChunkFileSizeLimit) + , m_ChunkingParams(ChunkingParams) + { + } + + BasicChunkingController(CbObjectView Parameters) + : m_ChunkExcludeExtensions(ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView())) + , m_ChunkFileSizeLimit(Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit)) + , m_ChunkingParams(ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())) + { + } + + virtual bool ProcessFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + ChunkedInfoWithSource& OutChunked, + std::atomic& BytesProcessed) const override + { + const bool ExcludeFromChunking = + std::find(m_ChunkExcludeExtensions.begin(), m_ChunkExcludeExtensions.end(), InputPath.extension()) != + m_ChunkExcludeExtensions.end(); + + if (ExcludeFromChunking || (RawSize < m_ChunkFileSizeLimit)) + { + return false; + } + + BasicFile Buffer(InputPath, BasicFile::Mode::kRead); + OutChunked = ChunkData(Buffer, 0, RawSize, m_ChunkingParams, &BytesProcessed); + return true; + } + + virtual std::string_view GetName() const override { return Name; } + + virtual CbObject GetParameters() const override + { + CbObjectWriter Writer; + Writer.BeginArray("ChunkExcludeExtensions"sv); + { + for (const std::string& Extension : m_ChunkExcludeExtensions) + { + Writer.AddString(Extension); + } + } + Writer.EndArray(); // ChunkExcludeExtensions + Writer.AddInteger("ChunkFileSizeLimit"sv, m_ChunkFileSizeLimit); + Writer.BeginObject("ChunkingParams"sv); + { + Writer.AddBool("UseThreshold"sv, m_ChunkingParams.UseThreshold); + + Writer.AddInteger("MinSize"sv, (uint64_t)m_ChunkingParams.MinSize); + Writer.AddInteger("MaxSize"sv, (uint64_t)m_ChunkingParams.MaxSize); + Writer.AddInteger("AvgSize"sv, (uint64_t)m_ChunkingParams.AvgSize); + } + Writer.EndObject(); // ChunkingParams + return Writer.Save(); + } + static constexpr std::string_view Name = "BasicChunkingController"sv; + +protected: + const std::vector m_ChunkExcludeExtensions; + const uint64_t m_ChunkFileSizeLimit; + const ChunkedParams m_ChunkingParams; +}; + +class ChunkingControllerWithFixedChunking : public ChunkingController +{ +public: + ChunkingControllerWithFixedChunking(std::span FixedChunkingExtensions, + uint64_t ChunkFileSizeLimit, + const ChunkedParams& ChunkingParams, + uint32_t FixedChunkingChunkSize) + : m_FixedChunkingExtensions(FixedChunkingExtensions.begin(), FixedChunkingExtensions.end()) + , m_ChunkFileSizeLimit(ChunkFileSizeLimit) + , m_ChunkingParams(ChunkingParams) + , m_FixedChunkingChunkSize(FixedChunkingChunkSize) + { + } + + ChunkingControllerWithFixedChunking(CbObjectView Parameters) + : m_FixedChunkingExtensions(ReadStringArray(Parameters["FixedChunkingExtensions"sv].AsArrayView())) + , m_ChunkFileSizeLimit(Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit)) + , m_ChunkingParams(ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())) + , m_FixedChunkingChunkSize(Parameters["FixedChunkingChunkSize"sv].AsUInt32(16u * 1024u * 1024u)) + { + } + + virtual bool ProcessFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + ChunkedInfoWithSource& OutChunked, + std::atomic& BytesProcessed) const override + { + if (RawSize < m_ChunkFileSizeLimit) + { + return false; + } + const bool FixedChunking = std::find(m_FixedChunkingExtensions.begin(), m_FixedChunkingExtensions.end(), InputPath.extension()) != + m_FixedChunkingExtensions.end(); + + if (FixedChunking) + { + IoHashStream FullHash; + IoBuffer Source = IoBufferBuilder::MakeFromFile(InputPath); + uint64_t Offset = 0; + tsl::robin_map ChunkHashToChunkIndex; + ChunkHashToChunkIndex.reserve(1 + (RawSize / m_FixedChunkingChunkSize)); + while (Offset < RawSize) + { + uint64_t ChunkSize = std::min(RawSize - Offset, m_FixedChunkingChunkSize); + IoBuffer Chunk(Source, Offset, ChunkSize); + MemoryView ChunkData = Chunk.GetView(); + FullHash.Append(ChunkData); + + IoHash ChunkHash = IoHash::HashBuffer(ChunkData); + if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) + { + OutChunked.Info.ChunkSequence.push_back(It->second); + } + else + { + uint32_t ChunkIndex = gsl::narrow(OutChunked.Info.ChunkHashes.size()); + OutChunked.Info.ChunkHashes.push_back(ChunkHash); + OutChunked.Info.ChunkSequence.push_back(ChunkIndex); + OutChunked.ChunkSources.push_back({.Offset = Offset, .Size = gsl::narrow(ChunkSize)}); + } + Offset += ChunkSize; + BytesProcessed.fetch_add(ChunkSize); + } + OutChunked.Info.RawSize = RawSize; + OutChunked.Info.RawHash = FullHash.GetHash(); + return true; + } + else + { + BasicFile Buffer(InputPath, BasicFile::Mode::kRead); + OutChunked = ChunkData(Buffer, 0, RawSize, m_ChunkingParams, &BytesProcessed); + return true; + } + } + + virtual std::string_view GetName() const override { return Name; } + + virtual CbObject GetParameters() const override + { + CbObjectWriter Writer; + Writer.BeginArray("FixedChunkingExtensions"); + { + for (const std::string& Extension : m_FixedChunkingExtensions) + { + Writer.AddString(Extension); + } + } + Writer.EndArray(); // ChunkExcludeExtensions + Writer.AddInteger("ChunkFileSizeLimit"sv, m_ChunkFileSizeLimit); + Writer.BeginObject("ChunkingParams"sv); + { + Writer.AddBool("UseThreshold"sv, m_ChunkingParams.UseThreshold); + + Writer.AddInteger("MinSize"sv, (uint64_t)m_ChunkingParams.MinSize); + Writer.AddInteger("MaxSize"sv, (uint64_t)m_ChunkingParams.MaxSize); + Writer.AddInteger("AvgSize"sv, (uint64_t)m_ChunkingParams.AvgSize); + } + Writer.EndObject(); // ChunkingParams + Writer.AddInteger("FixedChunkingChunkSize"sv, m_FixedChunkingChunkSize); + return Writer.Save(); + } + + static constexpr std::string_view Name = "ChunkingControllerWithFixedChunking"sv; + +protected: + const std::vector m_FixedChunkingExtensions; + const uint64_t m_ChunkFileSizeLimit; + const ChunkedParams m_ChunkingParams; + const uint32_t m_FixedChunkingChunkSize; +}; + +std::unique_ptr +CreateBasicChunkingController(std::span ExcludeExtensions, + uint64_t ChunkFileSizeLimit, + const ChunkedParams& ChunkingParams) +{ + return std::make_unique(ExcludeExtensions, ChunkFileSizeLimit, ChunkingParams); +} +std::unique_ptr +CreateBasicChunkingController(CbObjectView Parameters) +{ + return std::make_unique(Parameters); +} + +std::unique_ptr +CreateChunkingControllerWithFixedChunking(std::span FixedChunkingExtensions, + uint64_t ChunkFileSizeLimit, + const ChunkedParams& ChunkingParams, + uint32_t FixedChunkingChunkSize) +{ + return std::make_unique(FixedChunkingExtensions, + ChunkFileSizeLimit, + ChunkingParams, + FixedChunkingChunkSize); +} +std::unique_ptr +CreateChunkingControllerWithFixedChunking(CbObjectView Parameters) +{ + return std::make_unique(Parameters); +} + +std::unique_ptr +CreateChunkingController(std::string_view Name, CbObjectView Parameters) +{ + if (Name == BasicChunkingController::Name) + { + return CreateBasicChunkingController(Parameters); + } + else if (Name == ChunkingControllerWithFixedChunking::Name) + { + return CreateChunkingControllerWithFixedChunking(Parameters); + } + return {}; +} + +} // namespace zen diff --git a/src/zenutil/filebuildstorage.cpp b/src/zenutil/filebuildstorage.cpp new file mode 100644 index 000000000..78ebcdd55 --- /dev/null +++ b/src/zenutil/filebuildstorage.cpp @@ -0,0 +1,616 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include + +#include +#include +#include +#include +#include +#include + +namespace zen { + +using namespace std::literals; + +class FileBuildStorage : public BuildStorage +{ +public: + explicit FileBuildStorage(const std::filesystem::path& StoragePath, + BuildStorage::Statistics& Stats, + bool EnableJsonOutput, + double LatencySec, + double DelayPerKBSec) + : m_StoragePath(StoragePath) + , m_Stats(Stats) + , m_EnableJsonOutput(EnableJsonOutput) + , m_LatencySec(LatencySec) + , m_DelayPerKBSec(DelayPerKBSec) + { + CreateDirectories(GetBuildsFolder()); + CreateDirectories(GetBlobsFolder()); + CreateDirectories(GetBlobsMetadataFolder()); + } + + virtual ~FileBuildStorage() {} + + virtual CbObject ListBuilds(CbObject Query) override + { + ZEN_UNUSED(Query); + + SimulateLatency(Query.GetSize(), 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BuildFolder = GetBuildsFolder(); + DirectoryContent Content; + GetDirectoryContent(BuildFolder, DirectoryContentFlags::IncludeDirs, Content); + CbObjectWriter Writer; + Writer.BeginArray("results"); + { + for (const std::filesystem::path& BuildPath : Content.Directories) + { + Oid BuildId = Oid::TryFromHexString(BuildPath.stem().string()); + if (BuildId != Oid::Zero) + { + Writer.BeginObject(); + { + Writer.AddObjectId("buildId", BuildId); + Writer.AddObject("metadata", ReadBuild(BuildId)["metadata"sv].AsObjectView()); + } + Writer.EndObject(); + } + } + } + Writer.EndArray(); // builds + Writer.Save(); + SimulateLatency(Writer.GetSaveSize(), 0); + return Writer.Save(); + } + + virtual CbObject PutBuild(const Oid& BuildId, const CbObject& MetaData) override + { + SimulateLatency(MetaData.GetSize(), 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + CbObjectWriter BuildObject; + BuildObject.AddObject("metadata", MetaData); + BuildObject.AddInteger("chunkSize"sv, 32u * 1024u * 1024u); + WriteBuild(BuildId, BuildObject.Save()); + + CbObjectWriter BuildResponse; + BuildResponse.AddInteger("chunkSize"sv, 32u * 1024u * 1024u); + BuildResponse.Save(); + + SimulateLatency(0, BuildResponse.GetSaveSize()); + return BuildResponse.Save(); + } + + virtual CbObject GetBuild(const Oid& BuildId) override + { + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + CbObject Build = ReadBuild(BuildId); + SimulateLatency(0, Build.GetSize()); + return Build; + } + + virtual void FinalizeBuild(const Oid& BuildId) override + { + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + ZEN_UNUSED(BuildId); + SimulateLatency(0, 0); + } + + virtual std::pair> PutBuildPart(const Oid& BuildId, + const Oid& BuildPartId, + std::string_view PartName, + const CbObject& MetaData) override + { + SimulateLatency(MetaData.GetSize(), 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BuildPartDataPath = GetBuildPartPath(BuildId, BuildPartId); + CreateDirectories(BuildPartDataPath.parent_path()); + + TemporaryFile::SafeWriteFile(BuildPartDataPath, MetaData.GetView()); + m_WrittenBytes += MetaData.GetSize(); + WriteAsJson(BuildPartDataPath, MetaData); + + IoHash RawHash = IoHash::HashBuffer(MetaData.GetView()); + + CbObjectWriter Writer; + { + CbObject BuildObject = ReadBuild(BuildId); + CbObjectView PartsObject = BuildObject["parts"sv].AsObjectView(); + CbObjectView MetaDataView = BuildObject["metadata"sv].AsObjectView(); + + Writer.AddObject("metadata"sv, MetaDataView); + Writer.BeginObject("parts"sv); + { + for (CbFieldView PartView : PartsObject) + { + if (PartView.GetName() != PartName) + { + Writer.AddObjectId(PartView.GetName(), PartView.AsObjectId()); + } + } + Writer.AddObjectId(PartName, BuildPartId); + } + Writer.EndObject(); // parts + } + WriteBuild(BuildId, Writer.Save()); + + std::vector NeededAttachments = GetNeededAttachments(MetaData); + + SimulateLatency(0, sizeof(IoHash) * NeededAttachments.size()); + + return std::make_pair(RawHash, std::move(NeededAttachments)); + } + + virtual CbObject GetBuildPart(const Oid& BuildId, const Oid& BuildPartId) override + { + SimulateLatency(0, 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BuildPartDataPath = GetBuildPartPath(BuildId, BuildPartId); + + IoBuffer Payload = ReadFile(BuildPartDataPath).Flatten(); + m_Stats.TotalBytesRead += Payload.GetSize(); + + ZEN_ASSERT(ValidateCompactBinary(Payload.GetView(), CbValidateMode::Default) == CbValidateError::None); + + CbObject BuildPartObject = CbObject(SharedBuffer(Payload)); + + SimulateLatency(0, BuildPartObject.GetSize()); + + return BuildPartObject; + } + + virtual std::vector FinalizeBuildPart(const Oid& BuildId, const Oid& BuildPartId, const IoHash& PartHash) override + { + SimulateLatency(0, 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BuildPartDataPath = GetBuildPartPath(BuildId, BuildPartId); + IoBuffer Payload = ReadFile(BuildPartDataPath).Flatten(); + m_Stats.TotalBytesRead += Payload.GetSize(); + IoHash RawHash = IoHash::HashBuffer(Payload.GetView()); + if (RawHash != PartHash) + { + throw std::runtime_error( + fmt::format("Failed finalizing build part {}: Expected hash {}, got {}", BuildPartId, PartHash, RawHash)); + } + + CbObject BuildPartObject = CbObject(SharedBuffer(Payload)); + std::vector NeededAttachments(GetNeededAttachments(BuildPartObject)); + + SimulateLatency(0, NeededAttachments.size() * sizeof(IoHash)); + + return NeededAttachments; + } + + virtual void PutBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + const CompositeBuffer& Payload) override + { + ZEN_UNUSED(BuildId); + ZEN_ASSERT(ContentType == ZenContentType::kCompressedBinary); + SimulateLatency(Payload.GetSize(), 0); + + ZEN_ASSERT_SLOW(ValidateCompressedBuffer(RawHash, Payload)); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash); + if (!std::filesystem::is_regular_file(BlockPath)) + { + CreateDirectories(BlockPath.parent_path()); + TemporaryFile::SafeWriteFile(BlockPath, Payload.Flatten().GetView()); + } + m_Stats.TotalBytesWritten += Payload.GetSize(); + SimulateLatency(0, 0); + } + + virtual std::vector> PutLargeBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + uint64_t PayloadSize, + std::function&& Transmitter, + std::function&& OnSentBytes) override + { + ZEN_UNUSED(BuildId); + ZEN_UNUSED(ContentType); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash); + if (!std::filesystem::is_regular_file(BlockPath)) + { + CreateDirectories(BlockPath.parent_path()); + + struct WorkloadData + { + std::function Transmitter; + std::function OnSentBytes; + TemporaryFile TempFile; + std::atomic PartsLeft; + }; + + std::shared_ptr Workload(std::make_shared()); + Workload->Transmitter = std::move(Transmitter); + Workload->OnSentBytes = std::move(OnSentBytes); + std::error_code Ec; + Workload->TempFile.CreateTemporary(BlockPath.parent_path(), Ec); + + if (Ec) + { + throw std::runtime_error( + fmt::format("Failed opening temporary file '{}': {} ({})", Workload->TempFile.GetPath(), Ec.message(), Ec.value())); + } + + std::vector> WorkItems; + uint64_t Offset = 0; + while (Offset < PayloadSize) + { + uint64_t Size = Min(32u * 1024u * 1024u, PayloadSize - Offset); + + WorkItems.push_back([this, RawHash, BlockPath, Workload, Offset, Size]() { + IoBuffer PartPayload = Workload->Transmitter(Offset, Size); + SimulateLatency(PartPayload.GetSize(), 0); + + std::error_code Ec; + Workload->TempFile.Write(PartPayload, Offset, Ec); + if (Ec) + { + throw std::runtime_error(fmt::format("Failed writing to temporary file '{}': {} ({})", + Workload->TempFile.GetPath(), + Ec.message(), + Ec.value())); + } + uint64_t BytesWritten = PartPayload.GetSize(); + m_Stats.TotalBytesWritten += BytesWritten; + const bool IsLastPart = Workload->PartsLeft.fetch_sub(1) == 1; + if (IsLastPart) + { + Workload->TempFile.Flush(); + ZEN_ASSERT_SLOW(ValidateCompressedBuffer(RawHash, CompositeBuffer(Workload->TempFile.ReadAll()))); + Workload->TempFile.MoveTemporaryIntoPlace(BlockPath, Ec); + if (Ec) + { + throw std::runtime_error(fmt::format("Failed moving temporary file '{}' to '{}': {} ({})", + Workload->TempFile.GetPath(), + BlockPath, + Ec.message(), + Ec.value())); + } + } + Workload->OnSentBytes(BytesWritten, IsLastPart); + SimulateLatency(0, 0); + }); + + Offset += Size; + } + Workload->PartsLeft.store(WorkItems.size()); + + SimulateLatency(0, 0); + return WorkItems; + } + SimulateLatency(0, 0); + return {}; + } + + virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash) override + { + ZEN_UNUSED(BuildId); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash); + if (std::filesystem::is_regular_file(BlockPath)) + { + IoBuffer Payload = ReadFile(BlockPath).Flatten(); + ZEN_ASSERT_SLOW(ValidateCompressedBuffer(RawHash, CompositeBuffer(SharedBuffer(Payload)))); + m_Stats.TotalBytesRead += Payload.GetSize(); + Payload.SetContentType(ZenContentType::kCompressedBinary); + SimulateLatency(0, Payload.GetSize()); + return Payload; + } + SimulateLatency(0, 0); + return IoBuffer{}; + } + + virtual std::vector> GetLargeBuildBlob( + const Oid& BuildId, + const IoHash& RawHash, + uint64_t ChunkSize, + std::function&& Receiver) override + { + ZEN_UNUSED(BuildId); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash); + if (std::filesystem::is_regular_file(BlockPath)) + { + struct WorkloadData + { + std::atomic BytesRemaining; + IoBuffer BlobFile; + std::function Receiver; + }; + + std::shared_ptr Workload(std::make_shared()); + Workload->BlobFile = IoBufferBuilder::MakeFromFile(BlockPath); + const uint64_t BlobSize = Workload->BlobFile.GetSize(); + + Workload->Receiver = std::move(Receiver); + Workload->BytesRemaining = BlobSize; + + std::vector> WorkItems; + uint64_t Offset = 0; + while (Offset < BlobSize) + { + uint64_t Size = Min(ChunkSize, BlobSize - Offset); + WorkItems.push_back([this, BlockPath, Workload, Offset, Size]() { + SimulateLatency(0, 0); + IoBuffer PartPayload(Workload->BlobFile, Offset, Size); + m_Stats.TotalBytesRead += PartPayload.GetSize(); + uint64_t ByteRemaning = Workload->BytesRemaining.fetch_sub(Size); + Workload->Receiver(Offset, PartPayload, ByteRemaning); + SimulateLatency(Size, PartPayload.GetSize()); + }); + + Offset += Size; + } + SimulateLatency(0, 0); + return WorkItems; + } + return {}; + } + + virtual void PutBlockMetadata(const Oid& BuildId, const IoHash& BlockRawHash, const CbObject& MetaData) override + { + ZEN_UNUSED(BuildId); + + SimulateLatency(MetaData.GetSize(), 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BlockMetaDataPath = GetBlobMetadataPath(BlockRawHash); + CreateDirectories(BlockMetaDataPath.parent_path()); + TemporaryFile::SafeWriteFile(BlockMetaDataPath, MetaData.GetView()); + m_Stats.TotalBytesWritten += MetaData.GetSize(); + WriteAsJson(BlockMetaDataPath, MetaData); + SimulateLatency(0, 0); + } + + virtual std::vector FindBlocks(const Oid& BuildId) override + { + ZEN_UNUSED(BuildId); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + DirectoryContent Content; + GetDirectoryContent(GetBlobsMetadataFolder(), DirectoryContentFlags::IncludeFiles, Content); + std::vector Result; + for (const std::filesystem::path& MetaDataFile : Content.Files) + { + IoHash ChunkHash; + if (IoHash::TryParse(MetaDataFile.stem().string(), ChunkHash)) + { + std::filesystem::path BlockPath = GetBlobPayloadPath(ChunkHash); + if (std::filesystem::is_regular_file(BlockPath)) + { + IoBuffer BlockMetaDataPayload = ReadFile(MetaDataFile).Flatten(); + + m_Stats.TotalBytesRead += BlockMetaDataPayload.GetSize(); + + CbObject BlockObject = CbObject(SharedBuffer(BlockMetaDataPayload)); + Result.emplace_back(ParseChunkBlockDescription(BlockObject)); + } + } + } + SimulateLatency(0, sizeof(IoHash) * Result.size()); + return Result; + } + + virtual std::vector GetBlockMetadata(const Oid& BuildId, std::span BlockHashes) override + { + ZEN_UNUSED(BuildId); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + std::vector Result; + for (const IoHash& BlockHash : BlockHashes) + { + std::filesystem::path MetaDataFile = GetBlobMetadataPath(BlockHash); + if (std::filesystem::is_regular_file(MetaDataFile)) + { + IoBuffer BlockMetaDataPayload = ReadFile(MetaDataFile).Flatten(); + + m_Stats.TotalBytesRead += BlockMetaDataPayload.GetSize(); + + CbObject BlockObject = CbObject(SharedBuffer(BlockMetaDataPayload)); + Result.emplace_back(ParseChunkBlockDescription(BlockObject)); + } + } + SimulateLatency(sizeof(BlockHashes) * BlockHashes.size(), sizeof(ChunkBlockDescription) * Result.size()); + return Result; + } + +protected: + std::filesystem::path GetBuildsFolder() const { return m_StoragePath / "builds"; } + std::filesystem::path GetBlobsFolder() const { return m_StoragePath / "blobs"; } + std::filesystem::path GetBlobsMetadataFolder() const { return m_StoragePath / "blocks"; } + std::filesystem::path GetBuildFolder(const Oid& BuildId) const { return GetBuildsFolder() / BuildId.ToString(); } + + std::filesystem::path GetBuildPath(const Oid& BuildId) const { return GetBuildFolder(BuildId) / "metadata.cb"; } + + std::filesystem::path GetBuildPartFolder(const Oid& BuildId, const Oid& BuildPartId) const + { + return GetBuildFolder(BuildId) / "parts" / BuildPartId.ToString(); + } + + std::filesystem::path GetBuildPartPath(const Oid& BuildId, const Oid& BuildPartId) const + { + return GetBuildPartFolder(BuildId, BuildPartId) / "metadata.cb"; + } + + std::filesystem::path GetBlobPayloadPath(const IoHash& RawHash) const { return GetBlobsFolder() / fmt::format("{}.cbz", RawHash); } + + std::filesystem::path GetBlobMetadataPath(const IoHash& RawHash) const + { + return GetBlobsMetadataFolder() / fmt::format("{}.cb", RawHash); + } + + void SimulateLatency(uint64_t ReceiveSize, uint64_t SendSize) + { + double SleepSec = m_LatencySec; + if (m_DelayPerKBSec > 0.0) + { + SleepSec += m_DelayPerKBSec * (double(SendSize + ReceiveSize) / 1024u); + } + if (SleepSec > 0) + { + Sleep(int(SleepSec * 1000)); + } + } + + void WriteAsJson(const std::filesystem::path& OriginalPath, CbObjectView Data) const + { + if (m_EnableJsonOutput) + { + ExtendableStringBuilder<128> SB; + CompactBinaryToJson(Data, SB); + std::filesystem::path JsonPath = OriginalPath; + JsonPath.replace_extension(".json"); + std::string_view JsonMetaData = SB.ToView(); + TemporaryFile::SafeWriteFile(JsonPath, MemoryView(JsonMetaData.data(), JsonMetaData.length())); + } + } + + void WriteBuild(const Oid& BuildId, CbObjectView Data) + { + const std::filesystem::path BuildDataPath = GetBuildPath(BuildId); + CreateDirectories(BuildDataPath.parent_path()); + TemporaryFile::SafeWriteFile(BuildDataPath, Data.GetView()); + m_Stats.TotalBytesWritten += Data.GetSize(); + WriteAsJson(BuildDataPath, Data); + } + + CbObject ReadBuild(const Oid& BuildId) + { + const std::filesystem::path BuildDataPath = GetBuildPath(BuildId); + FileContents Content = ReadFile(BuildDataPath); + if (Content.ErrorCode) + { + throw std::runtime_error(fmt::format("Failed reading build '{}' from '{}': {} ({})", + BuildId, + BuildDataPath, + Content.ErrorCode.message(), + Content.ErrorCode.value())); + } + IoBuffer Payload = Content.Flatten(); + m_Stats.TotalBytesRead += Payload.GetSize(); + ZEN_ASSERT(ValidateCompactBinary(Payload.GetView(), CbValidateMode::Default) == CbValidateError::None); + CbObject BuildObject = CbObject(SharedBuffer(Payload)); + return BuildObject; + } + + std::vector GetNeededAttachments(CbObjectView BuildPartObject) + { + std::vector NeededAttachments; + BuildPartObject.IterateAttachments([&](CbFieldView FieldView) { + const IoHash AttachmentHash = FieldView.AsBinaryAttachment(); + const std::filesystem::path BlockPath = GetBlobPayloadPath(AttachmentHash); + if (!std::filesystem::is_regular_file(BlockPath)) + { + NeededAttachments.push_back(AttachmentHash); + } + }); + return NeededAttachments; + } + + bool ValidateCompressedBuffer(const IoHash& RawHash, const CompositeBuffer& Payload) + { + IoHash VerifyHash; + uint64_t VerifySize; + CompressedBuffer ValidateBuffer = CompressedBuffer::FromCompressed(Payload, VerifyHash, VerifySize); + if (!ValidateBuffer) + { + return false; + } + if (VerifyHash != RawHash) + { + return false; + } + CompositeBuffer Decompressed = ValidateBuffer.DecompressToComposite(); + if (!Decompressed) + { + return false; + } + IoHash Hash = IoHash::HashBuffer(Decompressed); + if (Hash != RawHash) + { + return false; + } + return true; + } + +private: + const std::filesystem::path m_StoragePath; + BuildStorage::Statistics& m_Stats; + const bool m_EnableJsonOutput = false; + std::atomic m_WrittenBytes; + + const double m_LatencySec = 0.0; + const double m_DelayPerKBSec = 0.0; +}; + +std::unique_ptr +CreateFileBuildStorage(const std::filesystem::path& StoragePath, + BuildStorage::Statistics& Stats, + bool EnableJsonOutput, + double LatencySec, + double DelayPerKBSec) +{ + return std::make_unique(StoragePath, Stats, EnableJsonOutput, LatencySec, DelayPerKBSec); +} + +} // namespace zen diff --git a/src/zenutil/include/zenutil/buildstorage.h b/src/zenutil/include/zenutil/buildstorage.h new file mode 100644 index 000000000..9c236310f --- /dev/null +++ b/src/zenutil/include/zenutil/buildstorage.h @@ -0,0 +1,55 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include +#include + +namespace zen { + +class BuildStorage +{ +public: + struct Statistics + { + std::atomic TotalBytesRead = 0; + std::atomic TotalBytesWritten = 0; + std::atomic TotalRequestCount = 0; + std::atomic TotalRequestTimeUs = 0; + std::atomic TotalExecutionTimeUs = 0; + }; + + virtual ~BuildStorage() {} + + virtual CbObject ListBuilds(CbObject Query) = 0; + virtual CbObject PutBuild(const Oid& BuildId, const CbObject& MetaData) = 0; + virtual CbObject GetBuild(const Oid& BuildId) = 0; + virtual void FinalizeBuild(const Oid& BuildId) = 0; + + virtual std::pair> PutBuildPart(const Oid& BuildId, + const Oid& BuildPartId, + std::string_view PartName, + const CbObject& MetaData) = 0; + virtual CbObject GetBuildPart(const Oid& BuildId, const Oid& BuildPartId) = 0; + virtual std::vector FinalizeBuildPart(const Oid& BuildId, const Oid& BuildPartId, const IoHash& PartHash) = 0; + virtual void PutBuildBlob(const Oid& BuildId, const IoHash& RawHash, ZenContentType ContentType, const CompositeBuffer& Payload) = 0; + virtual std::vector> PutLargeBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + uint64_t PayloadSize, + std::function&& Transmitter, + std::function&& OnSentBytes) = 0; + + virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash) = 0; + virtual std::vector> GetLargeBuildBlob( + const Oid& BuildId, + const IoHash& RawHash, + uint64_t ChunkSize, + std::function&& Receiver) = 0; + + virtual void PutBlockMetadata(const Oid& BuildId, const IoHash& BlockRawHash, const CbObject& MetaData) = 0; + virtual std::vector FindBlocks(const Oid& BuildId) = 0; + virtual std::vector GetBlockMetadata(const Oid& BuildId, std::span BlockHashes) = 0; +}; + +} // namespace zen diff --git a/src/zenutil/include/zenutil/chunkblock.h b/src/zenutil/include/zenutil/chunkblock.h index 9b7414629..21107fb7c 100644 --- a/src/zenutil/include/zenutil/chunkblock.h +++ b/src/zenutil/include/zenutil/chunkblock.h @@ -12,21 +12,28 @@ namespace zen { -struct ChunkBlockDescription +struct ThinChunkBlockDescription { - IoHash BlockHash; - std::vector ChunkHashes; + IoHash BlockHash; + std::vector ChunkRawHashes; +}; + +struct ChunkBlockDescription : public ThinChunkBlockDescription +{ + uint64_t HeaderSize; std::vector ChunkRawLengths; + std::vector ChunkCompressedLengths; }; std::vector ParseChunkBlockDescriptionList(const CbObjectView& BlocksObject); ChunkBlockDescription ParseChunkBlockDescription(const CbObjectView& BlockObject); CbObject BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView MetaData); - +ChunkBlockDescription GetChunkBlockDescription(const SharedBuffer& BlockPayload, const IoHash& RawHash); typedef std::function(const IoHash& RawHash)> FetchChunkFunc; CompressedBuffer GenerateChunkBlock(std::vector>&& FetchChunks, ChunkBlockDescription& OutBlock); bool IterateChunkBlock(const SharedBuffer& BlockPayload, - std::function Visitor); + std::function Visitor, + uint64_t& OutHeaderSize); } // namespace zen diff --git a/src/zenutil/include/zenutil/chunkedcontent.h b/src/zenutil/include/zenutil/chunkedcontent.h new file mode 100644 index 000000000..15c687462 --- /dev/null +++ b/src/zenutil/include/zenutil/chunkedcontent.h @@ -0,0 +1,256 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include +#include +#include + +#include +#include + +ZEN_THIRD_PARTY_INCLUDES_START +#include +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +class CbWriter; +class ChunkingController; +class WorkerThreadPool; + +enum class SourcePlatform +{ + Windows = 0, + Linux = 1, + MacOS = 2, + _Count +}; + +std::string_view ToString(SourcePlatform Platform); +SourcePlatform FromString(std::string_view Platform, SourcePlatform Default); +SourcePlatform GetSourceCurrentPlatform(); + +struct FolderContent +{ + SourcePlatform Platform = GetSourceCurrentPlatform(); + std::vector Paths; + std::vector RawSizes; + std::vector Attributes; + std::vector ModificationTicks; + + bool operator==(const FolderContent& Rhs) const; + + bool AreKnownFilesEqual(const FolderContent& Rhs) const; + void UpdateState(const FolderContent& Rhs, std::vector& PathIndexesOufOfDate); + static bool AreFileAttributesEqual(const uint32_t Lhs, const uint32_t Rhs); +}; + +FolderContent GetUpdatedContent(const FolderContent& Old, + const FolderContent& New, + std::vector& OutDeletedPathIndexes); + +void SaveFolderContentToCompactBinary(const FolderContent& Content, CbWriter& Output); +FolderContent LoadFolderContentToCompactBinary(CbObjectView Input); + +struct GetFolderContentStatistics +{ + std::atomic FoundFileCount = 0; + std::atomic FoundFileByteCount = 0; + std::atomic AcceptedFileCount = 0; + std::atomic AcceptedFileByteCount = 0; + uint64_t ElapsedWallTimeUS = 0; +}; + +FolderContent GetFolderContent(GetFolderContentStatistics& Stats, + const std::filesystem::path& RootPath, + std::function&& AcceptDirectory, + std::function&& AcceptFile, + WorkerThreadPool& WorkerPool, + int32_t UpdateInteralMS, + std::function&& UpdateCallback, + std::atomic& AbortFlag); + +struct ChunkedContentData +{ + // To describe one asset with a particular RawHash, find the index of the hash in SequenceRawHashes + // ChunkCounts for that index will be the number of indexes in ChunkOrders that describe + // the sequence of chunks required to reconstruct the asset. + // Offset into ChunkOrders is based on how many entries in ChunkOrders the previous [n - 1] SequenceRawHashes uses + std::vector SequenceRawHashes; // Raw hash for Chunk sequence + std::vector ChunkCounts; // Chunk count of ChunkOrder for SequenceRawHashes[n] + std::vector ChunkOrders; // Chunk sequence indexed into ChunkHashes, ChunkCounts[n] indexes per SequenceRawHashes[n] + std::vector ChunkHashes; // Unique chunk hashes + std::vector ChunkRawSizes; // Unique chunk raw size for ChunkHash[n] +}; + +struct ChunkedFolderContent +{ + SourcePlatform Platform = GetSourceCurrentPlatform(); + std::vector Paths; + std::vector RawSizes; + std::vector Attributes; + std::vector RawHashes; + ChunkedContentData ChunkedContent; +}; + +void SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbWriter& Output); +ChunkedFolderContent LoadChunkedFolderContentToCompactBinary(CbObjectView Input); + +ChunkedFolderContent MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span Overlays); +ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& Base, std::span DeletedPaths); + +struct ChunkingStatistics +{ + std::atomic FilesProcessed = 0; + std::atomic FilesChunked = 0; + std::atomic BytesHashed = 0; + std::atomic UniqueChunksFound = 0; + std::atomic UniqueSequencesFound = 0; + std::atomic UniqueBytesFound = 0; + uint64_t ElapsedWallTimeUS = 0; +}; + +ChunkedFolderContent ChunkFolderContent(ChunkingStatistics& Stats, + WorkerThreadPool& WorkerPool, + const std::filesystem::path& RootPath, + const FolderContent& Content, + const ChunkingController& InChunkingController, + int32_t UpdateInteralMS, + std::function&& UpdateCallback, + std::atomic& AbortFlag); + +struct ChunkedContentLookup +{ + struct ChunkLocation + { + uint32_t PathIndex; + uint64_t Offset; + }; + tsl::robin_map ChunkHashToChunkIndex; + tsl::robin_map RawHashToSequenceRawHashIndex; + std::vector SequenceRawHashIndexChunkOrderOffset; + std::vector ChunkLocations; + std::vector ChunkLocationOffset; // ChunkLocations[ChunkLocationOffset[ChunkIndex]] -> start of sources for ChunkIndex + std::vector ChunkLocationCounts; // ChunkLocationCounts[ChunkIndex] count of chunk locations for ChunkIndex +}; + +ChunkedContentLookup BuildChunkedContentLookup(const ChunkedFolderContent& Content); + +inline std::pair +GetChunkLocationRange(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) +{ + return std::make_pair(Lookup.ChunkLocationOffset[ChunkIndex], Lookup.ChunkLocationCounts[ChunkIndex]); +} + +inline std::span +GetChunkLocations(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) +{ + std::pair Range = GetChunkLocationRange(Lookup, ChunkIndex); + return std::span(Lookup.ChunkLocations).subspan(Range.first, Range.second); +} + +namespace compactbinary_helpers { + template + void WriteArray(std::span Values, std::string_view ArrayName, CbWriter& Output) + { + Output.BeginArray(ArrayName); + for (const Type Value : Values) + { + Output << Value; + } + Output.EndArray(); + } + + template + void WriteArray(const std::vector& Values, std::string_view ArrayName, CbWriter& Output) + { + WriteArray(std::span(Values), ArrayName, Output); + } + + template<> + inline void WriteArray(std::span Values, std::string_view ArrayName, CbWriter& Output) + { + Output.BeginArray(ArrayName); + for (const std::filesystem::path& Path : Values) + { + Output.AddString((const char*)Path.generic_u8string().c_str()); + } + Output.EndArray(); + } + + template<> + inline void WriteArray(const std::vector& Values, std::string_view ArrayName, CbWriter& Output) + { + WriteArray(std::span(Values), ArrayName, Output); + } + + inline void WriteBinaryAttachmentArray(std::span Values, std::string_view ArrayName, CbWriter& Output) + { + Output.BeginArray(ArrayName); + for (const IoHash& Hash : Values) + { + Output.AddBinaryAttachment(Hash); + } + Output.EndArray(); + } + + inline void WriteBinaryAttachmentArray(const std::vector& Values, std::string_view ArrayName, CbWriter& Output) + { + WriteArray(std::span(Values), ArrayName, Output); + } + + inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector& Result) + { + CbArrayView Array = Input[ArrayName].AsArrayView(); + Result.reserve(Array.Num()); + for (CbFieldView ItemView : Array) + { + Result.push_back(ItemView.AsUInt32()); + } + } + + inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector& Result) + { + CbArrayView Array = Input[ArrayName].AsArrayView(); + Result.reserve(Array.Num()); + for (CbFieldView ItemView : Array) + { + Result.push_back(ItemView.AsUInt64()); + } + } + + inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector& Result) + { + CbArrayView Array = Input[ArrayName].AsArrayView(); + Result.reserve(Array.Num()); + for (CbFieldView ItemView : Array) + { + std::u8string_view U8Path = ItemView.AsU8String(); + Result.push_back(std::filesystem::path(U8Path)); + } + } + + inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector& Result) + { + CbArrayView Array = Input[ArrayName].AsArrayView(); + Result.reserve(Array.Num()); + for (CbFieldView ItemView : Array) + { + Result.push_back(ItemView.AsHash()); + } + } + + inline void ReadBinaryAttachmentArray(std::string_view ArrayName, CbObjectView Input, std::vector& Result) + { + CbArrayView Array = Input[ArrayName].AsArrayView(); + Result.reserve(Array.Num()); + for (CbFieldView ItemView : Array) + { + Result.push_back(ItemView.AsBinaryAttachment()); + } + } + +} // namespace compactbinary_helpers + +} // namespace zen diff --git a/src/zenutil/include/zenutil/chunkingcontroller.h b/src/zenutil/include/zenutil/chunkingcontroller.h new file mode 100644 index 000000000..fe4fc1bb5 --- /dev/null +++ b/src/zenutil/include/zenutil/chunkingcontroller.h @@ -0,0 +1,55 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include + +#include + +#include +#include + +namespace zen { + +const std::vector DefaultChunkingExcludeExtensions = {".exe", ".dll", ".pdb", ".self"}; + +const ChunkedParams DefaultChunkedParams = {.MinSize = ((8u * 1u) * 1024u) - 128u, + .MaxSize = 128u * 1024u, + .AvgSize = ((8u * 4u) * 1024u) + 128u}; + +const size_t DefaultChunkingFileSizeLimit = DefaultChunkedParams.MaxSize; + +const uint32_t DefaultFixedChunkingChunkSize = 16u * 1024u * 1024u; + +struct ChunkedInfoWithSource; + +class ChunkingController +{ +public: + virtual ~ChunkingController() {} + + // Return true if the input file was processed. If true is returned OutChunked will contain the chunked info + virtual bool ProcessFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + ChunkedInfoWithSource& OutChunked, + std::atomic& BytesProcessed) const = 0; + virtual std::string_view GetName() const = 0; + virtual CbObject GetParameters() const = 0; +}; + +std::unique_ptr CreateBasicChunkingController( + std::span ExcludeExtensions = DefaultChunkingExcludeExtensions, + uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit, + const ChunkedParams& ChunkingParams = DefaultChunkedParams); +std::unique_ptr CreateBasicChunkingController(CbObjectView Parameters); + +std::unique_ptr CreateChunkingControllerWithFixedChunking( + std::span ExcludeExtensions = DefaultChunkingExcludeExtensions, + uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit, + const ChunkedParams& ChunkingParams = DefaultChunkedParams, + uint32_t FixedChunkingChunkSize = DefaultFixedChunkingChunkSize); +std::unique_ptr CreateChunkingControllerWithFixedChunking(CbObjectView Parameters); + +std::unique_ptr CreateChunkingController(std::string_view Name, CbObjectView Parameters); + +} // namespace zen diff --git a/src/zenutil/include/zenutil/filebuildstorage.h b/src/zenutil/include/zenutil/filebuildstorage.h new file mode 100644 index 000000000..c95fb32e6 --- /dev/null +++ b/src/zenutil/include/zenutil/filebuildstorage.h @@ -0,0 +1,16 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include +#include + +namespace zen { +class HttpClient; + +std::unique_ptr CreateFileBuildStorage(const std::filesystem::path& StoragePath, + BuildStorage::Statistics& Stats, + bool EnableJsonOutput, + double LatencySec = 0.0, + double DelayPerKBSec = 0.0); +} // namespace zen diff --git a/src/zenutil/include/zenutil/jupiter/jupiterbuildstorage.h b/src/zenutil/include/zenutil/jupiter/jupiterbuildstorage.h new file mode 100644 index 000000000..89fc70140 --- /dev/null +++ b/src/zenutil/include/zenutil/jupiter/jupiterbuildstorage.h @@ -0,0 +1,17 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include +#include + +namespace zen { +class HttpClient; + +std::unique_ptr CreateJupiterBuildStorage(LoggerRef InLog, + HttpClient& InHttpClient, + BuildStorage::Statistics& Stats, + std::string_view Namespace, + std::string_view Bucket, + const std::filesystem::path& TempFolderPath); +} // namespace zen diff --git a/src/zenutil/include/zenutil/parallellwork.h b/src/zenutil/include/zenutil/parallellwork.h new file mode 100644 index 000000000..7a8218c51 --- /dev/null +++ b/src/zenutil/include/zenutil/parallellwork.h @@ -0,0 +1,69 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include +#include + +#include + +namespace zen { + +class ParallellWork +{ +public: + ParallellWork(std::atomic& AbortFlag) : m_AbortFlag(AbortFlag), m_PendingWork(1) {} + + ~ParallellWork() + { + // Make sure to call Wait before destroying + ZEN_ASSERT(m_PendingWork.Remaining() == 0); + } + + void ScheduleWork(WorkerThreadPool& WorkerPool, + std::function& AbortFlag)>&& Work, + std::function& AbortFlag)>&& OnError) + { + m_PendingWork.AddCount(1); + try + { + WorkerPool.ScheduleWork([this, Work = std::move(Work), OnError = std::move(OnError)] { + try + { + Work(m_AbortFlag); + } + catch (const std::exception& Ex) + { + OnError(Ex, m_AbortFlag); + } + m_PendingWork.CountDown(); + }); + } + catch (const std::exception&) + { + m_PendingWork.CountDown(); + throw; + } + } + + void Abort() { m_AbortFlag = true; } + + bool IsAborted() const { return m_AbortFlag.load(); } + + void Wait(int32_t UpdateInteralMS, std::function&& UpdateCallback) + { + ZEN_ASSERT(m_PendingWork.Remaining() > 0); + m_PendingWork.CountDown(); + while (!m_PendingWork.Wait(UpdateInteralMS)) + { + UpdateCallback(m_AbortFlag.load(), m_PendingWork.Remaining()); + } + } + Latch& PendingWork() { return m_PendingWork; } + +private: + std::atomic& m_AbortFlag; + Latch m_PendingWork; +}; + +} // namespace zen diff --git a/src/zenutil/jupiter/jupiterbuildstorage.cpp b/src/zenutil/jupiter/jupiterbuildstorage.cpp new file mode 100644 index 000000000..481e9146f --- /dev/null +++ b/src/zenutil/jupiter/jupiterbuildstorage.cpp @@ -0,0 +1,371 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include + +#include +#include +#include +#include +#include + +ZEN_THIRD_PARTY_INCLUDES_START +#include +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +using namespace std::literals; + +class JupiterBuildStorage : public BuildStorage +{ +public: + JupiterBuildStorage(LoggerRef InLog, + HttpClient& InHttpClient, + Statistics& Stats, + std::string_view Namespace, + std::string_view Bucket, + const std::filesystem::path& TempFolderPath) + : m_Session(InLog, InHttpClient) + , m_Stats(Stats) + , m_Namespace(Namespace) + , m_Bucket(Bucket) + , m_TempFolderPath(TempFolderPath) + { + } + virtual ~JupiterBuildStorage() {} + + virtual CbObject ListBuilds(CbObject Query) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + IoBuffer Payload = Query.GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + JupiterResult ListResult = m_Session.ListBuilds(m_Namespace, m_Bucket, Payload); + AddStatistic(ListResult); + if (!ListResult.Success) + { + throw std::runtime_error(fmt::format("Failed listing builds: {} ({})", ListResult.Reason, ListResult.ErrorCode)); + } + return PayloadToJson("Failed listing builds"sv, ListResult.Response); + } + + virtual CbObject PutBuild(const Oid& BuildId, const CbObject& MetaData) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + IoBuffer Payload = MetaData.GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + JupiterResult PutResult = m_Session.PutBuild(m_Namespace, m_Bucket, BuildId, Payload); + AddStatistic(PutResult); + if (!PutResult.Success) + { + throw std::runtime_error(fmt::format("Failed creating build: {} ({})", PutResult.Reason, PutResult.ErrorCode)); + } + return PayloadToJson(fmt::format("Failed creating build: {}", BuildId), PutResult.Response); + } + + virtual CbObject GetBuild(const Oid& BuildId) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult GetBuildResult = m_Session.GetBuild(m_Namespace, m_Bucket, BuildId); + AddStatistic(GetBuildResult); + if (!GetBuildResult.Success) + { + throw std::runtime_error(fmt::format("Failed fetching build: {} ({})", GetBuildResult.Reason, GetBuildResult.ErrorCode)); + } + return PayloadToJson(fmt::format("Failed fetching build {}:", BuildId), GetBuildResult.Response); + } + + virtual void FinalizeBuild(const Oid& BuildId) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult FinalizeBuildResult = m_Session.FinalizeBuild(m_Namespace, m_Bucket, BuildId); + AddStatistic(FinalizeBuildResult); + if (!FinalizeBuildResult.Success) + { + throw std::runtime_error( + fmt::format("Failed finalizing build part: {} ({})", FinalizeBuildResult.Reason, FinalizeBuildResult.ErrorCode)); + } + } + + virtual std::pair> PutBuildPart(const Oid& BuildId, + const Oid& BuildPartId, + std::string_view PartName, + const CbObject& MetaData) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + IoBuffer Payload = MetaData.GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + PutBuildPartResult PutPartResult = m_Session.PutBuildPart(m_Namespace, m_Bucket, BuildId, BuildPartId, PartName, Payload); + AddStatistic(PutPartResult); + if (!PutPartResult.Success) + { + throw std::runtime_error(fmt::format("Failed creating build part: {} ({})", PutPartResult.Reason, PutPartResult.ErrorCode)); + } + return std::make_pair(PutPartResult.RawHash, std::move(PutPartResult.Needs)); + } + + virtual CbObject GetBuildPart(const Oid& BuildId, const Oid& BuildPartId) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult GetBuildPartResult = m_Session.GetBuildPart(m_Namespace, m_Bucket, BuildId, BuildPartId); + AddStatistic(GetBuildPartResult); + if (!GetBuildPartResult.Success) + { + throw std::runtime_error(fmt::format("Failed fetching build part {}: {} ({})", + BuildPartId, + GetBuildPartResult.Reason, + GetBuildPartResult.ErrorCode)); + } + return PayloadToJson(fmt::format("Failed fetching build part {}:", BuildPartId), GetBuildPartResult.Response); + } + + virtual std::vector FinalizeBuildPart(const Oid& BuildId, const Oid& BuildPartId, const IoHash& PartHash) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + FinalizeBuildPartResult FinalizePartResult = m_Session.FinalizeBuildPart(m_Namespace, m_Bucket, BuildId, BuildPartId, PartHash); + AddStatistic(FinalizePartResult); + if (!FinalizePartResult.Success) + { + throw std::runtime_error( + fmt::format("Failed finalizing build part: {} ({})", FinalizePartResult.Reason, FinalizePartResult.ErrorCode)); + } + return std::move(FinalizePartResult.Needs); + } + + virtual void PutBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + const CompositeBuffer& Payload) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult PutBlobResult = m_Session.PutBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, ContentType, Payload); + AddStatistic(PutBlobResult); + if (!PutBlobResult.Success) + { + throw std::runtime_error(fmt::format("Failed putting build part: {} ({})", PutBlobResult.Reason, PutBlobResult.ErrorCode)); + } + } + + virtual std::vector> PutLargeBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + uint64_t PayloadSize, + std::function&& Transmitter, + std::function&& OnSentBytes) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + std::vector> WorkItems; + JupiterResult PutMultipartBlobResult = m_Session.PutMultipartBuildBlob(m_Namespace, + m_Bucket, + BuildId, + RawHash, + ContentType, + PayloadSize, + std::move(Transmitter), + WorkItems); + AddStatistic(PutMultipartBlobResult); + if (!PutMultipartBlobResult.Success) + { + throw std::runtime_error( + fmt::format("Failed putting build part: {} ({})", PutMultipartBlobResult.Reason, PutMultipartBlobResult.ErrorCode)); + } + OnSentBytes(PutMultipartBlobResult.SentBytes, WorkItems.empty()); + + std::vector> WorkList; + for (auto& WorkItem : WorkItems) + { + WorkList.emplace_back([this, WorkItem = std::move(WorkItem), OnSentBytes]() { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + bool IsComplete = false; + JupiterResult PartResult = WorkItem(IsComplete); + AddStatistic(PartResult); + if (!PartResult.Success) + { + throw std::runtime_error(fmt::format("Failed putting build part: {} ({})", PartResult.Reason, PartResult.ErrorCode)); + } + OnSentBytes(PartResult.SentBytes, IsComplete); + }); + } + return WorkList; + } + + virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult GetBuildBlobResult = m_Session.GetBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, m_TempFolderPath); + AddStatistic(GetBuildBlobResult); + if (!GetBuildBlobResult.Success) + { + throw std::runtime_error( + fmt::format("Failed fetching build blob {}: {} ({})", RawHash, GetBuildBlobResult.Reason, GetBuildBlobResult.ErrorCode)); + } + return std::move(GetBuildBlobResult.Response); + } + + virtual std::vector> GetLargeBuildBlob( + const Oid& BuildId, + const IoHash& RawHash, + uint64_t ChunkSize, + std::function&& Receiver) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + std::vector> WorkItems; + JupiterResult GetMultipartBlobResult = + m_Session.GetMultipartBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, ChunkSize, std::move(Receiver), WorkItems); + + AddStatistic(GetMultipartBlobResult); + if (!GetMultipartBlobResult.Success) + { + throw std::runtime_error( + fmt::format("Failed getting build part: {} ({})", GetMultipartBlobResult.Reason, GetMultipartBlobResult.ErrorCode)); + } + std::vector> WorkList; + for (auto& WorkItem : WorkItems) + { + WorkList.emplace_back([this, WorkItem = std::move(WorkItem)]() { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult PartResult = WorkItem(); + AddStatistic(PartResult); + if (!PartResult.Success) + { + throw std::runtime_error(fmt::format("Failed getting build part: {} ({})", PartResult.Reason, PartResult.ErrorCode)); + } + }); + } + return WorkList; + } + + virtual void PutBlockMetadata(const Oid& BuildId, const IoHash& BlockRawHash, const CbObject& MetaData) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + IoBuffer Payload = MetaData.GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + JupiterResult PutMetaResult = m_Session.PutBlockMetadata(m_Namespace, m_Bucket, BuildId, BlockRawHash, Payload); + AddStatistic(PutMetaResult); + if (!PutMetaResult.Success) + { + throw std::runtime_error( + fmt::format("Failed putting build block metadata: {} ({})", PutMetaResult.Reason, PutMetaResult.ErrorCode)); + } + } + + virtual std::vector FindBlocks(const Oid& BuildId) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult FindResult = m_Session.FindBlocks(m_Namespace, m_Bucket, BuildId); + AddStatistic(FindResult); + if (!FindResult.Success) + { + throw std::runtime_error(fmt::format("Failed fetching known blocks: {} ({})", FindResult.Reason, FindResult.ErrorCode)); + } + return ParseChunkBlockDescriptionList(PayloadToJson("Failed fetching known blocks"sv, FindResult.Response)); + } + + virtual std::vector GetBlockMetadata(const Oid& BuildId, std::span BlockHashes) override + { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + CbObjectWriter Request; + + Request.BeginArray("blocks"sv); + for (const IoHash& BlockHash : BlockHashes) + { + Request.AddHash(BlockHash); + } + Request.EndArray(); + + IoBuffer Payload = Request.Save().GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + JupiterResult GetBlockMetadataResult = m_Session.GetBlockMetadata(m_Namespace, m_Bucket, BuildId, Payload); + AddStatistic(GetBlockMetadataResult); + if (!GetBlockMetadataResult.Success) + { + throw std::runtime_error( + fmt::format("Failed fetching block metadatas: {} ({})", GetBlockMetadataResult.Reason, GetBlockMetadataResult.ErrorCode)); + } + std::vector UnorderedList = + ParseChunkBlockDescriptionList(PayloadToJson("Failed fetching block metadatas", GetBlockMetadataResult.Response)); + tsl::robin_map BlockDescriptionLookup; + for (size_t DescriptionIndex = 0; DescriptionIndex < UnorderedList.size(); DescriptionIndex++) + { + const ChunkBlockDescription& Description = UnorderedList[DescriptionIndex]; + BlockDescriptionLookup.insert_or_assign(Description.BlockHash, DescriptionIndex); + } + std::vector SortedBlockDescriptions; + SortedBlockDescriptions.reserve(BlockDescriptionLookup.size()); + for (const IoHash& BlockHash : BlockHashes) + { + if (auto It = BlockDescriptionLookup.find(BlockHash); It != BlockDescriptionLookup.end()) + { + SortedBlockDescriptions.push_back(std::move(UnorderedList[It->second])); + } + } + return SortedBlockDescriptions; + } + +private: + static CbObject PayloadToJson(std::string_view Context, const IoBuffer& Payload) + { + if (Payload.GetContentType() == ZenContentType::kJSON) + { + std::string_view Json(reinterpret_cast(Payload.GetData()), Payload.GetSize()); + return LoadCompactBinaryFromJson(Json).AsObject(); + } + else if (Payload.GetContentType() == ZenContentType::kCbObject) + { + return LoadCompactBinaryObject(Payload); + } + else if (Payload.GetContentType() == ZenContentType::kCompressedBinary) + { + IoHash RawHash; + uint64_t RawSize; + return LoadCompactBinaryObject(CompressedBuffer::FromCompressed(SharedBuffer(Payload), RawHash, RawSize)); + } + else + { + throw std::runtime_error( + fmt::format("{}: {} ({})", "Unsupported response format", Context, ToString(Payload.GetContentType()))); + } + } + + void AddStatistic(const JupiterResult& Result) + { + m_Stats.TotalBytesWritten += Result.SentBytes; + m_Stats.TotalBytesRead += Result.ReceivedBytes; + m_Stats.TotalRequestTimeUs += uint64_t(Result.ElapsedSeconds * 1000000.0); + m_Stats.TotalRequestCount++; + } + + JupiterSession m_Session; + Statistics& m_Stats; + const std::string m_Namespace; + const std::string m_Bucket; + const std::filesystem::path m_TempFolderPath; +}; + +std::unique_ptr +CreateJupiterBuildStorage(LoggerRef InLog, + HttpClient& InHttpClient, + BuildStorage::Statistics& Stats, + std::string_view Namespace, + std::string_view Bucket, + const std::filesystem::path& TempFolderPath) +{ + return std::make_unique(InLog, InHttpClient, Stats, Namespace, Bucket, TempFolderPath); +} + +} // namespace zen -- cgit v1.2.3 From 38a58059214bacc18c8a6406acf7f46c57f51e86 Mon Sep 17 00:00:00 2001 From: Zousar Shaker Date: Thu, 27 Feb 2025 01:27:34 -0700 Subject: Zs/auth bad function fix (#287) * Describe fix in changelog * remove JupiterClient::m_TokenProvider --------- Co-authored-by: zousar <2936246+zousar@users.noreply.github.com> Co-authored-by: Dan Engelbrecht --- src/zenutil/include/zenutil/jupiter/jupiterclient.h | 11 +++++------ src/zenutil/jupiter/jupiterclient.cpp | 1 - 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'src/zenutil') diff --git a/src/zenutil/include/zenutil/jupiter/jupiterclient.h b/src/zenutil/include/zenutil/jupiter/jupiterclient.h index defe50edc..8a51bd60a 100644 --- a/src/zenutil/include/zenutil/jupiter/jupiterclient.h +++ b/src/zenutil/include/zenutil/jupiter/jupiterclient.h @@ -44,12 +44,11 @@ public: HttpClient& Client() { return m_HttpClient; } private: - LoggerRef m_Log; - const std::string m_DefaultDdcNamespace; - const std::string m_DefaultBlobStoreNamespace; - const std::string m_ComputeCluster; - std::function m_TokenProvider; - HttpClient m_HttpClient; + LoggerRef m_Log; + const std::string m_DefaultDdcNamespace; + const std::string m_DefaultBlobStoreNamespace; + const std::string m_ComputeCluster; + HttpClient m_HttpClient; friend class JupiterSession; }; diff --git a/src/zenutil/jupiter/jupiterclient.cpp b/src/zenutil/jupiter/jupiterclient.cpp index 5e5da3750..dbac218a4 100644 --- a/src/zenutil/jupiter/jupiterclient.cpp +++ b/src/zenutil/jupiter/jupiterclient.cpp @@ -11,7 +11,6 @@ JupiterClient::JupiterClient(const JupiterClientOptions& Options, std::function< , m_DefaultDdcNamespace(Options.DdcNamespace) , m_DefaultBlobStoreNamespace(Options.BlobStoreNamespace) , m_ComputeCluster(Options.ComputeCluster) -, m_TokenProvider(std::move(TokenProvider)) , m_HttpClient(Options.ServiceUrl, HttpClientSettings{.ConnectTimeout = Options.ConnectTimeout, .Timeout = Options.Timeout, -- cgit v1.2.3 From 5791f51cccea1d4e5365456c8da89dbac0dd3ec0 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Fri, 28 Feb 2025 12:39:48 +0100 Subject: improve error handling (#289) * clearer errors * quicker abort * handle deleted local files * simplify parallellwork error handling * don't finish progress on destructor - gives wrong impression * graceful ctrl-c handling --- src/zenutil/chunkedcontent.cpp | 13 +++---- src/zenutil/chunkedfile.cpp | 11 +++++- src/zenutil/chunkingcontroller.cpp | 12 ++++-- src/zenutil/include/zenutil/chunkedfile.h | 3 +- src/zenutil/include/zenutil/chunkingcontroller.h | 7 ++-- src/zenutil/include/zenutil/parallellwork.h | 48 ++++++++++++++++++++++++ 6 files changed, 79 insertions(+), 15 deletions(-) (limited to 'src/zenutil') diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp index a41b71972..6dc2a20d8 100644 --- a/src/zenutil/chunkedcontent.cpp +++ b/src/zenutil/chunkedcontent.cpp @@ -92,7 +92,8 @@ namespace { tsl::robin_map& RawHashToSequenceRawHashIndex, RwLock& Lock, const std::filesystem::path& FolderPath, - uint32_t PathIndex) + uint32_t PathIndex, + std::atomic& AbortFlag) { const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex]; const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex]; @@ -105,7 +106,7 @@ namespace { { ChunkedInfoWithSource Chunked; const bool DidChunking = - InChunkingController.ProcessFile((FolderPath / Path).make_preferred(), RawSize, Chunked, Stats.BytesHashed); + InChunkingController.ProcessFile((FolderPath / Path).make_preferred(), RawSize, Chunked, Stats.BytesHashed, AbortFlag); if (DidChunking) { Lock.WithExclusiveLock([&]() { @@ -753,15 +754,13 @@ ChunkFolderContent(ChunkingStatistics& Stats, RawHashToSequenceRawHashIndex, Lock, RootPath, - PathIndex); + PathIndex, + AbortFlag); Lock.WithExclusiveLock([&]() { Result.RawHashes[PathIndex] = RawHash; }); Stats.FilesProcessed++; } }, - [&, PathIndex](const std::exception& Ex, std::atomic& AbortFlag) { - ZEN_CONSOLE("Failed scanning file {}. Reason: {}", Result.Paths[PathIndex], Ex.what()); - AbortFlag = true; - }); + Work.DefaultErrorFunction()); } Work.Wait(UpdateInteralMS, [&](bool IsAborted, std::ptrdiff_t PendingWork) { diff --git a/src/zenutil/chunkedfile.cpp b/src/zenutil/chunkedfile.cpp index 3f3a6661c..4f9344039 100644 --- a/src/zenutil/chunkedfile.cpp +++ b/src/zenutil/chunkedfile.cpp @@ -112,7 +112,12 @@ Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, st } ChunkedInfoWithSource -ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Params, std::atomic* BytesProcessed) +ChunkData(BasicFile& RawData, + uint64_t Offset, + uint64_t Size, + ChunkedParams Params, + std::atomic* BytesProcessed, + std::atomic* AbortFlag) { ChunkedInfoWithSource Result; tsl::robin_map FoundChunks; @@ -129,6 +134,10 @@ ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Para IoHashStream RawHashStream; while (Offset < End) { + if (AbortFlag != nullptr && AbortFlag->load()) + { + return {}; + } size_t ScanLength = Chunker.ScanChunk(SliceView.GetData(), SliceSize); if (ScanLength == ZenChunkHelper::kNoBoundaryFound) { diff --git a/src/zenutil/chunkingcontroller.cpp b/src/zenutil/chunkingcontroller.cpp index bc0e57b14..017d12433 100644 --- a/src/zenutil/chunkingcontroller.cpp +++ b/src/zenutil/chunkingcontroller.cpp @@ -58,7 +58,8 @@ public: virtual bool ProcessFile(const std::filesystem::path& InputPath, uint64_t RawSize, ChunkedInfoWithSource& OutChunked, - std::atomic& BytesProcessed) const override + std::atomic& BytesProcessed, + std::atomic& AbortFlag) const override { const bool ExcludeFromChunking = std::find(m_ChunkExcludeExtensions.begin(), m_ChunkExcludeExtensions.end(), InputPath.extension()) != @@ -70,7 +71,7 @@ public: } BasicFile Buffer(InputPath, BasicFile::Mode::kRead); - OutChunked = ChunkData(Buffer, 0, RawSize, m_ChunkingParams, &BytesProcessed); + OutChunked = ChunkData(Buffer, 0, RawSize, m_ChunkingParams, &BytesProcessed, &AbortFlag); return true; } @@ -132,7 +133,8 @@ public: virtual bool ProcessFile(const std::filesystem::path& InputPath, uint64_t RawSize, ChunkedInfoWithSource& OutChunked, - std::atomic& BytesProcessed) const override + std::atomic& BytesProcessed, + std::atomic& AbortFlag) const override { if (RawSize < m_ChunkFileSizeLimit) { @@ -150,6 +152,10 @@ public: ChunkHashToChunkIndex.reserve(1 + (RawSize / m_FixedChunkingChunkSize)); while (Offset < RawSize) { + if (AbortFlag) + { + return false; + } uint64_t ChunkSize = std::min(RawSize - Offset, m_FixedChunkingChunkSize); IoBuffer Chunk(Source, Offset, ChunkSize); MemoryView ChunkData = Chunk.GetView(); diff --git a/src/zenutil/include/zenutil/chunkedfile.h b/src/zenutil/include/zenutil/chunkedfile.h index 7110ad317..4cec80fdb 100644 --- a/src/zenutil/include/zenutil/chunkedfile.h +++ b/src/zenutil/include/zenutil/chunkedfile.h @@ -47,7 +47,8 @@ ChunkedInfoWithSource ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Params = {}, - std::atomic* BytesProcessed = nullptr); + std::atomic* BytesProcessed = nullptr, + std::atomic* AbortFlag = nullptr); void Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, std::function GetChunk); diff --git a/src/zenutil/include/zenutil/chunkingcontroller.h b/src/zenutil/include/zenutil/chunkingcontroller.h index fe4fc1bb5..ebc80e207 100644 --- a/src/zenutil/include/zenutil/chunkingcontroller.h +++ b/src/zenutil/include/zenutil/chunkingcontroller.h @@ -32,9 +32,10 @@ public: virtual bool ProcessFile(const std::filesystem::path& InputPath, uint64_t RawSize, ChunkedInfoWithSource& OutChunked, - std::atomic& BytesProcessed) const = 0; - virtual std::string_view GetName() const = 0; - virtual CbObject GetParameters() const = 0; + std::atomic& BytesProcessed, + std::atomic& AbortFlag) const = 0; + virtual std::string_view GetName() const = 0; + virtual CbObject GetParameters() const = 0; }; std::unique_ptr CreateBasicChunkingController( diff --git a/src/zenutil/include/zenutil/parallellwork.h b/src/zenutil/include/zenutil/parallellwork.h index 7a8218c51..79798fc8d 100644 --- a/src/zenutil/include/zenutil/parallellwork.h +++ b/src/zenutil/include/zenutil/parallellwork.h @@ -2,6 +2,8 @@ #pragma once +#include +#include #include #include @@ -20,6 +22,14 @@ public: ZEN_ASSERT(m_PendingWork.Remaining() == 0); } + std::function& AbortFlag)> DefaultErrorFunction() + { + return [&](const std::exception& Ex, std::atomic& AbortFlag) { + m_ErrorLock.WithExclusiveLock([&]() { m_Errors.push_back(Ex.what()); }); + AbortFlag = true; + }; + } + void ScheduleWork(WorkerThreadPool& WorkerPool, std::function& AbortFlag)>&& Work, std::function& AbortFlag)>&& OnError) @@ -32,6 +42,27 @@ public: { Work(m_AbortFlag); } + catch (const AssertException& AssertEx) + { + OnError( + std::runtime_error(fmt::format("Caught assert exception while handling request: {}", AssertEx.FullDescription())), + m_AbortFlag); + } + catch (const std::system_error& SystemError) + { + if (IsOOM(SystemError.code())) + { + OnError(std::runtime_error(fmt::format("Out of memory. Reason: {}", SystemError.what())), m_AbortFlag); + } + else if (IsOOD(SystemError.code())) + { + OnError(std::runtime_error(fmt::format("Out of disk. Reason: {}", SystemError.what())), m_AbortFlag); + } + else + { + OnError(std::runtime_error(fmt::format("System error. Reason: {}", SystemError.what())), m_AbortFlag); + } + } catch (const std::exception& Ex) { OnError(Ex, m_AbortFlag); @@ -58,12 +89,29 @@ public: { UpdateCallback(m_AbortFlag.load(), m_PendingWork.Remaining()); } + if (m_Errors.size() == 1) + { + throw std::runtime_error(m_Errors.front()); + } + else if (m_Errors.size() > 1) + { + ExtendableStringBuilder<128> SB; + SB.Append("Multiple errors:"); + for (const std::string& Error : m_Errors) + { + SB.Append(fmt::format("\n {}", Error)); + } + throw std::runtime_error(SB.ToString()); + } } Latch& PendingWork() { return m_PendingWork; } private: std::atomic& m_AbortFlag; Latch m_PendingWork; + + RwLock m_ErrorLock; + std::vector m_Errors; }; } // namespace zen -- cgit v1.2.3 From 19b3c492dcc0fc3f8879ecb60124ca64dea9b7ef Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Sat, 1 Mar 2025 10:10:53 +0100 Subject: builds download incremental (#290) * incremental download * merge rebuild state and output state building * fix writing when > 1 zero size file --- src/zenutil/filebuildstorage.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'src/zenutil') diff --git a/src/zenutil/filebuildstorage.cpp b/src/zenutil/filebuildstorage.cpp index 78ebcdd55..a4bb759e7 100644 --- a/src/zenutil/filebuildstorage.cpp +++ b/src/zenutil/filebuildstorage.cpp @@ -336,7 +336,8 @@ public: const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash); if (std::filesystem::is_regular_file(BlockPath)) { - IoBuffer Payload = ReadFile(BlockPath).Flatten(); + BasicFile File(BlockPath, BasicFile::Mode::kRead); + IoBuffer Payload = File.ReadAll(); ZEN_ASSERT_SLOW(ValidateCompressedBuffer(RawHash, CompositeBuffer(SharedBuffer(Payload)))); m_Stats.TotalBytesRead += Payload.GetSize(); Payload.SetContentType(ZenContentType::kCompressedBinary); @@ -365,13 +366,13 @@ public: struct WorkloadData { std::atomic BytesRemaining; - IoBuffer BlobFile; + BasicFile BlobFile; std::function Receiver; }; std::shared_ptr Workload(std::make_shared()); - Workload->BlobFile = IoBufferBuilder::MakeFromFile(BlockPath); - const uint64_t BlobSize = Workload->BlobFile.GetSize(); + Workload->BlobFile.Open(BlockPath, BasicFile::Mode::kRead); + const uint64_t BlobSize = Workload->BlobFile.FileSize(); Workload->Receiver = std::move(Receiver); Workload->BytesRemaining = BlobSize; @@ -383,7 +384,8 @@ public: uint64_t Size = Min(ChunkSize, BlobSize - Offset); WorkItems.push_back([this, BlockPath, Workload, Offset, Size]() { SimulateLatency(0, 0); - IoBuffer PartPayload(Workload->BlobFile, Offset, Size); + IoBuffer PartPayload(Size); + Workload->BlobFile.Read(PartPayload.GetMutableView().GetData(), Size, Offset); m_Stats.TotalBytesRead += PartPayload.GetSize(); uint64_t ByteRemaning = Workload->BytesRemaining.fetch_sub(Size); Workload->Receiver(Offset, PartPayload, ByteRemaning); -- cgit v1.2.3 From 1270bfeffbc81b1e4940c5c454ee6acde43e696a Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 3 Mar 2025 17:53:11 +0100 Subject: refactor use chunk sequence download (#291) * work on chunk sequences on download, not paths * write chunksequences to .tmp file and move when complete * cleanup * Added on the fly validation `zen builds download` of files built from smaller chunks as each file is completed Added `--verify` option to `zen builds upload` to verify all uploaded data once entire upload is complete Added `--verify` option to `zen builds download` to verify all files in target folder once entire download is complete Fixed/improved progress updated Multithreaded part validation * added rates to Write Chunks task * b/s -> bits/s * dont validate partial content as complete payload * handle legacy c# builds --- src/zenutil/chunkedcontent.cpp | 78 ++++++++++++++++------------ src/zenutil/include/zenutil/chunkedcontent.h | 53 ++++++++++++++----- 2 files changed, 84 insertions(+), 47 deletions(-) (limited to 'src/zenutil') diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp index 6dc2a20d8..1552ea823 100644 --- a/src/zenutil/chunkedcontent.cpp +++ b/src/zenutil/chunkedcontent.cpp @@ -599,10 +599,10 @@ MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span(Result.ChunkedContent.SequenceRawHashes.size())}); - const uint32_t SequenceRawHashIndex = OverlayLookup.RawHashToSequenceRawHashIndex.at(RawHash); - const uint32_t OrderIndexOffset = OverlayLookup.SequenceRawHashIndexChunkOrderOffset[SequenceRawHashIndex]; - const uint32_t ChunkCount = OverlayContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; - ChunkingStatistics Stats; + const uint32_t SequenceRawHashIndex = OverlayLookup.RawHashToSequenceIndex.at(RawHash); + const uint32_t OrderIndexOffset = OverlayLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; + const uint32_t ChunkCount = OverlayContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + ChunkingStatistics Stats; std::span OriginalChunkOrder = std::span(OverlayContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); AddCunkSequence(Stats, @@ -667,9 +667,9 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span { RawHashToSequenceRawHashIndex.insert( {RawHash, gsl::narrow(Result.ChunkedContent.SequenceRawHashes.size())}); - const uint32_t SequenceRawHashIndex = BaseLookup.RawHashToSequenceRawHashIndex.at(RawHash); - const uint32_t OrderIndexOffset = BaseLookup.SequenceRawHashIndexChunkOrderOffset[SequenceRawHashIndex]; - const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + const uint32_t SequenceRawHashIndex = BaseLookup.RawHashToSequenceIndex.at(RawHash); + const uint32_t OrderIndexOffset = BaseLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; + const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; ChunkingStatistics Stats; std::span OriginalChunkOrder = std::span(BaseContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); @@ -777,46 +777,40 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) { struct ChunkLocationReference { - uint32_t ChunkIndex; - ChunkedContentLookup::ChunkLocation Location; + uint32_t ChunkIndex; + ChunkedContentLookup::ChunkSequenceLocation Location; }; ChunkedContentLookup Result; { const uint32_t SequenceRawHashesCount = gsl::narrow(Content.ChunkedContent.SequenceRawHashes.size()); - Result.RawHashToSequenceRawHashIndex.reserve(SequenceRawHashesCount); - Result.SequenceRawHashIndexChunkOrderOffset.reserve(SequenceRawHashesCount); + Result.RawHashToSequenceIndex.reserve(SequenceRawHashesCount); + Result.SequenceIndexChunkOrderOffset.reserve(SequenceRawHashesCount); uint32_t OrderOffset = 0; for (uint32_t SequenceRawHashIndex = 0; SequenceRawHashIndex < Content.ChunkedContent.SequenceRawHashes.size(); SequenceRawHashIndex++) { - Result.RawHashToSequenceRawHashIndex.insert( - {Content.ChunkedContent.SequenceRawHashes[SequenceRawHashIndex], SequenceRawHashIndex}); - Result.SequenceRawHashIndexChunkOrderOffset.push_back(OrderOffset); + Result.RawHashToSequenceIndex.insert({Content.ChunkedContent.SequenceRawHashes[SequenceRawHashIndex], SequenceRawHashIndex}); + Result.SequenceIndexChunkOrderOffset.push_back(OrderOffset); OrderOffset += Content.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; } } std::vector Locations; Locations.reserve(Content.ChunkedContent.ChunkOrders.size()); - for (uint32_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++) + for (uint32_t SequenceIndex = 0; SequenceIndex < Content.ChunkedContent.SequenceRawHashes.size(); SequenceIndex++) { - if (Content.RawSizes[PathIndex] > 0) + const uint32_t OrderOffset = Result.SequenceIndexChunkOrderOffset[SequenceIndex]; + const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SequenceIndex]; + uint64_t LocationOffset = 0; + for (size_t OrderIndex = OrderOffset; OrderIndex < OrderOffset + ChunkCount; OrderIndex++) { - const IoHash& RawHash = Content.RawHashes[PathIndex]; - uint32_t SequenceRawHashIndex = Result.RawHashToSequenceRawHashIndex.at(RawHash); - const uint32_t OrderOffset = Result.SequenceRawHashIndexChunkOrderOffset[SequenceRawHashIndex]; - const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; - uint64_t LocationOffset = 0; - for (size_t OrderIndex = OrderOffset; OrderIndex < OrderOffset + ChunkCount; OrderIndex++) - { - uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex]; + uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex]; - Locations.push_back(ChunkLocationReference{ChunkIndex, ChunkedContentLookup::ChunkLocation{PathIndex, LocationOffset}}); + Locations.push_back( + ChunkLocationReference{ChunkIndex, ChunkedContentLookup::ChunkSequenceLocation{SequenceIndex, LocationOffset}}); - LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; - } - ZEN_ASSERT(LocationOffset == Content.RawSizes[PathIndex]); + LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; } } @@ -829,18 +823,18 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) { return false; } - if (Lhs.Location.PathIndex < Rhs.Location.PathIndex) + if (Lhs.Location.SequenceIndex < Rhs.Location.SequenceIndex) { return true; } - if (Lhs.Location.PathIndex > Rhs.Location.PathIndex) + if (Lhs.Location.SequenceIndex > Rhs.Location.SequenceIndex) { return false; } return Lhs.Location.Offset < Rhs.Location.Offset; }); - Result.ChunkLocations.reserve(Locations.size()); + Result.ChunkSequenceLocations.reserve(Locations.size()); const uint32_t ChunkCount = gsl::narrow(Content.ChunkedContent.ChunkHashes.size()); Result.ChunkHashToChunkIndex.reserve(ChunkCount); size_t RangeOffset = 0; @@ -850,14 +844,30 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) uint32_t Count = 0; while (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex) { - Result.ChunkLocations.push_back(Locations[RangeOffset + Count].Location); + Result.ChunkSequenceLocations.push_back(Locations[RangeOffset + Count].Location); Count++; } - Result.ChunkLocationOffset.push_back(RangeOffset); - Result.ChunkLocationCounts.push_back(Count); + Result.ChunkSequenceLocationOffset.push_back(RangeOffset); + Result.ChunkSequenceLocationCounts.push_back(Count); RangeOffset += Count; } + Result.SequenceIndexFirstPathIndex.resize(Content.ChunkedContent.SequenceRawHashes.size(), (uint32_t)-1); + for (uint32_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++) + { + if (Content.RawSizes[PathIndex] > 0) + { + const IoHash& RawHash = Content.RawHashes[PathIndex]; + auto SequenceIndexIt = Result.RawHashToSequenceIndex.find(RawHash); + ZEN_ASSERT(SequenceIndexIt != Result.RawHashToSequenceIndex.end()); + const uint32_t SequenceIndex = SequenceIndexIt->second; + if (Result.SequenceIndexFirstPathIndex[SequenceIndex] == (uint32_t)-1) + { + Result.SequenceIndexFirstPathIndex[SequenceIndex] = PathIndex; + } + } + } + return Result; } diff --git a/src/zenutil/include/zenutil/chunkedcontent.h b/src/zenutil/include/zenutil/chunkedcontent.h index 15c687462..309341550 100644 --- a/src/zenutil/include/zenutil/chunkedcontent.h +++ b/src/zenutil/include/zenutil/chunkedcontent.h @@ -122,32 +122,59 @@ ChunkedFolderContent ChunkFolderContent(ChunkingStatistics& Stats, struct ChunkedContentLookup { - struct ChunkLocation + struct ChunkSequenceLocation { - uint32_t PathIndex; + uint32_t SequenceIndex; uint64_t Offset; }; tsl::robin_map ChunkHashToChunkIndex; - tsl::robin_map RawHashToSequenceRawHashIndex; - std::vector SequenceRawHashIndexChunkOrderOffset; - std::vector ChunkLocations; - std::vector ChunkLocationOffset; // ChunkLocations[ChunkLocationOffset[ChunkIndex]] -> start of sources for ChunkIndex - std::vector ChunkLocationCounts; // ChunkLocationCounts[ChunkIndex] count of chunk locations for ChunkIndex + tsl::robin_map RawHashToSequenceIndex; + std::vector SequenceIndexChunkOrderOffset; + std::vector ChunkSequenceLocations; + std::vector + ChunkSequenceLocationOffset; // ChunkSequenceLocations[ChunkLocationOffset[ChunkIndex]] -> start of sources for ChunkIndex + std::vector ChunkSequenceLocationCounts; // ChunkSequenceLocationCounts[ChunkIndex] count of chunk locations for ChunkIndex + std::vector SequenceIndexFirstPathIndex; // SequenceIndexFirstPathIndex[SequenceIndex] -> first path index with that RawHash }; ChunkedContentLookup BuildChunkedContentLookup(const ChunkedFolderContent& Content); inline std::pair -GetChunkLocationRange(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) +GetChunkSequenceLocationRange(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) { - return std::make_pair(Lookup.ChunkLocationOffset[ChunkIndex], Lookup.ChunkLocationCounts[ChunkIndex]); + return std::make_pair(Lookup.ChunkSequenceLocationOffset[ChunkIndex], Lookup.ChunkSequenceLocationCounts[ChunkIndex]); } -inline std::span -GetChunkLocations(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) +inline std::span +GetChunkSequenceLocations(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) { - std::pair Range = GetChunkLocationRange(Lookup, ChunkIndex); - return std::span(Lookup.ChunkLocations).subspan(Range.first, Range.second); + std::pair Range = GetChunkSequenceLocationRange(Lookup, ChunkIndex); + return std::span(Lookup.ChunkSequenceLocations).subspan(Range.first, Range.second); +} + +inline uint32_t +GetSequenceIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash) +{ + return Lookup.RawHashToSequenceIndex.at(RawHash); +} + +inline uint32_t +GetChunkIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash) +{ + return Lookup.RawHashToSequenceIndex.at(RawHash); +} + +inline uint32_t +GetFirstPathIndexForSeqeuenceIndex(const ChunkedContentLookup& Lookup, const uint32_t SequenceIndex) +{ + return Lookup.SequenceIndexFirstPathIndex[SequenceIndex]; +} + +inline uint32_t +GetFirstPathIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash) +{ + const uint32_t SequenceIndex = GetSequenceIndexForRawHash(Lookup, RawHash); + return GetFirstPathIndexForSeqeuenceIndex(Lookup, SequenceIndex); } namespace compactbinary_helpers { -- cgit v1.2.3 From 7b1c99f53da3a08b844cc7d7ce99530758e34be2 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Wed, 5 Mar 2025 12:25:51 +0100 Subject: Add trace support for zen CLI command (#296) - This change adds support for `--trace`, `--tracehost` and `--tracefile` command arguments to enable and control tracing to Insights - It also adds profiling scopes primarily to build download command related code --- src/zenutil/chunkedcontent.cpp | 79 ++++++++++++++++------------- src/zenutil/jupiter/jupiterbuildstorage.cpp | 31 +++++++++++ 2 files changed, 76 insertions(+), 34 deletions(-) (limited to 'src/zenutil') diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp index 1552ea823..4ca89d996 100644 --- a/src/zenutil/chunkedcontent.cpp +++ b/src/zenutil/chunkedcontent.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -23,13 +24,13 @@ namespace zen { using namespace std::literals; namespace { - void AddCunkSequence(ChunkingStatistics& Stats, - ChunkedContentData& InOutChunkedContent, - tsl::robin_map& ChunkHashToChunkIndex, - const IoHash& RawHash, - std::span ChunkSequence, - std::span ChunkHashes, - std::span ChunkRawSizes) + void AddChunkSequence(ChunkingStatistics& Stats, + ChunkedContentData& InOutChunkedContent, + tsl::robin_map& ChunkHashToChunkIndex, + const IoHash& RawHash, + std::span ChunkSequence, + std::span ChunkHashes, + std::span ChunkRawSizes) { ZEN_ASSERT(ChunkHashes.size() == ChunkRawSizes.size()); InOutChunkedContent.ChunkCounts.push_back(gsl::narrow(ChunkSequence.size())); @@ -58,11 +59,11 @@ namespace { Stats.UniqueSequencesFound++; } - void AddCunkSequence(ChunkingStatistics& Stats, - ChunkedContentData& InOutChunkedContent, - tsl::robin_map& ChunkHashToChunkIndex, - const IoHash& RawHash, - const uint64_t RawSize) + void AddChunkSequence(ChunkingStatistics& Stats, + ChunkedContentData& InOutChunkedContent, + tsl::robin_map& ChunkHashToChunkIndex, + const IoHash& RawHash, + const uint64_t RawSize) { InOutChunkedContent.ChunkCounts.push_back(1); @@ -120,13 +121,13 @@ namespace { { ChunkSizes.push_back(Source.Size); } - AddCunkSequence(Stats, - OutChunkedContent.ChunkedContent, - ChunkHashToChunkIndex, - Chunked.Info.RawHash, - Chunked.Info.ChunkSequence, - Chunked.Info.ChunkHashes, - ChunkSizes); + AddChunkSequence(Stats, + OutChunkedContent.ChunkedContent, + ChunkHashToChunkIndex, + Chunked.Info.RawHash, + Chunked.Info.ChunkSequence, + Chunked.Info.ChunkHashes, + ChunkSizes); Stats.UniqueSequencesFound++; } }); @@ -143,7 +144,7 @@ namespace { { RawHashToSequenceRawHashIndex.insert( {Hash, gsl::narrow(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); - AddCunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Hash, RawSize); + AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Hash, RawSize); Stats.UniqueSequencesFound++; } }); @@ -360,6 +361,8 @@ GetFolderContent(GetFolderContentStatistics& Stats, std::function&& UpdateCallback, std::atomic& AbortFlag) { + ZEN_TRACE_CPU("GetFolderContent"); + Stopwatch Timer; auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); @@ -528,6 +531,8 @@ LoadChunkedFolderContentToCompactBinary(CbObjectView Input) ChunkedFolderContent MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span Overlays) { + ZEN_TRACE_CPU("MergeChunkedFolderContents"); + ZEN_ASSERT(!Overlays.empty()); ChunkedFolderContent Result; @@ -605,13 +610,13 @@ MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span OriginalChunkOrder = std::span(OverlayContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); - AddCunkSequence(Stats, - Result.ChunkedContent, - ChunkHashToChunkIndex, - RawHash, - OriginalChunkOrder, - OverlayContent.ChunkedContent.ChunkHashes, - OverlayContent.ChunkedContent.ChunkRawSizes); + AddChunkSequence(Stats, + Result.ChunkedContent, + ChunkHashToChunkIndex, + RawHash, + OriginalChunkOrder, + OverlayContent.ChunkedContent.ChunkHashes, + OverlayContent.ChunkedContent.ChunkRawSizes); Stats.UniqueSequencesFound++; } } @@ -636,6 +641,8 @@ MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span DeletedPaths) { + ZEN_TRACE_CPU("DeletePathsFromChunkedContent"); + ZEN_ASSERT(DeletedPaths.size() <= BaseContent.Paths.size()); ChunkedFolderContent Result = {.Platform = BaseContent.Platform}; if (DeletedPaths.size() < BaseContent.Paths.size()) @@ -673,13 +680,13 @@ DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span ChunkingStatistics Stats; std::span OriginalChunkOrder = std::span(BaseContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); - AddCunkSequence(Stats, - Result.ChunkedContent, - ChunkHashToChunkIndex, - RawHash, - OriginalChunkOrder, - BaseContent.ChunkedContent.ChunkHashes, - BaseContent.ChunkedContent.ChunkRawSizes); + AddChunkSequence(Stats, + Result.ChunkedContent, + ChunkHashToChunkIndex, + RawHash, + OriginalChunkOrder, + BaseContent.ChunkedContent.ChunkHashes, + BaseContent.ChunkedContent.ChunkRawSizes); Stats.UniqueSequencesFound++; } } @@ -699,6 +706,8 @@ ChunkFolderContent(ChunkingStatistics& Stats, std::function&& UpdateCallback, std::atomic& AbortFlag) { + ZEN_TRACE_CPU("ChunkFolderContent"); + Stopwatch Timer; auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); @@ -775,6 +784,8 @@ ChunkFolderContent(ChunkingStatistics& Stats, ChunkedContentLookup BuildChunkedContentLookup(const ChunkedFolderContent& Content) { + ZEN_TRACE_CPU("BuildChunkedContentLookup"); + struct ChunkLocationReference { uint32_t ChunkIndex; diff --git a/src/zenutil/jupiter/jupiterbuildstorage.cpp b/src/zenutil/jupiter/jupiterbuildstorage.cpp index 481e9146f..309885b05 100644 --- a/src/zenutil/jupiter/jupiterbuildstorage.cpp +++ b/src/zenutil/jupiter/jupiterbuildstorage.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include ZEN_THIRD_PARTY_INCLUDES_START @@ -36,6 +37,8 @@ public: virtual CbObject ListBuilds(CbObject Query) override { + ZEN_TRACE_CPU("Jupiter::ListBuilds"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); IoBuffer Payload = Query.GetBuffer().AsIoBuffer(); @@ -51,6 +54,8 @@ public: virtual CbObject PutBuild(const Oid& BuildId, const CbObject& MetaData) override { + ZEN_TRACE_CPU("Jupiter::PutBuild"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); IoBuffer Payload = MetaData.GetBuffer().AsIoBuffer(); @@ -66,6 +71,8 @@ public: virtual CbObject GetBuild(const Oid& BuildId) override { + ZEN_TRACE_CPU("Jupiter::GetBuild"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); JupiterResult GetBuildResult = m_Session.GetBuild(m_Namespace, m_Bucket, BuildId); @@ -79,6 +86,8 @@ public: virtual void FinalizeBuild(const Oid& BuildId) override { + ZEN_TRACE_CPU("Jupiter::FinalizeBuild"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); JupiterResult FinalizeBuildResult = m_Session.FinalizeBuild(m_Namespace, m_Bucket, BuildId); @@ -95,6 +104,8 @@ public: std::string_view PartName, const CbObject& MetaData) override { + ZEN_TRACE_CPU("Jupiter::PutBuildPart"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); IoBuffer Payload = MetaData.GetBuffer().AsIoBuffer(); @@ -110,6 +121,8 @@ public: virtual CbObject GetBuildPart(const Oid& BuildId, const Oid& BuildPartId) override { + ZEN_TRACE_CPU("Jupiter::GetBuildPart"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); JupiterResult GetBuildPartResult = m_Session.GetBuildPart(m_Namespace, m_Bucket, BuildId, BuildPartId); @@ -126,6 +139,8 @@ public: virtual std::vector FinalizeBuildPart(const Oid& BuildId, const Oid& BuildPartId, const IoHash& PartHash) override { + ZEN_TRACE_CPU("Jupiter::FinalizeBuildPart"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); FinalizeBuildPartResult FinalizePartResult = m_Session.FinalizeBuildPart(m_Namespace, m_Bucket, BuildId, BuildPartId, PartHash); @@ -143,6 +158,8 @@ public: ZenContentType ContentType, const CompositeBuffer& Payload) override { + ZEN_TRACE_CPU("Jupiter::PutBuildBlob"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); JupiterResult PutBlobResult = m_Session.PutBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, ContentType, Payload); @@ -160,6 +177,8 @@ public: std::function&& Transmitter, std::function&& OnSentBytes) override { + ZEN_TRACE_CPU("Jupiter::PutLargeBuildBlob"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); std::vector> WorkItems; @@ -200,6 +219,8 @@ public: virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash) override { + ZEN_TRACE_CPU("Jupiter::GetBuildBlob"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); JupiterResult GetBuildBlobResult = m_Session.GetBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, m_TempFolderPath); @@ -218,6 +239,8 @@ public: uint64_t ChunkSize, std::function&& Receiver) override { + ZEN_TRACE_CPU("Jupiter::GetLargeBuildBlob"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); std::vector> WorkItems; @@ -249,6 +272,8 @@ public: virtual void PutBlockMetadata(const Oid& BuildId, const IoHash& BlockRawHash, const CbObject& MetaData) override { + ZEN_TRACE_CPU("Jupiter::PutBlockMetadata"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); IoBuffer Payload = MetaData.GetBuffer().AsIoBuffer(); @@ -264,6 +289,8 @@ public: virtual std::vector FindBlocks(const Oid& BuildId) override { + ZEN_TRACE_CPU("Jupiter::FindBlocks"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); JupiterResult FindResult = m_Session.FindBlocks(m_Namespace, m_Bucket, BuildId); @@ -277,6 +304,8 @@ public: virtual std::vector GetBlockMetadata(const Oid& BuildId, std::span BlockHashes) override { + ZEN_TRACE_CPU("Jupiter::GetBlockMetadata"); + Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); CbObjectWriter Request; @@ -365,6 +394,8 @@ CreateJupiterBuildStorage(LoggerRef InLog, std::string_view Bucket, const std::filesystem::path& TempFolderPath) { + ZEN_TRACE_CPU("CreateJupiterBuildStorage"); + return std::make_unique(InLog, InHttpClient, Stats, Namespace, Bucket, TempFolderPath); } -- cgit v1.2.3 From 920120bbcec9f91df3336f62970b3e010a4fa6c2 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Thu, 6 Mar 2025 16:18:32 +0100 Subject: reduced memory churn using fixed_xxx containers (#236) * Added EASTL to help with eliminating memory allocations * Applied EASTL to eliminate memory allocations, primarily by using `fixed_vector` et al to use stack allocations / inline struct allocations Reduces memory events in traces by close to a factor of 10 in test scenario (starting editor for project F) --- src/zenutil/include/zenutil/cache/cachekey.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/zenutil') diff --git a/src/zenutil/include/zenutil/cache/cachekey.h b/src/zenutil/include/zenutil/cache/cachekey.h index 741375946..0ab05f4f1 100644 --- a/src/zenutil/include/zenutil/cache/cachekey.h +++ b/src/zenutil/include/zenutil/cache/cachekey.h @@ -17,6 +17,12 @@ struct CacheKey static CacheKey Create(std::string_view Bucket, const IoHash& Hash) { return {.Bucket = ToLower(Bucket), .Hash = Hash}; } + // This should be used whenever the bucket name has already been validated to avoid redundant ToLower calls + static CacheKey CreateValidated(std::string&& BucketValidated, const IoHash& Hash) + { + return {.Bucket = std::move(BucketValidated), .Hash = Hash}; + } + auto operator<=>(const CacheKey& that) const { if (auto b = caseSensitiveCompareStrings(Bucket, that.Bucket); b != std::strong_ordering::equal) -- cgit v1.2.3 From 9b24647facccc9c7848a52f1f4c5e32055bf2f01 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Fri, 7 Mar 2025 09:58:20 +0100 Subject: partial block fetch (#298) - Improvement: Do partial requests of blocks if not all of the block is needed - Improvement: Better progress/statistics on download - Bugfix: Ensure that temporary folder for Jupiter downloads exists during verify phase --- src/zenutil/filebuildstorage.cpp | 17 +++++++++++++---- src/zenutil/include/zenutil/buildstorage.h | 5 ++++- src/zenutil/include/zenutil/jupiter/jupitersession.h | 4 +++- src/zenutil/jupiter/jupiterbuildstorage.cpp | 10 ++++++---- src/zenutil/jupiter/jupitersession.cpp | 12 ++++++++++-- 5 files changed, 36 insertions(+), 12 deletions(-) (limited to 'src/zenutil') diff --git a/src/zenutil/filebuildstorage.cpp b/src/zenutil/filebuildstorage.cpp index a4bb759e7..e57109006 100644 --- a/src/zenutil/filebuildstorage.cpp +++ b/src/zenutil/filebuildstorage.cpp @@ -325,7 +325,7 @@ public: return {}; } - virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash) override + virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash, uint64_t RangeOffset, uint64_t RangeBytes) override { ZEN_UNUSED(BuildId); SimulateLatency(0, 0); @@ -337,10 +337,19 @@ public: if (std::filesystem::is_regular_file(BlockPath)) { BasicFile File(BlockPath, BasicFile::Mode::kRead); - IoBuffer Payload = File.ReadAll(); - ZEN_ASSERT_SLOW(ValidateCompressedBuffer(RawHash, CompositeBuffer(SharedBuffer(Payload)))); - m_Stats.TotalBytesRead += Payload.GetSize(); + IoBuffer Payload; + if (RangeOffset != 0 || RangeBytes != (uint64_t)-1) + { + Payload = IoBuffer(RangeBytes); + File.Read(Payload.GetMutableView().GetData(), RangeBytes, RangeOffset); + } + else + { + Payload = File.ReadAll(); + ZEN_ASSERT_SLOW(ValidateCompressedBuffer(RawHash, CompositeBuffer(SharedBuffer(Payload)))); + } Payload.SetContentType(ZenContentType::kCompressedBinary); + m_Stats.TotalBytesRead += Payload.GetSize(); SimulateLatency(0, Payload.GetSize()); return Payload; } diff --git a/src/zenutil/include/zenutil/buildstorage.h b/src/zenutil/include/zenutil/buildstorage.h index 9c236310f..9d2bab170 100644 --- a/src/zenutil/include/zenutil/buildstorage.h +++ b/src/zenutil/include/zenutil/buildstorage.h @@ -40,7 +40,10 @@ public: std::function&& Transmitter, std::function&& OnSentBytes) = 0; - virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash) = 0; + virtual IoBuffer GetBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + uint64_t RangeOffset = 0, + uint64_t RangeBytes = (uint64_t)-1) = 0; virtual std::vector> GetLargeBuildBlob( const Oid& BuildId, const IoHash& RawHash, diff --git a/src/zenutil/include/zenutil/jupiter/jupitersession.h b/src/zenutil/include/zenutil/jupiter/jupitersession.h index 852271868..2c5fc73b8 100644 --- a/src/zenutil/include/zenutil/jupiter/jupitersession.h +++ b/src/zenutil/include/zenutil/jupiter/jupitersession.h @@ -123,7 +123,9 @@ public: std::string_view BucketId, const Oid& BuildId, const IoHash& Hash, - std::filesystem::path TempFolderPath); + std::filesystem::path TempFolderPath, + uint64_t Offset = 0, + uint64_t Size = (uint64_t)-1); JupiterResult PutMultipartBuildBlob(std::string_view Namespace, std::string_view BucketId, diff --git a/src/zenutil/jupiter/jupiterbuildstorage.cpp b/src/zenutil/jupiter/jupiterbuildstorage.cpp index 309885b05..bf89ce785 100644 --- a/src/zenutil/jupiter/jupiterbuildstorage.cpp +++ b/src/zenutil/jupiter/jupiterbuildstorage.cpp @@ -217,13 +217,15 @@ public: return WorkList; } - virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash) override + virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash, uint64_t RangeOffset, uint64_t RangeBytes) override { ZEN_TRACE_CPU("Jupiter::GetBuildBlob"); - Stopwatch ExecutionTimer; - auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); - JupiterResult GetBuildBlobResult = m_Session.GetBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, m_TempFolderPath); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + CreateDirectories(m_TempFolderPath); + JupiterResult GetBuildBlobResult = + m_Session.GetBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, m_TempFolderPath, RangeOffset, RangeBytes); AddStatistic(GetBuildBlobResult); if (!GetBuildBlobResult.Success) { diff --git a/src/zenutil/jupiter/jupitersession.cpp b/src/zenutil/jupiter/jupitersession.cpp index 06ac6ae36..68f214c06 100644 --- a/src/zenutil/jupiter/jupitersession.cpp +++ b/src/zenutil/jupiter/jupitersession.cpp @@ -698,11 +698,19 @@ JupiterSession::GetBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const IoHash& Hash, - std::filesystem::path TempFolderPath) + std::filesystem::path TempFolderPath, + uint64_t Offset, + uint64_t Size) { + HttpClient::KeyValueMap Headers; + if (Offset != 0 || Size != (uint64_t)-1) + { + Headers.Entries.insert({"Range", fmt::format("bytes={}-{}", Offset, Offset + Size - 1)}); + } HttpClient::Response Response = m_HttpClient.Download(fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}", Namespace, BucketId, BuildId, Hash.ToHexString()), - TempFolderPath); + TempFolderPath, + Headers); return detail::ConvertResponse(Response, "JupiterSession::GetBuildBlob"sv); } -- cgit v1.2.3 From 7de3d4218ee5969af6147f9ab20bda538a136d9a Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 10 Mar 2025 18:33:24 +0100 Subject: pick up existing cache (#299) - Improvement: Scavenge .zen temp folders for existing data (downloaded, decompressed or written) from previous failed run - Improvement: Faster abort during stream compression - Improvement: Try to move downloaded blobs with rename if possible avoiding an extra disk write - Improvement: Only clean temp folders on successful or cancelled build - keep it if download fails --- src/zenutil/chunkblock.cpp | 51 +++++++++++++++++++++++--------- src/zenutil/include/zenutil/chunkblock.h | 9 +++--- 2 files changed, 42 insertions(+), 18 deletions(-) (limited to 'src/zenutil') diff --git a/src/zenutil/chunkblock.cpp b/src/zenutil/chunkblock.cpp index a19cf5c1b..f3c14edc4 100644 --- a/src/zenutil/chunkblock.cpp +++ b/src/zenutil/chunkblock.cpp @@ -187,31 +187,54 @@ GenerateChunkBlock(std::vector>&& FetchChunks, return CompressedBlock; } -bool -IterateChunkBlock(const SharedBuffer& BlockPayload, - std::function Visitor, - uint64_t& OutHeaderSize) +std::vector +ReadChunkBlockHeader(const MemoryView BlockView, uint64_t& OutHeaderSize) { - ZEN_ASSERT(BlockPayload); - if (BlockPayload.GetSize() < 1) - { - return false; - } - - MemoryView BlockView = BlockPayload.GetView(); - const uint8_t* ReadPtr = reinterpret_cast(BlockView.GetData()); + const uint8_t* ReadPtr = reinterpret_cast(BlockView.GetData()); uint32_t NumberSize; uint64_t ChunkCount = ReadVarUInt(ReadPtr, NumberSize); ReadPtr += NumberSize; - std::vector ChunkSizes; + std::vector ChunkSizes; ChunkSizes.reserve(ChunkCount); while (ChunkCount--) { - ChunkSizes.push_back(ReadVarUInt(ReadPtr, NumberSize)); + if (ReadPtr >= BlockView.GetDataEnd()) + { + throw std::runtime_error("Invalid block header, block data ended unexpectedly"); + } + uint64_t ChunkSize = ReadVarUInt(ReadPtr, NumberSize); + if (ChunkSize > std::numeric_limits::max()) + { + throw std::runtime_error("Invalid block header, header data is corrupt"); + } + if (ChunkSize < 1) + { + throw std::runtime_error("Invalid block header, header data is corrupt"); + } + ChunkSizes.push_back(gsl::narrow(ChunkSize)); ReadPtr += NumberSize; } uint64_t Offset = std::distance((const uint8_t*)BlockView.GetData(), ReadPtr); OutHeaderSize = Offset; + return ChunkSizes; +} + +bool +IterateChunkBlock(const SharedBuffer& BlockPayload, + std::function Visitor, + uint64_t& OutHeaderSize) +{ + ZEN_ASSERT(BlockPayload); + if (BlockPayload.GetSize() < 1) + { + return false; + } + + MemoryView BlockView = BlockPayload.GetView(); + + std::vector ChunkSizes = ReadChunkBlockHeader(BlockView, OutHeaderSize); + uint64_t Offset = OutHeaderSize; + OutHeaderSize = Offset; for (uint64_t ChunkSize : ChunkSizes) { IoBuffer Chunk(BlockPayload.AsIoBuffer(), Offset, ChunkSize); diff --git a/src/zenutil/include/zenutil/chunkblock.h b/src/zenutil/include/zenutil/chunkblock.h index 21107fb7c..277580c74 100644 --- a/src/zenutil/include/zenutil/chunkblock.h +++ b/src/zenutil/include/zenutil/chunkblock.h @@ -31,9 +31,10 @@ CbObject BuildChunkBlockDescription(const ChunkBlockDescription& Block, ChunkBlockDescription GetChunkBlockDescription(const SharedBuffer& BlockPayload, const IoHash& RawHash); typedef std::function(const IoHash& RawHash)> FetchChunkFunc; -CompressedBuffer GenerateChunkBlock(std::vector>&& FetchChunks, ChunkBlockDescription& OutBlock); -bool IterateChunkBlock(const SharedBuffer& BlockPayload, - std::function Visitor, - uint64_t& OutHeaderSize); +CompressedBuffer GenerateChunkBlock(std::vector>&& FetchChunks, ChunkBlockDescription& OutBlock); +bool IterateChunkBlock(const SharedBuffer& BlockPayload, + std::function Visitor, + uint64_t& OutHeaderSize); +std::vector ReadChunkBlockHeader(const MemoryView BlockView, uint64_t& OutHeaderSize); } // namespace zen -- cgit v1.2.3 From 90db3ced033d4e06da2739e5d97cdeff2b0ba3b9 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Tue, 11 Mar 2025 09:58:07 +0100 Subject: Build command tweaks (#301) - Improvement: Don't chunk up .mp4 files as they generally won't benefit from deduplication or partial in-place-updates - Improvement: Emit build name to console output when downloading a build - Improvement: Added some debug logging - Bugfix: Logging setup would previously not function correctly when not logging to file --- src/zenutil/include/zenutil/chunkingcontroller.h | 2 +- src/zenutil/include/zenutil/logging.h | 1 + src/zenutil/logging.cpp | 27 +++++++++++++----------- 3 files changed, 17 insertions(+), 13 deletions(-) (limited to 'src/zenutil') diff --git a/src/zenutil/include/zenutil/chunkingcontroller.h b/src/zenutil/include/zenutil/chunkingcontroller.h index ebc80e207..246f4498a 100644 --- a/src/zenutil/include/zenutil/chunkingcontroller.h +++ b/src/zenutil/include/zenutil/chunkingcontroller.h @@ -11,7 +11,7 @@ namespace zen { -const std::vector DefaultChunkingExcludeExtensions = {".exe", ".dll", ".pdb", ".self"}; +const std::vector DefaultChunkingExcludeExtensions = {".exe", ".dll", ".pdb", ".self", ".mp4"}; const ChunkedParams DefaultChunkedParams = {.MinSize = ((8u * 1u) * 1024u) - 128u, .MaxSize = 128u * 1024u, diff --git a/src/zenutil/include/zenutil/logging.h b/src/zenutil/include/zenutil/logging.h index ebf6372fc..d64eef207 100644 --- a/src/zenutil/include/zenutil/logging.h +++ b/src/zenutil/include/zenutil/logging.h @@ -32,6 +32,7 @@ struct LoggingOptions bool IsDebug = false; bool IsVerbose = false; bool IsTest = false; + bool AllowAsync = true; bool NoConsoleOutput = false; std::filesystem::path AbsLogFile; // Absolute path to main log file std::string LogId; diff --git a/src/zenutil/logging.cpp b/src/zenutil/logging.cpp index 0444fa2c4..762b75a59 100644 --- a/src/zenutil/logging.cpp +++ b/src/zenutil/logging.cpp @@ -49,7 +49,7 @@ BeginInitializeLogging(const LoggingOptions& LogOptions) zen::logging::InitializeLogging(); zen::logging::EnableVTMode(); - bool IsAsync = true; + bool IsAsync = LogOptions.AllowAsync; if (LogOptions.IsDebug) { @@ -181,7 +181,7 @@ FinishInitializeLogging(const LoggingOptions& LogOptions) LogLevel = logging::level::Debug; } - if (LogOptions.IsTest) + if (LogOptions.IsTest || LogOptions.IsVerbose) { LogLevel = logging::level::Trace; } @@ -194,18 +194,21 @@ FinishInitializeLogging(const LoggingOptions& LogOptions) spdlog::set_formatter( std::make_unique(LogOptions.LogId, std::chrono::system_clock::now())); // default to duration prefix - if (LogOptions.AbsLogFile.extension() == ".json") - { - g_FileSink->set_formatter(std::make_unique(LogOptions.LogId)); - } - else + if (g_FileSink) { - g_FileSink->set_formatter(std::make_unique(LogOptions.LogId)); // this will have a date prefix - } + if (LogOptions.AbsLogFile.extension() == ".json") + { + g_FileSink->set_formatter(std::make_unique(LogOptions.LogId)); + } + else + { + g_FileSink->set_formatter(std::make_unique(LogOptions.LogId)); // this will have a date prefix + } - const std::string StartLogTime = zen::DateTime::Now().ToIso8601(); + const std::string StartLogTime = zen::DateTime::Now().ToIso8601(); - spdlog::apply_all([&](auto Logger) { Logger->info("log starting at {}", StartLogTime); }); + spdlog::apply_all([&](auto Logger) { Logger->info("log starting at {}", StartLogTime); }); + } g_IsLoggingInitialized = true; } @@ -213,7 +216,7 @@ FinishInitializeLogging(const LoggingOptions& LogOptions) void ShutdownLogging() { - if (g_IsLoggingInitialized) + if (g_IsLoggingInitialized && g_FileSink) { auto DefaultLogger = zen::logging::Default(); ZEN_LOG_INFO(DefaultLogger, "log ending at {}", zen::DateTime::Now().ToIso8601()); -- cgit v1.2.3 From fb09d861fd76e459ac86bec388bd406aaca8e681 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 12 Mar 2025 10:51:57 +0100 Subject: improved block gen logic (#302) - Improvement: Reduced memory usage during upload and part upload validation - Improvement: Reduced I/O usage during upload and download - Improvement: Faster block regeneration when uploading in response to PutBuild/FinalizeBuild - Improvement: More trace scopes for build upload operations - Bugfix: Fixed crash during download when trying to write outside a file range --- src/zenutil/chunkedcontent.cpp | 15 +++++++++++++-- src/zenutil/chunkedfile.cpp | 6 ++++++ src/zenutil/chunkingcontroller.cpp | 4 ++++ src/zenutil/filebuildstorage.cpp | 17 +++++++++++++++++ src/zenutil/include/zenutil/chunkedcontent.h | 4 ++-- 5 files changed, 42 insertions(+), 4 deletions(-) (limited to 'src/zenutil') diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp index 4ca89d996..bb1ee5183 100644 --- a/src/zenutil/chunkedcontent.cpp +++ b/src/zenutil/chunkedcontent.cpp @@ -96,6 +96,8 @@ namespace { uint32_t PathIndex, std::atomic& AbortFlag) { + ZEN_TRACE_CPU("ChunkFolderContent"); + const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex]; const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex]; @@ -136,6 +138,8 @@ namespace { } else { + ZEN_TRACE_CPU("HashOnly"); + IoBuffer Buffer = IoBufferBuilder::MakeFromFile((FolderPath / Path).make_preferred()); const IoHash Hash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed); @@ -228,6 +232,7 @@ FolderContent::operator==(const FolderContent& Rhs) const bool FolderContent::AreKnownFilesEqual(const FolderContent& Rhs) const { + ZEN_TRACE_CPU("FolderContent::AreKnownFilesEqual"); tsl::robin_map RhsPathToIndex; const size_t RhsPathCount = Rhs.Paths.size(); RhsPathToIndex.reserve(RhsPathCount); @@ -259,6 +264,7 @@ FolderContent::AreKnownFilesEqual(const FolderContent& Rhs) const void FolderContent::UpdateState(const FolderContent& Rhs, std::vector& OutPathIndexesOufOfDate) { + ZEN_TRACE_CPU("FolderContent::UpdateState"); tsl::robin_map RhsPathToIndex; const uint32_t RhsPathCount = gsl::narrow(Rhs.Paths.size()); RhsPathToIndex.reserve(RhsPathCount); @@ -297,6 +303,7 @@ FolderContent::UpdateState(const FolderContent& Rhs, std::vector& OutP FolderContent GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vector& OutDeletedPathIndexes) { + ZEN_TRACE_CPU("FolderContent::GetUpdatedContent"); FolderContent Result = {.Platform = Old.Platform}; tsl::robin_map NewPathToIndex; const uint32_t NewPathCount = gsl::narrow(New.Paths.size()); @@ -332,6 +339,7 @@ GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vecto void SaveFolderContentToCompactBinary(const FolderContent& Content, CbWriter& Output) { + ZEN_TRACE_CPU("SaveFolderContentToCompactBinary"); Output.AddString("platform"sv, ToString(Content.Platform)); compactbinary_helpers::WriteArray(Content.Paths, "paths"sv, Output); compactbinary_helpers::WriteArray(Content.RawSizes, "rawSizes"sv, Output); @@ -342,6 +350,7 @@ SaveFolderContentToCompactBinary(const FolderContent& Content, CbWriter& Output) FolderContent LoadFolderContentToCompactBinary(CbObjectView Input) { + ZEN_TRACE_CPU("LoadFolderContentToCompactBinary"); FolderContent Content; Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform()); compactbinary_helpers::ReadArray("paths"sv, Input, Content.Paths); @@ -494,6 +503,7 @@ GetFolderContent(GetFolderContentStatistics& Stats, void SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbWriter& Output) { + ZEN_TRACE_CPU("SaveChunkedFolderContentToCompactBinary"); Output.AddString("platform"sv, ToString(Content.Platform)); compactbinary_helpers::WriteArray(Content.Paths, "paths"sv, Output); compactbinary_helpers::WriteArray(Content.RawSizes, "rawSizes"sv, Output); @@ -512,6 +522,7 @@ SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbW ChunkedFolderContent LoadChunkedFolderContentToCompactBinary(CbObjectView Input) { + ZEN_TRACE_CPU("LoadChunkedFolderContentToCompactBinary"); ChunkedFolderContent Content; Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform()); compactbinary_helpers::ReadArray("paths"sv, Input, Content.Paths); @@ -788,7 +799,7 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) struct ChunkLocationReference { - uint32_t ChunkIndex; + uint32_t ChunkIndex = (uint32_t)-1; ChunkedContentLookup::ChunkSequenceLocation Location; }; @@ -853,7 +864,7 @@ BuildChunkedContentLookup(const ChunkedFolderContent& Content) { Result.ChunkHashToChunkIndex.insert({Content.ChunkedContent.ChunkHashes[ChunkIndex], ChunkIndex}); uint32_t Count = 0; - while (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex) + while ((RangeOffset + Count < Locations.size()) && (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex)) { Result.ChunkSequenceLocations.push_back(Locations[RangeOffset + Count].Location); Count++; diff --git a/src/zenutil/chunkedfile.cpp b/src/zenutil/chunkedfile.cpp index 4f9344039..a2c041ffd 100644 --- a/src/zenutil/chunkedfile.cpp +++ b/src/zenutil/chunkedfile.cpp @@ -3,6 +3,7 @@ #include #include +#include #include "chunking.h" @@ -33,6 +34,7 @@ namespace { IoBuffer SerializeChunkedInfo(const ChunkedInfo& Info) { + ZEN_TRACE_CPU("SerializeChunkedInfo"); size_t HeaderSize = RoundUp(sizeof(ChunkedHeader), 16) + RoundUp(sizeof(uint32_t) * Info.ChunkSequence.size(), 16) + RoundUp(sizeof(IoHash) * Info.ChunkHashes.size(), 16); IoBuffer HeaderData(HeaderSize); @@ -65,6 +67,7 @@ SerializeChunkedInfo(const ChunkedInfo& Info) ChunkedInfo DeserializeChunkedInfo(IoBuffer& Buffer) { + ZEN_TRACE_CPU("DeserializeChunkedInfo"); MemoryView View = Buffer.GetView(); ChunkedHeader Header; { @@ -99,6 +102,7 @@ DeserializeChunkedInfo(IoBuffer& Buffer) void Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, std::function GetChunk) { + ZEN_TRACE_CPU("Reconstruct"); BasicFile Reconstructed; Reconstructed.Open(TargetPath, BasicFile::Mode::kTruncate); BasicFileWriter ReconstructedWriter(Reconstructed, 64 * 1024); @@ -119,6 +123,8 @@ ChunkData(BasicFile& RawData, std::atomic* BytesProcessed, std::atomic* AbortFlag) { + ZEN_TRACE_CPU("ChunkData"); + ChunkedInfoWithSource Result; tsl::robin_map FoundChunks; diff --git a/src/zenutil/chunkingcontroller.cpp b/src/zenutil/chunkingcontroller.cpp index 017d12433..2a7057a46 100644 --- a/src/zenutil/chunkingcontroller.cpp +++ b/src/zenutil/chunkingcontroller.cpp @@ -4,6 +4,7 @@ #include #include +#include ZEN_THIRD_PARTY_INCLUDES_START #include @@ -61,6 +62,7 @@ public: std::atomic& BytesProcessed, std::atomic& AbortFlag) const override { + ZEN_TRACE_CPU("BasicChunkingController::ProcessFile"); const bool ExcludeFromChunking = std::find(m_ChunkExcludeExtensions.begin(), m_ChunkExcludeExtensions.end(), InputPath.extension()) != m_ChunkExcludeExtensions.end(); @@ -136,6 +138,7 @@ public: std::atomic& BytesProcessed, std::atomic& AbortFlag) const override { + ZEN_TRACE_CPU("ChunkingControllerWithFixedChunking::ProcessFile"); if (RawSize < m_ChunkFileSizeLimit) { return false; @@ -145,6 +148,7 @@ public: if (FixedChunking) { + ZEN_TRACE_CPU("FixedChunking"); IoHashStream FullHash; IoBuffer Source = IoBufferBuilder::MakeFromFile(InputPath); uint64_t Offset = 0; diff --git a/src/zenutil/filebuildstorage.cpp b/src/zenutil/filebuildstorage.cpp index e57109006..47a4e1cc4 100644 --- a/src/zenutil/filebuildstorage.cpp +++ b/src/zenutil/filebuildstorage.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace zen { @@ -36,6 +37,7 @@ public: virtual CbObject ListBuilds(CbObject Query) override { + ZEN_TRACE_CPU("FileBuildStorage::ListBuilds"); ZEN_UNUSED(Query); SimulateLatency(Query.GetSize(), 0); @@ -72,6 +74,7 @@ public: virtual CbObject PutBuild(const Oid& BuildId, const CbObject& MetaData) override { + ZEN_TRACE_CPU("FileBuildStorage::PutBuild"); SimulateLatency(MetaData.GetSize(), 0); Stopwatch ExecutionTimer; @@ -93,6 +96,7 @@ public: virtual CbObject GetBuild(const Oid& BuildId) override { + ZEN_TRACE_CPU("FileBuildStorage::GetBuild"); SimulateLatency(0, 0); Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); @@ -105,6 +109,7 @@ public: virtual void FinalizeBuild(const Oid& BuildId) override { + ZEN_TRACE_CPU("FileBuildStorage::FinalizeBuild"); SimulateLatency(0, 0); Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); @@ -119,6 +124,7 @@ public: std::string_view PartName, const CbObject& MetaData) override { + ZEN_TRACE_CPU("FileBuildStorage::PutBuildPart"); SimulateLatency(MetaData.GetSize(), 0); Stopwatch ExecutionTimer; auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); @@ -164,6 +170,7 @@ public: virtual CbObject GetBuildPart(const Oid& BuildId, const Oid& BuildPartId) override { + ZEN_TRACE_CPU("FileBuildStorage::GetBuildPart"); SimulateLatency(0, 0); Stopwatch ExecutionTimer; @@ -186,6 +193,7 @@ public: virtual std::vector FinalizeBuildPart(const Oid& BuildId, const Oid& BuildPartId, const IoHash& PartHash) override { + ZEN_TRACE_CPU("FileBuildStorage::FinalizeBuildPart"); SimulateLatency(0, 0); Stopwatch ExecutionTimer; @@ -215,6 +223,7 @@ public: ZenContentType ContentType, const CompositeBuffer& Payload) override { + ZEN_TRACE_CPU("FileBuildStorage::PutBuildBlob"); ZEN_UNUSED(BuildId); ZEN_ASSERT(ContentType == ZenContentType::kCompressedBinary); SimulateLatency(Payload.GetSize(), 0); @@ -242,6 +251,7 @@ public: std::function&& Transmitter, std::function&& OnSentBytes) override { + ZEN_TRACE_CPU("FileBuildStorage::PutLargeBuildBlob"); ZEN_UNUSED(BuildId); ZEN_UNUSED(ContentType); SimulateLatency(0, 0); @@ -281,6 +291,7 @@ public: uint64_t Size = Min(32u * 1024u * 1024u, PayloadSize - Offset); WorkItems.push_back([this, RawHash, BlockPath, Workload, Offset, Size]() { + ZEN_TRACE_CPU("FileBuildStorage::PutLargeBuildBlob_Work"); IoBuffer PartPayload = Workload->Transmitter(Offset, Size); SimulateLatency(PartPayload.GetSize(), 0); @@ -327,6 +338,7 @@ public: virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash, uint64_t RangeOffset, uint64_t RangeBytes) override { + ZEN_TRACE_CPU("FileBuildStorage::GetBuildBlob"); ZEN_UNUSED(BuildId); SimulateLatency(0, 0); Stopwatch ExecutionTimer; @@ -363,6 +375,7 @@ public: uint64_t ChunkSize, std::function&& Receiver) override { + ZEN_TRACE_CPU("FileBuildStorage::GetLargeBuildBlob"); ZEN_UNUSED(BuildId); SimulateLatency(0, 0); Stopwatch ExecutionTimer; @@ -392,6 +405,7 @@ public: { uint64_t Size = Min(ChunkSize, BlobSize - Offset); WorkItems.push_back([this, BlockPath, Workload, Offset, Size]() { + ZEN_TRACE_CPU("FileBuildStorage::GetLargeBuildBlob_Work"); SimulateLatency(0, 0); IoBuffer PartPayload(Size); Workload->BlobFile.Read(PartPayload.GetMutableView().GetData(), Size, Offset); @@ -411,6 +425,7 @@ public: virtual void PutBlockMetadata(const Oid& BuildId, const IoHash& BlockRawHash, const CbObject& MetaData) override { + ZEN_TRACE_CPU("FileBuildStorage::PutBlockMetadata"); ZEN_UNUSED(BuildId); SimulateLatency(MetaData.GetSize(), 0); @@ -429,6 +444,7 @@ public: virtual std::vector FindBlocks(const Oid& BuildId) override { + ZEN_TRACE_CPU("FileBuildStorage::FindBlocks"); ZEN_UNUSED(BuildId); SimulateLatency(0, 0); Stopwatch ExecutionTimer; @@ -461,6 +477,7 @@ public: virtual std::vector GetBlockMetadata(const Oid& BuildId, std::span BlockHashes) override { + ZEN_TRACE_CPU("FileBuildStorage::GetBlockMetadata"); ZEN_UNUSED(BuildId); SimulateLatency(0, 0); Stopwatch ExecutionTimer; diff --git a/src/zenutil/include/zenutil/chunkedcontent.h b/src/zenutil/include/zenutil/chunkedcontent.h index 309341550..57b55cb8e 100644 --- a/src/zenutil/include/zenutil/chunkedcontent.h +++ b/src/zenutil/include/zenutil/chunkedcontent.h @@ -124,8 +124,8 @@ struct ChunkedContentLookup { struct ChunkSequenceLocation { - uint32_t SequenceIndex; - uint64_t Offset; + uint32_t SequenceIndex = (uint32_t)-1; + uint64_t Offset = (uint64_t)-1; }; tsl::robin_map ChunkHashToChunkIndex; tsl::robin_map RawHashToSequenceIndex; -- cgit v1.2.3 From 7046fc9dc202307ba92d05a6386bfb52e9db0ab9 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Wed, 12 Mar 2025 13:54:16 +0100 Subject: fixes for log timestamps (#304) * add GetTimeSinceProcessStart returning time since process start. implemented using https://github.com/maxliani/GetTimeSinceProcessStart/tree/main * fix fractions when using epoch mode. Previously it would show the fraction from the absolute time stamp and not relative to epoch * used GetTimeSinceProcessStart to offset the epoch so that it represents the process spawn time --- src/zenutil/include/zenutil/logging/fullformatter.h | 7 ++++++- src/zenutil/logging.cpp | 6 ++++-- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'src/zenutil') diff --git a/src/zenutil/include/zenutil/logging/fullformatter.h b/src/zenutil/include/zenutil/logging/fullformatter.h index 07ad408fa..0326870e5 100644 --- a/src/zenutil/include/zenutil/logging/fullformatter.h +++ b/src/zenutil/include/zenutil/logging/fullformatter.h @@ -45,6 +45,8 @@ public: std::chrono::seconds TimestampSeconds; + std::chrono::milliseconds millis; + if (m_UseFullDate) { TimestampSeconds = std::chrono::duration_cast(msg.time.time_since_epoch()); @@ -69,6 +71,8 @@ public: spdlog::details::fmt_helper::pad2(m_CachedLocalTm.tm_sec, m_CachedDatetime); m_CachedDatetime.push_back('.'); } + + millis = spdlog::details::fmt_helper::time_fraction(msg.time); } else { @@ -97,6 +101,8 @@ public: spdlog::details::fmt_helper::pad2(LogSecs, m_CachedDatetime); m_CachedDatetime.push_back('.'); } + + millis = std::chrono::duration_cast(ElapsedTime - TimestampSeconds); } { @@ -104,7 +110,6 @@ public: OutBuffer.append(m_CachedDatetime.begin(), m_CachedDatetime.end()); } - auto millis = spdlog::details::fmt_helper::time_fraction(msg.time); spdlog::details::fmt_helper::pad3(static_cast(millis.count()), OutBuffer); OutBuffer.push_back(']'); OutBuffer.push_back(' '); diff --git a/src/zenutil/logging.cpp b/src/zenutil/logging.cpp index 762b75a59..cb0fd6679 100644 --- a/src/zenutil/logging.cpp +++ b/src/zenutil/logging.cpp @@ -16,6 +16,7 @@ ZEN_THIRD_PARTY_INCLUDES_END #include #include #include +#include #include #include #include @@ -191,8 +192,9 @@ FinishInitializeLogging(const LoggingOptions& LogOptions) logging::RefreshLogLevels(LogLevel); spdlog::flush_on(spdlog::level::err); spdlog::flush_every(std::chrono::seconds{2}); - spdlog::set_formatter( - std::make_unique(LogOptions.LogId, std::chrono::system_clock::now())); // default to duration prefix + spdlog::set_formatter(std::make_unique( + LogOptions.LogId, + std::chrono::system_clock::now() - std::chrono::milliseconds(GetTimeSinceProcessStart()))); // default to duration prefix if (g_FileSink) { -- cgit v1.2.3