diff options
| author | Dan Engelbrecht <[email protected]> | 2026-03-16 16:51:58 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2026-03-16 16:51:58 +0100 |
| commit | 4b1abcd8248877203f9f64666221d76071b6ac45 (patch) | |
| tree | e43854837faadd181dc2148ec98a73a2e155be72 /src | |
| parent | bump toolchain to match what's in use for UE (#846) (diff) | |
| download | zen-4b1abcd8248877203f9f64666221d76071b6ac45.tar.xz zen-4b1abcd8248877203f9f64666221d76071b6ac45.zip | |
revise oplog block arrangement (#842)
- Improvement: Fixed issue where oplog upload could create blocks larger than the max limit (64Mb)
Refactored remoteprojectstore.cpp to use ParallelWork and exceptions for error handling.
Diffstat (limited to 'src')
7 files changed, 4918 insertions, 2473 deletions
diff --git a/src/zenremotestore/builds/buildstorageoperations.cpp b/src/zenremotestore/builds/buildstorageoperations.cpp index 44d52451c..a04063c4c 100644 --- a/src/zenremotestore/builds/buildstorageoperations.cpp +++ b/src/zenremotestore/builds/buildstorageoperations.cpp @@ -5214,7 +5214,7 @@ BuildsOperationUploadFolder::GenerateBlock(const ChunkedFolderContent& Content, { BlockContent.emplace_back(std::make_pair( Content.ChunkedContent.ChunkHashes[ChunkIndex], - [this, &Content, &Lookup, &OpenFileCache, ChunkIndex](const IoHash& ChunkHash) -> std::pair<uint64_t, CompressedBuffer> { + [this, &Content, &Lookup, &OpenFileCache, ChunkIndex](const IoHash& ChunkHash) -> std::pair<uint64_t, CompositeBuffer> { CompositeBuffer Chunk = FetchChunk(Content, Lookup, ChunkHash, OpenFileCache); ZEN_ASSERT(Chunk); uint64_t RawSize = Chunk.GetSize(); @@ -5224,7 +5224,7 @@ BuildsOperationUploadFolder::GenerateBlock(const ChunkedFolderContent& Content, const OodleCompressionLevel CompressionLevel = ShouldCompressChunk ? OodleCompressionLevel::VeryFast : OodleCompressionLevel::None; - return {RawSize, CompressedBuffer::Compress(Chunk, OodleCompressor::Mermaid, CompressionLevel)}; + return {RawSize, CompressedBuffer::Compress(Chunk, OodleCompressor::Mermaid, CompressionLevel).GetCompressed()}; })); } diff --git a/src/zenremotestore/chunking/chunkblock.cpp b/src/zenremotestore/chunking/chunkblock.cpp index cca32c17d..0fe3c09ce 100644 --- a/src/zenremotestore/chunking/chunkblock.cpp +++ b/src/zenremotestore/chunking/chunkblock.cpp @@ -352,9 +352,9 @@ GenerateChunkBlock(std::vector<std::pair<IoHash, FetchChunkFunc>>&& FetchChunks, BufferEndPtr += WriteVarUInt(gsl::narrow<uint64_t>(ChunkCount), BufferEndPtr); for (const auto& It : FetchChunks) { - std::pair<uint64_t, CompressedBuffer> Chunk = It.second(It.first); - uint64_t ChunkSize = 0; - std::span<const SharedBuffer> Segments = Chunk.second.GetCompressed().GetSegments(); + std::pair<uint64_t, CompositeBuffer> Chunk = It.second(It.first); + uint64_t ChunkSize = 0; + std::span<const SharedBuffer> Segments = Chunk.second.GetSegments(); for (const SharedBuffer& Segment : Segments) { ZEN_ASSERT(Segment.IsOwned()); @@ -957,8 +957,8 @@ TEST_CASE("chunkblock.block") for (const auto& It : AttachmentsWithId) { Chunks.push_back( - std::make_pair(It.second.DecodeRawHash(), [Buffer = It.second](const IoHash&) -> std::pair<uint64_t, CompressedBuffer> { - return {Buffer.DecodeRawSize(), Buffer}; + std::make_pair(It.second.DecodeRawHash(), [Buffer = It.second](const IoHash&) -> std::pair<uint64_t, CompositeBuffer> { + return {Buffer.DecodeRawSize(), Buffer.GetCompressed()}; })); } ChunkBlockDescription Block; @@ -992,8 +992,8 @@ TEST_CASE("chunkblock.reuseblocks") for (const auto& It : AttachmentsWithId) { Chunks.push_back( - std::make_pair(It.second.DecodeRawHash(), [Buffer = It.second](const IoHash&) -> std::pair<uint64_t, CompressedBuffer> { - return {Buffer.DecodeRawSize(), Buffer}; + std::make_pair(It.second.DecodeRawHash(), [Buffer = It.second](const IoHash&) -> std::pair<uint64_t, CompositeBuffer> { + return {Buffer.DecodeRawSize(), Buffer.GetCompressed()}; })); } ChunkBlockDescription Block; diff --git a/src/zenremotestore/include/zenremotestore/chunking/chunkblock.h b/src/zenremotestore/include/zenremotestore/chunking/chunkblock.h index 931bb2097..e3a5f6539 100644 --- a/src/zenremotestore/include/zenremotestore/chunking/chunkblock.h +++ b/src/zenremotestore/include/zenremotestore/chunking/chunkblock.h @@ -31,7 +31,7 @@ ChunkBlockDescription ParseChunkBlockDescription(const CbObjectView& BlockO std::vector<ChunkBlockDescription> ParseBlockMetadatas(std::span<const CbObject> BlockMetadatas); CbObject BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView MetaData); ChunkBlockDescription GetChunkBlockDescription(const SharedBuffer& BlockPayload, const IoHash& RawHash); -typedef std::function<std::pair<uint64_t, CompressedBuffer>(const IoHash& RawHash)> FetchChunkFunc; +typedef std::function<std::pair<uint64_t, CompositeBuffer>(const IoHash& RawHash)> FetchChunkFunc; CompressedBuffer GenerateChunkBlock(std::vector<std::pair<IoHash, FetchChunkFunc>>&& FetchChunks, ChunkBlockDescription& OutBlock); bool IterateChunkBlock(const SharedBuffer& BlockPayload, diff --git a/src/zenremotestore/include/zenremotestore/projectstore/remoteprojectstore.h b/src/zenremotestore/include/zenremotestore/projectstore/remoteprojectstore.h index 084d975a2..8df892053 100644 --- a/src/zenremotestore/include/zenremotestore/projectstore/remoteprojectstore.h +++ b/src/zenremotestore/include/zenremotestore/projectstore/remoteprojectstore.h @@ -150,27 +150,51 @@ struct RemoteStoreOptions size_t ChunkFileSizeLimit = DefaultChunkFileSizeLimit; }; -typedef std::function<IoBuffer(const IoHash& AttachmentHash)> TGetAttachmentBufferFunc; - -RemoteProjectStore::LoadContainerResult BuildContainer( - CidStore& ChunkStore, - ProjectStore::Project& Project, - ProjectStore::Oplog& Oplog, - WorkerThreadPool& WorkerPool, - size_t MaxBlockSize, - size_t MaxChunksPerBlock, - size_t MaxChunkEmbedSize, - size_t ChunkFileSizeLimit, - bool BuildBlocks, - bool IgnoreMissingAttachments, - bool AllowChunking, - const std::function<void(CompressedBuffer&&, ChunkBlockDescription&&)>& AsyncOnBlock, - const std::function<void(const IoHash&, TGetAttachmentBufferFunc&&)>& OnLargeAttachment, - const std::function<void(std::vector<std::pair<IoHash, FetchChunkFunc>>&&)>& OnBlockChunks, - bool EmbedLooseFiles); +typedef std::function<CompositeBuffer(const IoHash& AttachmentHash)> TGetAttachmentBufferFunc; + +CbObject BuildContainer(CidStore& ChunkStore, + ProjectStore::Project& Project, + ProjectStore::Oplog& Oplog, + WorkerThreadPool& WorkerPool, + size_t MaxBlockSize, + size_t MaxChunksPerBlock, + size_t MaxChunkEmbedSize, + size_t ChunkFileSizeLimit, + bool BuildBlocks, + bool IgnoreMissingAttachments, + bool AllowChunking, + const std::function<void(CompressedBuffer&&, ChunkBlockDescription&&)>& AsyncOnBlock, + const std::function<void(const IoHash&, TGetAttachmentBufferFunc&&)>& OnLargeAttachment, + const std::function<void(std::vector<std::pair<IoHash, FetchChunkFunc>>&&)>& OnBlockChunks, + bool EmbedLooseFiles); class JobContext; +class RemoteStoreError : public std::runtime_error +{ +public: + RemoteStoreError(const std::string& Message, int32_t ErrorCode, std::string_view Text) + : std::runtime_error(Message) + , m_ErrorCode(ErrorCode) + , m_Text(Text) + { + } + + RemoteStoreError(const char* Message, int32_t ErrorCode, std::string_view Text) + : std::runtime_error(Message) + , m_ErrorCode(ErrorCode) + , m_Text(Text) + { + } + + inline int32_t GetErrorCode() const { return m_ErrorCode; } + inline std::string_view GetText() const { return m_Text; } + +private: + int32_t m_ErrorCode = 0; + std::string m_Text; +}; + RemoteProjectStore::Result SaveOplogContainer( ProjectStore::Oplog& Oplog, const CbObject& ContainerObject, @@ -181,20 +205,20 @@ RemoteProjectStore::Result SaveOplogContainer( const std::function<void(const ChunkedInfo& Chunked)>& OnChunkedAttachment, JobContext* OptionalContext); -RemoteProjectStore::Result SaveOplog(CidStore& ChunkStore, - RemoteProjectStore& RemoteStore, - ProjectStore::Project& Project, - ProjectStore::Oplog& Oplog, - WorkerThreadPool& NetworkWorkerPool, - WorkerThreadPool& WorkerPool, - size_t MaxBlockSize, - size_t MaxChunksPerBlock, - size_t MaxChunkEmbedSize, - size_t ChunkFileSizeLimit, - bool EmbedLooseFiles, - bool ForceUpload, - bool IgnoreMissingAttachments, - JobContext* OptionalContext); +void SaveOplog(CidStore& ChunkStore, + RemoteProjectStore& RemoteStore, + ProjectStore::Project& Project, + ProjectStore::Oplog& Oplog, + WorkerThreadPool& NetworkWorkerPool, + WorkerThreadPool& WorkerPool, + size_t MaxBlockSize, + size_t MaxChunksPerBlock, + size_t MaxChunkEmbedSize, + size_t ChunkFileSizeLimit, + bool EmbedLooseFiles, + bool ForceUpload, + bool IgnoreMissingAttachments, + JobContext* OptionalContext); struct LoadOplogContext { @@ -218,7 +242,7 @@ struct LoadOplogContext JobContext* OptionalJobContext = nullptr; }; -RemoteProjectStore::Result LoadOplog(LoadOplogContext&& Context); +void LoadOplog(LoadOplogContext&& Context); std::vector<IoHash> GetBlockHashesFromOplog(CbObjectView ContainerObject); std::vector<ThinChunkBlockDescription> GetBlocksFromOplog(CbObjectView ContainerObject, std::span<const IoHash> IncludeBlockHashes); diff --git a/src/zenremotestore/projectstore/remoteprojectstore.cpp b/src/zenremotestore/projectstore/remoteprojectstore.cpp index c44b06305..8ba2397ff 100644 --- a/src/zenremotestore/projectstore/remoteprojectstore.cpp +++ b/src/zenremotestore/projectstore/remoteprojectstore.cpp @@ -8,6 +8,7 @@ #include <zencore/filesystem.h> #include <zencore/fmtutils.h> #include <zencore/logging.h> +#include <zencore/parallelwork.h> #include <zencore/scopeguard.h> #include <zencore/stream.h> #include <zencore/timer.h> @@ -66,33 +67,7 @@ namespace zen { } */ namespace remotestore_impl { - ////////////////////////////// AsyncRemoteResult - - struct AsyncRemoteResult - { - void SetError(int32_t ErrorCode, const std::string& ErrorReason, const std::string ErrorText) - { - int32_t Expected = 0; - if (m_ErrorCode.compare_exchange_weak(Expected, ErrorCode ? ErrorCode : -1)) - { - m_ErrorReason = ErrorReason; - m_ErrorText = ErrorText; - } - } - bool IsError() const { return m_ErrorCode.load() != 0; } - int GetError() const { return m_ErrorCode.load(); }; - const std::string& GetErrorReason() const { return m_ErrorReason; }; - const std::string& GetErrorText() const { return m_ErrorText; }; - RemoteProjectStore::Result ConvertResult(double ElapsedSeconds = 0.0) const - { - return RemoteProjectStore::Result{m_ErrorCode, ElapsedSeconds, m_ErrorReason, m_ErrorText}; - } - - private: - std::atomic<int32_t> m_ErrorCode = 0; - std::string m_ErrorReason; - std::string m_ErrorText; - }; + using namespace std::literals; void ReportProgress(JobContext* OptionalContext, std::string_view CurrentOp, @@ -170,6 +145,920 @@ namespace remotestore_impl { return BlockIndex; } + // BlockComposer packs attachment chunks (each identified by an IoHash and a byte size) into + // fixed-size blocks subject to two constraints: + // - The total encoded content of a block must not exceed UsableBlockSize bytes. + // - A block may contain at most MaxChunksPerBlock chunk entries. + // + // Chunks belonging to the same op key (Oid) are kept together in one block whenever possible, + // so that a single block fetch can satisfy an entire op without needing to read multiple blocks. + // + // When a block is complete the OnNewBlock callback is invoked with ownership of the chunk-hash + // vector for that block. The callback is also invoked for any partially-filled pending block + // that remains after all attachments have been processed. + class BlockComposer + { + public: + struct Configuration + { + uint64_t MaxBlockSize = 0; // Total encoded block size limit in bytes (includes header overhead). + uint64_t MaxChunksPerBlock = 0; // Maximum number of chunk entries allowed in a single block. + uint64_t MaxChunkEmbedSize = 0; // Maximum size of one embeddable chunk; used to calculate worst-case header size. + std::function<bool()> + IsCancelledFunc; // Optional: if set and returns true, Compose returns early without emitting remaining blocks. + }; + + explicit BlockComposer(const Configuration& Config) : m_Config(Config), m_UsableBlockSize(CalculateUsableBlockSize(m_Config)) {} + + // Compose distributes AttachmentHashes into blocks via a two-phase algorithm. + // + // Phase 1 - Gather (inner while loop): + // Starting from the current index, collect all consecutive attachments that share the same + // op key (Oid) into CurrentOpRawHashes / CurrentOpChunkSizes. Collection stops (with + // CurrentOpFillFullBlock = false) when a different op key is encountered. Collection also + // stops early (with CurrentOpFillFullBlock = true) if adding the next same-key attachment + // would exceed m_UsableBlockSize by bytes OR would reach MaxChunksPerBlock by count - + // meaning the gathered chunks exactly saturate one block and must be emitted immediately. + // + // Phase 2 - Place (while loop over CurrentOpChunkSizes): + // Decides where the gathered chunks go. Exactly one of four mutually exclusive paths runs + // per iteration; after each path the loop re-evaluates with whatever chunks remain: + // + // Path A: CurrentOpFillFullBlock == true + // The gathered set exactly fills one block. Emit it immediately as a standalone block + // and clear CurrentOpChunkSizes. The pending block is left untouched. + // + // Path B: All gathered chunks fit in the pending block (both size and count constraints met) + // Merge the gathered chunks into PendingChunkHashes/PendingBlockSize and clear the + // current-op buffers. If the pending block is now exactly full, flush it immediately. + // + // Path C: Gathered chunks don't fit AND pending block is >75% full by bytes + // The pending block is already well-utilised; flush it now and loop so that the gathered + // chunks are re-evaluated against the freshly emptied pending block. + // + // Path D: Gathered chunks don't fit AND pending block is <=75% full by bytes + // The binding constraint is chunk count, not bytes. Greedily fill the pending block with + // as many gathered chunks as fit (stopping at the first chunk that would violate either + // size or count), flush the pending block, remove the added chunks from the current-op + // buffers, and loop so the remaining gathered chunks are re-evaluated. + // + // Final flush: after all attachments have been processed, any non-empty pending block is + // emitted. + void Compose(std::span<const IoHash> AttachmentHashes, + std::span<const uint64_t> AttachmentSizes, + std::span<const Oid> AttachmentKeys, + const std::function<void(std::vector<IoHash>&& ChunkRawHashes)>& OnNewBlock) + { + std::vector<IoHash> PendingChunkHashes; + uint64_t PendingBlockSize = 0; + + size_t SortedUploadAttachmentsIndex = 0; + + Stopwatch AssembleBlocksProgressTimer; + while (SortedUploadAttachmentsIndex < AttachmentHashes.size()) + { + if (m_Config.IsCancelledFunc && m_Config.IsCancelledFunc()) + { + return; + } + + const IoHash& FirstAttachmentHash = AttachmentHashes[SortedUploadAttachmentsIndex]; + const Oid FirstAttachmentOpKey = AttachmentKeys[SortedUploadAttachmentsIndex]; + uint64_t CurrentOpAttachmentsSize = AttachmentSizes[SortedUploadAttachmentsIndex]; + ZEN_ASSERT(CurrentOpAttachmentsSize <= m_Config.MaxChunkEmbedSize); + + std::vector<IoHash> CurrentOpRawHashes; + CurrentOpRawHashes.push_back(FirstAttachmentHash); + + std::vector<uint64_t> CurrentOpChunkSizes; + CurrentOpChunkSizes.push_back(CurrentOpAttachmentsSize); + + bool CurrentOpFillFullBlock = false; + + while (SortedUploadAttachmentsIndex + CurrentOpRawHashes.size() < AttachmentHashes.size()) + { + size_t NextSortedUploadAttachmentsIndex = SortedUploadAttachmentsIndex + CurrentOpChunkSizes.size(); + const Oid NextAttachmentOpKey = AttachmentKeys[NextSortedUploadAttachmentsIndex]; + if (NextAttachmentOpKey != FirstAttachmentOpKey) + { + break; + } + const IoHash& NextAttachmentHash = AttachmentHashes[NextSortedUploadAttachmentsIndex]; + uint64_t NextOpAttachmentSize = AttachmentSizes[NextSortedUploadAttachmentsIndex]; + ZEN_ASSERT(NextOpAttachmentSize <= m_Config.MaxChunkEmbedSize); + + if (CurrentOpAttachmentsSize + NextOpAttachmentSize > m_UsableBlockSize) + { + CurrentOpFillFullBlock = true; + break; + } + CurrentOpRawHashes.push_back(NextAttachmentHash); + CurrentOpChunkSizes.push_back(NextOpAttachmentSize); + CurrentOpAttachmentsSize += NextOpAttachmentSize; + + if (CurrentOpRawHashes.size() == m_Config.MaxChunksPerBlock) + { + CurrentOpFillFullBlock = true; + break; + } + } + SortedUploadAttachmentsIndex += CurrentOpChunkSizes.size(); + + while (!CurrentOpChunkSizes.empty()) + { + size_t CurrentOpAttachmentCount = CurrentOpChunkSizes.size(); + + ZEN_ASSERT(CurrentOpRawHashes.size() == CurrentOpChunkSizes.size()); + ZEN_ASSERT(CurrentOpAttachmentsSize <= m_UsableBlockSize); + ZEN_ASSERT(CurrentOpAttachmentCount <= m_Config.MaxChunksPerBlock); + + // Path A: gathered chunks exactly fill one block -- emit as a standalone block immediately. + if (CurrentOpFillFullBlock) + { + OnNewBlock(std::move(CurrentOpRawHashes)); + CurrentOpChunkSizes.clear(); + CurrentOpAttachmentsSize = 0; + CurrentOpFillFullBlock = false; + } + else if ((PendingBlockSize + CurrentOpAttachmentsSize) <= m_UsableBlockSize && + (PendingChunkHashes.size() + CurrentOpAttachmentCount) <= m_Config.MaxChunksPerBlock) + { + // Path B: all gathered chunks fit in the pending block -- merge them in. + PendingChunkHashes.insert(PendingChunkHashes.end(), CurrentOpRawHashes.begin(), CurrentOpRawHashes.end()); + PendingBlockSize += CurrentOpAttachmentsSize; + ZEN_ASSERT(PendingBlockSize <= m_UsableBlockSize); + ZEN_ASSERT(PendingChunkHashes.size() <= m_Config.MaxChunksPerBlock); + + CurrentOpRawHashes.clear(); + CurrentOpChunkSizes.clear(); + CurrentOpAttachmentsSize = 0; + + if (PendingBlockSize == m_UsableBlockSize || PendingChunkHashes.size() == m_Config.MaxChunksPerBlock) + { + OnNewBlock(std::move(PendingChunkHashes)); + PendingChunkHashes.clear(); + PendingBlockSize = 0; + } + } + else if (PendingBlockSize > (m_UsableBlockSize * 3) / 4) + { + // Path C: gathered chunks don't fit AND pending block is >75% full by bytes -- flush pending + // block now; loop to re-evaluate gathered chunks against the freshly emptied pending block. + OnNewBlock(std::move(PendingChunkHashes)); + PendingChunkHashes.clear(); + PendingBlockSize = 0; + } + else + { + // Path D: gathered chunks don't fit AND pending block is <=75% full by bytes -- the + // binding constraint is chunk count. Greedily fill the pending block with as many + // chunks as fit, flush it, remove them from the current-op buffers, and loop with the + // remaining gathered chunks in the next iteration. + + size_t AddedChunkCount = 0; + uint64_t AddedChunkSize = 0; + + for (size_t CurrentChunkIndex = 0; CurrentChunkIndex < CurrentOpRawHashes.size(); CurrentChunkIndex++) + { + uint64_t ChunkSize = CurrentOpChunkSizes[CurrentChunkIndex]; + if (PendingBlockSize + ChunkSize > m_UsableBlockSize) + { + break; + } + if (PendingChunkHashes.size() == m_Config.MaxChunksPerBlock) + { + break; + } + PendingBlockSize += ChunkSize; + PendingChunkHashes.push_back(CurrentOpRawHashes[CurrentChunkIndex]); + AddedChunkSize += ChunkSize; + AddedChunkCount++; + + ZEN_ASSERT(PendingBlockSize <= m_UsableBlockSize); + ZEN_ASSERT(PendingChunkHashes.size() <= m_Config.MaxChunksPerBlock); + } + ZEN_ASSERT(AddedChunkSize <= CurrentOpAttachmentsSize); + + ZEN_ASSERT(PendingBlockSize <= m_UsableBlockSize); + ZEN_ASSERT(PendingChunkHashes.size() <= m_Config.MaxChunksPerBlock); + ZEN_ASSERT(AddedChunkCount < CurrentOpRawHashes.size()); + + OnNewBlock(std::move(PendingChunkHashes)); + PendingChunkHashes.clear(); + PendingBlockSize = 0; + + CurrentOpRawHashes.erase(CurrentOpRawHashes.begin(), CurrentOpRawHashes.begin() + AddedChunkCount); + CurrentOpChunkSizes.erase(CurrentOpChunkSizes.begin(), CurrentOpChunkSizes.begin() + AddedChunkCount); + CurrentOpAttachmentsSize -= AddedChunkSize; + } + } + } + if (!PendingChunkHashes.empty()) + { + ZEN_ASSERT(PendingBlockSize < m_UsableBlockSize); + ZEN_ASSERT(PendingChunkHashes.size() < m_Config.MaxChunksPerBlock); + OnNewBlock(std::move(PendingChunkHashes)); + PendingChunkHashes.clear(); + } + } + + private: + // CalculateUsableBlockSize computes the maximum bytes available for chunk content in one + // block. The block header encodes: + // - A CompressedBuffer header of fixed size. + // - One VarUInt field encoding MaxChunksPerBlock. + // - MaxChunksPerBlock VarUInt entries each encoding one chunk size (bounded by + // MaxChunkEmbedSize, which determines the worst-case VarUInt width). + // MaxHeaderSize is the worst-case total header size, so + // UsableBlockSize = MaxBlockSize - MaxHeaderSize is a conservative bound that guarantees + // chunk content always fits within the encoded block. + static uint64_t CalculateUsableBlockSize(const Configuration& Config) + { + ZEN_ASSERT(Config.MaxChunksPerBlock > 0); + ZEN_ASSERT(Config.MaxChunkEmbedSize > 0); + uint64_t MaxHeaderSize = CompressedBuffer::GetHeaderSizeForNoneEncoder() + MeasureVarUInt(Config.MaxChunksPerBlock) + + MeasureVarUInt(Config.MaxChunkEmbedSize) * Config.MaxChunksPerBlock; + ZEN_ASSERT(Config.MaxBlockSize > MaxHeaderSize); + return Config.MaxBlockSize - MaxHeaderSize; + } + + const Configuration m_Config; + const uint64_t m_UsableBlockSize = 0; + }; + + IoBuffer CompressToTempFile(const IoHash& RawHash, + const IoBuffer& RawData, + const std::filesystem::path& AttachmentPath, + OodleCompressor Compressor, + OodleCompressionLevel CompressionLevel) + { + ZEN_ASSERT(!IsFile(AttachmentPath)); + BasicFile CompressedFile; + std::error_code Ec; + CompressedFile.Open(AttachmentPath, BasicFile::Mode::kTruncateDelete, Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to create temp file for blob {} at '{}'", RawHash, AttachmentPath)); + } + + if (RawData.GetSize() < 512u * 1024u) + { + CompressedBuffer CompressedBlob = CompressedBuffer::Compress(SharedBuffer(std::move(RawData)), Compressor, CompressionLevel); + if (!CompressedBlob) + { + throw std::runtime_error(fmt::format("Failed to compress blob {}", RawHash)); + } + CompressedFile.Write(CompressedBlob.GetCompressed(), 0); + } + else + { + bool CouldCompress = CompressedBuffer::CompressToStream( + CompositeBuffer(SharedBuffer(RawData)), + [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) { + ZEN_UNUSED(SourceOffset, SourceSize); + CompressedFile.Write(RangeBuffer, Offset); + }, + Compressor, + CompressionLevel); + if (!CouldCompress) + { + // Compressed is larger than source data... + CompressedBuffer CompressedBlob = + CompressedBuffer::Compress(SharedBuffer(std::move(RawData)), OodleCompressor::Mermaid, OodleCompressionLevel::None); + if (!CompressedBlob) + { + throw std::runtime_error(fmt::format("Failed to compress blob {}", RawHash)); + } + CompressedFile.SetFileSize(0); + CompressedFile.Write(CompressedBlob.GetCompressed(), 0); + } + } + IoBuffer TempAttachmentBuffer = IoBufferBuilder::MakeFromFile(AttachmentPath); + CompressedFile.Close(); + TempAttachmentBuffer.SetDeleteOnClose(true); + ZEN_ASSERT_SLOW(CompressedBuffer::FromCompressedNoValidate(IoBuffer(TempAttachmentBuffer)).CompressedBuffer::Decompress()); + return TempAttachmentBuffer; + } + + struct FoundAttachment + { + std::filesystem::path RawPath; // If not stored in cid + uint64_t Size = 0; + Oid Key = Oid::Zero; + }; + + CbObject RewriteOplog( + ProjectStore::Project& Project, + ProjectStore::Oplog& Oplog, + bool IgnoreMissingAttachments, + bool EmbedLooseFiles, + const std::filesystem::path& AttachmentTempPath, + std::unordered_map<IoHash, FoundAttachment, IoHash::Hasher>& UploadAttachments, // TODO: Rename to OutUploadAttachments + JobContext* OptionalContext) + { + size_t OpCount = 0; + CreateDirectories(AttachmentTempPath); + + auto RewriteOp = [&](const Oid& Key, CbObjectView Op, const std::function<void(CbObjectView)>& CB) { + bool OpRewritten = false; + CbArrayView Files = Op["files"sv].AsArrayView(); + if (Files.Num() == 0) + { + CB(Op); + return; + } + + CbWriter Cbo; + Cbo.BeginArray("files"sv); + + for (CbFieldView& Field : Files) + { + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return; + } + + bool CopyField = true; + + if (CbObjectView View = Field.AsObjectView()) + { + IoHash DataHash = View["data"sv].AsHash(); + + if (DataHash == IoHash::Zero) + { + std::string_view ServerPath = View["serverpath"sv].AsString(); + std::filesystem::path FilePath = (Project.RootDir / ServerPath).make_preferred(); + if (!IsFile(FilePath)) + { + remotestore_impl::ReportMessage( + OptionalContext, + fmt::format("Missing attachment '{}' for op '{}'", FilePath, View["id"sv].AsObjectId())); + if (IgnoreMissingAttachments) + { + continue; + } + else + { + ExtendableStringBuilder<1024> Sb; + Sb.Append("Failed to find attachment '"); + Sb.Append(FilePath.string()); + Sb.Append("' for op: \n"); + View.ToJson(Sb); + throw std::runtime_error(Sb.ToString()); + } + } + + { + Stopwatch HashTimer; + SharedBuffer DataBuffer(IoBufferBuilder::MakeFromFile(FilePath)); + DataHash = IoHash::HashBuffer(CompositeBuffer(DataBuffer)); + ZEN_INFO("Hashed loose file '{}' {}: {} in {}", + FilePath, + NiceBytes(DataBuffer.GetSize()), + DataHash, + NiceTimeSpanMs(HashTimer.GetElapsedTimeMs())); + } + + // Rewrite file array entry with new data reference + CbObjectWriter Writer; + RewriteCbObject(Writer, View, [&](CbObjectWriter&, CbFieldView Field) -> bool { + if (Field.GetName() == "data"sv) + { + // omit this field as we will write it explicitly ourselves + return true; + } + return false; + }); + Writer.AddBinaryAttachment("data"sv, DataHash); + UploadAttachments.insert_or_assign(DataHash, FoundAttachment{.RawPath = FilePath, .Key = Key}); + + CbObject RewrittenOp = Writer.Save(); + Cbo.AddObject(std::move(RewrittenOp)); + CopyField = false; + } + } + + if (CopyField) + { + Cbo.AddField(Field); + } + else + { + OpRewritten = true; + } + } + + if (!OpRewritten) + { + CB(Op); + return; + } + + Cbo.EndArray(); + CbArray FilesArray = Cbo.Save().AsArray(); + + CbObject RewrittenOp = RewriteCbObject(Op, [&](CbObjectWriter& NewWriter, CbFieldView Field) -> bool { + if (Field.GetName() == "files"sv) + { + NewWriter.AddArray("files"sv, FilesArray); + + return true; + } + + return false; + }); + CB(RewrittenOp); + }; + + remotestore_impl::ReportMessage(OptionalContext, "Building exported oplog and collecting attachments"); + + Stopwatch Timer; + + size_t TotalOpCount = Oplog.GetOplogEntryCount(); + Stopwatch RewriteOplogTimer; + CbObjectWriter SectionOpsWriter; + SectionOpsWriter.BeginArray("ops"sv); + { + Stopwatch BuildingOplogProgressTimer; + Oplog.IterateOplogWithKey([&](int, const Oid& Key, CbObjectView Op) { + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return; + } + Op.IterateAttachments([&](CbFieldView FieldView) { + UploadAttachments.insert_or_assign(FieldView.AsAttachment(), FoundAttachment{.Key = Key}); + }); + if (EmbedLooseFiles) + { + RewriteOp(Key, Op, [&SectionOpsWriter](CbObjectView Op) { SectionOpsWriter << Op; }); + } + else + { + SectionOpsWriter << Op; + } + OpCount++; + + if (OpCount % 1000 == 0) + { + remotestore_impl::ReportProgress(OptionalContext, + "Building oplog"sv, + fmt::format("{} ops processed", OpCount), + TotalOpCount, + TotalOpCount - OpCount, + BuildingOplogProgressTimer.GetElapsedTimeMs()); + } + }); + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return {}; + } + if (TotalOpCount > 0) + { + remotestore_impl::ReportProgress(OptionalContext, + "Building oplog"sv, + fmt::format("{} ops processed", OpCount), + TotalOpCount, + 0, + BuildingOplogProgressTimer.GetElapsedTimeMs()); + } + } + SectionOpsWriter.EndArray(); // "ops" + + return SectionOpsWriter.Save(); + } + + struct FoundChunkedFile + { + IoHash RawHash = IoHash::Zero; + IoBuffer Source; + uint64_t Offset = 0; + uint64_t Size = 0; + }; + + void FindChunkSizes(CidStore& ChunkStore, + WorkerThreadPool& WorkerPool, + size_t MaxChunkEmbedSize, + size_t ChunkFileSizeLimit, + bool AllowChunking, + const std::filesystem::path& AttachmentTempPath, + std::unordered_map<IoHash, FoundAttachment, IoHash::Hasher>& UploadAttachments, + std::unordered_set<IoHash, IoHash::Hasher>& MissingHashes, + std::vector<FoundChunkedFile>& AttachmentsToChunk, + JobContext* OptionalContext) + { + if (UploadAttachments.empty()) + { + return; + } + Stopwatch FindChunkSizesTimer; + + RwLock FindChunkSizesLock; + + std::atomic<bool> AbortFlag(false); + std::atomic<bool> PauseFlag(false); + ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + + for (auto& It : UploadAttachments) + { + if (remotestore_impl::IsCancelled(OptionalContext)) + { + AbortFlag.store(true); + break; + } + Work.ScheduleWork( + WorkerPool, + [&ChunkStore, + UploadAttachment = &It.second, + RawHash = It.first, + &FindChunkSizesLock, + &MissingHashes, + AttachmentTempPath, + MaxChunkEmbedSize, + ChunkFileSizeLimit, + AllowChunking, + &AttachmentsToChunk, + OptionalContext](std::atomic<bool>& AbortFlag) { + if (remotestore_impl::IsCancelled(OptionalContext)) + { + AbortFlag.store(true); + } + if (AbortFlag) + { + return; + } + if (!UploadAttachment->RawPath.empty()) + { + const std::filesystem::path& FilePath = UploadAttachment->RawPath; + IoBuffer RawData = IoBufferBuilder::MakeFromFile(FilePath); + if (RawData) + { + UploadAttachment->Size = RawData.GetSize(); + if (AllowChunking && UploadAttachment->Size > ChunkFileSizeLimit) + { + FindChunkSizesLock.WithExclusiveLock([&]() { + AttachmentsToChunk.push_back( + FoundChunkedFile{.RawHash = RawHash, .Source = RawData, .Offset = 0, .Size = RawData.GetSize()}); + }); + } + } + else + { + FindChunkSizesLock.WithExclusiveLock([RawHash, &MissingHashes]() { MissingHashes.insert(RawHash); }); + } + } + else + { + IoBuffer Data = ChunkStore.FindChunkByCid(RawHash); + if (Data) + { + UploadAttachment->Size = Data.GetSize(); + if (AllowChunking && Data.IsWholeFile()) + { + IoHash VerifyRawHash; + uint64_t VerifyRawSize; + CompressedBuffer Compressed = + CompressedBuffer::FromCompressed(SharedBuffer(Data), VerifyRawHash, VerifyRawSize); + if (Compressed) + { + if (VerifyRawSize > ChunkFileSizeLimit) + { + OodleCompressor Compressor; + OodleCompressionLevel CompressionLevel; + uint64_t BlockSize; + if (Compressed.TryGetCompressParameters(Compressor, CompressionLevel, BlockSize)) + { + if (CompressionLevel == OodleCompressionLevel::None) + { + CompositeBuffer Decompressed = Compressed.DecompressToComposite(); + if (Decompressed) + { + std::span<const SharedBuffer> Segments = Decompressed.GetSegments(); + if (Segments.size() == 1) + { + IoBuffer DecompressedData = Segments[0].AsIoBuffer(); + IoBufferFileReference DecompressedFileRef; + if (DecompressedData.GetFileReference(DecompressedFileRef)) + { + // Are we still pointing to disk? + FindChunkSizesLock.WithExclusiveLock([&]() { + AttachmentsToChunk.push_back( + FoundChunkedFile{.RawHash = RawHash, + .Source = Data, + .Offset = DecompressedFileRef.FileChunkOffset, + .Size = DecompressedFileRef.FileChunkSize}); + }); + } + } + } + } + } + } + } + } + } + else + { + FindChunkSizesLock.WithExclusiveLock([RawHash, &MissingHashes]() { MissingHashes.insert(RawHash); }); + } + } + }); + } + + Work.Wait(1000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, IsPaused); + if (remotestore_impl::IsCancelled(OptionalContext)) + { + AbortFlag.store(true); + } + remotestore_impl::ReportProgress(OptionalContext, + "Finding attachments"sv, + fmt::format("{}{} remaining...", AbortFlag.load() ? "Aborting, " : "", PendingWork), + UploadAttachments.size(), + PendingWork, + FindChunkSizesTimer.GetElapsedTimeMs()); + }); + + if (!AbortFlag.load()) + { + remotestore_impl::ReportProgress(OptionalContext, + "Finding attachments"sv, + "", + UploadAttachments.size(), + 0, + FindChunkSizesTimer.GetElapsedTimeMs()); + } + } + + struct ChunkedFile + { + IoBuffer Source; + ChunkedInfoWithSource Chunked; + }; + + std::vector<ChunkedFile> ChunkAttachments(WorkerThreadPool& WorkerPool, + const std::vector<remotestore_impl::FoundChunkedFile>& AttachmentsToChunk, + JobContext* OptionalContext) + { + if (AttachmentsToChunk.empty()) + { + return {}; + } + Stopwatch ChunkAttachmentsTimer; + + std::vector<ChunkedFile> ChunkedFiles(AttachmentsToChunk.size()); + + std::atomic<bool> AbortFlag(false); + std::atomic<bool> PauseFlag(false); + ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + + for (size_t ChunkFileIndexToChunk = 0; ChunkFileIndexToChunk < AttachmentsToChunk.size(); ChunkFileIndexToChunk++) + { + if (remotestore_impl::IsCancelled(OptionalContext)) + { + AbortFlag.store(true); + break; + } + Work.ScheduleWork(WorkerPool, + [&AttachmentsToChunk, ChunkFileIndexToChunk, &ChunkedFiles, OptionalContext](std::atomic<bool>& AbortFlag) { + if (remotestore_impl::IsCancelled(OptionalContext)) + { + AbortFlag.store(true); + } + if (AbortFlag) + { + return; + } + const remotestore_impl::FoundChunkedFile& AttachmentToChunk = AttachmentsToChunk[ChunkFileIndexToChunk]; + const IoHash& RawHash = AttachmentToChunk.RawHash; + + const IoBuffer& Buffer = AttachmentToChunk.Source; + IoBufferFileReference FileRef; + bool IsFile = Buffer.GetFileReference(FileRef); + ZEN_ASSERT(IsFile); + + Stopwatch ChunkOneTimer; + + uint64_t Offset = AttachmentToChunk.Offset; + uint64_t Size = AttachmentToChunk.Size; + + BasicFile SourceFile; + SourceFile.Attach(FileRef.FileHandle); + auto __ = MakeGuard([&SourceFile]() { SourceFile.Detach(); }); + + ChunkedFile& Chunked = ChunkedFiles[ChunkFileIndexToChunk]; + Chunked.Source = Buffer; + Chunked.Chunked = ChunkData(SourceFile, Offset, Size, UShaderByteCodeParams); + ZEN_ASSERT(Chunked.Chunked.Info.RawHash == RawHash); + + ZEN_INFO("Chunked large attachment '{}' {} into {} chunks in {}", + RawHash, + NiceBytes(Chunked.Chunked.Info.RawSize), + Chunked.Chunked.Info.ChunkHashes.size(), + NiceTimeSpanMs(ChunkOneTimer.GetElapsedTimeMs())); + }); + } + + Work.Wait(1000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, IsPaused); + if (remotestore_impl::IsCancelled(OptionalContext)) + { + AbortFlag.store(true); + } + remotestore_impl::ReportProgress(OptionalContext, + "Chunking attachments"sv, + fmt::format("{}{} remaining...", AbortFlag.load() ? "Aborting, " : "", PendingWork), + AttachmentsToChunk.size(), + PendingWork, + ChunkAttachmentsTimer.GetElapsedTimeMs()); + }); + + if (!AbortFlag.load()) + { + remotestore_impl::ReportProgress(OptionalContext, + "Chunking attachments"sv, + "", + AttachmentsToChunk.size(), + 0, + ChunkAttachmentsTimer.GetElapsedTimeMs()); + } + return ChunkedFiles; + } + + void ResolveAttachments(CidStore& ChunkStore, + WorkerThreadPool& WorkerPool, + uint64_t MaxChunkEmbedSize, + const std::filesystem::path& AttachmentTempPath, + std::unordered_map<IoHash, FoundAttachment, IoHash::Hasher>& UploadAttachments, + std::unordered_map<IoHash, TGetAttachmentBufferFunc, IoHash::Hasher>& LargeChunkAttachments, + std::unordered_map<IoHash, std::pair<uint64_t, IoBuffer>, IoHash::Hasher>& LooseUploadAttachments, + JobContext* OptionalContext) + { + ZEN_ASSERT(!UploadAttachments.empty()); + Stopwatch UploadAttachmentsTimer; + + RwLock ResolveLock; + + std::atomic<bool> AbortFlag(false); + std::atomic<bool> PauseFlag(false); + ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + + for (auto& It : UploadAttachments) + { + if (remotestore_impl::IsCancelled(OptionalContext)) + { + AbortFlag.store(true); + break; + } + Work.ScheduleWork( + WorkerPool, + [&ChunkStore, + MaxChunkEmbedSize, + &AttachmentTempPath, + &ResolveLock, + &LargeChunkAttachments, + &LooseUploadAttachments, + UploadAttachment = &It.second, + RawHash = It.first, + OptionalContext](std::atomic<bool>& AbortFlag) { + if (remotestore_impl::IsCancelled(OptionalContext)) + { + AbortFlag.store(true); + } + if (AbortFlag) + { + return; + } + ZEN_ASSERT(UploadAttachment->Size != 0); + if (!UploadAttachment->RawPath.empty()) + { + if (UploadAttachment->Size > (MaxChunkEmbedSize * 2)) + { + // Assume the compressed file is going to be larger than MaxChunkEmbedSize, even if it isn't + // it will be a loose attachment instead of going into a block + + TGetAttachmentBufferFunc FetchFunc = + [RawPath = UploadAttachment->RawPath, AttachmentTempPath, RawSize = UploadAttachment->Size]( + const IoHash& RawHash) -> CompositeBuffer { + IoBuffer RawData = IoBufferBuilder::MakeFromFile(RawPath); + if (!RawData) + { + throw std::runtime_error( + fmt::format("Failed to read source file for blob {} from '{}'", RawHash, RawPath)); + } + + std::filesystem::path AttachmentPath = AttachmentTempPath; + AttachmentPath.append(RawHash.ToHexString()); + + IoBuffer TempAttachmentBuffer = remotestore_impl::CompressToTempFile(RawHash, + RawData, + AttachmentPath, + OodleCompressor::Mermaid, + OodleCompressionLevel::VeryFast); + if (!TempAttachmentBuffer) + { + throw std::runtime_error(fmt::format("Failed to compressed source file for blob {} from '{}' to '{}'", + RawHash, + RawPath, + AttachmentPath)); + } + TempAttachmentBuffer.SetDeleteOnClose(true); + + ZEN_INFO("Saved temp attachment to '{}', {} ({})", + AttachmentPath, + NiceBytes(RawSize), + NiceBytes(TempAttachmentBuffer.GetSize())); + return CompositeBuffer(SharedBuffer(std::move(TempAttachmentBuffer))); + }; + + RwLock::ExclusiveLockScope _(ResolveLock); + LargeChunkAttachments.insert_or_assign(RawHash, std::move(FetchFunc)); + } + else + { + // Compress inline - check compressed size to see if it should go into a block or not + IoBuffer RawData = IoBufferBuilder::MakeFromFile(UploadAttachment->RawPath); + if (!RawData) + { + throw std::runtime_error( + fmt::format("Failed to read source file for blob {} from '{}'", RawHash, UploadAttachment->RawPath)); + } + + std::filesystem::path TempFilePath = AttachmentTempPath; + TempFilePath.append(RawHash.ToHexString()); + + IoBuffer TempAttachmentBuffer = remotestore_impl::CompressToTempFile(RawHash, + RawData, + TempFilePath, + OodleCompressor::Mermaid, + OodleCompressionLevel::VeryFast); + TempAttachmentBuffer.SetDeleteOnClose(true); + + uint64_t CompressedSize = TempAttachmentBuffer.GetSize(); + + ZEN_INFO("Saved temp attachment to '{}', {} ({})", + TempFilePath, + NiceBytes(UploadAttachment->Size), + NiceBytes(CompressedSize)); + + if (CompressedSize > MaxChunkEmbedSize) + { + TGetAttachmentBufferFunc FetchFunc = [Data = std::move(TempAttachmentBuffer)](const IoHash&) mutable { + return CompositeBuffer(SharedBuffer(std::move(Data))); + }; + + RwLock::ExclusiveLockScope _(ResolveLock); + LargeChunkAttachments.insert_or_assign(RawHash, std::move(FetchFunc)); + } + else + { + UploadAttachment->Size = CompressedSize; + + std::pair<uint64_t, IoBuffer> LooseAttachment(RawData.GetSize(), std::move(TempAttachmentBuffer)); + + RwLock::ExclusiveLockScope _(ResolveLock); + LooseUploadAttachments.insert_or_assign(RawHash, std::move(LooseAttachment)); + } + } + } + else + { + if (UploadAttachment->Size > MaxChunkEmbedSize) + { + TGetAttachmentBufferFunc FetchFunc = [&ChunkStore](const IoHash& RawHash) { + return CompositeBuffer(SharedBuffer(ChunkStore.FindChunkByCid(RawHash))); + }; + RwLock::ExclusiveLockScope _(ResolveLock); + LargeChunkAttachments.insert_or_assign(RawHash, std::move(FetchFunc)); + } + } + }); + } + + Work.Wait(1000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, IsPaused); + if (remotestore_impl::IsCancelled(OptionalContext)) + { + AbortFlag.store(true); + } + remotestore_impl::ReportProgress(OptionalContext, + "Resolving attachments"sv, + fmt::format("{}{} remaining...", AbortFlag.load() ? "Aborting, " : "", PendingWork), + UploadAttachments.size(), + PendingWork, + UploadAttachmentsTimer.GetElapsedTimeMs()); + }); + + if (!AbortFlag.load()) + { + remotestore_impl::ReportProgress(OptionalContext, + "Resolving attachments"sv, + "", + UploadAttachments.size(), + 0, + UploadAttachmentsTimer.GetElapsedTimeMs()); + } + } + RemoteProjectStore::Result WriteOplogSection(ProjectStore::Oplog& Oplog, const CbObjectView& SectionObject, JobContext* OptionalContext) { using namespace std::literals; @@ -278,30 +1167,25 @@ namespace remotestore_impl { }; void DownloadAndSaveBlockChunks(LoadOplogContext& Context, - Latch& AttachmentsDownloadLatch, - Latch& AttachmentsWriteLatch, - AsyncRemoteResult& RemoteResult, + ParallelWork& AttachmentWork, DownloadInfo& Info, Stopwatch& LoadAttachmentsTimer, std::atomic_uint64_t& DownloadStartMS, ThinChunkBlockDescription&& ThinBlockDescription, std::vector<uint32_t>&& NeededChunkIndexes) { - AttachmentsDownloadLatch.AddCount(1); - Context.NetworkWorkerPool.ScheduleWork( + AttachmentWork.ScheduleWork( + Context.NetworkWorkerPool, [&Context, - &AttachmentsDownloadLatch, - &AttachmentsWriteLatch, - &RemoteResult, + &AttachmentWork, ThinBlockDescription = std::move(ThinBlockDescription), NeededChunkIndexes = std::move(NeededChunkIndexes), &Info, &LoadAttachmentsTimer, - &DownloadStartMS]() { + &DownloadStartMS](std::atomic<bool>& AbortFlag) { ZEN_TRACE_CPU("DownloadBlockChunks"); - auto _ = MakeGuard([&AttachmentsDownloadLatch] { AttachmentsDownloadLatch.CountDown(); }); - if (RemoteResult.IsError()) + if (AbortFlag) { return; } @@ -322,12 +1206,12 @@ namespace remotestore_impl { ReportMessage(Context.OptionalJobContext, fmt::format("Failed to load attachments with {} chunks ({}): {}", Chunks.size(), - RemoteResult.GetError(), - RemoteResult.GetErrorReason())); + Result.ErrorCode, + Result.Reason)); Info.MissingAttachmentCount.fetch_add(1); - if (Context.IgnoreMissingAttachments) + if (!Context.IgnoreMissingAttachments) { - RemoteResult.SetError(Result.ErrorCode, Result.Reason, Result.Text); + throw RemoteStoreError(Result.Reason, Result.ErrorCode, Result.Text); } return; } @@ -341,70 +1225,60 @@ namespace remotestore_impl { fmt::format("Loaded {} bulk attachments in {}", Chunks.size(), NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)))); - if (RemoteResult.IsError()) + if (AbortFlag) { return; } - AttachmentsWriteLatch.AddCount(1); - Context.WorkerPool.ScheduleWork( - [&AttachmentsWriteLatch, &RemoteResult, &Info, &Context, Chunks = std::move(Result.Chunks)]() { - auto _ = MakeGuard([&AttachmentsWriteLatch] { AttachmentsWriteLatch.CountDown(); }); - if (RemoteResult.IsError()) + AttachmentWork.ScheduleWork( + Context.WorkerPool, + [&Info, &Context, Chunks = std::move(Result.Chunks)](std::atomic<bool>& AbortFlag) { + if (AbortFlag) { return; } if (!Chunks.empty()) { - try - { - std::vector<IoBuffer> WriteAttachmentBuffers; - std::vector<IoHash> WriteRawHashes; - WriteAttachmentBuffers.reserve(Chunks.size()); - WriteRawHashes.reserve(Chunks.size()); + std::vector<IoBuffer> WriteAttachmentBuffers; + std::vector<IoHash> WriteRawHashes; + WriteAttachmentBuffers.reserve(Chunks.size()); + WriteRawHashes.reserve(Chunks.size()); - for (const auto& It : Chunks) - { - WriteAttachmentBuffers.push_back(It.second.GetCompressed().Flatten().AsIoBuffer()); - WriteRawHashes.push_back(It.first); - } - std::vector<CidStore::InsertResult> InsertResults = - Context.ChunkStore.AddChunks(WriteAttachmentBuffers, - WriteRawHashes, - CidStore::InsertMode::kCopyOnly); + for (const auto& It : Chunks) + { + WriteAttachmentBuffers.push_back(It.second.GetCompressed().Flatten().AsIoBuffer()); + WriteRawHashes.push_back(It.first); + } + std::vector<CidStore::InsertResult> InsertResults = + Context.ChunkStore.AddChunks(WriteAttachmentBuffers, WriteRawHashes, CidStore::InsertMode::kCopyOnly); - for (size_t Index = 0; Index < InsertResults.size(); Index++) + for (size_t Index = 0; Index < InsertResults.size(); Index++) + { + if (InsertResults[Index].New) { - if (InsertResults[Index].New) - { - Info.AttachmentBytesStored.fetch_add(WriteAttachmentBuffers[Index].GetSize()); - Info.AttachmentsStored.fetch_add(1); - } + Info.AttachmentBytesStored.fetch_add(WriteAttachmentBuffers[Index].GetSize()); + Info.AttachmentsStored.fetch_add(1); } } - catch (const std::exception& Ex) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Failed to bulk save {} attachments", Chunks.size()), - Ex.what()); - } } }, WorkerThreadPool::EMode::EnableBacklog); } + catch (const RemoteStoreError&) + { + throw; + } catch (const std::exception& Ex) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Failed to bulk load {} attachments", NeededChunkIndexes.size()), - Ex.what()); + throw RemoteStoreError(fmt::format("Failed to bulk load {} attachments", NeededChunkIndexes.size()), + gsl::narrow<int>(HttpResponseCode::InternalServerError), + Ex.what()); } }, WorkerThreadPool::EMode::EnableBacklog); }; void DownloadAndSaveBlock(LoadOplogContext& Context, - Latch& AttachmentsDownloadLatch, - Latch& AttachmentsWriteLatch, - AsyncRemoteResult& RemoteResult, + ParallelWork& AttachmentWork, DownloadInfo& Info, Stopwatch& LoadAttachmentsTimer, std::atomic_uint64_t& DownloadStartMS, @@ -413,23 +1287,20 @@ namespace remotestore_impl { std::span<std::atomic<bool>> ChunkDownloadedFlags, uint32_t RetriesLeft) { - AttachmentsDownloadLatch.AddCount(1); - Context.NetworkWorkerPool.ScheduleWork( - [&AttachmentsDownloadLatch, - &AttachmentsWriteLatch, + AttachmentWork.ScheduleWork( + Context.NetworkWorkerPool, + [&AttachmentWork, &Context, - &RemoteResult, &Info, &LoadAttachmentsTimer, &DownloadStartMS, RetriesLeft, BlockHash = IoHash(BlockHash), &AllNeededPartialChunkHashesLookup, - ChunkDownloadedFlags]() { + ChunkDownloadedFlags](std::atomic<bool>& AbortFlag) { ZEN_TRACE_CPU("DownloadBlock"); - auto _ = MakeGuard([&AttachmentsDownloadLatch] { AttachmentsDownloadLatch.CountDown(); }); - if (RemoteResult.IsError()) + if (AbortFlag) { return; } @@ -457,11 +1328,11 @@ namespace remotestore_impl { Info.MissingAttachmentCount.fetch_add(1); if (!Context.IgnoreMissingAttachments) { - RemoteResult.SetError(BlockResult.ErrorCode, BlockResult.Reason, BlockResult.Text); + throw RemoteStoreError(BlockResult.Reason, BlockResult.ErrorCode, BlockResult.Text); } return; } - if (RemoteResult.IsError()) + if (AbortFlag) { return; } @@ -482,12 +1353,10 @@ namespace remotestore_impl { Info.AttachmentBlocksDownloaded.fetch_add(1); Info.AttachmentBlockBytesDownloaded.fetch_add(BlockSize); - AttachmentsWriteLatch.AddCount(1); - Context.WorkerPool.ScheduleWork( - [&AttachmentsDownloadLatch, - &AttachmentsWriteLatch, + AttachmentWork.ScheduleWork( + Context.WorkerPool, + [&AttachmentWork, &Context, - &RemoteResult, &Info, &LoadAttachmentsTimer, &DownloadStartMS, @@ -495,9 +1364,8 @@ namespace remotestore_impl { BlockHash = IoHash(BlockHash), &AllNeededPartialChunkHashesLookup, ChunkDownloadedFlags, - Bytes = std::move(BlobBuffer)]() { - auto _ = MakeGuard([&AttachmentsWriteLatch] { AttachmentsWriteLatch.CountDown(); }); - if (RemoteResult.IsError()) + Bytes = std::move(BlobBuffer)](std::atomic<bool>& AbortFlag) { + if (AbortFlag) { return; } @@ -606,9 +1474,7 @@ namespace remotestore_impl { ReportMessage(Context.OptionalJobContext, fmt::format("{}, retrying download", ErrorString)); return DownloadAndSaveBlock(Context, - AttachmentsDownloadLatch, - AttachmentsWriteLatch, - RemoteResult, + AttachmentWork, Info, LoadAttachmentsTimer, DownloadStartMS, @@ -620,32 +1486,41 @@ namespace remotestore_impl { else { ReportMessage(Context.OptionalJobContext, ErrorString); - RemoteResult.SetError(gsl::narrow<int32_t>(HttpResponseCode::InternalServerError), ErrorString, {}); - return; + throw RemoteStoreError(ErrorString, + gsl::narrow<int32_t>(HttpResponseCode::InternalServerError), + {}); } } } + catch (const RemoteStoreError&) + { + throw; + } catch (const std::exception& Ex) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Failed save block attachment {}", BlockHash), - Ex.what()); + throw RemoteStoreError(fmt::format("Failed to save block attachment {}", BlockHash), + gsl::narrow<int>(HttpResponseCode::InternalServerError), + Ex.what()); } }, WorkerThreadPool::EMode::EnableBacklog); } + catch (const RemoteStoreError&) + { + throw; + } catch (const std::exception& Ex) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Failed to download block attachment {}", BlockHash), - Ex.what()); + throw RemoteStoreError(fmt::format("Failed to download block attachment {}", BlockHash), + gsl::narrow<int>(HttpResponseCode::InternalServerError), + Ex.what()); } }, WorkerThreadPool::EMode::EnableBacklog); }; void DownloadPartialBlock(LoadOplogContext& Context, - AsyncRemoteResult& RemoteResult, + std::atomic<bool>& AbortFlag, DownloadInfo& Info, double& DownloadTimeSeconds, const ChunkBlockDescription& BlockDescription, @@ -674,7 +1549,7 @@ namespace remotestore_impl { while (SubRangeCountComplete < SubBlockRangeCount) { - if (RemoteResult.IsError()) + if (AbortFlag.load()) { break; } @@ -696,7 +1571,7 @@ namespace remotestore_impl { SubRange.first, SubRange.second); DownloadTimeSeconds += CacheTimer.GetElapsedTimeMs() / 1000.0; - if (RemoteResult.IsError()) + if (AbortFlag.load()) { break; } @@ -717,7 +1592,7 @@ namespace remotestore_impl { BuildStorageCache::BuildBlobRanges RangeBuffers = Context.OptionalCache->GetBuildBlobRanges(Context.CacheBuildId, BlockDescription.BlockHash, SubRanges); DownloadTimeSeconds += CacheTimer.GetElapsedTimeMs() / 1000.0; - if (RemoteResult.IsError()) + if (AbortFlag.load()) { break; } @@ -749,7 +1624,7 @@ namespace remotestore_impl { RemoteProjectStore::LoadAttachmentRangesResult BlockResult = Context.RemoteStore.LoadAttachmentRanges(BlockDescription.BlockHash, SubRanges); DownloadTimeSeconds += BlockResult.ElapsedSeconds; - if (RemoteResult.IsError()) + if (AbortFlag.load()) { break; } @@ -764,8 +1639,7 @@ namespace remotestore_impl { Info.MissingAttachmentCount.fetch_add(1); if (!Context.IgnoreMissingAttachments) { - RemoteResult.SetError(BlockResult.ErrorCode, BlockResult.Reason, BlockResult.Text); - break; + throw RemoteStoreError(BlockResult.Reason, BlockResult.ErrorCode, BlockResult.Text); } } else @@ -781,7 +1655,7 @@ namespace remotestore_impl { BlockDescription.BlockHash, ZenContentType::kCompressedBinary, CompositeBuffer(std::vector<IoBuffer>{BlockResult.Bytes})); - if (RemoteResult.IsError()) + if (AbortFlag.load()) { break; } @@ -795,13 +1669,12 @@ namespace remotestore_impl { { if (BlockResult.Ranges.size() != SubRanges.size()) { - RemoteResult.SetError(gsl::narrow<int32_t>(HttpResponseCode::InternalServerError), - fmt::format("Range response for block {} contains {} ranges, expected {} ranges", - BlockDescription.BlockHash, - BlockResult.Ranges.size(), - SubRanges.size()), - ""); - break; + throw RemoteStoreError(fmt::format("Range response for block {} contains {} ranges, expected {} ranges", + BlockDescription.BlockHash, + BlockResult.Ranges.size(), + SubRanges.size()), + gsl::narrow<int32_t>(HttpResponseCode::InternalServerError), + ""); } OnDownloaded(std::move(BlockResult.Bytes), SubRangeStartIndex, BlockResult.Ranges); } @@ -812,9 +1685,7 @@ namespace remotestore_impl { } void DownloadAndSavePartialBlock(LoadOplogContext& Context, - Latch& AttachmentsDownloadLatch, - Latch& AttachmentsWriteLatch, - AsyncRemoteResult& RemoteResult, + ParallelWork& AttachmentWork, DownloadInfo& Info, Stopwatch& LoadAttachmentsTimer, std::atomic_uint64_t& DownloadStartMS, @@ -827,12 +1698,10 @@ namespace remotestore_impl { std::span<std::atomic<bool>> ChunkDownloadedFlags, uint32_t RetriesLeft) { - AttachmentsDownloadLatch.AddCount(1); - Context.NetworkWorkerPool.ScheduleWork( - [&AttachmentsDownloadLatch, - &AttachmentsWriteLatch, + AttachmentWork.ScheduleWork( + Context.NetworkWorkerPool, + [&AttachmentWork, &Context, - &RemoteResult, &Info, &LoadAttachmentsTimer, &DownloadStartMS, @@ -843,10 +1712,8 @@ namespace remotestore_impl { BlockRangeCount, &AllNeededPartialChunkHashesLookup, ChunkDownloadedFlags, - RetriesLeft]() { + RetriesLeft](std::atomic<bool>& AbortFlag) { ZEN_TRACE_CPU("DownloadBlockRanges"); - - auto _ = MakeGuard([&AttachmentsDownloadLatch] { AttachmentsDownloadLatch.CountDown(); }); try { uint64_t Unset = (std::uint64_t)-1; @@ -857,7 +1724,7 @@ namespace remotestore_impl { DownloadPartialBlock( Context, - RemoteResult, + AbortFlag, Info, DownloadElapsedSeconds, BlockDescription, @@ -874,12 +1741,10 @@ namespace remotestore_impl { Info.AttachmentBlockRangeBytesDownloaded.fetch_add(BlockPartSize); Info.AttachmentBlocksRangesDownloaded++; - AttachmentsWriteLatch.AddCount(1); - Context.WorkerPool.ScheduleWork( - [&AttachmentsWriteLatch, + AttachmentWork.ScheduleWork( + Context.WorkerPool, + [&AttachmentWork, &Context, - &AttachmentsDownloadLatch, - &RemoteResult, &Info, &LoadAttachmentsTimer, &DownloadStartMS, @@ -892,8 +1757,8 @@ namespace remotestore_impl { RetriesLeft, BlockPayload = std::move(Buffer), OffsetAndLengths = - std::vector<std::pair<uint64_t, uint64_t>>(OffsetAndLengths.begin(), OffsetAndLengths.end())]() { - auto _ = MakeGuard([&AttachmentsWriteLatch] { AttachmentsWriteLatch.CountDown(); }); + std::vector<std::pair<uint64_t, uint64_t>>(OffsetAndLengths.begin(), OffsetAndLengths.end())]( + std::atomic<bool>& AbortFlag) { try { ZEN_ASSERT(BlockPayload.Size() > 0); @@ -901,7 +1766,7 @@ namespace remotestore_impl { size_t RangeCount = OffsetAndLengths.size(); for (size_t RangeOffset = 0; RangeOffset < RangeCount; RangeOffset++) { - if (RemoteResult.IsError()) + if (AbortFlag) { return; } @@ -923,7 +1788,7 @@ namespace remotestore_impl { ChunkBlockIndex < BlockRange.ChunkBlockIndexStart + BlockRange.ChunkBlockIndexCount; ChunkBlockIndex++) { - if (RemoteResult.IsError()) + if (AbortFlag) { break; } @@ -985,9 +1850,7 @@ namespace remotestore_impl { ReportMessage(Context.OptionalJobContext, fmt::format("{}, retrying download", ErrorString)); return DownloadAndSavePartialBlock(Context, - AttachmentsDownloadLatch, - AttachmentsWriteLatch, - RemoteResult, + AttachmentWork, Info, LoadAttachmentsTimer, DownloadStartMS, @@ -1005,9 +1868,9 @@ namespace remotestore_impl { Info.MissingAttachmentCount.fetch_add(1); if (!Context.IgnoreMissingAttachments) { - RemoteResult.SetError(gsl::narrow<int32_t>(HttpResponseCode::NotFound), - "Malformed chunk block", - ErrorString); + throw RemoteStoreError("Malformed chunk block", + gsl::narrow<int32_t>(HttpResponseCode::NotFound), + ErrorString); } } else @@ -1059,18 +1922,22 @@ namespace remotestore_impl { } } } + catch (const RemoteStoreError&) + { + throw; + } catch (const std::exception& Ex) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Failed saving {} ranges from block attachment {}", - OffsetAndLengths.size(), - BlockDescription.BlockHash), - Ex.what()); + throw RemoteStoreError(fmt::format("Failed saving {} ranges from block attachment {}", + OffsetAndLengths.size(), + BlockDescription.BlockHash), + gsl::narrow<int>(HttpResponseCode::InternalServerError), + Ex.what()); } }, WorkerThreadPool::EMode::EnableBacklog); }); - if (!RemoteResult.IsError()) + if (!AbortFlag) { ZEN_DEBUG("Loaded {} ranges from block attachment '{}' in {} ({})", BlockRangeCount, @@ -1079,39 +1946,33 @@ namespace remotestore_impl { NiceBytes(DownloadedBytes)); } } + catch (const RemoteStoreError&) + { + throw; + } catch (const std::exception& Ex) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Failed to download block attachment {} ranges", BlockDescription.BlockHash), - Ex.what()); + throw RemoteStoreError(fmt::format("Failed to download block attachment {} ranges", BlockDescription.BlockHash), + gsl::narrow<int>(HttpResponseCode::InternalServerError), + Ex.what()); } }, WorkerThreadPool::EMode::EnableBacklog); }; void DownloadAndSaveAttachment(LoadOplogContext& Context, - Latch& AttachmentsDownloadLatch, - Latch& AttachmentsWriteLatch, - AsyncRemoteResult& RemoteResult, + ParallelWork& AttachmentWork, DownloadInfo& Info, Stopwatch& LoadAttachmentsTimer, std::atomic_uint64_t& DownloadStartMS, const IoHash& RawHash) { - AttachmentsDownloadLatch.AddCount(1); - Context.NetworkWorkerPool.ScheduleWork( - [&Context, - &RemoteResult, - &AttachmentsDownloadLatch, - &AttachmentsWriteLatch, - RawHash, - &LoadAttachmentsTimer, - &DownloadStartMS, - &Info]() { + AttachmentWork.ScheduleWork( + Context.NetworkWorkerPool, + [&Context, &AttachmentWork, RawHash, &LoadAttachmentsTimer, &DownloadStartMS, &Info](std::atomic<bool>& AbortFlag) { ZEN_TRACE_CPU("DownloadAttachment"); - auto _ = MakeGuard([&AttachmentsDownloadLatch] { AttachmentsDownloadLatch.CountDown(); }); - if (RemoteResult.IsError()) + if (AbortFlag) { return; } @@ -1130,17 +1991,18 @@ namespace remotestore_impl { if (AttachmentResult.ErrorCode) { ReportMessage(Context.OptionalJobContext, - fmt::format("Failed to download large attachment {}: '{}', error code : {}", + fmt::format("Failed to download large attachment {}: '{}', error code: {}", RawHash, AttachmentResult.Reason, AttachmentResult.ErrorCode)); Info.MissingAttachmentCount.fetch_add(1); if (!Context.IgnoreMissingAttachments) { - RemoteResult.SetError(AttachmentResult.ErrorCode, AttachmentResult.Reason, AttachmentResult.Text); + throw RemoteStoreError(AttachmentResult.Reason, AttachmentResult.ErrorCode, AttachmentResult.Text); } return; } + ZEN_ASSERT(AttachmentResult.Bytes); BlobBuffer = std::move(AttachmentResult.Bytes); ZEN_DEBUG("Loaded large attachment '{}' in {} ({})", RawHash, @@ -1154,107 +2016,89 @@ namespace remotestore_impl { CompositeBuffer(SharedBuffer(BlobBuffer))); } } - if (RemoteResult.IsError()) + if (AbortFlag) { return; } uint64_t AttachmentSize = BlobBuffer.GetSize(); Info.AttachmentsDownloaded.fetch_add(1); Info.AttachmentBytesDownloaded.fetch_add(AttachmentSize); + ZEN_ASSERT(BlobBuffer); - AttachmentsWriteLatch.AddCount(1); - Context.WorkerPool.ScheduleWork( - [&Context, &AttachmentsWriteLatch, &RemoteResult, &Info, RawHash, AttachmentSize, Bytes = std::move(BlobBuffer)]() { + AttachmentWork.ScheduleWork( + Context.WorkerPool, + [&Context, &Info, RawHash, AttachmentSize, Bytes = std::move(BlobBuffer)](std::atomic<bool>& AbortFlag) { ZEN_TRACE_CPU("WriteAttachment"); - auto _ = MakeGuard([&AttachmentsWriteLatch] { AttachmentsWriteLatch.CountDown(); }); - if (RemoteResult.IsError()) + ZEN_ASSERT(Bytes); + + if (AbortFlag) { return; } - try - { - CidStore::InsertResult InsertResult = Context.ChunkStore.AddChunk(Bytes, RawHash); - if (InsertResult.New) - { - Info.AttachmentBytesStored.fetch_add(AttachmentSize); - Info.AttachmentsStored.fetch_add(1); - } - Info.ChunksCompleteCount++; - } - catch (const std::exception& Ex) + CidStore::InsertResult InsertResult = Context.ChunkStore.AddChunk(Bytes, RawHash); + if (InsertResult.New) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Saving attachment {} failed", RawHash), - Ex.what()); + Info.AttachmentBytesStored.fetch_add(AttachmentSize); + Info.AttachmentsStored.fetch_add(1); } + Info.ChunksCompleteCount++; }, WorkerThreadPool::EMode::EnableBacklog); } - catch (const std::exception& Ex) + catch (const RemoteStoreError&) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Loading attachment {} failed", RawHash), - Ex.what()); - } - }, - WorkerThreadPool::EMode::EnableBacklog); - }; - - void CreateBlock(WorkerThreadPool& WorkerPool, - Latch& OpSectionsLatch, - std::vector<std::pair<IoHash, FetchChunkFunc>>&& ChunksInBlock, - RwLock& SectionsLock, - std::vector<ChunkBlockDescription>& Blocks, - size_t BlockIndex, - const std::function<void(CompressedBuffer&&, ChunkBlockDescription&&)>& AsyncOnBlock, - AsyncRemoteResult& RemoteResult) - { - OpSectionsLatch.AddCount(1); - WorkerPool.ScheduleWork( - [&Blocks, - &SectionsLock, - &OpSectionsLatch, - BlockIndex, - Chunks = std::move(ChunksInBlock), - &AsyncOnBlock, - &RemoteResult]() mutable { - ZEN_TRACE_CPU("CreateBlock"); - - auto _ = MakeGuard([&OpSectionsLatch] { OpSectionsLatch.CountDown(); }); - if (RemoteResult.IsError()) - { - return; - } - size_t ChunkCount = Chunks.size(); - try - { - ZEN_ASSERT(ChunkCount > 0); - Stopwatch Timer; - ChunkBlockDescription Block; - CompressedBuffer CompressedBlock = GenerateChunkBlock(std::move(Chunks), Block); - IoHash BlockHash = CompressedBlock.DecodeRawHash(); - ZEN_UNUSED(BlockHash); - { - // We can share the lock as we are not resizing the vector and only touch BlockHash at our own index - RwLock::SharedLockScope __(SectionsLock); - Blocks[BlockIndex] = Block; - } - uint64_t BlockSize = CompressedBlock.GetCompressedSize(); - AsyncOnBlock(std::move(CompressedBlock), std::move(Block)); - ZEN_INFO("Generated block with {} attachments in {} ({})", - ChunkCount, - NiceTimeSpanMs(Timer.GetElapsedTimeMs()), - NiceBytes(BlockSize)); + throw; } catch (const std::exception& Ex) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Failed creating block {} with {} chunks", BlockIndex, ChunkCount), - Ex.what()); + throw RemoteStoreError(fmt::format("Loading attachment {} failed", RawHash), + gsl::narrow<int>(HttpResponseCode::InternalServerError), + Ex.what()); } }, WorkerThreadPool::EMode::EnableBacklog); + }; + + void AsyncCreateBlock(ParallelWork& Work, + WorkerThreadPool& WorkerPool, + std::vector<std::pair<IoHash, FetchChunkFunc>>&& ChunksInBlock, + RwLock& SectionsLock, + std::vector<ChunkBlockDescription>& Blocks, + size_t BlockIndex, + const std::function<void(CompressedBuffer&&, ChunkBlockDescription&&)>& AsyncOnBlock, + JobContext* OptionalContext) + { + Work.ScheduleWork(WorkerPool, + [&Blocks, &SectionsLock, BlockIndex, Chunks = std::move(ChunksInBlock), &AsyncOnBlock, OptionalContext]( + std::atomic<bool>& AbortFlag) mutable { + ZEN_TRACE_CPU("CreateBlock"); + + if (remotestore_impl::IsCancelled(OptionalContext)) + { + AbortFlag.store(true); + } + if (AbortFlag) + { + return; + } + size_t ChunkCount = Chunks.size(); + ZEN_ASSERT(ChunkCount > 0); + Stopwatch Timer; + ChunkBlockDescription Block; + CompressedBuffer CompressedBlock = GenerateChunkBlock(std::move(Chunks), Block); + uint64_t BlockSize = CompressedBlock.GetCompressedSize(); + { + // We can share the lock as we are not resizing the vector and only touch our own index + RwLock::SharedLockScope __(SectionsLock); + Blocks[BlockIndex] = Block; + } + AsyncOnBlock(std::move(CompressedBlock), std::move(Block)); + ZEN_INFO("Generated block with {} attachments in {} ({})", + ChunkCount, + NiceTimeSpanMs(Timer.GetElapsedTimeMs()), + NiceBytes(BlockSize)); + }); } struct UploadInfo @@ -1268,7 +2112,7 @@ namespace remotestore_impl { struct CreatedBlock { - IoBuffer Payload; + CompositeBuffer Payload; ChunkBlockDescription Block; }; @@ -1282,7 +2126,6 @@ namespace remotestore_impl { const std::unordered_set<IoHash, IoHash::Hasher>& Needs, bool ForceAll, UploadInfo& Info, - AsyncRemoteResult& RemoteResult, JobContext* OptionalContext) { using namespace std::literals; @@ -1343,22 +2186,15 @@ namespace remotestore_impl { if (!UnknownAttachments.empty()) { - RemoteResult.SetError( - gsl::narrow<int>(HttpResponseCode::NotFound), + throw RemoteStoreError( fmt::format("Upload requested of {} missing attachments, the base container referenced blocks that are no longer available", UnknownAttachments.size()), + gsl::narrow<int>(HttpResponseCode::NotFound), ""); - ReportMessage(OptionalContext, fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return; } if (IsCancelled(OptionalContext)) { - if (!RemoteResult.IsError()) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - ReportMessage(OptionalContext, fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - } return; } @@ -1371,122 +2207,91 @@ namespace remotestore_impl { Stopwatch Timer; - ptrdiff_t AttachmentsToSave(0); - Latch SaveAttachmentsLatch(1); + std::atomic<bool> AbortFlag(false); + std::atomic<bool> PauseFlag(false); + ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + ptrdiff_t AttachmentsToSave(0); for (const IoHash& RawHash : AttachmentsToUpload) { - if (RemoteResult.IsError()) + if (AbortFlag.load()) { break; } - SaveAttachmentsLatch.AddCount(1); AttachmentsToSave++; - WorkerPool.ScheduleWork( - [&ChunkStore, - &RemoteStore, - &SaveAttachmentsLatch, - &RemoteResult, - RawHash, - &CreatedBlocks, - &LooseFileAttachments, - &Info, - OptionalContext]() { + Work.ScheduleWork( + WorkerPool, + [&ChunkStore, &RemoteStore, RawHash, &CreatedBlocks, &LooseFileAttachments, &Info, OptionalContext]( + std::atomic<bool>& AbortFlag) { ZEN_TRACE_CPU("UploadAttachment"); - auto _ = MakeGuard([&SaveAttachmentsLatch] { SaveAttachmentsLatch.CountDown(); }); - if (RemoteResult.IsError()) + if (AbortFlag.load()) { return; } - try + CompositeBuffer Payload; + ChunkBlockDescription Block; + if (auto BlockIt = CreatedBlocks.find(RawHash); BlockIt != CreatedBlocks.end()) { - IoBuffer Payload; - ChunkBlockDescription Block; - if (auto BlockIt = CreatedBlocks.find(RawHash); BlockIt != CreatedBlocks.end()) - { - Payload = BlockIt->second.Payload; - Block = BlockIt->second.Block; - } - else if (auto LooseTmpFileIt = LooseFileAttachments.find(RawHash); LooseTmpFileIt != LooseFileAttachments.end()) - { - Payload = LooseTmpFileIt->second(RawHash); - } - else - { - Payload = ChunkStore.FindChunkByCid(RawHash); - } - if (!Payload) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::NotFound), - fmt::format("Failed to find attachment {}", RawHash), - {}); - ZEN_WARN("Failed to save attachment '{}' ({}): {}", - RawHash, - RemoteResult.GetError(), - RemoteResult.GetErrorReason()); - return; - } - const bool IsBlock = Block.BlockHash == RawHash; - size_t PayloadSize = Payload.GetSize(); - RemoteProjectStore::SaveAttachmentResult Result = - RemoteStore.SaveAttachment(CompositeBuffer(SharedBuffer(std::move(Payload))), RawHash, std::move(Block)); - if (Result.ErrorCode) - { - RemoteResult.SetError(Result.ErrorCode, Result.Reason, Result.Text); - ReportMessage(OptionalContext, - fmt::format("Failed to save attachment '{}', {} ({}): {}", - RawHash, - NiceBytes(PayloadSize), - RemoteResult.GetError(), - RemoteResult.GetErrorReason())); - return; - } - if (IsBlock) - { - Info.AttachmentBlocksUploaded.fetch_add(1); - Info.AttachmentBlockBytesUploaded.fetch_add(PayloadSize); - ZEN_INFO("Saved block attachment '{}' in {} ({})", - RawHash, - NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)), - NiceBytes(PayloadSize)); - } - else - { - Info.AttachmentsUploaded.fetch_add(1); - Info.AttachmentBytesUploaded.fetch_add(PayloadSize); - ZEN_INFO("Saved large attachment '{}' in {} ({})", - RawHash, - NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)), - NiceBytes(PayloadSize)); - } + Payload = BlockIt->second.Payload; + Block = BlockIt->second.Block; } - catch (const std::exception& Ex) + else if (auto LooseTmpFileIt = LooseFileAttachments.find(RawHash); LooseTmpFileIt != LooseFileAttachments.end()) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("To upload attachment {}", RawHash), - Ex.what()); + Payload = LooseTmpFileIt->second(RawHash); } - }, - WorkerThreadPool::EMode::EnableBacklog); + else + { + Payload = CompositeBuffer(SharedBuffer(ChunkStore.FindChunkByCid(RawHash))); + } + if (!Payload) + { + throw RemoteStoreError(fmt::format("Failed to find attachment {}", RawHash), + gsl::narrow<int>(HttpResponseCode::NotFound), + {}); + } + const bool IsBlock = Block.BlockHash == RawHash; + size_t PayloadSize = Payload.GetSize(); + RemoteProjectStore::SaveAttachmentResult Result = + RemoteStore.SaveAttachment(std::move(Payload), RawHash, std::move(Block)); + if (Result.ErrorCode) + { + throw RemoteStoreError(fmt::format("Failed to save attachment '{}', {}", RawHash, NiceBytes(PayloadSize)), + Result.ErrorCode, + Result.Text); + } + if (IsBlock) + { + Info.AttachmentBlocksUploaded.fetch_add(1); + Info.AttachmentBlockBytesUploaded.fetch_add(PayloadSize); + ZEN_INFO("Saved block attachment '{}' in {} ({})", + RawHash, + NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)), + NiceBytes(PayloadSize)); + } + else + { + Info.AttachmentsUploaded.fetch_add(1); + Info.AttachmentBytesUploaded.fetch_add(PayloadSize); + ZEN_INFO("Saved large attachment '{}' in {} ({})", + RawHash, + NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)), + NiceBytes(PayloadSize)); + } + }); } if (IsCancelled(OptionalContext)) { - if (!RemoteResult.IsError()) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - ReportMessage(OptionalContext, fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - } - return; + AbortFlag = true; } if (!BulkBlockAttachmentsToUpload.empty()) { for (const std::vector<std::pair<IoHash, FetchChunkFunc>>& Chunks : BlockChunks) { - if (RemoteResult.IsError()) + if (AbortFlag.load()) { break; } @@ -1506,88 +2311,62 @@ namespace remotestore_impl { continue; } - SaveAttachmentsLatch.AddCount(1); AttachmentsToSave++; - WorkerPool.ScheduleWork( + Work.ScheduleWork( + WorkerPool, [&RemoteStore, &ChunkStore, - &SaveAttachmentsLatch, - &RemoteResult, NeededChunks = std::move(NeededChunks), &BulkBlockAttachmentsToUpload, &Info, - OptionalContext]() { + OptionalContext](std::atomic<bool>& AbortFlag) { ZEN_TRACE_CPU("UploadChunk"); - auto _ = MakeGuard([&SaveAttachmentsLatch] { SaveAttachmentsLatch.CountDown(); }); - if (RemoteResult.IsError()) + if (AbortFlag.load()) { return; } - try + size_t ChunksSize = 0; + std::vector<SharedBuffer> ChunkBuffers; + ChunkBuffers.reserve(NeededChunks.size()); + for (const IoHash& Chunk : NeededChunks) { - size_t ChunksSize = 0; - std::vector<SharedBuffer> ChunkBuffers; - ChunkBuffers.reserve(NeededChunks.size()); - for (const IoHash& Chunk : NeededChunks) - { - auto It = BulkBlockAttachmentsToUpload.find(Chunk); - ZEN_ASSERT(It != BulkBlockAttachmentsToUpload.end()); - CompressedBuffer ChunkPayload = It->second(It->first).second; - if (!ChunkPayload) - { - RemoteResult.SetError(static_cast<int32_t>(HttpResponseCode::NotFound), - fmt::format("Missing chunk {}"sv, Chunk), - fmt::format("Unable to fetch attachment {} required by the oplog"sv, Chunk)); - ChunkBuffers.clear(); - break; - } - ChunksSize += ChunkPayload.GetCompressedSize(); - ChunkBuffers.emplace_back(SharedBuffer(std::move(ChunkPayload).GetCompressed().Flatten().AsIoBuffer())); - } - RemoteProjectStore::SaveAttachmentsResult Result = RemoteStore.SaveAttachments(ChunkBuffers); - if (Result.ErrorCode) + auto It = BulkBlockAttachmentsToUpload.find(Chunk); + ZEN_ASSERT(It != BulkBlockAttachmentsToUpload.end()); + CompositeBuffer ChunkPayload = It->second(It->first).second; + if (!ChunkPayload) { - RemoteResult.SetError(Result.ErrorCode, Result.Reason, Result.Text); - ReportMessage(OptionalContext, - fmt::format("Failed to save attachments with {} chunks ({}): {}", - NeededChunks.size(), - RemoteResult.GetError(), - RemoteResult.GetErrorReason())); - return; + throw RemoteStoreError(fmt::format("Missing chunk {}"sv, Chunk), + static_cast<int32_t>(HttpResponseCode::NotFound), + fmt::format("Unable to fetch attachment {} required by the oplog"sv, Chunk)); } - Info.AttachmentsUploaded.fetch_add(ChunkBuffers.size()); - Info.AttachmentBytesUploaded.fetch_add(ChunksSize); - - ZEN_INFO("Saved {} bulk attachments in {} ({})", - NeededChunks.size(), - NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)), - NiceBytes(ChunksSize)); + ChunksSize += ChunkPayload.GetSize(); + ChunkBuffers.emplace_back(SharedBuffer(ChunkPayload.Flatten().AsIoBuffer())); } - catch (const std::exception& Ex) + RemoteProjectStore::SaveAttachmentsResult Result = RemoteStore.SaveAttachments(ChunkBuffers); + if (Result.ErrorCode) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Failed to buck upload {} attachments", NeededChunks.size()), - Ex.what()); + throw RemoteStoreError(fmt::format("Failed to save attachments with {} chunks", NeededChunks.size()), + Result.ErrorCode, + Result.Text); } - }, - WorkerThreadPool::EMode::EnableBacklog); + Info.AttachmentsUploaded.fetch_add(ChunkBuffers.size()); + Info.AttachmentBytesUploaded.fetch_add(ChunksSize); + + ZEN_INFO("Saved {} bulk attachments in {} ({})", + NeededChunks.size(), + NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000)), + NiceBytes(ChunksSize)); + }); } } Stopwatch SaveAttachmentsProgressTimer; - SaveAttachmentsLatch.CountDown(); - while (!SaveAttachmentsLatch.Wait(1000)) - { - ptrdiff_t Remaining = SaveAttachmentsLatch.Remaining(); - if (IsCancelled(OptionalContext)) + Work.Wait(1000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t Remaining) { + ZEN_UNUSED(IsAborted, IsPaused); + if (IsCancelled(OptionalContext) && !AbortFlag.load()) { - if (!RemoteResult.IsError()) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - } + AbortFlag = true; } uint64_t PartialTransferWallTimeMS = Timer.GetElapsedTimeMs(); ReportProgress(OptionalContext, @@ -1598,7 +2377,7 @@ namespace remotestore_impl { AttachmentsToSave, Remaining, SaveAttachmentsProgressTimer.GetElapsedTimeMs()); - } + }); uint64_t ElapsedTimeMS = Timer.GetElapsedTimeMs(); if (AttachmentsToSave > 0) { @@ -1625,8 +2404,7 @@ std::vector<IoHash> GetBlockHashesFromOplog(CbObjectView ContainerObject) { using namespace std::literals; - std::vector<ChunkBlockDescription> Result; - CbArrayView BlocksArray = ContainerObject["blocks"sv].AsArrayView(); + CbArrayView BlocksArray = ContainerObject["blocks"sv].AsArrayView(); std::vector<IoHash> BlockHashes; BlockHashes.reserve(BlocksArray.Num()); @@ -1689,1169 +2467,634 @@ BuildContainer(CidStore& ChunkStore, const std::function<void(const IoHash&, TGetAttachmentBufferFunc&&)>& OnLargeAttachment, const std::function<void(std::vector<std::pair<IoHash, FetchChunkFunc>>&&)>& OnBlockChunks, bool EmbedLooseFiles, - JobContext* OptionalContext, - remotestore_impl::AsyncRemoteResult& RemoteResult) + JobContext* OptionalContext) { using namespace std::literals; std::unique_ptr<OperationLogOutput> LogOutput(std::make_unique<remotestore_impl::JobContextLogOutput>(OptionalContext)); - size_t OpCount = 0; - - CbObject OplogContainerObject; - { - struct FoundAttachment - { - std::filesystem::path RawPath; // If not stored in cid - uint64_t Size = 0; - Oid Key = Oid::Zero; - }; - - std::unordered_map<IoHash, FoundAttachment, IoHash::Hasher> UploadAttachments; - - RwLock BlocksLock; - std::vector<ChunkBlockDescription> Blocks; - CompressedBuffer OpsBuffer; - - std::filesystem::path AttachmentTempPath = Oplog.TempPath(); - AttachmentTempPath.append(".pending"); - CreateDirectories(AttachmentTempPath); - - auto RewriteOp = [&](const Oid& Key, CbObjectView Op, const std::function<void(CbObjectView)>& CB) { - bool OpRewritten = false; - CbArrayView Files = Op["files"sv].AsArrayView(); - if (Files.Num() == 0) - { - CB(Op); - return; - } - - CbWriter Cbo; - Cbo.BeginArray("files"sv); - - for (CbFieldView& Field : Files) - { - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - CB(Op); - return; - } + Stopwatch Timer; - bool CopyField = true; + CbObject OplogContainerObject; + CompressedBuffer CompressedOpsSection; + std::unordered_map<IoHash, remotestore_impl::FoundAttachment, IoHash::Hasher> UploadAttachments; + std::filesystem::path AttachmentTempPath = Oplog.TempPath(); + AttachmentTempPath.append(".pending"); + + size_t TotalOpCount = Oplog.GetOplogEntryCount(); + + Stopwatch RewriteOplogTimer; + CbObject SectionOps = remotestore_impl::RewriteOplog(Project, + Oplog, + IgnoreMissingAttachments, + EmbedLooseFiles, + AttachmentTempPath, + UploadAttachments, + OptionalContext); + remotestore_impl::ReportMessage(OptionalContext, + fmt::format("Rewrote {} ops to new oplog in {}", + TotalOpCount, + NiceTimeSpanMs(static_cast<uint64_t>(RewriteOplogTimer.GetElapsedTimeMs())))); - if (CbObjectView View = Field.AsObjectView()) - { - IoHash DataHash = View["data"sv].AsHash(); + { + Stopwatch CompressOpsTimer; + CompressedOpsSection = CompressedBuffer::Compress(SectionOps.GetBuffer(), OodleCompressor::Mermaid, OodleCompressionLevel::Fast); + remotestore_impl::ReportMessage(OptionalContext, + fmt::format("Compressed oplog section {} ({} -> {}) in {}", + CompressedOpsSection.DecodeRawHash(), + NiceBytes(CompressedOpsSection.DecodeRawSize()), + NiceBytes(CompressedOpsSection.GetCompressedSize()), + NiceTimeSpanMs(static_cast<uint64_t>(CompressOpsTimer.GetElapsedTimeMs())))); + } - if (DataHash == IoHash::Zero) - { - std::string_view ServerPath = View["serverpath"sv].AsString(); - std::filesystem::path FilePath = Project.RootDir / ServerPath; - if (!IsFile(FilePath)) - { - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Missing attachment '{}' for op '{}'", FilePath, View["id"sv].AsObjectId())); - if (IgnoreMissingAttachments) - { - continue; - } - else - { - ExtendableStringBuilder<1024> Sb; - Sb.Append("Failed to find attachment '"); - Sb.Append(FilePath.string()); - Sb.Append("' for op: \n"); - View.ToJson(Sb); - throw std::runtime_error(Sb.ToString()); - } - } + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return {}; + } - { - Stopwatch HashTimer; - SharedBuffer DataBuffer(IoBufferBuilder::MakeFromFile(FilePath)); - DataHash = IoHash::HashBuffer(CompositeBuffer(DataBuffer)); - ZEN_INFO("Hashed loose file '{}' {}: {} in {}", - FilePath, - NiceBytes(DataBuffer.GetSize()), - DataHash, - NiceTimeSpanMs(HashTimer.GetElapsedTimeMs())); - } + std::unordered_set<IoHash, IoHash::Hasher> FoundHashes; + FoundHashes.reserve(UploadAttachments.size()); + for (const auto& It : UploadAttachments) + { + FoundHashes.insert(It.first); + } - // Rewrite file array entry with new data reference - CbObjectWriter Writer; - RewriteCbObject(Writer, View, [&](CbObjectWriter&, CbFieldView Field) -> bool { - if (Field.GetName() == "data"sv) - { - // omit this field as we will write it explicitly ourselves - return true; - } - return false; - }); - Writer.AddBinaryAttachment("data"sv, DataHash); - UploadAttachments.insert_or_assign(DataHash, FoundAttachment{.RawPath = FilePath, .Key = Key}); + std::unordered_set<IoHash, IoHash::Hasher> MissingHashes; + std::vector<remotestore_impl::FoundChunkedFile> AttachmentsToChunk; - CbObject RewrittenOp = Writer.Save(); - Cbo.AddObject(std::move(RewrittenOp)); - CopyField = false; - } - } + remotestore_impl::FindChunkSizes(ChunkStore, + WorkerPool, + MaxChunkEmbedSize, + ChunkFileSizeLimit, + AllowChunking, + AttachmentTempPath, + UploadAttachments, + MissingHashes, + AttachmentsToChunk, + OptionalContext); - if (CopyField) - { - Cbo.AddField(Field); - } - else - { - OpRewritten = true; - } - } + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return {}; + } - if (!OpRewritten) - { - CB(Op); - return; - } + for (const IoHash& AttachmentHash : MissingHashes) + { + auto It = UploadAttachments.find(AttachmentHash); + ZEN_ASSERT(It != UploadAttachments.end()); + std::optional<CbObject> Op = Oplog.GetOpByKey(It->second.Key); + ZEN_ASSERT(Op.has_value()); - Cbo.EndArray(); - CbArray FilesArray = Cbo.Save().AsArray(); + if (IgnoreMissingAttachments) + { + remotestore_impl::ReportMessage(OptionalContext, + fmt::format("Missing attachment '{}' for op '{}'", AttachmentHash, It->second.Key)); + } + else + { + ExtendableStringBuilder<1024> Sb; + Sb.Append("Failed to find attachment '"); + Sb.Append(AttachmentHash.ToHexString()); + Sb.Append("' for op: \n"); + Op.value().ToJson(Sb); + throw std::runtime_error(Sb.ToString()); + } + UploadAttachments.erase(AttachmentHash); + } - CbObject RewrittenOp = RewriteCbObject(Op, [&](CbObjectWriter& NewWriter, CbFieldView Field) -> bool { - if (Field.GetName() == "files"sv) - { - NewWriter.AddArray("files"sv, FilesArray); + std::vector<remotestore_impl::ChunkedFile> ChunkedFiles = ChunkAttachments(WorkerPool, AttachmentsToChunk, OptionalContext); + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return {}; + } - return true; - } + for (const remotestore_impl::ChunkedFile& Chunked : ChunkedFiles) + { + UploadAttachments.erase(Chunked.Chunked.Info.RawHash); + for (const IoHash& ChunkHash : Chunked.Chunked.Info.ChunkHashes) + { + UploadAttachments.erase(ChunkHash); + } + } - return false; - }); - CB(RewrittenOp); - }; + size_t ChunkedChunkCount = std::accumulate( + ChunkedFiles.begin(), + ChunkedFiles.end(), + size_t(0), + [](size_t Current, const remotestore_impl::ChunkedFile& Value) { return Current + Value.Chunked.Info.ChunkHashes.size(); }); - remotestore_impl::ReportMessage(OptionalContext, "Building exported oplog and collecting attachments"); + size_t ReusedAttachmentCount = 0; + std::vector<size_t> ReusedBlockIndexes; + { + std::unordered_set<IoHash, IoHash::Hasher> UniqueChunkHashes; + UniqueChunkHashes.reserve(FoundHashes.size() + ChunkedChunkCount); - Stopwatch Timer; + UniqueChunkHashes.insert(FoundHashes.begin(), FoundHashes.end()); - size_t TotalOpCount = Oplog.GetOplogEntryCount(); - CompressedBuffer CompressedOpsSection; + for (remotestore_impl::ChunkedFile& Chunked : ChunkedFiles) + { + UniqueChunkHashes.insert(Chunked.Chunked.Info.ChunkHashes.begin(), Chunked.Chunked.Info.ChunkHashes.end()); + } + std::vector<IoHash> ChunkHashes(UniqueChunkHashes.begin(), UniqueChunkHashes.end()); + + std::vector<uint32_t> ChunkIndexes; + ChunkIndexes.resize(ChunkHashes.size()); + std::iota(ChunkIndexes.begin(), ChunkIndexes.end(), 0); + + std::vector<uint32_t> UnusedChunkIndexes; + ReuseBlocksStatistics ReuseBlocksStats; + + ReusedBlockIndexes = FindReuseBlocks(*LogOutput, + /*BlockReuseMinPercentLimit*/ 80, + /*IsVerbose*/ false, + ReuseBlocksStats, + KnownBlocks, + ChunkHashes, + ChunkIndexes, + UnusedChunkIndexes); + for (size_t KnownBlockIndex : ReusedBlockIndexes) { - Stopwatch RewriteOplogTimer; - CbObjectWriter SectionOpsWriter; - SectionOpsWriter.BeginArray("ops"sv); + const ThinChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; + for (const IoHash& KnownHash : KnownBlock.ChunkRawHashes) { - Stopwatch BuildingOplogProgressTimer; - Oplog.IterateOplogWithKey([&](int, const Oid& Key, CbObjectView Op) { - if (RemoteResult.IsError()) - { - return; - } - Op.IterateAttachments([&](CbFieldView FieldView) { - UploadAttachments.insert_or_assign(FieldView.AsAttachment(), FoundAttachment{.Key = Key}); - }); - if (EmbedLooseFiles) - { - RewriteOp(Key, Op, [&SectionOpsWriter](CbObjectView Op) { SectionOpsWriter << Op; }); - } - else - { - SectionOpsWriter << Op; - } - OpCount++; - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return; - } - if (OpCount % 1000 == 0) - { - remotestore_impl::ReportProgress(OptionalContext, - "Building oplog"sv, - fmt::format("{} ops processed", OpCount), - TotalOpCount, - TotalOpCount - OpCount, - BuildingOplogProgressTimer.GetElapsedTimeMs()); - } - }); - if (RemoteResult.IsError()) + if (UploadAttachments.erase(KnownHash) == 1) { - return {}; - } - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return {}; - } - if (TotalOpCount > 0) - { - remotestore_impl::ReportProgress(OptionalContext, - "Building oplog"sv, - fmt::format("{} ops processed", OpCount), - TotalOpCount, - 0, - BuildingOplogProgressTimer.GetElapsedTimeMs()); + ReusedAttachmentCount++; } } - SectionOpsWriter.EndArray(); // "ops" - - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Rewrote {} ops to new oplog in {}", - OpCount, - NiceTimeSpanMs(static_cast<uint64_t>(RewriteOplogTimer.GetElapsedTimeMs())))); - - { - Stopwatch CompressOpsTimer; - CompressedOpsSection = - CompressedBuffer::Compress(SectionOpsWriter.Save().GetBuffer(), OodleCompressor::Mermaid, OodleCompressionLevel::Fast); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Compressed oplog section {} ({} -> {}) in {}", - CompressedOpsSection.DecodeRawHash(), - NiceBytes(CompressedOpsSection.DecodeRawSize()), - NiceBytes(CompressedOpsSection.GetCompressedSize()), - NiceTimeSpanMs(static_cast<uint64_t>(CompressOpsTimer.GetElapsedTimeMs())))); - } } + } - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return {}; - } + std::unordered_map<IoHash, TGetAttachmentBufferFunc, IoHash::Hasher> LargeChunkAttachments; + std::unordered_map<IoHash, std::pair<uint64_t, IoBuffer>, IoHash::Hasher> LooseUploadAttachments; - std::unordered_set<IoHash, IoHash::Hasher> FoundHashes; - FoundHashes.reserve(UploadAttachments.size()); - for (const auto& It : UploadAttachments) + if (UploadAttachments.empty()) + { + if (ReusedAttachmentCount != 0) { - FoundHashes.insert(It.first); + remotestore_impl::ReportMessage( + OptionalContext, + fmt::format("Found all {} attachments from {} ops in existing blocks", ReusedAttachmentCount, TotalOpCount)); } + } + else + { + const size_t TotalAttachmentCount = UploadAttachments.size() + ReusedAttachmentCount; + remotestore_impl::ReportMessage(OptionalContext, + fmt::format("Resolving {} attachments from {} ops ({} ({:.1f}%) found in existing blocks)", + UploadAttachments.size(), + TotalOpCount, + ReusedAttachmentCount, + (100.f * ReusedAttachmentCount) / TotalAttachmentCount)); + + ResolveAttachments(ChunkStore, + WorkerPool, + MaxChunkEmbedSize, + AttachmentTempPath, + UploadAttachments, + LargeChunkAttachments, + LooseUploadAttachments, + OptionalContext); - size_t ReusedAttachmentCount = 0; - std::vector<size_t> ReusedBlockIndexes; + if (remotestore_impl::IsCancelled(OptionalContext)) { - std::vector<IoHash> ChunkHashes; - ChunkHashes.reserve(FoundHashes.size()); - ChunkHashes.insert(ChunkHashes.begin(), FoundHashes.begin(), FoundHashes.end()); - std::vector<uint32_t> ChunkIndexes; - ChunkIndexes.resize(FoundHashes.size()); - std::iota(ChunkIndexes.begin(), ChunkIndexes.end(), 0); - - std::vector<uint32_t> UnusedChunkIndexes; - ReuseBlocksStatistics ReuseBlocksStats; - - ReusedBlockIndexes = FindReuseBlocks(*LogOutput, - /*BlockReuseMinPercentLimit*/ 80, - /*IsVerbose*/ false, - ReuseBlocksStats, - KnownBlocks, - ChunkHashes, - ChunkIndexes, - UnusedChunkIndexes); - for (size_t KnownBlockIndex : ReusedBlockIndexes) - { - const ThinChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; - for (const IoHash& KnownHash : KnownBlock.ChunkRawHashes) - { - if (UploadAttachments.erase(KnownHash) == 1) - { - ReusedAttachmentCount++; - } - } - } + return {}; } + } - struct ChunkedFile - { - IoBuffer Source; - - ChunkedInfoWithSource Chunked; - tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkLoookup; - }; - std::vector<ChunkedFile> ChunkedFiles; - - auto ChunkFile = [](const IoHash& RawHash, IoBuffer& RawData, const IoBufferFileReference& FileRef, JobContext*) -> ChunkedFile { - ChunkedFile Chunked; - Stopwatch Timer; - - uint64_t Offset = FileRef.FileChunkOffset; - uint64_t Size = FileRef.FileChunkSize; - - BasicFile SourceFile; - SourceFile.Attach(FileRef.FileHandle); - auto __ = MakeGuard([&SourceFile]() { SourceFile.Detach(); }); + std::unordered_set<IoHash, IoHash::Hasher> LargeChunkHashes; - Chunked.Chunked = ChunkData(SourceFile, Offset, Size, UShaderByteCodeParams); - ZEN_ASSERT(Chunked.Chunked.Info.RawHash == RawHash); - Chunked.Source = RawData; + for (auto& It : LargeChunkAttachments) + { + UploadAttachments.erase(It.first); + LargeChunkHashes.insert(It.first); + OnLargeAttachment(It.first, std::move(It.second)); + } - ZEN_INFO("Chunked large attachment '{}' {} into {} chunks in {}", - RawHash, - NiceBytes(Chunked.Chunked.Info.RawSize), - Chunked.Chunked.Info.ChunkHashes.size(), - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + RwLock BlocksLock; + std::vector<ChunkBlockDescription> Blocks; - return Chunked; - }; + std::vector<std::pair<IoHash, Oid>> SortedUploadAttachments; + SortedUploadAttachments.reserve(UploadAttachments.size()); + for (const auto& It : UploadAttachments) + { + SortedUploadAttachments.push_back(std::make_pair(It.first, It.second.Key)); + } - RwLock ResolveLock; - std::unordered_set<IoHash, IoHash::Hasher> ChunkedHashes; - std::unordered_set<IoHash, IoHash::Hasher> LargeChunkHashes; - std::unordered_map<IoHash, size_t, IoHash::Hasher> ChunkedUploadAttachments; - std::unordered_map<IoHash, std::pair<uint64_t, IoBuffer>, IoHash::Hasher> LooseUploadAttachments; - std::unordered_set<IoHash, IoHash::Hasher> MissingHashes; + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return {}; + } - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Resolving {} attachments from {} ops", UploadAttachments.size(), TotalOpCount)); + remotestore_impl::ReportMessage(OptionalContext, + fmt::format("Sorting {} attachments from {} ops", SortedUploadAttachments.size(), TotalOpCount)); - Latch ResolveAttachmentsLatch(1); - for (auto& It : UploadAttachments) - { - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return {}; - } + // Sort attachments so we get predictable blocks for the same oplog upload + std::sort(SortedUploadAttachments.begin(), + SortedUploadAttachments.end(), + [](const std::pair<IoHash, Oid>& Lhs, const std::pair<IoHash, Oid>& Rhs) { + if (Lhs.second == Rhs.second) + { + // Same key, sort by raw hash + return Lhs.first < Rhs.first; + } + // Sort by key + return Lhs.second < Rhs.second; + }); - ResolveAttachmentsLatch.AddCount(1); + std::vector<size_t> ChunkedFilesOrder; + ChunkedFilesOrder.reserve(ChunkedFiles.size()); + for (size_t Index = 0; Index < ChunkedFiles.size(); Index++) + { + ChunkedFilesOrder.push_back(Index); + } + std::sort(ChunkedFilesOrder.begin(), ChunkedFilesOrder.end(), [&ChunkedFiles](size_t Lhs, size_t Rhs) { + return ChunkedFiles[Lhs].Chunked.Info.RawHash < ChunkedFiles[Rhs].Chunked.Info.RawHash; + }); - WorkerPool.ScheduleWork( - [&ChunkStore, - UploadAttachment = &It.second, - RawHash = It.first, - &ResolveAttachmentsLatch, - &ResolveLock, - &ChunkedHashes, - &LargeChunkHashes, - &ChunkedUploadAttachments, - &LooseUploadAttachments, - &MissingHashes, - &OnLargeAttachment, - &AttachmentTempPath, - &ChunkFile, - &ChunkedFiles, - MaxChunkEmbedSize, - ChunkFileSizeLimit, - AllowChunking, - &RemoteResult, - OptionalContext]() { - ZEN_TRACE_CPU("PrepareChunk"); + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return {}; + } + remotestore_impl::ReportMessage( + OptionalContext, + fmt::format("Assembling {} attachments and {} chunked parts from {} ops into blocks and loose attachments", + SortedUploadAttachments.size(), + ChunkedChunkCount, + TotalOpCount)); - auto _ = MakeGuard([&ResolveAttachmentsLatch] { ResolveAttachmentsLatch.CountDown(); }); - if (remotestore_impl::IsCancelled(OptionalContext)) - { - return; - } + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return {}; + } - try - { - if (!UploadAttachment->RawPath.empty()) - { - const std::filesystem::path& FilePath = UploadAttachment->RawPath; - IoBuffer RawData = IoBufferBuilder::MakeFromFile(FilePath); - if (RawData) - { - if (AllowChunking && RawData.GetSize() > ChunkFileSizeLimit) - { - IoBufferFileReference FileRef; - (void)RawData.GetFileReference(FileRef); - - ChunkedFile Chunked = ChunkFile(RawHash, RawData, FileRef, OptionalContext); - ResolveLock.WithExclusiveLock( - [RawHash, &ChunkedFiles, &ChunkedUploadAttachments, &ChunkedHashes, &Chunked]() { - ChunkedUploadAttachments.insert_or_assign(RawHash, ChunkedFiles.size()); - ChunkedHashes.reserve(ChunkedHashes.size() + Chunked.Chunked.Info.ChunkHashes.size()); - for (const IoHash& ChunkHash : Chunked.Chunked.Info.ChunkHashes) - { - ChunkedHashes.insert(ChunkHash); - } - ChunkedFiles.emplace_back(std::move(Chunked)); - }); - } - else if (RawData.GetSize() > (MaxChunkEmbedSize * 2)) - { - // Assume the compressed file is going to be larger than MaxChunkEmbedSize, even if it isn't - // it will be a loose attachment instead of going into a block - OnLargeAttachment(RawHash, [RawData = std::move(RawData), AttachmentTempPath](const IoHash& RawHash) { - size_t RawSize = RawData.GetSize(); - CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer(std::move(RawData)), - OodleCompressor::Mermaid, - OodleCompressionLevel::VeryFast); + size_t ChunkAssembleCount = SortedUploadAttachments.size() + ChunkedChunkCount; + size_t ChunksAssembled = 0; + remotestore_impl::ReportMessage(OptionalContext, + fmt::format("Assembling {} attachments from {} ops into blocks", ChunkAssembleCount, TotalOpCount)); - std::filesystem::path AttachmentPath = AttachmentTempPath; - AttachmentPath.append(RawHash.ToHexString()); - IoBuffer TempAttachmentBuffer = - WriteToTempFile(std::move(Compressed).GetCompressed(), AttachmentPath); - ZEN_INFO("Saved temp attachment to '{}', {} ({})", - AttachmentPath, - NiceBytes(RawSize), - NiceBytes(TempAttachmentBuffer.GetSize())); - return TempAttachmentBuffer; - }); - ResolveLock.WithExclusiveLock([RawHash, &LargeChunkHashes]() { LargeChunkHashes.insert(RawHash); }); - } - else - { - uint64_t RawSize = RawData.GetSize(); - CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer(RawData), - OodleCompressor::Mermaid, - OodleCompressionLevel::VeryFast); - - std::filesystem::path AttachmentPath = AttachmentTempPath; - AttachmentPath.append(RawHash.ToHexString()); - - uint64_t CompressedSize = Compressed.GetCompressedSize(); - IoBuffer TempAttachmentBuffer = WriteToTempFile(std::move(Compressed).GetCompressed(), AttachmentPath); - ZEN_INFO("Saved temp attachment to '{}', {} ({})", - AttachmentPath, - NiceBytes(RawSize), - NiceBytes(TempAttachmentBuffer.GetSize())); - - if (CompressedSize > MaxChunkEmbedSize) - { - OnLargeAttachment(RawHash, - [Data = std::move(TempAttachmentBuffer)](const IoHash&) { return Data; }); - ResolveLock.WithExclusiveLock([RawHash, &LargeChunkHashes]() { LargeChunkHashes.insert(RawHash); }); - } - else - { - UploadAttachment->Size = CompressedSize; - ResolveLock.WithExclusiveLock( - [RawHash, RawSize, &LooseUploadAttachments, Data = std::move(TempAttachmentBuffer)]() { - LooseUploadAttachments.insert_or_assign(RawHash, std::make_pair(RawSize, std::move(Data))); - }); - } - } - } - else - { - ResolveLock.WithExclusiveLock([RawHash, &MissingHashes]() { MissingHashes.insert(RawHash); }); - } - } - else - { - IoBuffer Data = ChunkStore.FindChunkByCid(RawHash); - if (Data) - { - auto GetForChunking = - [](size_t ChunkFileSizeLimit, const IoBuffer& Data, IoBufferFileReference& OutFileRef) -> bool { - if (Data.IsWholeFile()) - { - IoHash VerifyRawHash; - uint64_t VerifyRawSize; - CompressedBuffer Compressed = - CompressedBuffer::FromCompressed(SharedBuffer(Data), VerifyRawHash, VerifyRawSize); - if (Compressed) - { - if (VerifyRawSize > ChunkFileSizeLimit) - { - OodleCompressor Compressor; - OodleCompressionLevel CompressionLevel; - uint64_t BlockSize; - if (Compressed.TryGetCompressParameters(Compressor, CompressionLevel, BlockSize)) - { - if (CompressionLevel == OodleCompressionLevel::None) - { - CompositeBuffer Decompressed = Compressed.DecompressToComposite(); - if (Decompressed) - { - std::span<const SharedBuffer> Segments = Decompressed.GetSegments(); - if (Segments.size() == 1) - { - IoBuffer DecompressedData = Segments[0].AsIoBuffer(); - if (DecompressedData.GetFileReference(OutFileRef)) - { - return true; - } - } - } - } - } - } - } - } - return false; - }; + std::atomic<bool> AbortFlag(false); + std::atomic<bool> PauseFlag(false); + ParallelWork Work(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog); - IoBufferFileReference FileRef; - if (AllowChunking && GetForChunking(ChunkFileSizeLimit, Data, FileRef)) - { - ChunkedFile Chunked = ChunkFile(RawHash, Data, FileRef, OptionalContext); - ResolveLock.WithExclusiveLock( - [RawHash, &ChunkedFiles, &ChunkedUploadAttachments, &ChunkedHashes, &Chunked]() { - ChunkedUploadAttachments.insert_or_assign(RawHash, ChunkedFiles.size()); - ChunkedHashes.reserve(ChunkedHashes.size() + Chunked.Chunked.Info.ChunkHashes.size()); - for (const IoHash& ChunkHash : Chunked.Chunked.Info.ChunkHashes) - { - ChunkedHashes.insert(ChunkHash); - } - ChunkedFiles.emplace_back(std::move(Chunked)); - }); - } - else if (Data.GetSize() > MaxChunkEmbedSize) - { - OnLargeAttachment(RawHash, - [&ChunkStore](const IoHash& RawHash) { return ChunkStore.FindChunkByCid(RawHash); }); - ResolveLock.WithExclusiveLock([RawHash, &LargeChunkHashes]() { LargeChunkHashes.insert(RawHash); }); - } - else - { - UploadAttachment->Size = Data.GetSize(); - } - } - else - { - ResolveLock.WithExclusiveLock([RawHash, &MissingHashes]() { MissingHashes.insert(RawHash); }); - } - } - } - catch (const std::exception& Ex) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::NotFound), - fmt::format("Failed to resolve attachment {}", RawHash), - Ex.what()); - } - }, - WorkerThreadPool::EMode::EnableBacklog); - } - ResolveAttachmentsLatch.CountDown(); + uint32_t ComposedBlocks = 0; - { - Stopwatch ResolveAttachmentsProgressTimer; - ptrdiff_t AttachmentCountToUseForProgress = ResolveAttachmentsLatch.Remaining(); - while (!ResolveAttachmentsLatch.Wait(1000)) - { - ptrdiff_t Remaining = ResolveAttachmentsLatch.Remaining(); - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - while (!ResolveAttachmentsLatch.Wait(1000)) - { - Remaining = ResolveAttachmentsLatch.Remaining(); - remotestore_impl::ReportProgress(OptionalContext, - "Resolving attachments"sv, - fmt::format("Aborting, {} attachments remaining...", Remaining), - UploadAttachments.size(), - Remaining, - ResolveAttachmentsProgressTimer.GetElapsedTimeMs()); - } - remotestore_impl::ReportProgress(OptionalContext, - "Resolving attachments"sv, - "Aborted"sv, - UploadAttachments.size(), - 0, - ResolveAttachmentsProgressTimer.GetElapsedTimeMs()); - return {}; - } - AttachmentCountToUseForProgress = Max(Remaining, AttachmentCountToUseForProgress); - remotestore_impl::ReportProgress(OptionalContext, - "Resolving attachments"sv, - fmt::format("{} remaining...", Remaining), - AttachmentCountToUseForProgress, - Remaining, - ResolveAttachmentsProgressTimer.GetElapsedTimeMs()); - } - if (UploadAttachments.size() > 0) + uint64_t CreateBlocksStartMS = Timer.GetElapsedTimeMs(); + { + Stopwatch BlockCreateProgressTimer; + remotestore_impl::BlockComposer Composer(remotestore_impl::BlockComposer::Configuration{ + .MaxBlockSize = MaxBlockSize, + .MaxChunksPerBlock = MaxChunksPerBlock, + .MaxChunkEmbedSize = MaxChunkEmbedSize, + .IsCancelledFunc = [OptionalContext]() { return remotestore_impl::IsCancelled(OptionalContext); }}); + + auto OnNewBlock = [&Work, + &WorkerPool, + BuildBlocks, + &BlockCreateProgressTimer, + &BlocksLock, + &Blocks, + &AsyncOnBlock, + &OnBlockChunks, + ChunkAssembleCount, + &ChunksAssembled, + &ComposedBlocks, + OptionalContext](std::vector<IoHash>&& ChunkRawHashes, + const std::function<FetchChunkFunc(const IoHash& AttachmentHash)>& FetchAttachmentResolver) { + size_t ChunkCount = ChunkRawHashes.size(); + std::vector<std::pair<IoHash, FetchChunkFunc>> ChunksInBlock; + ChunksInBlock.reserve(ChunkCount); + + for (const IoHash& AttachmentHash : ChunkRawHashes) { - remotestore_impl::ReportProgress(OptionalContext, - "Resolving attachments"sv, - ""sv, - UploadAttachments.size(), - 0, - ResolveAttachmentsProgressTimer.GetElapsedTimeMs()); + ChunksInBlock.emplace_back(std::make_pair(AttachmentHash, FetchAttachmentResolver(AttachmentHash))); } - } - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return {}; - } - - for (const IoHash& AttachmentHash : MissingHashes) - { - auto It = UploadAttachments.find(AttachmentHash); - ZEN_ASSERT(It != UploadAttachments.end()); - std::optional<CbObject> Op = Oplog.GetOpByKey(It->second.Key); - ZEN_ASSERT(Op.has_value()); - if (IgnoreMissingAttachments) + size_t BlockIndex = remotestore_impl::AddBlock(BlocksLock, Blocks); + if (BuildBlocks) { - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Missing attachment '{}' for op '{}'", AttachmentHash, It->second.Key)); + remotestore_impl::AsyncCreateBlock(Work, + WorkerPool, + std::move(ChunksInBlock), + BlocksLock, + Blocks, + BlockIndex, + AsyncOnBlock, + OptionalContext); } else { - ExtendableStringBuilder<1024> Sb; - Sb.Append("Failed to find attachment '"); - Sb.Append(AttachmentHash.ToHexString()); - Sb.Append("' for op: \n"); - Op.value().ToJson(Sb); - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::NotFound), Sb.ToString(), {}); - return {}; - } - UploadAttachments.erase(AttachmentHash); - } - - for (const auto& It : ChunkedUploadAttachments) - { - UploadAttachments.erase(It.first); - } - for (const auto& It : LargeChunkHashes) - { - UploadAttachments.erase(It); - } + ZEN_INFO("Bulk group {} attachments", ChunkCount); - { - std::vector<IoHash> ChunkHashes; - ChunkHashes.reserve(ChunkedHashes.size()); - ChunkHashes.insert(ChunkHashes.begin(), ChunkedHashes.begin(), ChunkedHashes.end()); - std::vector<uint32_t> ChunkIndexes; - ChunkIndexes.resize(ChunkedHashes.size()); - std::iota(ChunkIndexes.begin(), ChunkIndexes.end(), 0); - - std::vector<uint32_t> UnusedChunkIndexes; - ReuseBlocksStatistics ReuseBlocksStats; - - std::vector<size_t> ReusedBlockFromChunking = FindReuseBlocks(*LogOutput, - /*BlockReuseMinPercentLimit*/ 80, - /*IsVerbose*/ false, - ReuseBlocksStats, - KnownBlocks, - ChunkHashes, - ChunkIndexes, - UnusedChunkIndexes); - for (size_t KnownBlockIndex : ReusedBlockIndexes) - { - const ThinChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; - for (const IoHash& KnownHash : KnownBlock.ChunkRawHashes) - { - if (ChunkedHashes.erase(KnownHash) == 1) - { - ReusedAttachmentCount++; - } - } + // We can share the lock as we are not resizing the vector and only touch BlockHash at our own index + RwLock::SharedLockScope _(BlocksLock); + Blocks[BlockIndex].ChunkRawHashes = std::move(ChunkRawHashes); + OnBlockChunks(std::move(ChunksInBlock)); } - ReusedBlockIndexes.insert(ReusedBlockIndexes.end(), ReusedBlockFromChunking.begin(), ReusedBlockFromChunking.end()); - } - std::sort(ReusedBlockIndexes.begin(), ReusedBlockIndexes.end()); - auto UniqueKnownBlocksEnd = std::unique(ReusedBlockIndexes.begin(), ReusedBlockIndexes.end()); - size_t ReuseBlockCount = std::distance(ReusedBlockIndexes.begin(), UniqueKnownBlocksEnd); - if (ReuseBlockCount > 0) - { - Blocks.reserve(ReuseBlockCount); - for (auto It = ReusedBlockIndexes.begin(); It != UniqueKnownBlocksEnd; It++) + ChunksAssembled += ChunkCount; + ComposedBlocks++; + + if (ChunksAssembled % 1000 == 0) { - Blocks.push_back({KnownBlocks[*It]}); + remotestore_impl::ReportProgress( + OptionalContext, + "Assembling blocks"sv, + fmt::format("{} attachments processed, {} blocks assembled", ChunksAssembled, ComposedBlocks), + ChunkAssembleCount, + ChunkAssembleCount - ChunksAssembled, + BlockCreateProgressTimer.GetElapsedTimeMs()); } - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Reused {} attachments from {} blocks", ReusedAttachmentCount, ReuseBlockCount)); - } - - std::vector<std::pair<IoHash, Oid>> SortedUploadAttachments; - SortedUploadAttachments.reserve(UploadAttachments.size()); - for (const auto& It : UploadAttachments) - { - SortedUploadAttachments.push_back(std::make_pair(It.first, It.second.Key)); - } - - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return {}; - } - - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Sorting {} attachments from {} ops", SortedUploadAttachments.size(), TotalOpCount)); - - // Sort attachments so we get predictable blocks for the same oplog upload - std::sort(SortedUploadAttachments.begin(), - SortedUploadAttachments.end(), - [](const std::pair<IoHash, Oid>& Lhs, const std::pair<IoHash, Oid>& Rhs) { - if (Lhs.second == Rhs.second) - { - // Same key, sort by raw hash - return Lhs.first < Rhs.first; - } - // Sort by key - return Lhs.second < Rhs.second; - }); - - std::vector<size_t> ChunkedFilesOrder; - ChunkedFilesOrder.reserve(ChunkedFiles.size()); - for (size_t Index = 0; Index < ChunkedFiles.size(); Index++) - { - ChunkedFilesOrder.push_back(Index); - } - std::sort(ChunkedFilesOrder.begin(), ChunkedFilesOrder.end(), [&ChunkedFiles](size_t Lhs, size_t Rhs) { - return ChunkedFiles[Lhs].Chunked.Info.RawHash < ChunkedFiles[Rhs].Chunked.Info.RawHash; - }); - - // SortedUploadAttachments now contains all whole chunks with size to be composed into blocks and uploaded - // ChunkedHashes contains all chunked up chunks to be composed into blocks - - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return {}; - } - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Assembling {} attachments and {} chunked parts from {} ops into blocks and loose attachments", - SortedUploadAttachments.size(), - ChunkedHashes.size(), - TotalOpCount)); - - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return {}; - } - - // SortedUploadAttachments now contains all whole chunks with size to be composed into blocks and uploaded - // ChunkedHashes contains all chunked up chunks to be composed into blocks - - size_t ChunkAssembleCount = SortedUploadAttachments.size() + ChunkedHashes.size(); - size_t ChunksAssembled = 0; - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Assembling {} attachments from {} ops into blocks", ChunkAssembleCount, TotalOpCount)); - - Latch BlockCreateLatch(1); - size_t GeneratedBlockCount = 0; - size_t BlockSize = 0; - std::vector<std::pair<IoHash, FetchChunkFunc>> ChunksInBlock; - - Oid LastOpKey = Oid::Zero; - uint32_t ComposedBlocks = 0; + }; - uint64_t CreateBlocksStartMS = Timer.GetElapsedTimeMs(); - try { - uint64_t FetchAttachmentsStartMS = Timer.GetElapsedTimeMs(); - std::unordered_set<IoHash, IoHash::Hasher> AddedAttachmentHashes; - auto NewBlock = [&]() { - size_t BlockIndex = remotestore_impl::AddBlock(BlocksLock, Blocks); - size_t ChunkCount = ChunksInBlock.size(); - std::vector<IoHash> ChunkRawHashes; - ChunkRawHashes.reserve(ChunkCount); - for (const std::pair<IoHash, FetchChunkFunc>& Chunk : ChunksInBlock) - { - ChunkRawHashes.push_back(Chunk.first); - } - if (BuildBlocks) - { - remotestore_impl::CreateBlock(WorkerPool, - BlockCreateLatch, - std::move(ChunksInBlock), - BlocksLock, - Blocks, - BlockIndex, - AsyncOnBlock, - RemoteResult); - ComposedBlocks++; - // Worker will set Blocks[BlockIndex] = Block (including ChunkRawHashes) under shared lock - } - else - { - ZEN_INFO("Bulk group {} attachments", ChunkCount); - OnBlockChunks(std::move(ChunksInBlock)); - // We can share the lock as we are not resizing the vector and only touch our own index - RwLock::SharedLockScope _(BlocksLock); - Blocks[BlockIndex].ChunkRawHashes = std::move(ChunkRawHashes); - } - uint64_t NowMS = Timer.GetElapsedTimeMs(); - ZEN_INFO("Assembled block {} with {} chunks in {} ({})", - BlockIndex, - ChunkCount, - NiceTimeSpanMs(NowMS - FetchAttachmentsStartMS), - NiceBytes(BlockSize)); - FetchAttachmentsStartMS = NowMS; - ChunksInBlock.clear(); - BlockSize = 0; - GeneratedBlockCount++; - }; - - Stopwatch AssembleBlocksProgressTimer; - uint64_t LastAssembleBlocksProgressUpdateMs = AssembleBlocksProgressTimer.GetElapsedTimeMs(); - for (auto HashIt = SortedUploadAttachments.begin(); HashIt != SortedUploadAttachments.end(); HashIt++) + std::vector<IoHash> AttachmentHashes; + AttachmentHashes.reserve(SortedUploadAttachments.size()); + std::vector<uint64_t> AttachmentSizes; + AttachmentSizes.reserve(SortedUploadAttachments.size()); + std::vector<Oid> AttachmentKeys; + AttachmentKeys.reserve(SortedUploadAttachments.size()); + + for (const std::pair<IoHash, Oid>& Attachment : SortedUploadAttachments) { - if (remotestore_impl::IsCancelled(OptionalContext)) + AttachmentHashes.push_back(Attachment.first); + if (auto It = UploadAttachments.find(Attachment.first); It != UploadAttachments.end()) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - break; + AttachmentSizes.push_back(It->second.Size); } - if (AssembleBlocksProgressTimer.GetElapsedTimeMs() - LastAssembleBlocksProgressUpdateMs > 200) + else { - remotestore_impl::ReportProgress( - OptionalContext, - "Assembling blocks"sv, - fmt::format("{} attachments processed, {} blocks assembled", ChunksAssembled, ComposedBlocks), - ChunkAssembleCount, - ChunkAssembleCount - ChunksAssembled, - AssembleBlocksProgressTimer.GetElapsedTimeMs()); - LastAssembleBlocksProgressUpdateMs = AssembleBlocksProgressTimer.GetElapsedTimeMs(); - } - const IoHash& RawHash(HashIt->first); - const Oid CurrentOpKey = HashIt->second; - const IoHash& AttachmentHash(HashIt->first); - auto InfoIt = UploadAttachments.find(RawHash); - ZEN_ASSERT(InfoIt != UploadAttachments.end()); - uint64_t PayloadSize = InfoIt->second.Size; - - if (AddedAttachmentHashes.insert(AttachmentHash).second) - { - if (BuildBlocks && ChunksInBlock.size() > 0) - { - if (((BlockSize + PayloadSize) > MaxBlockSize || (ChunksInBlock.size() + 1) > MaxChunksPerBlock) && - (CurrentOpKey != LastOpKey)) - { - NewBlock(); - } - } - - if (auto It = LooseUploadAttachments.find(RawHash); It != LooseUploadAttachments.end()) - { - ChunksInBlock.emplace_back(std::make_pair( - RawHash, - [RawSize = It->second.first, - IoBuffer = SharedBuffer(It->second.second)](const IoHash&) -> std::pair<uint64_t, CompressedBuffer> { - return std::make_pair(RawSize, CompressedBuffer::FromCompressedNoValidate(IoBuffer.AsIoBuffer())); - })); - LooseUploadAttachments.erase(It); - } - else - { - ChunksInBlock.emplace_back( - std::make_pair(RawHash, [&ChunkStore](const IoHash& RawHash) -> std::pair<uint64_t, CompressedBuffer> { - IoBuffer Chunk = ChunkStore.FindChunkByCid(RawHash); - if (!Chunk) - { - throw std::runtime_error(fmt::format("Failed to find chunk {} in cid store", RawHash)); - } - IoHash ValidateRawHash; - uint64_t RawSize = 0; - CompressedBuffer Compressed = - CompressedBuffer::FromCompressed(SharedBuffer(std::move(Chunk)), ValidateRawHash, RawSize); - if (!Compressed) - { - throw std::runtime_error( - fmt::format("Chunk {} in cid store is malformed (not a compressed buffer)", RawHash)); - } - if (RawHash != ValidateRawHash) - { - throw std::runtime_error( - fmt::format("Chunk {} in cid store is malformed (mismatching raw hash)", RawHash)); - } - return {RawSize, Compressed}; - })); - } - BlockSize += PayloadSize; - - LastOpKey = CurrentOpKey; - ChunksAssembled++; + throw std::runtime_error( + fmt::format("Attachment to upload state inconsistent, could not find attachment {}", Attachment.first)); } + AttachmentKeys.push_back(Attachment.second); } - if (!RemoteResult.IsError()) - { - // Keep the chunked files as separate blocks to make the blocks generated - // more consistent - if (BlockSize > 0) + + auto FetchWholeAttachmentResolver = [&LooseUploadAttachments, &ChunkStore](const IoHash& AttachmentHash) -> FetchChunkFunc { + if (auto It = LooseUploadAttachments.find(AttachmentHash); It != LooseUploadAttachments.end()) { - NewBlock(); + uint64_t RawSize = It->second.first; + IoBuffer Payload = std::move(It->second.second); + return + [RawSize, Payload = std::move(Payload)](const IoHash& ChunkHash) mutable -> std::pair<uint64_t, CompositeBuffer> { + ZEN_UNUSED(ChunkHash); + return {RawSize, CompositeBuffer(SharedBuffer(std::move(Payload)))}; + }; } - - for (size_t ChunkedFileIndex : ChunkedFilesOrder) + else { - const ChunkedFile& ChunkedFile = ChunkedFiles[ChunkedFileIndex]; - const ChunkedInfoWithSource& Chunked = ChunkedFile.Chunked; - size_t ChunkCount = Chunked.Info.ChunkHashes.size(); - for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ChunkIndex++) - { - if (remotestore_impl::IsCancelled(OptionalContext)) + return [&ChunkStore](const IoHash& RawHash) -> std::pair<uint64_t, CompositeBuffer> { + IoBuffer Chunk = ChunkStore.FindChunkByCid(RawHash); + if (!Chunk) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - break; + throw std::runtime_error(fmt::format("Failed to find chunk {} in cid store", RawHash)); } - if (AssembleBlocksProgressTimer.GetElapsedTimeMs() - LastAssembleBlocksProgressUpdateMs > 200) + + // These are small chunks - make memory resident + Chunk = IoBufferBuilder::ReadFromFileMaybe(Chunk); + + IoHash ValidateRawHash; + uint64_t RawSize = 0; + CompressedBuffer Compressed = + CompressedBuffer::FromCompressed(SharedBuffer(std::move(Chunk)), ValidateRawHash, RawSize); + if (!Compressed) { - remotestore_impl::ReportProgress( - OptionalContext, - "Assembling blocks"sv, - fmt::format("{} attachments processed, {} blocks assembled", ChunksAssembled, ComposedBlocks), - ChunkAssembleCount, - ChunkAssembleCount - ChunksAssembled, - AssembleBlocksProgressTimer.GetElapsedTimeMs()); - LastAssembleBlocksProgressUpdateMs = AssembleBlocksProgressTimer.GetElapsedTimeMs(); + throw std::runtime_error(fmt::format("Chunk {} in cid store is malformed (not a compressed buffer)", RawHash)); } - const IoHash& ChunkHash = ChunkedFile.Chunked.Info.ChunkHashes[ChunkIndex]; - if (auto FindIt = ChunkedHashes.find(ChunkHash); FindIt != ChunkedHashes.end()) + if (RawHash != ValidateRawHash) { - if (AddedAttachmentHashes.insert(ChunkHash).second) - { - const ChunkSource& Source = Chunked.ChunkSources[ChunkIndex]; - uint32_t ChunkSize = gsl::narrow<uint32_t>(CompressedBuffer::GetHeaderSizeForNoneEncoder() + Source.Size); - if (BuildBlocks && ChunksInBlock.size() > 0) - { - if ((BlockSize + ChunkSize) > MaxBlockSize || (ChunksInBlock.size() + 1) > MaxChunksPerBlock) - { - NewBlock(); - } - } - ChunksInBlock.emplace_back( - std::make_pair(ChunkHash, - [Source = ChunkedFile.Source, Offset = Source.Offset, Size = Source.Size]( - const IoHash&) -> std::pair<uint64_t, CompressedBuffer> { - return {Size, - CompressedBuffer::Compress(SharedBuffer(IoBuffer(Source, Offset, Size)), - OodleCompressor::Mermaid, - OodleCompressionLevel::None)}; - })); - BlockSize += CompressedBuffer::GetHeaderSizeForNoneEncoder() + Source.Size; - ChunksAssembled++; - } - ChunkedHashes.erase(FindIt); + throw std::runtime_error(fmt::format("Chunk {} in cid store is malformed (mismatching raw hash)", RawHash)); } - } + return {RawSize, Compressed.GetCompressed()}; + }; } - } + }; - if (BlockSize > 0 && !RemoteResult.IsError()) + Composer.Compose(AttachmentHashes, + AttachmentSizes, + AttachmentKeys, + [&OnNewBlock, &FetchWholeAttachmentResolver](std::vector<IoHash>&& ChunkRawHashes) { + OnNewBlock(std::move(ChunkRawHashes), FetchWholeAttachmentResolver); + }); + } + + { + std::vector<IoHash> AttachmentHashes; + AttachmentHashes.reserve(ChunkedChunkCount); + std::vector<uint64_t> AttachmentSizes; + AttachmentSizes.reserve(ChunkedChunkCount); + std::vector<Oid> AttachmentKeys; + AttachmentKeys.reserve(ChunkedChunkCount); + + tsl::robin_map<IoHash, std::pair<size_t, size_t>, IoHash::Hasher> ChunkHashToChunkFileIndexAndChunkIndex; + + for (size_t ChunkedFileIndex : ChunkedFilesOrder) { - if (!remotestore_impl::IsCancelled(OptionalContext)) + const remotestore_impl::ChunkedFile& ChunkedFile = ChunkedFiles[ChunkedFileIndex]; + const ChunkedInfoWithSource& Chunked = ChunkedFile.Chunked; + size_t ChunkCount = Chunked.Info.ChunkHashes.size(); + Oid ChunkedFileOid = Oid::NewOid(); + for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ChunkIndex++) { - NewBlock(); + const IoHash& ChunkHash = Chunked.Info.ChunkHashes[ChunkIndex]; + uint64_t ChunkSize = Chunked.ChunkSources[ChunkIndex].Size; + { + if (ChunkHashToChunkFileIndexAndChunkIndex + .insert(std::make_pair(ChunkHash, std::make_pair(ChunkedFileIndex, ChunkIndex))) + .second) + { + if (ChunkSize > MaxChunkEmbedSize) + { + OnLargeAttachment(ChunkHash, + [SourceBuffer = ChunkedFile.Source, + ChunkSource = Chunked.ChunkSources[ChunkIndex], + ChunkHash](const IoHash& RawHash) -> CompositeBuffer { + ZEN_ASSERT(RawHash == ChunkHash); + CompressedBuffer Compressed = CompressedBuffer::Compress( + SharedBuffer(IoBuffer(SourceBuffer, ChunkSource.Offset, ChunkSource.Size)), + OodleCompressor::Mermaid, + OodleCompressionLevel::None); + return Compressed.GetCompressed(); + }); + + LargeChunkHashes.insert(ChunkHash); + } + else + { + AttachmentHashes.push_back(ChunkHash); + AttachmentSizes.push_back(ChunkSize); + AttachmentKeys.push_back(ChunkedFileOid); + } + } + } } } - if (ChunkAssembleCount > 0) - { - remotestore_impl::ReportProgress( - OptionalContext, - "Assembling blocks"sv, - fmt::format("{} attachments processed, {} blocks assembled", ChunksAssembled, ComposedBlocks), - ChunkAssembleCount, - 0, - AssembleBlocksProgressTimer.GetElapsedTimeMs()); - } - - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Built oplog and collected {} attachments from {} ops into {} blocks and {} loose attachments in {}", - ChunkAssembleCount, - TotalOpCount, - GeneratedBlockCount, - LargeChunkHashes.size(), - NiceTimeSpanMs(static_cast<uint64_t>(Timer.GetElapsedTimeMs())))); - - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - BlockCreateLatch.CountDown(); - while (!BlockCreateLatch.Wait(1000)) + auto ChunkedFileAttachmentResolver = [&ChunkHashToChunkFileIndexAndChunkIndex, + &ChunkedFiles](const IoHash& AttachmentHash) -> FetchChunkFunc { + if (auto It = ChunkHashToChunkFileIndexAndChunkIndex.find(AttachmentHash); + It != ChunkHashToChunkFileIndexAndChunkIndex.end()) { - ptrdiff_t Remaining = BlockCreateLatch.Remaining(); - remotestore_impl::ReportProgress(OptionalContext, - "Assembling blocks"sv, - fmt::format("Aborting, {} blocks remaining...", Remaining), - GeneratedBlockCount, - Remaining, - AssembleBlocksProgressTimer.GetElapsedTimeMs()); + const std::pair<size_t, size_t>& ChunkFileIndexAndChunkIndex = It->second; + size_t ChunkedFileIndex = ChunkFileIndexAndChunkIndex.first; + size_t ChunkIndex = ChunkFileIndexAndChunkIndex.second; + const remotestore_impl::ChunkedFile& ChunkedFile = ChunkedFiles[ChunkedFileIndex]; + + const ChunkSource& Source = ChunkedFile.Chunked.ChunkSources[ChunkIndex]; + ZEN_ASSERT(Source.Offset + Source.Size <= ChunkedFile.Source.GetSize()); + + return [Source = ChunkedFile.Source, Offset = Source.Offset, Size = Source.Size]( + const IoHash&) -> std::pair<uint64_t, CompositeBuffer> { + return {Size, + CompressedBuffer::Compress(SharedBuffer(IoBuffer(Source, Offset, Size)), + OodleCompressor::Mermaid, + OodleCompressionLevel::None) + .GetCompressed()}; + }; } - if (GeneratedBlockCount > 0) + else { - remotestore_impl::ReportProgress(OptionalContext, - "Assembling blocks"sv, - fmt::format("Aborting, {} blocks remaining...", 0), - GeneratedBlockCount, - 0, - AssembleBlocksProgressTimer.GetElapsedTimeMs()); + ZEN_ASSERT(false); } - return {}; - } + }; + + Composer.Compose(AttachmentHashes, + AttachmentSizes, + AttachmentKeys, + [&OnNewBlock, &ChunkedFileAttachmentResolver](std::vector<IoHash>&& ChunkRawHashes) { + OnNewBlock(std::move(ChunkRawHashes), ChunkedFileAttachmentResolver); + }); } - catch (const std::exception& Ex) + + if (remotestore_impl::IsCancelled(OptionalContext)) { - BlockCreateLatch.CountDown(); - while (!BlockCreateLatch.Wait(1000)) - { - } - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), "Block creation failed", Ex.what()); - throw; + Work.Abort(); } - Stopwatch BlockCreateProgressTimer; - BlockCreateLatch.CountDown(); - while (!BlockCreateLatch.Wait(1000)) - { - ptrdiff_t Remaining = BlockCreateLatch.Remaining(); + Work.Wait(1000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, IsPaused); if (remotestore_impl::IsCancelled(OptionalContext)) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - while (!BlockCreateLatch.Wait(1000)) - { - Remaining = BlockCreateLatch.Remaining(); - remotestore_impl::ReportProgress(OptionalContext, - "Creating blocks"sv, - fmt::format("Aborting, {} blocks remaining...", Remaining), - GeneratedBlockCount, - Remaining, - BlockCreateProgressTimer.GetElapsedTimeMs()); - } - remotestore_impl::ReportProgress(OptionalContext, - "Creating blocks"sv, - "Aborted"sv, - GeneratedBlockCount, - 0, - BlockCreateProgressTimer.GetElapsedTimeMs()); - return {}; + AbortFlag.store(true); } remotestore_impl::ReportProgress(OptionalContext, "Creating blocks"sv, - fmt::format("{} remaining...", Remaining), - GeneratedBlockCount, - Remaining, + fmt::format("{}{} remaining...", AbortFlag.load() ? "Aborting, " : "", PendingWork), + ComposedBlocks, + PendingWork, BlockCreateProgressTimer.GetElapsedTimeMs()); - } + }); - if (GeneratedBlockCount > 0) + if (!AbortFlag.load() && ComposedBlocks > 0) { - uint64_t NowMS = Timer.GetElapsedTimeMs(); remotestore_impl::ReportProgress(OptionalContext, "Creating blocks"sv, ""sv, - GeneratedBlockCount, + ComposedBlocks, 0, BlockCreateProgressTimer.GetElapsedTimeMs()); + + uint64_t NowMS = Timer.GetElapsedTimeMs(); remotestore_impl::ReportMessage( OptionalContext, - fmt::format("Created {} blocks in {}", GeneratedBlockCount, NiceTimeSpanMs(NowMS - CreateBlocksStartMS))); + fmt::format("Created {} blocks in {}", ComposedBlocks, NiceTimeSpanMs(NowMS - CreateBlocksStartMS))); } + } + + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return {}; + } - if (!RemoteResult.IsError()) + CbObjectWriter OplogContainerWriter; + RwLock::SharedLockScope _(BlocksLock); + OplogContainerWriter.AddBinary("ops"sv, CompressedOpsSection.GetCompressed().Flatten().AsIoBuffer()); + OplogContainerWriter.BeginArray("blocks"sv); + { + for (const ChunkBlockDescription& B : Blocks) { - CbObjectWriter OplogContinerWriter; - RwLock::SharedLockScope _(BlocksLock); - OplogContinerWriter.AddBinary("ops"sv, CompressedOpsSection.GetCompressed().Flatten().AsIoBuffer()); - OplogContinerWriter.BeginArray("blocks"sv); + ZEN_ASSERT(!B.ChunkRawHashes.empty()); + if (BuildBlocks) { - for (const ChunkBlockDescription& B : Blocks) + ZEN_ASSERT(B.BlockHash != IoHash::Zero); + + OplogContainerWriter.BeginObject(); { - ZEN_ASSERT(!B.ChunkRawHashes.empty()); - if (BuildBlocks) + OplogContainerWriter.AddBinaryAttachment("rawhash"sv, B.BlockHash); + OplogContainerWriter.BeginArray("chunks"sv); { - ZEN_ASSERT(B.BlockHash != IoHash::Zero); - - OplogContinerWriter.BeginObject(); + for (const IoHash& RawHash : B.ChunkRawHashes) { - OplogContinerWriter.AddBinaryAttachment("rawhash"sv, B.BlockHash); - OplogContinerWriter.BeginArray("chunks"sv); - { - for (const IoHash& RawHash : B.ChunkRawHashes) - { - OplogContinerWriter.AddHash(RawHash); - } - } - OplogContinerWriter.EndArray(); // "chunks" + OplogContainerWriter.AddHash(RawHash); } - OplogContinerWriter.EndObject(); - continue; } + OplogContainerWriter.EndArray(); // "chunks" + } + OplogContainerWriter.EndObject(); + continue; + } - ZEN_ASSERT(B.BlockHash == IoHash::Zero); - OplogContinerWriter.BeginObject(); + ZEN_ASSERT(B.BlockHash == IoHash::Zero); + OplogContainerWriter.BeginObject(); + { + OplogContainerWriter.BeginArray("chunks"sv); + { + for (const IoHash& RawHash : B.ChunkRawHashes) { - OplogContinerWriter.BeginArray("chunks"sv); - { - for (const IoHash& RawHash : B.ChunkRawHashes) - { - OplogContinerWriter.AddBinaryAttachment(RawHash); - } - } - OplogContinerWriter.EndArray(); + OplogContainerWriter.AddBinaryAttachment(RawHash); } - OplogContinerWriter.EndObject(); } + OplogContainerWriter.EndArray(); } - OplogContinerWriter.EndArray(); // "blocks"sv - OplogContinerWriter.BeginArray("chunkedfiles"sv); + OplogContainerWriter.EndObject(); + } + } + OplogContainerWriter.EndArray(); // "blocks"sv + OplogContainerWriter.BeginArray("chunkedfiles"sv); + { + for (const remotestore_impl::ChunkedFile& F : ChunkedFiles) + { + OplogContainerWriter.BeginObject(); { - for (const ChunkedFile& F : ChunkedFiles) + OplogContainerWriter.AddHash("rawhash"sv, F.Chunked.Info.RawHash); + OplogContainerWriter.AddInteger("rawsize"sv, F.Chunked.Info.RawSize); + OplogContainerWriter.BeginArray("chunks"sv); { - OplogContinerWriter.BeginObject(); + for (const IoHash& RawHash : F.Chunked.Info.ChunkHashes) { - OplogContinerWriter.AddHash("rawhash"sv, F.Chunked.Info.RawHash); - OplogContinerWriter.AddInteger("rawsize"sv, F.Chunked.Info.RawSize); - OplogContinerWriter.BeginArray("chunks"sv); - { - for (const IoHash& RawHash : F.Chunked.Info.ChunkHashes) - { - OplogContinerWriter.AddHash(RawHash); - } - } - OplogContinerWriter.EndArray(); // "chunks" - OplogContinerWriter.BeginArray("sequence"sv); - { - for (uint32_t ChunkIndex : F.Chunked.Info.ChunkSequence) - { - OplogContinerWriter.AddInteger(ChunkIndex); - } - } - OplogContinerWriter.EndArray(); // "sequence" + OplogContainerWriter.AddHash(RawHash); } - OplogContinerWriter.EndObject(); } - } - OplogContinerWriter.EndArray(); // "chunkedfiles"sv - - OplogContinerWriter.BeginArray("chunks"sv); - { - for (const IoHash& AttachmentHash : LargeChunkHashes) + OplogContainerWriter.EndArray(); // "chunks" + OplogContainerWriter.BeginArray("sequence"sv); { - OplogContinerWriter.AddBinaryAttachment(AttachmentHash); + for (uint32_t ChunkIndex : F.Chunked.Info.ChunkSequence) + { + OplogContainerWriter.AddInteger(ChunkIndex); + } } + OplogContainerWriter.EndArray(); // "sequence" } - OplogContinerWriter.EndArray(); // "chunks" + OplogContainerWriter.EndObject(); + } + } + OplogContainerWriter.EndArray(); // "chunkedfiles"sv - OplogContainerObject = OplogContinerWriter.Save(); + OplogContainerWriter.BeginArray("chunks"sv); + { + for (const IoHash& AttachmentHash : LargeChunkHashes) + { + OplogContainerWriter.AddBinaryAttachment(AttachmentHash); } } + OplogContainerWriter.EndArray(); // "chunks" + + OplogContainerObject = OplogContainerWriter.Save(); + return OplogContainerObject; } -RemoteProjectStore::LoadContainerResult +CbObject BuildContainer(CidStore& ChunkStore, ProjectStore::Project& Project, ProjectStore::Oplog& Oplog, @@ -2868,31 +3111,26 @@ BuildContainer(CidStore& ChunkStore, const std::function<void(std::vector<std::pair<IoHash, FetchChunkFunc>>&&)>& OnBlockChunks, bool EmbedLooseFiles) { - // WorkerThreadPool& WorkerPool = GetLargeWorkerPool(EWorkloadType::Background); - - remotestore_impl::AsyncRemoteResult RemoteResult; - CbObject ContainerObject = BuildContainer(ChunkStore, - Project, - Oplog, - MaxBlockSize, - MaxChunksPerBlock, - MaxChunkEmbedSize, - ChunkFileSizeLimit, - BuildBlocks, - IgnoreMissingAttachments, - AllowChunking, - {}, - WorkerPool, - AsyncOnBlock, - OnLargeAttachment, - OnBlockChunks, - EmbedLooseFiles, - nullptr, - RemoteResult); - return RemoteProjectStore::LoadContainerResult{RemoteResult.ConvertResult(), ContainerObject}; + return BuildContainer(ChunkStore, + Project, + Oplog, + MaxBlockSize, + MaxChunksPerBlock, + MaxChunkEmbedSize, + ChunkFileSizeLimit, + BuildBlocks, + IgnoreMissingAttachments, + AllowChunking, + {}, + WorkerPool, + AsyncOnBlock, + OnLargeAttachment, + OnBlockChunks, + EmbedLooseFiles, + /*OptionalContext*/ nullptr); } -RemoteProjectStore::Result +void SaveOplog(CidStore& ChunkStore, RemoteProjectStore& RemoteStore, ProjectStore::Project& Project, @@ -2924,53 +3162,46 @@ SaveOplog(CidStore& ChunkStore, CreateDirectories(AttachmentTempPath); } - remotestore_impl::AsyncRemoteResult RemoteResult; RwLock AttachmentsLock; std::unordered_set<IoHash, IoHash::Hasher> LargeAttachments; std::unordered_map<IoHash, remotestore_impl::CreatedBlock, IoHash::Hasher> CreatedBlocks; tsl::robin_map<IoHash, TGetAttachmentBufferFunc, IoHash::Hasher> LooseLargeFiles; - auto MakeTempBlock = [AttachmentTempPath, &RemoteResult, &AttachmentsLock, &CreatedBlocks](CompressedBuffer&& CompressedBlock, - ChunkBlockDescription&& Block) { + auto MakeTempBlock = [AttachmentTempPath, &AttachmentsLock, &CreatedBlocks](CompressedBuffer&& CompressedBlock, + ChunkBlockDescription&& Block) { std::filesystem::path BlockPath = AttachmentTempPath; BlockPath.append(Block.BlockHash.ToHexString()); - try - { - IoBuffer BlockBuffer = WriteToTempFile(std::move(CompressedBlock).GetCompressed(), BlockPath); - RwLock::ExclusiveLockScope __(AttachmentsLock); - CreatedBlocks.insert({Block.BlockHash, {.Payload = std::move(BlockBuffer), .Block = std::move(Block)}}); - ZEN_DEBUG("Saved temp block to '{}', {}", AttachmentTempPath, NiceBytes(BlockBuffer.GetSize())); - } - catch (const std::exception& Ex) - { - RemoteResult.SetError(gsl::narrow<int32_t>(HttpResponseCode::InternalServerError), - Ex.what(), - "Unable to create temp block file"); - return; - } + IoBuffer BlockBuffer = WriteToTempFile(std::move(CompressedBlock).GetCompressed(), BlockPath); + const uint64_t BlockSize = BlockBuffer.GetSize(); + RwLock::ExclusiveLockScope __(AttachmentsLock); + CreatedBlocks.insert( + {Block.BlockHash, {.Payload = CompositeBuffer(SharedBuffer(std::move(BlockBuffer))), .Block = std::move(Block)}}); + ZEN_DEBUG("Saved temp block to '{}', {}", AttachmentTempPath, NiceBytes(BlockSize)); }; - auto UploadBlock = [&RemoteStore, &RemoteResult, &Info, OptionalContext](CompressedBuffer&& CompressedBlock, - ChunkBlockDescription&& Block) { - IoHash BlockHash = Block.BlockHash; + auto UploadBlock = [&RemoteStore, &RemoteStoreInfo, &Info, OptionalContext](CompressedBuffer&& CompressedBlock, + ChunkBlockDescription&& Block) { + IoHash BlockHash = Block.BlockHash; + uint64_t CompressedSize = CompressedBlock.GetCompressedSize(); RemoteProjectStore::SaveAttachmentResult Result = RemoteStore.SaveAttachment(CompressedBlock.GetCompressed(), BlockHash, std::move(Block)); if (Result.ErrorCode) { - RemoteResult.SetError(Result.ErrorCode, Result.Reason, Result.Text); - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Failed to save attachment ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return; + throw RemoteStoreError(fmt::format("Failed to save block attachment {} for oplog '{}': {}", + BlockHash, + RemoteStoreInfo.ContainerName, + Result.Reason), + Result.ErrorCode, + Result.Text); } Info.AttachmentBlocksUploaded.fetch_add(1); - Info.AttachmentBlockBytesUploaded.fetch_add(CompressedBlock.GetCompressedSize()); - ZEN_DEBUG("Saved block {}, {}", BlockHash, NiceBytes(CompressedBlock.GetCompressedSize())); + Info.AttachmentBlockBytesUploaded.fetch_add(CompressedSize); + ZEN_DEBUG("Saved block {}, {}", BlockHash, NiceBytes(CompressedSize)); }; std::vector<std::vector<std::pair<IoHash, FetchChunkFunc>>> BlockChunks; auto OnBlockChunks = [&BlockChunks](std::vector<std::pair<IoHash, FetchChunkFunc>>&& Chunks) { - BlockChunks.push_back({Chunks.begin(), Chunks.end()}); + BlockChunks.push_back({std::make_move_iterator(Chunks.begin()), std::make_move_iterator(Chunks.end())}); ZEN_DEBUG("Found {} block chunks", Chunks.size()); }; @@ -3001,15 +3232,10 @@ SaveOplog(CidStore& ChunkStore, RemoteProjectStore::CreateContainerResult ContainerResult = RemoteStore.CreateContainer(); if (ContainerResult.ErrorCode) { - RemoteProjectStore::Result Result = {.ErrorCode = ContainerResult.ErrorCode, - .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0, - .Text = fmt::format("Failed to create container for oplog '{}' ({}): {}", - RemoteStoreInfo.ContainerName, - ContainerResult.ErrorCode, - ContainerResult.Reason)}; - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return Result; + throw RemoteStoreError( + fmt::format("Failed to create container for oplog '{}': {}", RemoteStoreInfo.ContainerName, ContainerResult.Reason), + ContainerResult.ErrorCode, + ContainerResult.Text); } if (RemoteStoreInfo.CreateBlocks) @@ -3025,7 +3251,7 @@ SaveOplog(CidStore& ChunkStore, { ZEN_ASSERT(BlockDescription.ChunkCompressedLengths.empty()); - size_t ChunkCount = BlockDescription.ChunkRawLengths.size(); + size_t ChunkCount = BlockDescription.ChunkRawHashes.size(); if (ChunkCount > 0) { // Fake sizes, will give usage number of number of chunks used rather than bytes used - better than nothing @@ -3075,97 +3301,80 @@ SaveOplog(CidStore& ChunkStore, OnLargeAttachment, OnBlockChunks, EmbedLooseFiles, - OptionalContext, - /* out */ RemoteResult); - if (!RemoteResult.IsError()) + OptionalContext); + if (remotestore_impl::IsCancelled(OptionalContext)) { - Info.OplogSizeBytes = OplogContainerObject.GetSize(); + return; + } - if (remotestore_impl::IsCancelled(OptionalContext)) - { - RemoteProjectStore::Result Result = {.ErrorCode = 0, - .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0, - .Text = "Operation cancelled"}; - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return Result; - } + Info.OplogSizeBytes = OplogContainerObject.GetSize(); - uint64_t ChunkCount = OplogContainerObject["chunks"sv].AsArrayView().Num(); - uint64_t BlockCount = OplogContainerObject["blocks"sv].AsArrayView().Num(); + if (remotestore_impl::IsCancelled(OptionalContext)) + { + return; + } + + uint64_t ChunkCount = OplogContainerObject["chunks"sv].AsArrayView().Num(); + uint64_t BlockCount = OplogContainerObject["blocks"sv].AsArrayView().Num(); + remotestore_impl::ReportMessage(OptionalContext, + fmt::format("Saving oplog container '{}' with {} attachments and {} blocks...", + RemoteStoreInfo.ContainerName, + ChunkCount, + BlockCount)); + Stopwatch SaveContainerTimer; + IoBuffer ContainerPayload = OplogContainerObject.GetBuffer().AsIoBuffer(); + ContainerPayload.SetContentType(ZenContentType::kCbObject); + RemoteProjectStore::SaveResult ContainerSaveResult = RemoteStore.SaveContainer(std::move(ContainerPayload)); + TransferWallTimeMS += SaveContainerTimer.GetElapsedTimeMs(); + if (ContainerSaveResult.ErrorCode) + { + throw RemoteStoreError( + fmt::format("Failed to save oplog container for oplog '{}': {}", RemoteStoreInfo.ContainerName, ContainerSaveResult.Reason), + ContainerSaveResult.ErrorCode, + ContainerSaveResult.Text); + } + else + { remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Saving oplog container '{}' with {} attachments and {} blocks...", + fmt::format("Saved container '{}' in {}", RemoteStoreInfo.ContainerName, - ChunkCount, - BlockCount)); - Stopwatch SaveContainerTimer; - IoBuffer ContainerPayload = OplogContainerObject.GetBuffer().AsIoBuffer(); - ContainerPayload.SetContentType(ZenContentType::kCbObject); - RemoteProjectStore::SaveResult ContainerSaveResult = RemoteStore.SaveContainer(std::move(ContainerPayload)); - TransferWallTimeMS += SaveContainerTimer.GetElapsedTimeMs(); - if (ContainerSaveResult.ErrorCode) - { - RemoteResult.SetError(ContainerSaveResult.ErrorCode, ContainerSaveResult.Reason, "Failed to save oplog container"); - RemoteProjectStore::Result Result = { - .ErrorCode = RemoteResult.GetError(), - .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0, - .Text = fmt::format("Failed to save oplog container ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())}; - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Failed to save oplog container ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); - return Result; - } - else - { - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Saved container '{}' in {}", - RemoteStoreInfo.ContainerName, - NiceTimeSpanMs(static_cast<uint64_t>(ContainerSaveResult.ElapsedSeconds * 1000.0)))); - } - - { - Stopwatch UploadAttachmentsTimer; - UploadAttachments(NetworkWorkerPool, - ChunkStore, - RemoteStore, - LargeAttachments, - BlockChunks, - CreatedBlocks, - LooseLargeFiles, - ContainerSaveResult.Needs, - ForceUpload, - Info, - RemoteResult, - OptionalContext); - TransferWallTimeMS += UploadAttachmentsTimer.GetElapsedTimeMs(); - } + NiceTimeSpanMs(static_cast<uint64_t>(ContainerSaveResult.ElapsedSeconds * 1000.0)))); + } - uint32_t Try = 0; - while (!RemoteResult.IsError()) + { + Stopwatch UploadAttachmentsTimer; + UploadAttachments(NetworkWorkerPool, + ChunkStore, + RemoteStore, + LargeAttachments, + BlockChunks, + CreatedBlocks, + LooseLargeFiles, + ContainerSaveResult.Needs, + ForceUpload, + Info, + OptionalContext); + TransferWallTimeMS += UploadAttachmentsTimer.GetElapsedTimeMs(); + + const uint32_t MaxTries = 8; + uint32_t Try = 0; + while (Try < MaxTries) { if (remotestore_impl::IsCancelled(OptionalContext)) { - RemoteProjectStore::Result Result = {.ErrorCode = 0, - .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0, - .Text = "Operation cancelled"}; - remotestore_impl::ReportMessage(OptionalContext, fmt::format("Aborting ({}): {}", Result.ErrorCode, Result.Text)); - return Result; + return; } remotestore_impl::ReportMessage(OptionalContext, "Finalizing oplog container..."); RemoteProjectStore::FinalizeResult ContainerFinalizeResult = RemoteStore.FinalizeContainer(ContainerSaveResult.RawHash); if (ContainerFinalizeResult.ErrorCode) { - RemoteResult.SetError(ContainerFinalizeResult.ErrorCode, ContainerFinalizeResult.Reason, ContainerFinalizeResult.Text); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Failed to finalize oplog container {} ({}): {}", - ContainerSaveResult.RawHash, - RemoteResult.GetError(), - RemoteResult.GetErrorReason())); - RemoteProjectStore::Result Result = RemoteResult.ConvertResult(); - return Result; + throw RemoteStoreError( + fmt::format("Failed to finalize oplog container {}: {}", ContainerSaveResult.RawHash, ContainerFinalizeResult.Reason), + ContainerFinalizeResult.ErrorCode, + ContainerFinalizeResult.Text); } + remotestore_impl::ReportMessage( OptionalContext, fmt::format("Finalized container '{}' in {}", @@ -3179,61 +3388,44 @@ SaveOplog(CidStore& ChunkStore, if (remotestore_impl::IsCancelled(OptionalContext)) { - RemoteProjectStore::Result Result = {.ErrorCode = 0, - .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0, - .Text = "Operation cancelled"}; - return Result; + return; } - const uint32_t MaxTries = 8; - if (Try < MaxTries) - { - Try++; - - remotestore_impl::ReportMessage( - OptionalContext, - fmt::format("Finalize of container '{}' reported {} missing attachments. Uploading missing attachements. Try {}", - RemoteStoreInfo.ContainerName, - ContainerFinalizeResult.Needs.size(), - Try)); + Try++; - Stopwatch UploadAttachmentsTimer; - UploadAttachments(NetworkWorkerPool, - ChunkStore, - RemoteStore, - LargeAttachments, - BlockChunks, - CreatedBlocks, - LooseLargeFiles, - ContainerFinalizeResult.Needs, - false, - Info, - RemoteResult, - OptionalContext); - TransferWallTimeMS += UploadAttachmentsTimer.GetElapsedTimeMs(); - } - else + if (Try == MaxTries) { - RemoteResult.SetError( - gsl::narrow<int>(HttpResponseCode::InternalServerError), - "Failed to save oplog container", + throw std::runtime_error( fmt::format("Giving up finalize oplog container {} after {} retries, still getting reports of missing attachments", ContainerSaveResult.RawHash, - ContainerFinalizeResult.Needs.size())); - remotestore_impl::ReportMessage(OptionalContext, - fmt::format("Failed to finalize oplog container container {} ({}): {}", - ContainerSaveResult.RawHash, - RemoteResult.GetError(), - RemoteResult.GetErrorReason())); - break; + Try)); } - } - LooseLargeFiles.clear(); - CreatedBlocks.clear(); + remotestore_impl::ReportMessage( + OptionalContext, + fmt::format("Finalize of container '{}' reported {} missing attachments. Uploading missing attachments. Try {}", + RemoteStoreInfo.ContainerName, + ContainerFinalizeResult.Needs.size(), + Try)); + + Stopwatch RetryUploadAttachmentsTimer; + UploadAttachments(NetworkWorkerPool, + ChunkStore, + RemoteStore, + LargeAttachments, + BlockChunks, + CreatedBlocks, + LooseLargeFiles, + ContainerFinalizeResult.Needs, + false, + Info, + OptionalContext); + TransferWallTimeMS += RetryUploadAttachmentsTimer.GetElapsedTimeMs(); + } } - RemoteProjectStore::Result Result = RemoteResult.ConvertResult(); - Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; + + LooseLargeFiles.clear(); + CreatedBlocks.clear(); remotestore_impl::LogRemoteStoreStatsDetails(RemoteStore.GetStats()); @@ -3241,17 +3433,15 @@ SaveOplog(CidStore& ChunkStore, OptionalContext, fmt::format("Saved oplog '{}' {} in {} ({}), Blocks: {} ({}), Attachments: {} ({}) {}", RemoteStoreInfo.ContainerName, - RemoteResult.GetError() == 0 ? "SUCCESS" : "FAILURE", - NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000.0)), + "SUCCESS", + NiceTimeSpanMs(Timer.GetElapsedTimeMs()), NiceBytes(Info.OplogSizeBytes), Info.AttachmentBlocksUploaded.load(), NiceBytes(Info.AttachmentBlockBytesUploaded.load()), Info.AttachmentsUploaded.load(), NiceBytes(Info.AttachmentBytesUploaded.load()), remotestore_impl::GetStats(RemoteStore.GetStats(), /*OptionalCacheStats*/ nullptr, TransferWallTimeMS))); - - return Result; -}; +} RemoteProjectStore::Result ParseOplogContainer( @@ -3274,7 +3464,7 @@ ParseOplogContainer( CbValidateError ValidateResult = CbValidateError::None; if (CbObject SectionObject = ValidateAndReadCompactBinaryObject(std::move(SectionPayload), ValidateResult); - ValidateResult == CbValidateError::None && ContainerObject) + ValidateResult == CbValidateError::None && SectionObject) { OutOplogSection = SectionObject; } @@ -3282,11 +3472,11 @@ ParseOplogContainer( { remotestore_impl::ReportMessage( OptionalContext, - fmt::format("Failed to save oplog container: '{}' ('{}')", "Section has unexpected data type", ToString(ValidateResult))); + fmt::format("Failed to read oplog container: '{}' ('{}')", "Section has unexpected data type", ToString(ValidateResult))); return RemoteProjectStore::Result{gsl::narrow<int>(HttpResponseCode::BadRequest), Timer.GetElapsedTimeMs() / 1000.0, "Section has unexpected data type", - "Failed to save oplog container"}; + "Failed to read oplog container"}; } std::unordered_set<IoHash, IoHash::Hasher> NeededAttachments; { @@ -3459,7 +3649,7 @@ ParseOplogContainer( .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0, .Reason = "Operation cancelled"}; } - }; + } remotestore_impl::ReportMessage(OptionalContext, fmt::format("Requesting {} of {} large attachments", NeedAttachmentCount, LargeChunksArray.Num())); @@ -3498,7 +3688,7 @@ SaveOplogContainer( return Result; } -RemoteProjectStore::Result +void LoadOplog(LoadOplogContext&& Context) { using namespace std::literals; @@ -3525,10 +3715,10 @@ LoadOplog(LoadOplogContext&& Context) remotestore_impl::ReportMessage( Context.OptionalJobContext, fmt::format("Failed to load oplog container: '{}', error code: {}", LoadContainerResult.Reason, LoadContainerResult.ErrorCode)); - return RemoteProjectStore::Result{.ErrorCode = LoadContainerResult.ErrorCode, - .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0, - .Reason = LoadContainerResult.Reason, - .Text = LoadContainerResult.Text}; + throw RemoteStoreError( + fmt::format("Failed to load oplog container: '{}', error code: {}", LoadContainerResult.Reason, LoadContainerResult.ErrorCode), + LoadContainerResult.ErrorCode, + LoadContainerResult.Text); } remotestore_impl::ReportMessage(Context.OptionalJobContext, fmt::format("Loaded container in {} ({})", @@ -3536,10 +3726,10 @@ LoadOplog(LoadOplogContext&& Context) NiceBytes(LoadContainerResult.ContainerObject.GetSize()))); Info.OplogSizeBytes = LoadContainerResult.ContainerObject.GetSize(); - remotestore_impl::AsyncRemoteResult RemoteResult; - Latch AttachmentsDownloadLatch(1); - Latch AttachmentsWriteLatch(1); - std::atomic_size_t AttachmentCount = 0; + std::atomic<bool> AbortFlag(false); + std::atomic<bool> PauseFlag(false); + ParallelWork AttachmentWork(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + std::atomic_size_t AttachmentCount = 0; Stopwatch LoadAttachmentsTimer; std::atomic_uint64_t DownloadStartMS = (std::uint64_t)-1; @@ -3565,17 +3755,16 @@ LoadOplog(LoadOplogContext&& Context) std::vector<NeededBlockDownload> NeededBlockDownloads; auto OnNeedBlock = [&Context, - &AttachmentsDownloadLatch, - &AttachmentsWriteLatch, + &AttachmentWork, + &AbortFlag, &AttachmentCount, - &RemoteResult, &BlockCountToDownload, &Info, &LoadAttachmentsTimer, &DownloadStartMS, &NeededBlockDownloads](ThinChunkBlockDescription&& ThinBlockDescription, std::vector<uint32_t>&& NeededChunkIndexes) { - if (RemoteResult.IsError()) + if (AbortFlag.load()) { return; } @@ -3585,9 +3774,7 @@ LoadOplog(LoadOplogContext&& Context) if (ThinBlockDescription.BlockHash == IoHash::Zero) { DownloadAndSaveBlockChunks(Context, - AttachmentsDownloadLatch, - AttachmentsWriteLatch, - RemoteResult, + AttachmentWork, Info, LoadAttachmentsTimer, DownloadStartMS, @@ -3603,12 +3790,12 @@ LoadOplog(LoadOplogContext&& Context) std::vector<IoHash> AttachmentsToDownload; - auto OnNeedAttachment = [&AttachmentsToDownload, &RemoteResult, &Attachments, &AttachmentCount](const IoHash& RawHash) { + auto OnNeedAttachment = [&AttachmentsToDownload, &AbortFlag, &Attachments, &AttachmentCount](const IoHash& RawHash) { if (!Attachments.insert(RawHash).second) { return; } - if (RemoteResult.IsError()) + if (AbortFlag.load()) { return; } @@ -3636,7 +3823,9 @@ LoadOplog(LoadOplogContext&& Context) Context.OptionalJobContext); if (Result.ErrorCode != 0) { - RemoteResult.SetError(Result.ErrorCode, Result.Reason, Result.Text); + AbortFlag = true; + AttachmentWork.Wait(); + throw RemoteStoreError(Result.Reason, Result.ErrorCode, Result.Text); } remotestore_impl::ReportMessage(Context.OptionalJobContext, fmt::format("Parsed oplog in {}, found {} attachments, {} blocks and {} chunked files to download", @@ -3687,9 +3876,7 @@ LoadOplog(LoadOplogContext&& Context) { // Fall back to full download as we can't get enough information about the block DownloadAndSaveBlock(Context, - AttachmentsDownloadLatch, - AttachmentsWriteLatch, - RemoteResult, + AttachmentWork, Info, LoadAttachmentsTimer, DownloadStartMS, @@ -3715,8 +3902,8 @@ LoadOplog(LoadOplogContext&& Context) } else { - // Not a requested block? - ZEN_ASSERT(false); + // Not a requested block? Ignore it + FindIt++; } } } @@ -3817,9 +4004,7 @@ LoadOplog(LoadOplogContext&& Context) for (uint32_t FullBlockIndex : PartialBlocksResult.FullBlockIndexes) { DownloadAndSaveBlock(Context, - AttachmentsDownloadLatch, - AttachmentsWriteLatch, - RemoteResult, + AttachmentWork, Info, LoadAttachmentsTimer, DownloadStartMS, @@ -3841,9 +4026,7 @@ LoadOplog(LoadOplogContext&& Context) } DownloadAndSavePartialBlock(Context, - AttachmentsDownloadLatch, - AttachmentsWriteLatch, - RemoteResult, + AttachmentWork, Info, LoadAttachmentsTimer, DownloadStartMS, @@ -3861,95 +4044,48 @@ LoadOplog(LoadOplogContext&& Context) for (const IoHash& AttachmentToDownload : AttachmentsToDownload) { - DownloadAndSaveAttachment(Context, - AttachmentsDownloadLatch, - AttachmentsWriteLatch, - RemoteResult, - Info, - LoadAttachmentsTimer, - DownloadStartMS, - AttachmentToDownload); + DownloadAndSaveAttachment(Context, AttachmentWork, Info, LoadAttachmentsTimer, DownloadStartMS, AttachmentToDownload); } uint64_t TotalChunksToDownload = AllNeededChunkHashes.size() + AttachmentsToDownload.size(); - AttachmentsDownloadLatch.CountDown(); - { - while (!AttachmentsDownloadLatch.Wait(1000)) + AttachmentWork.Wait(1000, [&](bool /*IsAborted*/, bool /*IsPaused*/, std::ptrdiff_t /*Pending*/) { + if (remotestore_impl::IsCancelled(Context.OptionalJobContext) && !AbortFlag) { - if (remotestore_impl::IsCancelled(Context.OptionalJobContext)) - { - if (!RemoteResult.IsError()) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - } - } - uint64_t PartialTransferWallTimeMS = TransferWallTimeMS; - if (DownloadStartMS != (uint64_t)-1) - { - PartialTransferWallTimeMS += LoadAttachmentsTimer.GetElapsedTimeMs() - DownloadStartMS.load(); - } + AbortFlag = true; + } + uint64_t PartialTransferWallTimeMS = TransferWallTimeMS; + if (DownloadStartMS != (uint64_t)-1) + { + PartialTransferWallTimeMS += LoadAttachmentsTimer.GetElapsedTimeMs() - DownloadStartMS.load(); + } - uint64_t CompletedChunkCount = Info.ChunksCompleteCount.load(); + uint64_t CompletedChunkCount = Info.ChunksCompleteCount.load(); - uint64_t AttachmentsDownloaded = - Info.AttachmentBlocksDownloaded.load() + Info.AttachmentBlocksRangesDownloaded.load() + Info.AttachmentsDownloaded.load(); - uint64_t AttachmentBytesDownloaded = Info.AttachmentBlockBytesDownloaded.load() + - Info.AttachmentBlockRangeBytesDownloaded.load() + Info.AttachmentBytesDownloaded.load(); + uint64_t AttachmentsDownloaded = + Info.AttachmentBlocksDownloaded.load() + Info.AttachmentBlocksRangesDownloaded.load() + Info.AttachmentsDownloaded.load(); + uint64_t AttachmentBytesDownloaded = Info.AttachmentBlockBytesDownloaded.load() + Info.AttachmentBlockRangeBytesDownloaded.load() + + Info.AttachmentBytesDownloaded.load(); + + remotestore_impl::ReportProgress( + Context.OptionalJobContext, + "Loading attachments"sv, + fmt::format("{}/{} ({}) chunks. {} ({}) blobs downloaded. {}", + CompletedChunkCount, + TotalChunksToDownload, + NiceBytes(Info.AttachmentBytesStored.load()), + AttachmentsDownloaded, + NiceBytes(AttachmentBytesDownloaded), + remotestore_impl::GetStats(Context.RemoteStore.GetStats(), Context.OptionalCacheStats, PartialTransferWallTimeMS)), + TotalChunksToDownload, + TotalChunksToDownload - CompletedChunkCount, + AttachmentsDownloadProgressTimer.GetElapsedTimeMs()); + }); - remotestore_impl::ReportProgress( - Context.OptionalJobContext, - "Loading attachments"sv, - fmt::format( - "{}/{} ({}) chunks. {} ({}) blobs downloaded. {}", - CompletedChunkCount, - TotalChunksToDownload, - NiceBytes(Info.AttachmentBytesStored.load()), - AttachmentsDownloaded, - NiceBytes(AttachmentBytesDownloaded), - remotestore_impl::GetStats(Context.RemoteStore.GetStats(), Context.OptionalCacheStats, PartialTransferWallTimeMS)), - TotalChunksToDownload, - TotalChunksToDownload - CompletedChunkCount, - AttachmentsDownloadProgressTimer.GetElapsedTimeMs()); - } - } if (DownloadStartMS != (uint64_t)-1) { TransferWallTimeMS += LoadAttachmentsTimer.GetElapsedTimeMs() - DownloadStartMS.load(); } - AttachmentsWriteLatch.CountDown(); - { - while (!AttachmentsWriteLatch.Wait(1000)) - { - if (remotestore_impl::IsCancelled(Context.OptionalJobContext)) - { - if (!RemoteResult.IsError()) - { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - } - } - - uint64_t CompletedChunkCount = Info.ChunksCompleteCount.load(); - - uint64_t AttachmentsDownloaded = - Info.AttachmentBlocksDownloaded.load() + Info.AttachmentBlocksRangesDownloaded.load() + Info.AttachmentsDownloaded.load(); - uint64_t AttachmentBytesDownloaded = Info.AttachmentBlockBytesDownloaded.load() + - Info.AttachmentBlockRangeBytesDownloaded.load() + Info.AttachmentBytesDownloaded.load(); - - remotestore_impl::ReportProgress(Context.OptionalJobContext, - "Loading attachments"sv, - fmt::format("{}/{} ({}) chunks. {} ({}) blobs downloaded.", - CompletedChunkCount, - TotalChunksToDownload, - NiceBytes(Info.AttachmentBytesStored.load()), - AttachmentsDownloaded, - NiceBytes(AttachmentBytesDownloaded)), - TotalChunksToDownload, - TotalChunksToDownload - CompletedChunkCount, - AttachmentsDownloadProgressTimer.GetElapsedTimeMs()); - } - } - if (AttachmentCount.load() > 0) { remotestore_impl::ReportProgress(Context.OptionalJobContext, @@ -3959,89 +4095,137 @@ LoadOplog(LoadOplogContext&& Context) 0, AttachmentsDownloadProgressTimer.GetElapsedTimeMs()); } - - if (Result.ErrorCode == 0) + if (!FilesToDechunk.empty()) { - if (!FilesToDechunk.empty()) - { - remotestore_impl::ReportMessage(Context.OptionalJobContext, fmt::format("Dechunking {} attachments", FilesToDechunk.size())); - - Latch DechunkLatch(1); - std::filesystem::path TempFilePath = Context.Oplog.TempPath(); - for (const ChunkedInfo& Chunked : FilesToDechunk) - { - std::filesystem::path TempFileName = TempFilePath / Chunked.RawHash.ToHexString(); - DechunkLatch.AddCount(1); - Context.WorkerPool.ScheduleWork( - [&Context, &DechunkLatch, TempFileName, &Chunked, &RemoteResult, &Info]() { - ZEN_TRACE_CPU("DechunkAttachment"); + remotestore_impl::ReportMessage(Context.OptionalJobContext, fmt::format("Dechunking {} attachments", FilesToDechunk.size())); - auto _ = MakeGuard([&DechunkLatch, &TempFileName] { - std::error_code Ec; - if (IsFile(TempFileName, Ec)) + ParallelWork DechunkWork(AbortFlag, PauseFlag, WorkerThreadPool::EMode::EnableBacklog); + std::filesystem::path TempFilePath = Context.Oplog.TempPath(); + for (size_t ChunkedIndex = 0; ChunkedIndex < FilesToDechunk.size(); ChunkedIndex++) + { + const ChunkedInfo& Chunked = FilesToDechunk[ChunkedIndex]; + std::filesystem::path TempFileName = TempFilePath / Chunked.RawHash.ToHexString(); + DechunkWork.ScheduleWork( + Context.WorkerPool, + [&Context, TempFileName, &FilesToDechunk, ChunkedIndex, &Info](std::atomic<bool>& AbortFlag) { + ZEN_TRACE_CPU("DechunkAttachment"); + + auto _ = MakeGuard([&TempFileName] { + std::error_code Ec; + if (IsFile(TempFileName, Ec)) + { + RemoveFile(TempFileName, Ec); + if (Ec) { - RemoveFile(TempFileName, Ec); - if (Ec) - { - ZEN_INFO("Failed to remove temporary file '{}'. Reason: {}", TempFileName, Ec.message()); - } + ZEN_INFO("Failed to remove temporary file '{}'. Reason: {}", TempFileName, Ec.message()); } - DechunkLatch.CountDown(); - }); - try + } + }); + const ChunkedInfo& Chunked = FilesToDechunk[ChunkedIndex]; + + try + { + if (AbortFlag.load()) + { + return; + } + Stopwatch Timer; + + IoBuffer TmpBuffer; { - if (RemoteResult.IsError()) + BasicFile TmpFile; + std::error_code Ec; + TmpFile.Open(TempFileName, BasicFile::Mode::kTruncate, Ec); + if (Ec) { - return; + throw RemoteStoreError( + "Write error", + gsl::narrow<int>(HttpResponseCode::InternalServerError), + fmt::format("Failed to open temp file {} for chunked attachment {}", TempFileName, Chunked.RawHash)); } - Stopwatch Timer; - IoBuffer TmpBuffer; + else { - BasicFile TmpFile; - TmpFile.Open(TempFileName, BasicFile::Mode::kTruncate); + BasicFileWriter TmpWriter(TmpFile, 64u * 1024u); + + uint64_t ChunkOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder(); + BLAKE3Stream HashingStream; + for (std::uint32_t SequenceIndex : Chunked.ChunkSequence) { - BasicFileWriter TmpWriter(TmpFile, 64u * 1024u); + if (AbortFlag.load()) + { + return; + } - uint64_t ChunkOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder(); - BLAKE3Stream HashingStream; - for (std::uint32_t SequenceIndex : Chunked.ChunkSequence) + const IoHash& ChunkHash = Chunked.ChunkHashes[SequenceIndex]; + IoBuffer Chunk = Context.ChunkStore.FindChunkByCid(ChunkHash); + if (!Chunk) { - if (RemoteResult.IsError()) - { - return; - } + remotestore_impl::ReportMessage( + Context.OptionalJobContext, + fmt::format("Missing chunk {} for chunked attachment {}", ChunkHash, Chunked.RawHash)); - const IoHash& ChunkHash = Chunked.ChunkHashes[SequenceIndex]; - IoBuffer Chunk = Context.ChunkStore.FindChunkByCid(ChunkHash); - if (!Chunk) + // We only add 1 as the resulting missing count will be 1 for the dechunked file + Info.MissingAttachmentCount.fetch_add(1); + if (!Context.IgnoreMissingAttachments) { - remotestore_impl::ReportMessage( - Context.OptionalJobContext, + throw RemoteStoreError( + "Missing chunk", + gsl::narrow<int>(HttpResponseCode::NotFound), fmt::format("Missing chunk {} for chunked attachment {}", ChunkHash, Chunked.RawHash)); - - // We only add 1 as the resulting missing count will be 1 for the dechunked file - Info.MissingAttachmentCount.fetch_add(1); - if (!Context.IgnoreMissingAttachments) - { - RemoteResult.SetError( - gsl::narrow<int>(HttpResponseCode::NotFound), - "Missing chunk", - fmt::format("Missing chunk {} for chunked attachment {}", ChunkHash, Chunked.RawHash)); - } - return; } + return; + } + + IoHash RawHash; + uint64_t RawSize; - IoHash RawHash; - uint64_t RawSize; + CompressedBuffer Compressed = + CompressedBuffer::FromCompressed(SharedBuffer(std::move(Chunk)), RawHash, RawSize); + if (RawHash != ChunkHash || !Compressed) + { + std::string Message = + Compressed ? fmt::format("Mismatching raw hash {} for chunk {} for chunked attachment {}", + RawHash, + ChunkHash, + Chunked.RawHash) + : fmt::format("Malformed data for chunk {} for chunked attachment {}", + ChunkHash, + Chunked.RawHash); + remotestore_impl::ReportMessage(Context.OptionalJobContext, Message); + + // We only add 1 as the resulting missing count will be 1 for the dechunked file + Info.MissingAttachmentCount.fetch_add(1); + if (!Context.IgnoreMissingAttachments) + { + throw RemoteStoreError("Missing chunk", gsl::narrow<int>(HttpResponseCode::NotFound), Message); + } + return; + } - CompressedBuffer Compressed = - CompressedBuffer::FromCompressed(SharedBuffer(std::move(Chunk)), RawHash, RawSize); - if (RawHash != ChunkHash) + { + ZEN_TRACE_CPU("DecompressChunk"); + + if (!Compressed.DecompressToStream( + 0, + RawSize, + [&](uint64_t SourceOffset, + uint64_t SourceSize, + uint64_t Offset, + const CompositeBuffer& RangeBuffer) { + ZEN_UNUSED(SourceOffset, SourceSize, Offset); + + for (const SharedBuffer& Segment : RangeBuffer.GetSegments()) + { + MemoryView SegmentData = Segment.GetView(); + HashingStream.Append(SegmentData); + TmpWriter.Write(SegmentData.GetData(), SegmentData.GetSize(), ChunkOffset + Offset); + } + return true; + })) { remotestore_impl::ReportMessage( Context.OptionalJobContext, - fmt::format("Mismatching raw hash {} for chunk {} for chunked attachment {}", - RawHash, + fmt::format("Failed to decompress chunk {} for chunked attachment {}", ChunkHash, Chunked.RawHash)); @@ -4049,151 +4233,96 @@ LoadOplog(LoadOplogContext&& Context) Info.MissingAttachmentCount.fetch_add(1); if (!Context.IgnoreMissingAttachments) { - RemoteResult.SetError( - gsl::narrow<int>(HttpResponseCode::NotFound), + throw RemoteStoreError( "Missing chunk", - fmt::format("Mismatching raw hash {} for chunk {} for chunked attachment {}", - RawHash, - ChunkHash, - Chunked.RawHash)); - } - return; - } - - { - ZEN_TRACE_CPU("DecompressChunk"); - - if (!Compressed.DecompressToStream(0, - RawSize, - [&](uint64_t SourceOffset, - uint64_t SourceSize, - uint64_t Offset, - const CompositeBuffer& RangeBuffer) { - ZEN_UNUSED(SourceOffset, SourceSize, Offset); - - for (const SharedBuffer& Segment : - RangeBuffer.GetSegments()) - { - MemoryView SegmentData = Segment.GetView(); - HashingStream.Append(SegmentData); - TmpWriter.Write(SegmentData.GetData(), - SegmentData.GetSize(), - ChunkOffset + Offset); - } - return true; - })) - { - remotestore_impl::ReportMessage( - Context.OptionalJobContext, + gsl::narrow<int>(HttpResponseCode::NotFound), fmt::format("Failed to decompress chunk {} for chunked attachment {}", ChunkHash, Chunked.RawHash)); - - // We only add 1 as the resulting missing count will be 1 for the dechunked file - Info.MissingAttachmentCount.fetch_add(1); - if (!Context.IgnoreMissingAttachments) - { - RemoteResult.SetError( - gsl::narrow<int>(HttpResponseCode::NotFound), - "Missing chunk", - fmt::format("Failed to decompress chunk {} for chunked attachment {}", - ChunkHash, - Chunked.RawHash)); - } - return; } + return; } - ChunkOffset += RawSize; } - BLAKE3 RawHash = HashingStream.GetHash(); - ZEN_ASSERT(Chunked.RawHash == IoHash::FromBLAKE3(RawHash)); - UniqueBuffer Header = CompressedBuffer::CreateHeaderForNoneEncoder(Chunked.RawSize, RawHash); - TmpWriter.Write(Header.GetData(), Header.GetSize(), 0); + ChunkOffset += RawSize; } - TmpFile.Close(); - TmpBuffer = IoBufferBuilder::MakeFromTemporaryFile(TempFileName); + BLAKE3 RawHash = HashingStream.GetHash(); + ZEN_ASSERT(Chunked.RawHash == IoHash::FromBLAKE3(RawHash)); + UniqueBuffer Header = CompressedBuffer::CreateHeaderForNoneEncoder(Chunked.RawSize, RawHash); + TmpWriter.Write(Header.GetData(), Header.GetSize(), 0); } - uint64_t TmpBufferSize = TmpBuffer.GetSize(); - CidStore::InsertResult InsertResult = - Context.ChunkStore.AddChunk(TmpBuffer, Chunked.RawHash, CidStore::InsertMode::kMayBeMovedInPlace); - if (InsertResult.New) - { - Info.AttachmentBytesStored.fetch_add(TmpBufferSize); - Info.AttachmentsStored.fetch_add(1); - } - - ZEN_INFO("Dechunked attachment {} ({}) in {}", - Chunked.RawHash, - NiceBytes(Chunked.RawSize), - NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + TmpFile.Close(); + TmpBuffer = IoBufferBuilder::MakeFromTemporaryFile(TempFileName); } - catch (const std::exception& Ex) + uint64_t TmpBufferSize = TmpBuffer.GetSize(); + CidStore::InsertResult InsertResult = + Context.ChunkStore.AddChunk(TmpBuffer, Chunked.RawHash, CidStore::InsertMode::kMayBeMovedInPlace); + if (InsertResult.New) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::InternalServerError), - fmt::format("Failed to dechunck file {}", Chunked.RawHash), - Ex.what()); + Info.AttachmentBytesStored.fetch_add(TmpBufferSize); + Info.AttachmentsStored.fetch_add(1); } - }, - WorkerThreadPool::EMode::EnableBacklog); - } - Stopwatch DechunkProgressTimer; - DechunkLatch.CountDown(); - while (!DechunkLatch.Wait(1000)) - { - ptrdiff_t Remaining = DechunkLatch.Remaining(); - if (remotestore_impl::IsCancelled(Context.OptionalJobContext)) - { - if (!RemoteResult.IsError()) + ZEN_INFO("Dechunked attachment {} ({}) in {}", + Chunked.RawHash, + NiceBytes(Chunked.RawSize), + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + } + catch (const std::exception& Ex) { - RemoteResult.SetError(gsl::narrow<int>(HttpResponseCode::OK), "Operation cancelled", ""); - remotestore_impl::ReportMessage( - Context.OptionalJobContext, - fmt::format("Aborting ({}): {}", RemoteResult.GetError(), RemoteResult.GetErrorReason())); + throw RemoteStoreError(fmt::format("Failed to dechunk file {}", Chunked.RawHash), + gsl::narrow<int>(HttpResponseCode::InternalServerError), + Ex.what()); } - } - remotestore_impl::ReportProgress(Context.OptionalJobContext, - "Dechunking attachments"sv, - fmt::format("{} remaining...", Remaining), - FilesToDechunk.size(), - Remaining, - DechunkProgressTimer.GetElapsedTimeMs()); + }, + WorkerThreadPool::EMode::EnableBacklog); + } + + Stopwatch DechunkProgressTimer; + DechunkWork.Wait(1000, [&](bool /*IsAborted*/, bool /*IsPaused*/, std::ptrdiff_t Remaining) { + if (remotestore_impl::IsCancelled(Context.OptionalJobContext) && !AbortFlag) + { + AbortFlag = true; } remotestore_impl::ReportProgress(Context.OptionalJobContext, "Dechunking attachments"sv, - ""sv, + fmt::format("{} remaining...", Remaining), FilesToDechunk.size(), - 0, + Remaining, DechunkProgressTimer.GetElapsedTimeMs()); - } - Result = RemoteResult.ConvertResult(); + }); + remotestore_impl::ReportProgress(Context.OptionalJobContext, + "Dechunking attachments"sv, + ""sv, + FilesToDechunk.size(), + 0, + DechunkProgressTimer.GetElapsedTimeMs()); } - - if (Result.ErrorCode == 0) + if (Context.CleanOplog) { - if (Context.CleanOplog) + if (Context.OptionalCache) { - if (Context.OptionalCache) - { - Context.OptionalCache->Flush(100, [](intptr_t) { return /*DontWaitForPendingOperation*/ false; }); - } - if (!Context.Oplog.Reset()) - { - Result = RemoteProjectStore::Result{.ErrorCode = gsl::narrow<int>(HttpResponseCode::InternalServerError), - .ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0, - .Reason = fmt::format("Failed to clean existing oplog '{}'", Context.Oplog.OplogId())}; - remotestore_impl::ReportMessage(Context.OptionalJobContext, - fmt::format("Aborting ({}): {}", Result.ErrorCode, Result.Reason)); - } + Context.OptionalCache->Flush(100, [](intptr_t) { return /*DontWaitForPendingOperation*/ false; }); } - if (Result.ErrorCode == 0) + if (!Context.Oplog.Reset()) { + std::string Reason = fmt::format("Failed to clean existing oplog '{}'", Context.Oplog.OplogId()); + remotestore_impl::ReportMessage( + Context.OptionalJobContext, + fmt::format("Aborting ({}): {}", gsl::narrow<int>(HttpResponseCode::InternalServerError), Reason)); + throw RemoteStoreError(Reason, gsl::narrow<int>(HttpResponseCode::InternalServerError), ""); + } + } + { + RemoteProjectStore::Result WriteResult = remotestore_impl::WriteOplogSection(Context.Oplog, OplogSection, Context.OptionalJobContext); + if (WriteResult.ErrorCode) + { + remotestore_impl::ReportMessage(Context.OptionalJobContext, + fmt::format("Aborting ({}): {}", WriteResult.ErrorCode, WriteResult.Reason)); + throw RemoteStoreError(WriteResult.Reason, WriteResult.ErrorCode, WriteResult.Text); } } - Result.ElapsedSeconds = Timer.GetElapsedTimeMs() / 1000.0; - remotestore_impl::LogRemoteStoreStatsDetails(Context.RemoteStore.GetStats()); { @@ -4234,8 +4363,8 @@ LoadOplog(LoadOplogContext&& Context) fmt::format("Loaded oplog '{}' {} in {} ({}), Blocks: {} ({}), BlockRanges: {} ({}), Attachments: {} " "({}), Total: {} ({}), Stored: {} ({}), Missing: {} {}", RemoteStoreInfo.ContainerName, - Result.ErrorCode == 0 ? "SUCCESS" : "FAILURE", - NiceTimeSpanMs(static_cast<uint64_t>(Result.ElapsedSeconds * 1000.0)), + "SUCCESS", + NiceTimeSpanMs(static_cast<uint64_t>(Timer.GetElapsedTimeMs())), NiceBytes(Info.OplogSizeBytes), Info.AttachmentBlocksDownloaded.load(), NiceBytes(Info.AttachmentBlockBytesDownloaded.load()), @@ -4249,8 +4378,6 @@ LoadOplog(LoadOplogContext&& Context) NiceBytes(Info.AttachmentBytesStored.load()), Info.MissingAttachmentCount.load(), remotestore_impl::GetStats(Context.RemoteStore.GetStats(), Context.OptionalCacheStats, TransferWallTimeMS))); - - return Result; } ChunkedInfo @@ -4329,6 +4456,64 @@ namespace projectstore_testutils { return Package; }; + static CbPackage CreateFilesOplogPackage(const Oid& Id, + const std::filesystem::path ProjectRootDir, + const std::span<const std::pair<Oid, std::filesystem::path>>& Attachments) + { + CbPackage Package; + CbObjectWriter Object; + Object << "key"sv << OidAsString(Id); + if (!Attachments.empty()) + { + Object.BeginArray("files"); + for (const auto& Attachment : Attachments) + { + std::filesystem::path ServerPath = std::filesystem::relative(Attachment.second, ProjectRootDir).generic_string(); + std::filesystem::path ClientPath = ServerPath; // dummy + Object.BeginObject(); + Object << "id"sv << Attachment.first; + Object << "serverpath"sv << ServerPath.string(); + Object << "clientpath"sv << ClientPath.string(); + Object.EndObject(); + } + Object.EndArray(); + } + Package.SetObject(Object.Save()); + return Package; + }; + + // Variant of CreateFilesOplogPackage where each entry includes a "data" field of + // CbFieldType::Hash set to IoHash::Zero. CbFieldView::AsHash() returns Zero for a + // plain Hash field whose stored value is zero, so RewriteOp still enters the rewrite + // path (DataHash == Zero) and calls RewriteCbObject, which then finds the pre-existing + // "data" field, triggering the return-true branch at line 1858. + static CbPackage CreateFilesOplogPackageWithZeroDataHash(const Oid& Id, + const std::filesystem::path ProjectRootDir, + const std::span<const std::pair<Oid, std::filesystem::path>>& Attachments) + { + CbPackage Package; + CbObjectWriter Object; + Object << "key"sv << OidAsString(Id); + if (!Attachments.empty()) + { + Object.BeginArray("files"); + for (const auto& Attachment : Attachments) + { + std::filesystem::path ServerPath = std::filesystem::relative(Attachment.second, ProjectRootDir).generic_string(); + std::filesystem::path ClientPath = ServerPath; // dummy + Object.BeginObject(); + Object << "id"sv << Attachment.first; + Object << "serverpath"sv << ServerPath.string(); + Object << "clientpath"sv << ClientPath.string(); + Object.AddHash("data"sv, IoHash::Zero); + Object.EndObject(); + } + Object.EndArray(); + } + Package.SetObject(Object.Save()); + return Package; + }; + static std::vector<std::pair<Oid, CompressedBuffer>> CreateAttachments( const std::span<const size_t>& Sizes, OodleCompressionLevel CompressionLevel = OodleCompressionLevel::VeryFast, @@ -4345,31 +4530,102 @@ namespace projectstore_testutils { return Result; } - class TestJobContext : public JobContext + static std::vector<std::pair<Oid, std::filesystem::path>> CreateFileAttachments(const std::filesystem::path& RootDir, + const std::span<const size_t>& Sizes) { - public: - explicit TestJobContext(int& OpIndex) : m_OpIndex(OpIndex) {} - virtual bool IsCancelled() const { return false; } - virtual void ReportMessage(std::string_view Message) { ZEN_INFO("Job {}: {}", m_OpIndex, Message); } - virtual void ReportProgress(std::string_view CurrentOp, - std::string_view Details, - ptrdiff_t TotalCount, - ptrdiff_t RemainingCount, - uint64_t ElapsedTimeMs) + std::vector<std::pair<Oid, std::filesystem::path>> Result; + Result.reserve(Sizes.size()); + for (size_t Size : Sizes) { - ZEN_UNUSED(ElapsedTimeMs); - ZEN_INFO("Job {}: Op '{}'{} {}/{}", - m_OpIndex, - CurrentOp, - Details.empty() ? "" : fmt::format(" {}", Details), - TotalCount - RemainingCount, - TotalCount); + IoBuffer FileBlob = CreateRandomBlob(Size); + IoHash FileHash = IoHash::HashBuffer(FileBlob); + std::filesystem::path UncompressedFilePath = RootDir / "content" / "uncompressed_file" / FileHash.ToHexString(); + CreateDirectories(UncompressedFilePath.parent_path()); + WriteFile(UncompressedFilePath, FileBlob); + Result.push_back({Oid::NewOid(), UncompressedFilePath}); } + return Result; + } + + struct CapturingJobContext : public JobContext + { + bool IsCancelled() const override { return m_Cancel; } + void ReportMessage(std::string_view Message) override + { + RwLock::ExclusiveLockScope _(m_Lock); + Messages.emplace_back(Message); + } + void ReportProgress(std::string_view Op, std::string_view Details, ptrdiff_t, ptrdiff_t, uint64_t) override + { + RwLock::ExclusiveLockScope _(m_Lock); + ProgressMessages.emplace_back(fmt::format("{}: {}", Op, Details)); + } + + bool HasMessage(std::string_view Substr) const + { + RwLock::SharedLockScope _(m_Lock); + return std::any_of(Messages.begin(), Messages.end(), [Substr](const std::string& M) { + return M.find(Substr) != std::string::npos; + }); + } + + bool m_Cancel = false; + std::vector<std::string> Messages; + std::vector<std::string> ProgressMessages; private: - int& m_OpIndex; + mutable RwLock m_Lock; }; + // Worker pool pair with separate NetworkPool and WorkerPool. + struct TestWorkerPools + { + private: + uint32_t m_NetworkCount; + uint32_t m_WorkerCount; + + public: + WorkerThreadPool NetworkPool; + WorkerThreadPool WorkerPool; + + TestWorkerPools() + : m_NetworkCount(Max(GetHardwareConcurrency() / 4u, 2u)) + , m_WorkerCount(m_NetworkCount < GetHardwareConcurrency() ? Max(GetHardwareConcurrency() - m_NetworkCount, 4u) : 4u) + , NetworkPool(m_NetworkCount) + , WorkerPool(m_WorkerCount) + { + } + }; + + inline uint32_t GetWorkerCount() { return Max(GetHardwareConcurrency() / 4u, 2u); } + + inline IoHash MakeTestHash(uint8_t Index) + { + uint8_t Data[20] = {}; + Data[0] = Index; + return IoHash::MakeFrom(Data); + } + + inline Oid MakeTestOid(uint32_t Index) + { + uint32_t Data[3] = {Index, 0, 0}; + return Oid::FromMemory(Data); + } + + // MaxChunks must be <= 127 (so MeasureVarUInt(MaxChunks) == 1) and MaxChunkEmbedSize is + // fixed at 100 to keep header sizes deterministic in BlockComposer tests. + inline remotestore_impl::BlockComposer::Configuration MakeTestConfig(uint64_t UsableSize, uint64_t MaxChunks) + { + constexpr uint64_t MaxChunkEmbedSize = 100; + uint64_t MaxHeaderSize = + CompressedBuffer::GetHeaderSizeForNoneEncoder() + MeasureVarUInt(MaxChunks) + MeasureVarUInt(MaxChunkEmbedSize) * MaxChunks; + return remotestore_impl::BlockComposer::Configuration{ + .MaxBlockSize = UsableSize + MaxHeaderSize, + .MaxChunksPerBlock = MaxChunks, + .MaxChunkEmbedSize = MaxChunkEmbedSize, + }; + } + } // namespace projectstore_testutils TEST_SUITE_BEGIN("remotestore.projectstore"); @@ -4434,6 +4690,11 @@ TEST_CASE_TEMPLATE("project.store.export", Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{256u * 1024u, 92u * 1024u}, OodleCompressionLevel::None))); + Oplog->AppendNewOplogEntry( + CreateFilesOplogPackage(Oid::NewOid(), + RootDir, + CreateFileAttachments(RootDir, std::initializer_list<size_t>{423 * 1024, 2 * 1024, 3213, 762 * 1024}))); + FileRemoteStoreOptions Options = {RemoteStoreOptions{.MaxBlockSize = 64u * 1024, .MaxChunksPerBlock = 1000, .MaxChunkEmbedSize = 32 * 1024u, @@ -4446,107 +4707,57 @@ TEST_CASE_TEMPLATE("project.store.export", std::shared_ptr<RemoteProjectStore> RemoteStore = CreateFileRemoteStore(Log(), Options); RemoteProjectStore::RemoteStoreInfo StoreInfo = RemoteStore->GetInfo(); - uint32_t NetworkWorkerCount = Max(GetHardwareConcurrency() / 4u, 2u); - uint32_t WorkerCount = (NetworkWorkerCount < GetHardwareConcurrency()) ? Max(GetHardwareConcurrency() - NetworkWorkerCount, 4u) : 4u; - - WorkerThreadPool WorkerPool(WorkerCount); - WorkerThreadPool NetworkPool(NetworkWorkerCount); - - RemoteProjectStore::Result ExportResult = SaveOplog(CidStore, - *RemoteStore, - *Project.Get(), - *Oplog, - NetworkPool, - WorkerPool, - Options.MaxBlockSize, - Options.MaxChunksPerBlock, - Options.MaxChunkEmbedSize, - Options.ChunkFileSizeLimit, - true, - false, - false, - nullptr); - - REQUIRE(ExportResult.ErrorCode == 0); + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + SaveOplog(CidStore, + *RemoteStore, + *Project.Get(), + *Oplog, + NetworkPool, + WorkerPool, + Options.MaxBlockSize, + Options.MaxChunksPerBlock, + Options.MaxChunkEmbedSize, + Options.ChunkFileSizeLimit, + true, + false, + false, + nullptr); Ref<ProjectStore::Oplog> OplogImport = Project->NewOplog("oplog2", {}); - CHECK(OplogImport); - - int OpJobIndex = 0; - TestJobContext OpJobContext(OpJobIndex); - - RemoteProjectStore::Result ImportResult = LoadOplog(LoadOplogContext{.ChunkStore = CidStore, - .RemoteStore = *RemoteStore, - .OptionalCache = nullptr, - .CacheBuildId = Oid::Zero, - .Oplog = *OplogImport, - .NetworkWorkerPool = NetworkPool, - .WorkerPool = WorkerPool, - .ForceDownload = false, - .IgnoreMissingAttachments = false, - .CleanOplog = false, - .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, - .OptionalJobContext = &OpJobContext}); - CHECK(ImportResult.ErrorCode == 0); - OpJobIndex++; - - RemoteProjectStore::Result ImportForceResult = LoadOplog(LoadOplogContext{.ChunkStore = CidStore, - .RemoteStore = *RemoteStore, - .OptionalCache = nullptr, - .CacheBuildId = Oid::Zero, - .Oplog = *OplogImport, - .NetworkWorkerPool = NetworkPool, - .WorkerPool = WorkerPool, - .ForceDownload = true, - .IgnoreMissingAttachments = false, - .CleanOplog = false, - .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, - .OptionalJobContext = &OpJobContext}); - CHECK(ImportForceResult.ErrorCode == 0); - OpJobIndex++; - - RemoteProjectStore::Result ImportCleanResult = LoadOplog(LoadOplogContext{.ChunkStore = CidStore, - .RemoteStore = *RemoteStore, - .OptionalCache = nullptr, - .CacheBuildId = Oid::Zero, - .Oplog = *OplogImport, - .NetworkWorkerPool = NetworkPool, - .WorkerPool = WorkerPool, - .ForceDownload = false, - .IgnoreMissingAttachments = false, - .CleanOplog = true, - .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, - .OptionalJobContext = &OpJobContext}); - CHECK(ImportCleanResult.ErrorCode == 0); - OpJobIndex++; - - RemoteProjectStore::Result ImportForceCleanResult = - LoadOplog(LoadOplogContext{.ChunkStore = CidStore, - .RemoteStore = *RemoteStore, - .OptionalCache = nullptr, - .CacheBuildId = Oid::Zero, - .Oplog = *OplogImport, - .NetworkWorkerPool = NetworkPool, - .WorkerPool = WorkerPool, - .ForceDownload = true, - .IgnoreMissingAttachments = false, - .CleanOplog = true, - .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, - .OptionalJobContext = &OpJobContext}); - CHECK(ImportForceCleanResult.ErrorCode == 0); - OpJobIndex++; + REQUIRE(OplogImport); + + CapturingJobContext Ctx; + auto DoLoad = [&](bool Force, bool Clean) { + LoadOplog(LoadOplogContext{.ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .OptionalCache = nullptr, + .CacheBuildId = Oid::Zero, + .Oplog = *OplogImport, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = Force, + .IgnoreMissingAttachments = false, + .CleanOplog = Clean, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, + .OptionalJobContext = &Ctx}); + }; + + DoLoad(false, false); + DoLoad(true, false); + DoLoad(false, true); + DoLoad(true, true); } -// Common oplog setup used by the two tests below. -// Returns a FileRemoteStore backed by ExportDir that has been populated with a SaveOplog call. -// Keeps the test data identical to project.store.export so the two test suites exercise the same blocks/attachments. -static RemoteProjectStore::Result -SetupExportStore(CidStore& CidStore, - ProjectStore::Project& Project, - WorkerThreadPool& NetworkPool, - WorkerThreadPool& WorkerPool, - const std::filesystem::path& ExportDir, - std::shared_ptr<RemoteProjectStore>& OutRemoteStore) +// Populates ExportDir with a SaveOplog call using the same data as project.store.export. +static std::shared_ptr<RemoteProjectStore> +SetupExportStore(CidStore& CidStore, + ProjectStore::Project& Project, + WorkerThreadPool& NetworkPool, + WorkerThreadPool& WorkerPool, + const std::filesystem::path& ExportDir) { using namespace projectstore_testutils; using namespace std::literals; @@ -4554,7 +4765,7 @@ SetupExportStore(CidStore& CidStore, Ref<ProjectStore::Oplog> Oplog = Project.NewOplog("oplog_export", {}); if (!Oplog) { - return RemoteProjectStore::Result{.ErrorCode = -1}; + throw std::runtime_error("Failed to create oplog"); } Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), {})); @@ -4565,6 +4776,10 @@ SetupExportStore(CidStore& CidStore, Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage( Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{256u * 1024u, 92u * 1024u}, OodleCompressionLevel::None))); + Oplog->AppendNewOplogEntry(CreateFilesOplogPackage( + Oid::NewOid(), + Project.RootDir, + CreateFileAttachments(Project.RootDir, std::initializer_list<size_t>{423 * 1024, 2 * 1024, 3213, 762 * 1024}))); FileRemoteStoreOptions Options = {RemoteStoreOptions{.MaxBlockSize = 64u * 1024, .MaxChunksPerBlock = 1000, @@ -4576,43 +4791,33 @@ SetupExportStore(CidStore& CidStore, /*.ForceDisableBlocks =*/false, /*.ForceEnableTempBlocks =*/false}; - OutRemoteStore = CreateFileRemoteStore(Log(), Options); - return SaveOplog(CidStore, - *OutRemoteStore, - Project, - *Oplog, - NetworkPool, - WorkerPool, - Options.MaxBlockSize, - Options.MaxChunksPerBlock, - Options.MaxChunkEmbedSize, - Options.ChunkFileSizeLimit, - /*EmbedLooseFiles*/ true, - /*ForceUpload*/ false, - /*IgnoreMissingAttachments*/ false, - /*OptionalContext*/ nullptr); + std::shared_ptr<RemoteProjectStore> RemoteStore = CreateFileRemoteStore(Log(), Options); + SaveOplog(CidStore, + *RemoteStore, + Project, + *Oplog, + NetworkPool, + WorkerPool, + Options.MaxBlockSize, + Options.MaxChunksPerBlock, + Options.MaxChunkEmbedSize, + Options.ChunkFileSizeLimit, + /*EmbedLooseFiles*/ true, + /*ForceUpload*/ false, + /*IgnoreMissingAttachments*/ false, + /*OptionalContext*/ nullptr); + return RemoteStore; } -// Creates an export store with a single oplog entry that packs six 512 KB chunks into one -// ~3 MB block (MaxBlockSize = 8 MB). The resulting block slack (~1.5 MB) far exceeds the -// 512 KB threshold that ChunkBlockAnalyser requires before it will consider partial-block -// downloads instead of full-block downloads. -// -// This function is self-contained: it creates its own GcManager, CidStore, ProjectStore and -// Project internally so that each call is independent of any outer test context. After -// SaveOplog returns, all persistent data lives on disk inside ExportDir and the caller can -// freely query OutRemoteStore without holding any references to the internal context. -static RemoteProjectStore::Result -SetupPartialBlockExportStore(WorkerThreadPool& NetworkPool, - WorkerThreadPool& WorkerPool, - const std::filesystem::path& ExportDir, - std::shared_ptr<RemoteProjectStore>& OutRemoteStore) +// Creates an export store with six 512 KB chunks packed into one ~3 MB block (MaxBlockSize=8 MB). +// The ~1.5 MB slack exceeds the ChunkBlockAnalyser threshold, enabling partial-block downloads. +// Uses its own GcManager/CidStore/ProjectStore so each call is independent. +static std::shared_ptr<RemoteProjectStore> +SetupPartialBlockExportStore(WorkerThreadPool& NetworkPool, WorkerThreadPool& WorkerPool, const std::filesystem::path& ExportDir) { using namespace projectstore_testutils; using namespace std::literals; - // Self-contained CAS and project store. Subdirectories of ExportDir keep everything - // together without relying on the outer TEST_CASE's ExportCidStore / ExportProject. GcManager LocalGc; CidStore LocalCidStore(LocalGc); CidStoreConfiguration LocalCidConfig = {.RootDirectory = ExportDir / "cas", .TinyValueThreshold = 1024, .HugeValueThreshold = 4096}; @@ -4630,48 +4835,43 @@ SetupPartialBlockExportStore(WorkerThreadPool& NetworkPool, Ref<ProjectStore::Oplog> Oplog = LocalProject->NewOplog("oplog_partial_block", {}); if (!Oplog) { - return RemoteProjectStore::Result{.ErrorCode = -1}; + throw std::runtime_error("Failed to create oplog"); } - // Six 512 KB chunks with OodleCompressionLevel::None so the compressed size stays large - // and the block genuinely exceeds the 512 KB slack threshold. Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage( Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{512u * 1024u, 512u * 1024u, 512u * 1024u, 512u * 1024u, 512u * 1024u, 512u * 1024u}, OodleCompressionLevel::None))); - // MaxChunkEmbedSize must be larger than the compressed size of each 512 KB chunk - // (OodleCompressionLevel::None → compressed ≈ raw ≈ 512 KB). With the legacy - // 32 KB limit all six chunks would become loose large attachments and no block would - // be created, so we use the production default of 1.5 MB instead. - FileRemoteStoreOptions Options = {RemoteStoreOptions{.MaxBlockSize = 8u * 1024u * 1024u, - .MaxChunksPerBlock = 1000, - .MaxChunkEmbedSize = RemoteStoreOptions::DefaultMaxChunkEmbedSize, - .ChunkFileSizeLimit = 64u * 1024u * 1024u}, - /*.FolderPath =*/ExportDir, - /*.Name =*/std::string("oplog_partial_block"), - /*.OptionalBaseName =*/std::string(), - /*.ForceDisableBlocks =*/false, - /*.ForceEnableTempBlocks =*/false}; - OutRemoteStore = CreateFileRemoteStore(Log(), Options); - return SaveOplog(LocalCidStore, - *OutRemoteStore, - *LocalProject, - *Oplog, - NetworkPool, - WorkerPool, - Options.MaxBlockSize, - Options.MaxChunksPerBlock, - Options.MaxChunkEmbedSize, - Options.ChunkFileSizeLimit, - /*EmbedLooseFiles*/ true, - /*ForceUpload*/ false, - /*IgnoreMissingAttachments*/ false, - /*OptionalContext*/ nullptr); + // MaxChunkEmbedSize must exceed 512 KB (compressed size with None encoding) or all chunks + // become loose attachments and no block is created. + FileRemoteStoreOptions Options = {RemoteStoreOptions{.MaxBlockSize = 8u * 1024u * 1024u, + .MaxChunksPerBlock = 1000, + .MaxChunkEmbedSize = RemoteStoreOptions::DefaultMaxChunkEmbedSize, + .ChunkFileSizeLimit = 64u * 1024u * 1024u}, + /*.FolderPath =*/ExportDir, + /*.Name =*/std::string("oplog_partial_block"), + /*.OptionalBaseName =*/std::string(), + /*.ForceDisableBlocks =*/false, + /*.ForceEnableTempBlocks =*/false}; + std::shared_ptr<RemoteProjectStore> RemoteStore = CreateFileRemoteStore(Log(), Options); + SaveOplog(LocalCidStore, + *RemoteStore, + *LocalProject, + *Oplog, + NetworkPool, + WorkerPool, + Options.MaxBlockSize, + Options.MaxChunksPerBlock, + Options.MaxChunkEmbedSize, + Options.ChunkFileSizeLimit, + /*EmbedLooseFiles*/ true, + /*ForceUpload*/ false, + /*IgnoreMissingAttachments*/ false, + /*OptionalContext*/ nullptr); + return RemoteStore; } -// Returns the first block hash that has at least MinChunkCount chunks, or a zero IoHash -// if no qualifying block exists in Store. static IoHash FindBlockWithMultipleChunks(RemoteProjectStore& Store, size_t MinChunkCount) { @@ -4700,10 +4900,8 @@ FindBlockWithMultipleChunks(RemoteProjectStore& Store, size_t MinChunkCount) return {}; } -// Loads BlockHash from Source and inserts every even-indexed chunk (0, 2, 4, …) into -// TargetCidStore. Odd-indexed chunks are left absent so that when an import is run -// against the same block, HasAttachment returns false for three non-adjacent positions -// — the minimum needed to exercise the multi-range partial-block download paths. +// Seeds TargetCidStore with even-indexed chunks (0, 2, 4 ...) from BlockHash, leaving +// odd chunks absent to create non-adjacent missing ranges for partial-block download tests. static void SeedCidStoreWithAlternateChunks(CidStore& TargetCidStore, RemoteProjectStore& Source, const IoHash& BlockHash) { @@ -4754,8 +4952,7 @@ TEST_CASE("project.store.import.context_settings") std::filesystem::path ProjectRootDir = TempDir.Path() / "game"; std::filesystem::path ProjectFilePath = TempDir.Path() / "game" / "game.uproject"; - // Export-side CAS and project store: used only by SetupExportStore to build the remote store - // payload. Kept separate from the import side so the two CAS instances are disjoint. + // Export-side CAS and project store; kept disjoint from the import side. GcManager ExportGc; CidStore ExportCidStore(ExportGc); CidStoreConfiguration ExportCidConfig = {.RootDirectory = TempDir.Path() / "export_cas", @@ -4772,19 +4969,14 @@ TEST_CASE("project.store.import.context_settings") ProjectRootDir.string(), ProjectFilePath.string())); - uint32_t NetworkWorkerCount = Max(GetHardwareConcurrency() / 4u, 2u); - uint32_t WorkerCount = (NetworkWorkerCount < GetHardwareConcurrency()) ? Max(GetHardwareConcurrency() - NetworkWorkerCount, 4u) : 4u; - WorkerThreadPool WorkerPool(WorkerCount); - WorkerThreadPool NetworkPool(NetworkWorkerCount); + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; - std::shared_ptr<RemoteProjectStore> RemoteStore; - RemoteProjectStore::Result ExportResult = - SetupExportStore(ExportCidStore, *ExportProject, NetworkPool, WorkerPool, ExportDir.Path(), RemoteStore); - REQUIRE(ExportResult.ErrorCode == 0); + std::shared_ptr<RemoteProjectStore> RemoteStore = + SetupExportStore(ExportCidStore, *ExportProject, NetworkPool, WorkerPool, ExportDir.Path()); - // Import-side CAS and project store: starts empty, mirroring a fresh machine that has never - // downloaded the data. HasAttachment() therefore returns false for every chunk, so the import - // genuinely contacts the remote store without needing ForceDownload on the populate pass. + // Import-side CAS starts empty so the first import downloads from the remote store without ForceDownload. GcManager ImportGc; CidStore ImportCidStore(ImportGc); CidStoreConfiguration ImportCidConfig = {.RootDirectory = TempDir.Path() / "import_cas", @@ -4819,11 +5011,9 @@ TEST_CASE("project.store.import.context_settings") int OpJobIndex = 0; - TestJobContext OpJobContext(OpJobIndex); + CapturingJobContext OpJobContext; - // Helper: run a LoadOplog against the import-side CAS/project with the given context knobs. - // Each call creates a fresh oplog so repeated calls within one SUBCASE don't short-circuit on - // already-present data. + // Each call creates a fresh oplog to prevent short-circuiting on already-present data. auto DoImport = [&](BuildStorageCache* OptCache, EPartialBlockRequestMode Mode, double StoreLatency, @@ -4831,168 +5021,132 @@ TEST_CASE("project.store.import.context_settings") double CacheLatency, uint64_t CacheRanges, bool PopulateCache, - bool ForceDownload) -> RemoteProjectStore::Result { + bool ForceDownload) -> void { Ref<ProjectStore::Oplog> ImportOplog = ImportProject->NewOplog(fmt::format("import_{}", OpJobIndex++), {}); - return LoadOplog(LoadOplogContext{.ChunkStore = ImportCidStore, - .RemoteStore = *RemoteStore, - .OptionalCache = OptCache, - .CacheBuildId = CacheBuildId, - .Oplog = *ImportOplog, - .NetworkWorkerPool = NetworkPool, - .WorkerPool = WorkerPool, - .ForceDownload = ForceDownload, - .IgnoreMissingAttachments = false, - .CleanOplog = false, - .PartialBlockRequestMode = Mode, - .PopulateCache = PopulateCache, - .StoreLatencySec = StoreLatency, - .StoreMaxRangeCountPerRequest = StoreRanges, - .CacheLatencySec = CacheLatency, - .CacheMaxRangeCountPerRequest = CacheRanges, - .OptionalJobContext = &OpJobContext}); + LoadOplog(LoadOplogContext{.ChunkStore = ImportCidStore, + .RemoteStore = *RemoteStore, + .OptionalCache = OptCache, + .CacheBuildId = CacheBuildId, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = ForceDownload, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = Mode, + .PopulateCache = PopulateCache, + .StoreLatencySec = StoreLatency, + .StoreMaxRangeCountPerRequest = StoreRanges, + .CacheLatencySec = CacheLatency, + .CacheMaxRangeCountPerRequest = CacheRanges, + .OptionalJobContext = &OpJobContext}); }; // Shorthand: Mode=All, low latency, 128 ranges for both store and cache. - auto ImportAll = [&](BuildStorageCache* OptCache, bool Populate, bool Force) { - return DoImport(OptCache, EPartialBlockRequestMode::All, 0.001, 128u, 0.001, 128u, Populate, Force); + auto ImportAll = [&](BuildStorageCache* OptCache, bool Populate, bool Force) -> void { + DoImport(OptCache, EPartialBlockRequestMode::All, 0.001, 128u, 0.001, 128u, Populate, Force); }; - SUBCASE("mode_off_no_cache") - { - // Baseline: no partial block requests, no cache. - RemoteProjectStore::Result R = - DoImport(nullptr, EPartialBlockRequestMode::Off, -1.0, (uint64_t)-1, -1.0, (uint64_t)-1, false, false); - CHECK(R.ErrorCode == 0); - } + SUBCASE("mode_off_no_cache") { DoImport(nullptr, EPartialBlockRequestMode::Off, -1.0, (uint64_t)-1, -1.0, (uint64_t)-1, false, false); } SUBCASE("mode_all_multirange_cloud_no_cache") { - // StoreMaxRangeCountPerRequest > 1 → MultiRange cloud path. - RemoteProjectStore::Result R = DoImport(nullptr, EPartialBlockRequestMode::All, 0.001, 128u, -1.0, 0u, false, false); - CHECK(R.ErrorCode == 0); + // StoreMaxRangeCountPerRequest > 1 -> MultiRange cloud path. + DoImport(nullptr, EPartialBlockRequestMode::All, 0.001, 128u, -1.0, 0u, false, false); } SUBCASE("mode_all_singlerange_cloud_no_cache") { - // StoreMaxRangeCountPerRequest == 1 → SingleRange cloud path. - RemoteProjectStore::Result R = DoImport(nullptr, EPartialBlockRequestMode::All, 0.001, 1u, -1.0, 0u, false, false); - CHECK(R.ErrorCode == 0); + // StoreMaxRangeCountPerRequest == 1 -> SingleRange cloud path. + DoImport(nullptr, EPartialBlockRequestMode::All, 0.001, 1u, -1.0, 0u, false, false); } SUBCASE("mode_mixed_high_latency_no_cache") { // High store latency encourages range merging; Mixed uses SingleRange for cloud, Off for cache. - RemoteProjectStore::Result R = DoImport(nullptr, EPartialBlockRequestMode::Mixed, 0.1, 128u, -1.0, 0u, false, false); - CHECK(R.ErrorCode == 0); + DoImport(nullptr, EPartialBlockRequestMode::Mixed, 0.1, 128u, -1.0, 0u, false, false); } SUBCASE("cache_populate_and_hit") { - // First import: ImportCidStore is empty so all blocks are downloaded from the remote store - // and written to the cache. - RemoteProjectStore::Result PopulateResult = ImportAll(Cache.get(), /*PopulateCache=*/true, /*Force=*/false); - CHECK(PopulateResult.ErrorCode == 0); + // First import: CidStore empty -> blocks downloaded and written to cache. + ImportAll(Cache.get(), /*PopulateCache=*/true, /*Force=*/false); CHECK(CacheStats.PutBlobCount > 0); - // Re-import with ForceDownload=true: all chunks are now in ImportCidStore but Force overrides - // HasAttachment() so the download logic re-runs and serves blocks from the cache instead of - // the remote store. + // Re-import with Force=true: HasAttachment overridden, blocks served from cache. ResetCacheStats(); - RemoteProjectStore::Result HitResult = ImportAll(Cache.get(), /*PopulateCache=*/false, /*Force=*/true); - CHECK(HitResult.ErrorCode == 0); + ImportAll(Cache.get(), /*PopulateCache=*/false, /*Force=*/true); CHECK(CacheStats.PutBlobCount == 0); - // TotalRequestCount covers both full-blob cache hits and partial-range cache hits. CHECK(CacheStats.TotalRequestCount > 0); } SUBCASE("cache_no_populate_flag") { - // Cache is provided but PopulateCache=false: blocks are downloaded to ImportCidStore but - // nothing should be written to the cache. - RemoteProjectStore::Result R = ImportAll(Cache.get(), /*PopulateCache=*/false, /*Force=*/false); - CHECK(R.ErrorCode == 0); + ImportAll(Cache.get(), /*PopulateCache=*/false, /*Force=*/false); CHECK(CacheStats.PutBlobCount == 0); } SUBCASE("mode_zencacheonly_cache_multirange") { - // Pre-populate the cache via a plain import, then re-import with ZenCacheOnly + - // CacheMaxRangeCountPerRequest=128. With 100% of chunks needed, all blocks go to - // FullBlockIndexes and GetBuildBlob (full blob) is called from the cache. - // CacheMaxRangeCountPerRequest > 1 would route partial downloads through GetBuildBlobRanges - // if the analyser ever emits BlockRanges entries. - RemoteProjectStore::Result Populate = ImportAll(Cache.get(), /*PopulateCache=*/true, /*Force=*/false); - CHECK(Populate.ErrorCode == 0); + // Pre-populate; re-import via ZenCacheOnly. All chunks needed -> FullBlockIndexes path (GetBuildBlob). + ImportAll(Cache.get(), /*PopulateCache=*/true, /*Force=*/false); ResetCacheStats(); - RemoteProjectStore::Result R = DoImport(Cache.get(), EPartialBlockRequestMode::ZenCacheOnly, 0.1, 128u, 0.001, 128u, false, true); - CHECK(R.ErrorCode == 0); + DoImport(Cache.get(), EPartialBlockRequestMode::ZenCacheOnly, 0.1, 128u, 0.001, 128u, false, true); CHECK(CacheStats.TotalRequestCount > 0); } SUBCASE("mode_zencacheonly_cache_singlerange") { - // Pre-populate the cache, then re-import with ZenCacheOnly + CacheMaxRangeCountPerRequest=1. - // With 100% of chunks needed the analyser sends all blocks to FullBlockIndexes (full-block - // download path), which calls GetBuildBlob with no range offset — a full-blob cache hit. - // The single-range vs multi-range distinction only matters for the partial-block (BlockRanges) - // path, which is not reached when all chunks are needed. - RemoteProjectStore::Result Populate = ImportAll(Cache.get(), /*PopulateCache=*/true, /*Force=*/false); - CHECK(Populate.ErrorCode == 0); + // Pre-populate; re-import via ZenCacheOnly with CacheMaxRangeCountPerRequest=1. All chunks needed -> GetBuildBlob (full-blob). + ImportAll(Cache.get(), /*PopulateCache=*/true, /*Force=*/false); ResetCacheStats(); - RemoteProjectStore::Result R = DoImport(Cache.get(), EPartialBlockRequestMode::ZenCacheOnly, 0.1, 128u, 0.001, 1u, false, true); - CHECK(R.ErrorCode == 0); + DoImport(Cache.get(), EPartialBlockRequestMode::ZenCacheOnly, 0.1, 128u, 0.001, 1u, false, true); CHECK(CacheStats.TotalRequestCount > 0); } SUBCASE("mode_all_cache_and_cloud_multirange") { // Pre-populate cache; All mode uses multi-range for both the cache and cloud paths. - RemoteProjectStore::Result Populate = ImportAll(Cache.get(), /*PopulateCache=*/true, /*Force=*/false); - CHECK(Populate.ErrorCode == 0); + ImportAll(Cache.get(), /*PopulateCache=*/true, /*Force=*/false); ResetCacheStats(); - RemoteProjectStore::Result R = ImportAll(Cache.get(), /*PopulateCache=*/false, /*Force=*/true); - CHECK(R.ErrorCode == 0); + ImportAll(Cache.get(), /*PopulateCache=*/false, /*Force=*/true); CHECK(CacheStats.TotalRequestCount > 0); } SUBCASE("partial_block_cloud_multirange") { - // Export store with 6 × 512 KB chunks packed into one ~3 MB block. ScopedTemporaryDirectory PartialExportDir; - std::shared_ptr<RemoteProjectStore> PartialRemoteStore; - RemoteProjectStore::Result ExportR = - SetupPartialBlockExportStore(NetworkPool, WorkerPool, PartialExportDir.Path(), PartialRemoteStore); - REQUIRE(ExportR.ErrorCode == 0); + std::shared_ptr<RemoteProjectStore> PartialRemoteStore = + SetupPartialBlockExportStore(NetworkPool, WorkerPool, PartialExportDir.Path()); // Seeding even-indexed chunks (0, 2, 4) leaves odd ones (1, 3, 5) absent in - // ImportCidStore. Three non-adjacent needed positions → three BlockRangeDescriptors. + // ImportCidStore. Three non-adjacent needed positions -> three BlockRangeDescriptors. IoHash BlockHash = FindBlockWithMultipleChunks(*PartialRemoteStore, 4u); CHECK(BlockHash != IoHash::Zero); SeedCidStoreWithAlternateChunks(ImportCidStore, *PartialRemoteStore, BlockHash); - // StoreMaxRangeCountPerRequest=128 → all three ranges sent in one LoadAttachmentRanges call. - Ref<ProjectStore::Oplog> PartialOplog = ImportProject->NewOplog(fmt::format("partial_cloud_multi_{}", OpJobIndex++), {}); - RemoteProjectStore::Result R = LoadOplog(LoadOplogContext{.ChunkStore = ImportCidStore, - .RemoteStore = *PartialRemoteStore, - .OptionalCache = nullptr, - .CacheBuildId = CacheBuildId, - .Oplog = *PartialOplog, - .NetworkWorkerPool = NetworkPool, - .WorkerPool = WorkerPool, - .ForceDownload = false, - .IgnoreMissingAttachments = false, - .CleanOplog = false, - .PartialBlockRequestMode = EPartialBlockRequestMode::All, - .PopulateCache = false, - .StoreLatencySec = 0.001, - .StoreMaxRangeCountPerRequest = 128u, - .CacheLatencySec = -1.0, - .CacheMaxRangeCountPerRequest = 0u, - .OptionalJobContext = &OpJobContext}); - CHECK(R.ErrorCode == 0); + // StoreMaxRangeCountPerRequest=128 -> all three ranges sent in one LoadAttachmentRanges call. + Ref<ProjectStore::Oplog> PartialOplog = ImportProject->NewOplog(fmt::format("partial_cloud_multi_{}", OpJobIndex++), {}); + LoadOplog(LoadOplogContext{.ChunkStore = ImportCidStore, + .RemoteStore = *PartialRemoteStore, + .OptionalCache = nullptr, + .CacheBuildId = CacheBuildId, + .Oplog = *PartialOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::All, + .PopulateCache = false, + .StoreLatencySec = 0.001, + .StoreMaxRangeCountPerRequest = 128u, + .CacheLatencySec = -1.0, + .CacheMaxRangeCountPerRequest = 0u, + .OptionalJobContext = &OpJobContext}); } SUBCASE("partial_block_cloud_singlerange") @@ -5000,77 +5154,68 @@ TEST_CASE("project.store.import.context_settings") // Same block layout as partial_block_cloud_multirange but StoreMaxRangeCountPerRequest=1. // DownloadPartialBlock issues one LoadAttachmentRanges call per range. ScopedTemporaryDirectory PartialExportDir; - std::shared_ptr<RemoteProjectStore> PartialRemoteStore; - RemoteProjectStore::Result ExportR = - SetupPartialBlockExportStore(NetworkPool, WorkerPool, PartialExportDir.Path(), PartialRemoteStore); - REQUIRE(ExportR.ErrorCode == 0); + std::shared_ptr<RemoteProjectStore> PartialRemoteStore = + SetupPartialBlockExportStore(NetworkPool, WorkerPool, PartialExportDir.Path()); IoHash BlockHash = FindBlockWithMultipleChunks(*PartialRemoteStore, 4u); CHECK(BlockHash != IoHash::Zero); SeedCidStoreWithAlternateChunks(ImportCidStore, *PartialRemoteStore, BlockHash); - Ref<ProjectStore::Oplog> PartialOplog = ImportProject->NewOplog(fmt::format("partial_cloud_single_{}", OpJobIndex++), {}); - RemoteProjectStore::Result R = LoadOplog(LoadOplogContext{.ChunkStore = ImportCidStore, - .RemoteStore = *PartialRemoteStore, - .OptionalCache = nullptr, - .CacheBuildId = CacheBuildId, - .Oplog = *PartialOplog, - .NetworkWorkerPool = NetworkPool, - .WorkerPool = WorkerPool, - .ForceDownload = false, - .IgnoreMissingAttachments = false, - .CleanOplog = false, - .PartialBlockRequestMode = EPartialBlockRequestMode::All, - .PopulateCache = false, - .StoreLatencySec = 0.001, - .StoreMaxRangeCountPerRequest = 1u, - .CacheLatencySec = -1.0, - .CacheMaxRangeCountPerRequest = 0u, - .OptionalJobContext = &OpJobContext}); - CHECK(R.ErrorCode == 0); + Ref<ProjectStore::Oplog> PartialOplog = ImportProject->NewOplog(fmt::format("partial_cloud_single_{}", OpJobIndex++), {}); + LoadOplog(LoadOplogContext{.ChunkStore = ImportCidStore, + .RemoteStore = *PartialRemoteStore, + .OptionalCache = nullptr, + .CacheBuildId = CacheBuildId, + .Oplog = *PartialOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::All, + .PopulateCache = false, + .StoreLatencySec = 0.001, + .StoreMaxRangeCountPerRequest = 1u, + .CacheLatencySec = -1.0, + .CacheMaxRangeCountPerRequest = 0u, + .OptionalJobContext = &OpJobContext}); } SUBCASE("partial_block_cache_multirange") { ScopedTemporaryDirectory PartialExportDir; - std::shared_ptr<RemoteProjectStore> PartialRemoteStore; - RemoteProjectStore::Result ExportR = - SetupPartialBlockExportStore(NetworkPool, WorkerPool, PartialExportDir.Path(), PartialRemoteStore); - REQUIRE(ExportR.ErrorCode == 0); + std::shared_ptr<RemoteProjectStore> PartialRemoteStore = + SetupPartialBlockExportStore(NetworkPool, WorkerPool, PartialExportDir.Path()); IoHash BlockHash = FindBlockWithMultipleChunks(*PartialRemoteStore, 4u); CHECK(BlockHash != IoHash::Zero); - // Phase 1: ImportCidStore starts empty → full block download from remote → PutBuildBlob - // populates the cache. + // Phase 1: full block download from remote populates the cache. { - Ref<ProjectStore::Oplog> Phase1Oplog = ImportProject->NewOplog(fmt::format("partial_cache_multi_p1_{}", OpJobIndex++), {}); - RemoteProjectStore::Result Phase1R = LoadOplog(LoadOplogContext{.ChunkStore = ImportCidStore, - .RemoteStore = *PartialRemoteStore, - .OptionalCache = Cache.get(), - .CacheBuildId = CacheBuildId, - .Oplog = *Phase1Oplog, - .NetworkWorkerPool = NetworkPool, - .WorkerPool = WorkerPool, - .ForceDownload = false, - .IgnoreMissingAttachments = false, - .CleanOplog = false, - .PartialBlockRequestMode = EPartialBlockRequestMode::All, - .PopulateCache = true, - .StoreLatencySec = 0.001, - .StoreMaxRangeCountPerRequest = 128u, - .CacheLatencySec = 0.001, - .CacheMaxRangeCountPerRequest = 128u, - .OptionalJobContext = &OpJobContext}); - CHECK(Phase1R.ErrorCode == 0); + Ref<ProjectStore::Oplog> Phase1Oplog = ImportProject->NewOplog(fmt::format("partial_cache_multi_p1_{}", OpJobIndex++), {}); + LoadOplog(LoadOplogContext{.ChunkStore = ImportCidStore, + .RemoteStore = *PartialRemoteStore, + .OptionalCache = Cache.get(), + .CacheBuildId = CacheBuildId, + .Oplog = *Phase1Oplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::All, + .PopulateCache = true, + .StoreLatencySec = 0.001, + .StoreMaxRangeCountPerRequest = 128u, + .CacheLatencySec = 0.001, + .CacheMaxRangeCountPerRequest = 128u, + .OptionalJobContext = &OpJobContext}); + CHECK(CacheStats.PutBlobCount > 0); } ResetCacheStats(); - // Phase 2: fresh CidStore with only even-indexed chunks seeded. - // HasAttachment returns false for odd chunks (1, 3, 5) → three BlockRangeDescriptors. - // Block is in cache from Phase 1 → cache partial path. - // CacheMaxRangeCountPerRequest=128 → SubRangeCount=3 > 1 → GetBuildBlobRanges. + // Phase 2: fresh CidStore with even chunks seeded; CacheMaxRangeCountPerRequest=128 -> GetBuildBlobRanges. GcManager Phase2Gc; CidStore Phase2CidStore(Phase2Gc); CidStoreConfiguration Phase2CidConfig = {.RootDirectory = TempDir.Path() / "partial_cas", @@ -5079,67 +5224,63 @@ TEST_CASE("project.store.import.context_settings") Phase2CidStore.Initialize(Phase2CidConfig); SeedCidStoreWithAlternateChunks(Phase2CidStore, *PartialRemoteStore, BlockHash); - Ref<ProjectStore::Oplog> Phase2Oplog = ImportProject->NewOplog(fmt::format("partial_cache_multi_p2_{}", OpJobIndex++), {}); - RemoteProjectStore::Result Phase2R = LoadOplog(LoadOplogContext{.ChunkStore = Phase2CidStore, - .RemoteStore = *PartialRemoteStore, - .OptionalCache = Cache.get(), - .CacheBuildId = CacheBuildId, - .Oplog = *Phase2Oplog, - .NetworkWorkerPool = NetworkPool, - .WorkerPool = WorkerPool, - .ForceDownload = false, - .IgnoreMissingAttachments = false, - .CleanOplog = false, - .PartialBlockRequestMode = EPartialBlockRequestMode::ZenCacheOnly, - .PopulateCache = false, - .StoreLatencySec = 0.001, - .StoreMaxRangeCountPerRequest = 128u, - .CacheLatencySec = 0.001, - .CacheMaxRangeCountPerRequest = 128u, - .OptionalJobContext = &OpJobContext}); - CHECK(Phase2R.ErrorCode == 0); + Ref<ProjectStore::Oplog> Phase2Oplog = ImportProject->NewOplog(fmt::format("partial_cache_multi_p2_{}", OpJobIndex++), {}); + LoadOplog(LoadOplogContext{.ChunkStore = Phase2CidStore, + .RemoteStore = *PartialRemoteStore, + .OptionalCache = Cache.get(), + .CacheBuildId = CacheBuildId, + .Oplog = *Phase2Oplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::ZenCacheOnly, + .PopulateCache = false, + .StoreLatencySec = 0.001, + .StoreMaxRangeCountPerRequest = 128u, + .CacheLatencySec = 0.001, + .CacheMaxRangeCountPerRequest = 128u, + .OptionalJobContext = &OpJobContext}); + CHECK(CacheStats.TotalRequestCount > 0); } SUBCASE("partial_block_cache_singlerange") { ScopedTemporaryDirectory PartialExportDir; - std::shared_ptr<RemoteProjectStore> PartialRemoteStore; - RemoteProjectStore::Result ExportR = - SetupPartialBlockExportStore(NetworkPool, WorkerPool, PartialExportDir.Path(), PartialRemoteStore); - REQUIRE(ExportR.ErrorCode == 0); + std::shared_ptr<RemoteProjectStore> PartialRemoteStore = + SetupPartialBlockExportStore(NetworkPool, WorkerPool, PartialExportDir.Path()); IoHash BlockHash = FindBlockWithMultipleChunks(*PartialRemoteStore, 4u); CHECK(BlockHash != IoHash::Zero); // Phase 1: full block download from remote into cache. { - Ref<ProjectStore::Oplog> Phase1Oplog = ImportProject->NewOplog(fmt::format("partial_cache_single_p1_{}", OpJobIndex++), {}); - RemoteProjectStore::Result Phase1R = LoadOplog(LoadOplogContext{.ChunkStore = ImportCidStore, - .RemoteStore = *PartialRemoteStore, - .OptionalCache = Cache.get(), - .CacheBuildId = CacheBuildId, - .Oplog = *Phase1Oplog, - .NetworkWorkerPool = NetworkPool, - .WorkerPool = WorkerPool, - .ForceDownload = false, - .IgnoreMissingAttachments = false, - .CleanOplog = false, - .PartialBlockRequestMode = EPartialBlockRequestMode::All, - .PopulateCache = true, - .StoreLatencySec = 0.001, - .StoreMaxRangeCountPerRequest = 128u, - .CacheLatencySec = 0.001, - .CacheMaxRangeCountPerRequest = 128u, - .OptionalJobContext = &OpJobContext}); - CHECK(Phase1R.ErrorCode == 0); + Ref<ProjectStore::Oplog> Phase1Oplog = ImportProject->NewOplog(fmt::format("partial_cache_single_p1_{}", OpJobIndex++), {}); + LoadOplog(LoadOplogContext{.ChunkStore = ImportCidStore, + .RemoteStore = *PartialRemoteStore, + .OptionalCache = Cache.get(), + .CacheBuildId = CacheBuildId, + .Oplog = *Phase1Oplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::All, + .PopulateCache = true, + .StoreLatencySec = 0.001, + .StoreMaxRangeCountPerRequest = 128u, + .CacheLatencySec = 0.001, + .CacheMaxRangeCountPerRequest = 128u, + .OptionalJobContext = &OpJobContext}); + CHECK(CacheStats.PutBlobCount > 0); } ResetCacheStats(); - // Phase 2: fresh CidStore with only even-indexed chunks seeded. - // CacheMaxRangeCountPerRequest=1 → SubRangeCount=Min(3,1)=1 → GetBuildBlob with range - // offset (single-range legacy cache path), called once per needed chunk range. + // Phase 2: CacheMaxRangeCountPerRequest=1 -> GetBuildBlob with range offset, called per needed range. GcManager Phase2Gc; CidStore Phase2CidStore(Phase2Gc); CidStoreConfiguration Phase2CidConfig = {.RootDirectory = TempDir.Path() / "partial_cas_single", @@ -5148,29 +5289,2248 @@ TEST_CASE("project.store.import.context_settings") Phase2CidStore.Initialize(Phase2CidConfig); SeedCidStoreWithAlternateChunks(Phase2CidStore, *PartialRemoteStore, BlockHash); - Ref<ProjectStore::Oplog> Phase2Oplog = ImportProject->NewOplog(fmt::format("partial_cache_single_p2_{}", OpJobIndex++), {}); - RemoteProjectStore::Result Phase2R = LoadOplog(LoadOplogContext{.ChunkStore = Phase2CidStore, - .RemoteStore = *PartialRemoteStore, - .OptionalCache = Cache.get(), - .CacheBuildId = CacheBuildId, - .Oplog = *Phase2Oplog, - .NetworkWorkerPool = NetworkPool, - .WorkerPool = WorkerPool, - .ForceDownload = false, - .IgnoreMissingAttachments = false, - .CleanOplog = false, - .PartialBlockRequestMode = EPartialBlockRequestMode::ZenCacheOnly, - .PopulateCache = false, - .StoreLatencySec = 0.001, - .StoreMaxRangeCountPerRequest = 128u, - .CacheLatencySec = 0.001, - .CacheMaxRangeCountPerRequest = 1u, - .OptionalJobContext = &OpJobContext}); - CHECK(Phase2R.ErrorCode == 0); + Ref<ProjectStore::Oplog> Phase2Oplog = ImportProject->NewOplog(fmt::format("partial_cache_single_p2_{}", OpJobIndex++), {}); + LoadOplog(LoadOplogContext{.ChunkStore = Phase2CidStore, + .RemoteStore = *PartialRemoteStore, + .OptionalCache = Cache.get(), + .CacheBuildId = CacheBuildId, + .Oplog = *Phase2Oplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::ZenCacheOnly, + .PopulateCache = false, + .StoreLatencySec = 0.001, + .StoreMaxRangeCountPerRequest = 128u, + .CacheLatencySec = 0.001, + .CacheMaxRangeCountPerRequest = 1u, + .OptionalJobContext = &OpJobContext}); + CHECK(CacheStats.TotalRequestCount > 0); } } +static Ref<ProjectStore::Project> +MakeTestProject(CidStore& CidStore, + GcManager& Gc, + const std::filesystem::path& TempDir, + std::unique_ptr<class ProjectStore>& OutProjectStore) +{ + using namespace std::literals; + + CidStoreConfiguration CidConfig = {.RootDirectory = TempDir / "cas", .TinyValueThreshold = 1024, .HugeValueThreshold = 4096}; + CidStore.Initialize(CidConfig); + + std::filesystem::path BasePath = TempDir / "projectstore"; + OutProjectStore = std::make_unique<class ProjectStore>(CidStore, BasePath, Gc, ProjectStore::Configuration{}); + + std::filesystem::path RootDir = TempDir / "root"; + std::filesystem::path EngineRootDir = TempDir / "engine"; + std::filesystem::path ProjectRootDir = TempDir / "game"; + std::filesystem::path ProjectFilePath = TempDir / "game" / "game.uproject"; + + return Ref<ProjectStore::Project>(OutProjectStore->NewProject(BasePath / "proj1"sv, + "proj1"sv, + RootDir.string(), + EngineRootDir.string(), + ProjectRootDir.string(), + ProjectFilePath.string())); +} + +static void +RunSaveOplog(CidStore& CidStore, + ProjectStore::Project& Project, + ProjectStore::Oplog& Oplog, + WorkerThreadPool& NetworkPool, + WorkerThreadPool& WorkerPool, + const std::filesystem::path& ExportDir, + const std::string& Name, + size_t MaxBlockSize, + size_t MaxChunksPerBlock, + size_t MaxChunkEmbedSize, + bool EmbedLooseFiles, + bool ForceUpload, + bool IgnoreMissingAttachments, + JobContext* OptionalContext, + bool ForceDisableBlocks, + std::shared_ptr<RemoteProjectStore>* OutRemoteStore = nullptr) +{ + FileRemoteStoreOptions Options = {RemoteStoreOptions{.MaxBlockSize = MaxBlockSize, + .MaxChunksPerBlock = MaxChunksPerBlock, + .MaxChunkEmbedSize = MaxChunkEmbedSize, + .ChunkFileSizeLimit = 64u * 1024u * 1024u}, + /*.FolderPath =*/ExportDir, + /*.Name =*/Name, + /*.OptionalBaseName =*/std::string(), + /*.ForceDisableBlocks =*/ForceDisableBlocks, + /*.ForceEnableTempBlocks =*/false}; + + std::shared_ptr<RemoteProjectStore> RemoteStore = CreateFileRemoteStore(Log(), Options); + if (OutRemoteStore) + { + *OutRemoteStore = RemoteStore; + } + SaveOplog(CidStore, + *RemoteStore, + Project, + Oplog, + NetworkPool, + WorkerPool, + Options.MaxBlockSize, + Options.MaxChunksPerBlock, + Options.MaxChunkEmbedSize, + Options.ChunkFileSizeLimit, + EmbedLooseFiles, + ForceUpload, + IgnoreMissingAttachments, + OptionalContext); +} + +TEST_CASE("project.store.export.no_attachments_needed") +{ + // With no binary attachments, UploadAttachments reports "No attachments needed". + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_no_att", {}); + REQUIRE(Oplog); + + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), {})); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), {})); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + CapturingJobContext Ctx; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_no_att", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/true, + /*IgnoreMissingAttachments=*/false, + &Ctx, + /*ForceDisableBlocks=*/false); + + CHECK(Ctx.HasMessage("No attachments needed")); +} + +TEST_CASE("project.store.embed_loose_files_true") +{ + // EmbedLooseFiles=true: file-op entries are rewritten with a BinaryAttachment field. Round-trip must succeed. + using namespace projectstore_testutils; + using namespace std::literals; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_embed_true", {}); + REQUIRE(Oplog); + + Oplog->AppendNewOplogEntry( + CreateFilesOplogPackage(Oid::NewOid(), RootDir, CreateFileAttachments(RootDir, std::initializer_list<size_t>{1024, 2048}))); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_embed_true", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore); + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_embed_true_import", {}); + LoadOplog(LoadOplogContext{.ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed}); +} + +TEST_CASE("project.store.embed_loose_files_false" * doctest::skip()) // superseded by buildcontainer.embed_loose_files_false_no_rewrite +{ + // EmbedLooseFiles=false: file-op entries pass through unrewritten. Round-trip must succeed. + using namespace projectstore_testutils; + using namespace std::literals; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_embed_false", {}); + REQUIRE(Oplog); + + Oplog->AppendNewOplogEntry( + CreateFilesOplogPackage(Oid::NewOid(), RootDir, CreateFileAttachments(RootDir, std::initializer_list<size_t>{1024, 2048}))); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_embed_false", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/false, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore); + + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_embed_false_import", {}); + LoadOplog(LoadOplogContext{.ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed}); +} + +TEST_CASE("project.store.export.missing_attachment_ignored" * + doctest::skip()) // superseded by buildcontainer.ignore_missing_file_attachment_warn +{ + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + + auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{512, 1024}); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_missing_att", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(Oid::NewOid(), RootDir, FileAtts)); + + for (const auto& [Id, Path] : FileAtts) + { + std::filesystem::remove(Path); + } + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + CapturingJobContext Ctx; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_missing_att", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/true, + &Ctx, + /*ForceDisableBlocks=*/false); + + CHECK(Ctx.HasMessage("Missing attachment")); +} + +TEST_CASE("project.store.export.missing_chunk_in_cidstore" * + doctest::skip()) // superseded by buildcontainer.ignore_missing_binary_attachment_warn/throw +{ + using namespace projectstore_testutils; + using namespace std::literals; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + IoBuffer FakeData = CreateRandomBlob(256); + IoHash FakeHash = IoHash::HashBuffer(FakeData); + + CbObjectWriter Object; + Object << "key"sv << OidAsString(Oid::NewOid()); + Object.BeginArray("bulkdata"sv); + { + Object.BeginObject(); + Object << "id"sv << Oid::NewOid(); + Object << "type"sv + << "Standard"sv; + Object.AddBinaryAttachment("data"sv, FakeHash); + Object.EndObject(); + } + Object.EndArray(); + CbPackage Package; + Package.SetObject(Object.Save()); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_missing_cid", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(Package); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + CHECK_THROWS(RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_missing_cid", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false)); +} + +TEST_CASE("project.store.export.large_file_attachment_direct") +{ + // File > 2 x MaxChunkEmbedSize: classified as a direct large attachment (no compression attempt). Round-trip must succeed. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + + // 96 KB > 2 x 32 KB -> direct large attachment. + auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{96u * 1024u}); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_large_direct", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(Oid::NewOid(), RootDir, FileAtts)); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + constexpr size_t MaxChunkEmbedSize = 32u * 1024u; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_large_direct", + 64u * 1024u, + 1000, + MaxChunkEmbedSize, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore); + + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_large_direct_import", {}); + LoadOplog(LoadOplogContext{.ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed}); +} + +TEST_CASE("project.store.export.large_file_attachment_via_temp") +{ + // File with MaxChunkEmbedSize < size <= 2xMaxChunkEmbedSize: compressed to a temp buffer; + // if still large (incompressible), goes to OnLargeAttachment. Round-trip must succeed. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + + // 48 KB: 32 KB < 48 KB <= 64 KB -> temp-compression path; incompressible data stays > 32 KB. + auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{48u * 1024u}); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_large_via_temp", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(Oid::NewOid(), RootDir, FileAtts)); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + constexpr size_t MaxChunkEmbedSize = 32u * 1024u; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_large_via_temp", + 64u * 1024u, + 1000, + MaxChunkEmbedSize, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore); + + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_large_via_temp_import", {}); + LoadOplog(LoadOplogContext{.ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed}); +} + +TEST_CASE("project.store.export.large_chunk_from_cidstore") +{ + // Bulkdata attachment in CidStore with compressed size > MaxChunkEmbedSize -> OnLargeAttachment. Round-trip must succeed. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + // 64 KB with None encoding -> compressed ~ 64 KB > MaxChunkEmbedSize = 32 KB. + auto Attachments = CreateAttachments(std::initializer_list<size_t>{64u * 1024u}, OodleCompressionLevel::None); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_large_cid", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), Attachments)); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + constexpr size_t MaxChunkEmbedSize = 32u * 1024u; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_large_cid", + 64u * 1024u, + 1000, + MaxChunkEmbedSize, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore); + + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_large_cid_import", {}); + LoadOplog(LoadOplogContext{.ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed}); +} + +TEST_CASE("project.store.export.block_reuse") +{ + // Second export to the same store: FindReuseBlocks matches existing blocks; no new blocks are written. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + // 20 KB with None encoding: compressed ~ 20 KB < MaxChunkEmbedSize = 32 KB -> goes into a block. + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_reuse", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage( + Oid::NewOid(), + CreateAttachments(std::initializer_list<size_t>{20u * 1024u, 20u * 1024u}, OodleCompressionLevel::None))); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + constexpr size_t MaxChunkEmbedSize = 32u * 1024u; + constexpr size_t MaxBlockSize = 64u * 1024u; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_reuse", + MaxBlockSize, + 1000, + MaxChunkEmbedSize, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore); + + RemoteProjectStore::GetKnownBlocksResult KnownAfterFirst = RemoteStore->GetKnownBlocks(); + REQUIRE(!KnownAfterFirst.Blocks.empty()); + + std::vector<IoHash> BlockHashesAfterFirst; + for (const ChunkBlockDescription& B : KnownAfterFirst.Blocks) + { + BlockHashesAfterFirst.push_back(B.BlockHash); + } + + SaveOplog(CidStore, + *RemoteStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + MaxBlockSize, + 1000, + MaxChunkEmbedSize, + 64u * 1024u * 1024u, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr); + + RemoteProjectStore::GetKnownBlocksResult KnownAfterSecond = RemoteStore->GetKnownBlocks(); + std::vector<IoHash> BlockHashesAfterSecond; + for (const ChunkBlockDescription& B : KnownAfterSecond.Blocks) + { + BlockHashesAfterSecond.push_back(B.BlockHash); + } + + std::sort(BlockHashesAfterFirst.begin(), BlockHashesAfterFirst.end()); + std::sort(BlockHashesAfterSecond.begin(), BlockHashesAfterSecond.end()); + CHECK(BlockHashesAfterFirst == BlockHashesAfterSecond); +} + +TEST_CASE("project.store.export.max_chunks_per_block") +{ + // MaxChunksPerBlock=2 with 3 attachments from one op -> at least 2 blocks produced. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + // 2 KB with None encoding: compressed ~ 2 KB < MaxChunkEmbedSize = 4 KB -> enters block assembly. + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_max_chunks", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage( + Oid::NewOid(), + CreateAttachments(std::initializer_list<size_t>{2u * 1024u, 2u * 1024u, 2u * 1024u}, OodleCompressionLevel::None))); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + constexpr size_t MaxChunksPerBlock = 2; + constexpr size_t MaxBlockSize = 1u * 1024u * 1024u; + constexpr size_t MaxChunkEmbedSize = 4u * 1024u; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_max_chunks", + MaxBlockSize, + MaxChunksPerBlock, + MaxChunkEmbedSize, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore); + + RemoteProjectStore::GetKnownBlocksResult KnownBlocks = RemoteStore->GetKnownBlocks(); + CHECK(KnownBlocks.Blocks.size() >= 2); + + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_max_chunks_import", {}); + LoadOplog(LoadOplogContext{.ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed}); +} + +TEST_CASE("project.store.export.max_data_per_block") +{ + // Verifies ComposeBlocks respects UsableBlockSize = MaxBlockSize - MaxHeaderSize. + // With MaxBlockSize=7168, MaxChunksPerBlock=32: MaxHeaderSize=129, UsableBlockSize=7039. + // Oids[1] contributes 7041 compressed bytes (> 7039) to force a block boundary at that exact limit. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_max_data_per_block", {}); + REQUIRE(Oplog); + + std::vector<Oid> Oids; + Oids.push_back(Oid::NewOid()); + Oids.push_back(Oid::NewOid()); + Oids.push_back(Oid::NewOid()); + Oids.push_back(Oid::NewOid()); + Oids.push_back(Oid::NewOid()); + std::sort(Oids.begin(), Oids.end()); + + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oids[0], CreateAttachments(std::initializer_list<size_t>{2u * 1024u}, OodleCompressionLevel::None))); + + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oids[1], + CreateAttachments(std::initializer_list<size_t>{3u * 1024u, 2u * 1024u, 2u * 1024u, 875u, 875u, 875u}, + OodleCompressionLevel::None))); + + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oids[2], CreateAttachments(std::initializer_list<size_t>{875u, 875u}, OodleCompressionLevel::None))); + + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage( + Oids[3], + CreateAttachments(std::initializer_list<size_t>{875u, 875u, 875u, 875u, 875u, 875u}, OodleCompressionLevel::None))); + + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oids[4], CreateAttachments(std::initializer_list<size_t>{1676, 1678}, OodleCompressionLevel::None))); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + constexpr size_t MaxChunksPerBlock = 32; + constexpr size_t MaxBlockSize = 7u * 1024u; + constexpr size_t MaxChunkEmbedSize = 3u * 1024u; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_max_data_per_block", + MaxBlockSize, + MaxChunksPerBlock, + MaxChunkEmbedSize, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore); + + RemoteProjectStore::GetKnownBlocksResult KnownBlocks = RemoteStore->GetKnownBlocks(); + CHECK(KnownBlocks.Blocks.size() >= 2); + + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_max_data_per_block_import", {}); + LoadOplog(LoadOplogContext{.ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed}); +} + +TEST_CASE("project.store.export.file_deleted_between_phases") +{ + // File exists during RewriteOp but is deleted before AllowChunking workers run. + // With IgnoreMissingAttachments=true the export continues. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + + auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{512, 1024}); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_file_deleted", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(Oid::NewOid(), RootDir, FileAtts)); + + std::vector<std::filesystem::path> FilePaths; + for (const auto& [Id, Path] : FileAtts) + { + FilePaths.push_back(Path); + } + + // Deletes files when "Rewrote" arrives, before AllowChunking workers run. + struct DeleteOnRewriteContext : public CapturingJobContext + { + std::vector<std::filesystem::path>* Paths = nullptr; + void ReportMessage(std::string_view Message) override + { + CapturingJobContext::ReportMessage(Message); + if (Message.find("Rewrote") != std::string_view::npos && Paths) + { + for (const auto& P : *Paths) + { + std::filesystem::remove(P); + } + } + } + }; + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + DeleteOnRewriteContext Ctx; + Ctx.Paths = &FilePaths; + + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_file_deleted", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/true, + &Ctx, + /*ForceDisableBlocks=*/false); + + CHECK(Ctx.HasMessage("Missing attachment")); + for (const auto& P : FilePaths) + { + CHECK(!std::filesystem::exists(P)); + } +} + +TEST_CASE("project.store.embed_loose_files_zero_data_hash") +{ + // File-op entries with "data": IoHash::Zero (unresolved marker) trigger RewriteOp to + // read from disk and replace with a resolved BinaryAttachment. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{512, 1024}); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_zero_data_hash", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateFilesOplogPackageWithZeroDataHash(Oid::NewOid(), RootDir, FileAtts)); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_zero_data_hash", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore); + + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_zero_data_hash_import", {}); + LoadOplog(LoadOplogContext{.ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed}); +} + +TEST_CASE("project.store.embed_loose_files_already_resolved") +{ + // After an export->import round-trip, oplog entries carry resolved "data": BinaryAttachment(H). + // A re-export must preserve those fields without re-reading from disk. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir1; + ScopedTemporaryDirectory ExportDir2; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{512, 1024}); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_already_resolved", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(Oid::NewOid(), RootDir, FileAtts)); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + std::shared_ptr<RemoteProjectStore> RemoteStore1; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir1.Path(), + "oplog_already_resolved", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore1); + + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_already_resolved_import", {}); + LoadOplog(LoadOplogContext{.ChunkStore = CidStore, + .RemoteStore = *RemoteStore1, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed}); + + RunSaveOplog(CidStore, + *Project, + *ImportOplog, + NetworkPool, + WorkerPool, + ExportDir2.Path(), + "oplog_already_resolved_reexport", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/true, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false); +} + +TEST_CASE("project.store.import.missing_attachment") +{ + // Export a small oplog with ForceDisableBlocks=true (only loose .blob files), delete one + // attachment, then test both sides of IgnoreMissingAttachments. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_missing_att", {}); + REQUIRE(Oplog); + + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{512, 1024}))); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{2048, 3000}))); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_missing_att", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/false, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/true, + &RemoteStore); + + // Find and delete one .blob attachment file from the remote store directory. + std::filesystem::path DeletedBlob; + for (const auto& Entry : std::filesystem::recursive_directory_iterator(ExportDir.Path())) + { + if (Entry.path().extension() == ".blob") + { + DeletedBlob = Entry.path(); + break; + } + } + REQUIRE(!DeletedBlob.empty()); + std::error_code Ec; + std::filesystem::remove(DeletedBlob, Ec); + REQUIRE(!Ec); + + SUBCASE("throws_when_not_ignored") + { + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_missing_att_throw", {}); + REQUIRE(ImportOplog); + CapturingJobContext Ctx; + CHECK_THROWS_AS(LoadOplog(LoadOplogContext{.ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = true, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, + .OptionalJobContext = &Ctx}), + RemoteStoreError); + } + + SUBCASE("succeeds_when_ignored") + { + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_missing_att_ignore", {}); + REQUIRE(ImportOplog); + CapturingJobContext Ctx; + CHECK_NOTHROW(LoadOplog(LoadOplogContext{.ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = true, + .IgnoreMissingAttachments = true, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, + .OptionalJobContext = &Ctx})); + CHECK(Ctx.HasMessage("Failed to load attachments")); + } +} + +TEST_CASE("project.store.import.error.load_container_failure") +{ + // LoadContainer() on a nonexistent path returns non-zero ErrorCode -> LoadOplog throws RemoteStoreError. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path NonExistentPath = TempDir.Path() / "does_not_exist"; + FileRemoteStoreOptions Options = {RemoteStoreOptions{.MaxBlockSize = 64u * 1024u, + .MaxChunksPerBlock = 1000, + .MaxChunkEmbedSize = 32u * 1024u, + .ChunkFileSizeLimit = 64u * 1024u * 1024u}, + /*.FolderPath =*/NonExistentPath, + /*.Name =*/"load_container_failure", + /*.OptionalBaseName =*/std::string(), + /*.ForceDisableBlocks =*/false, + /*.ForceEnableTempBlocks =*/false}; + std::shared_ptr<RemoteProjectStore> RemoteStore = CreateFileRemoteStore(Log(), Options); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("load_container_failure_import", {}); + REQUIRE(ImportOplog); + + CapturingJobContext Ctx; + CHECK_THROWS_AS(LoadOplog(LoadOplogContext{.ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, + .OptionalJobContext = &Ctx}), + RemoteStoreError); +} + +TEST_CASE("project.store.blockcomposer.path_a_standalone_block") +{ + // Path A: one op with exactly MaxChunksPerBlock chunks -> emitted as a standalone block without merging into pending. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 1000; + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4)}; + std::vector<uint64_t> Sizes = {100, 100, 100, 100}; + std::vector<Oid> Keys = {Op1, Op1, Op1, Op1}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 1); + CHECK(Blocks[0].size() == 4); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[0][3] == MakeTestHash(4)); +} + +TEST_CASE("project.store.blockcomposer.path_b_fits_pending") +{ + // Path B: a single op whose chunks fit in the empty pending block. + // No flush occurs during processing; the final flush emits the one pending block. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 1000; + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2)}; + std::vector<uint64_t> Sizes = {60, 80}; // each <= MaxChunkEmbedSize (100); sum=140 << UsableSize (1000) + std::vector<Oid> Keys = {Op1, Op1}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 1); + CHECK(Blocks[0].size() == 2); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[0][1] == MakeTestHash(2)); +} + +TEST_CASE("project.store.blockcomposer.path_b_exact_count_fill") +{ + // Path B: pending reaches MaxChunksPerBlock exactly -> immediate flush, no separate final flush. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 1000; + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + Oid Op2 = MakeTestOid(2); + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4)}; + std::vector<uint64_t> Sizes = {100, 100, 100, 100}; + std::vector<Oid> Keys = {Op1, Op1, Op2, Op2}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 1); + CHECK(Blocks[0].size() == 4); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[0][3] == MakeTestHash(4)); +} + +TEST_CASE("project.store.blockcomposer.path_c_75pct_flush") +{ + // Path C: pending is >75% full when the next op doesn't fit -> pending flushed first, new op placed via Path B. + // UsableSize=100, threshold=75 bytes; Op1=80 bytes > 75%. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 100; // 75% threshold = 75 bytes + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + Oid Op2 = MakeTestOid(2); + // Op1: 80 bytes -> Path B, pending = {80 bytes, 1 chunk} (80 > 75) + // Op2: 30 bytes -> does not fit (80+30=110 > 100) and 80 > 75 -> Path C flush, + // then Path B, pending = {30 bytes} -> final flush + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2)}; + std::vector<uint64_t> Sizes = {80, 30}; + std::vector<Oid> Keys = {Op1, Op2}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 2); + CHECK(Blocks[0].size() == 1); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[1].size() == 1); + CHECK(Blocks[1][0] == MakeTestHash(2)); +} + +TEST_CASE("project.store.blockcomposer.path_d_partial_fill") +{ + // Path D: pending <=75% full but chunk count is the binding constraint. Greedy fill adds chunks until count capacity, then flushes. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 1000; // 75% threshold = 750 bytes + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + Oid Op2 = MakeTestOid(2); + // Op1: 3 x 100 bytes -> Path B, pending = {3 chunks, 300 bytes} (300 <= 750) + // Op2: 2 x 100 bytes -> 3+2=5 > MaxChunks=4; 300+200=500 <= 1000; 300 <= 750 -> Path D + // D adds op2[0] to pending (4 chunks, count capacity reached), flushes -> block 1 + // Remaining op2[1] -> Path B (pending empty) -> final flush -> block 2 + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4), MakeTestHash(5)}; + std::vector<uint64_t> Sizes = {100, 100, 100, 100, 100}; + std::vector<Oid> Keys = {Op1, Op1, Op1, Op2, Op2}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 2); + CHECK(Blocks[0].size() == 4); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[0][1] == MakeTestHash(2)); + CHECK(Blocks[0][2] == MakeTestHash(3)); + CHECK(Blocks[0][3] == MakeTestHash(4)); + CHECK(Blocks[1].size() == 1); + CHECK(Blocks[1][0] == MakeTestHash(5)); +} + +TEST_CASE("project.store.blockcomposer.cancellation") +{ + // IsCancelledFunc returns true on the second outer-loop iteration. + // Op1 (4 chunks, Path A) is fully emitted before cancellation; Op2 is never started. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 1000; + constexpr uint64_t MaxChunks = 4; + + int CallCount = 0; + remotestore_impl::BlockComposer::Configuration Config = MakeTestConfig(UsableSize, MaxChunks); + Config.IsCancelledFunc = [&]() { return ++CallCount > 1; }; + remotestore_impl::BlockComposer Composer(Config); + + Oid Op1 = MakeTestOid(1); + Oid Op2 = MakeTestOid(2); + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4), MakeTestHash(5), MakeTestHash(6)}; + std::vector<uint64_t> Sizes = {100, 100, 100, 100, 100, 100}; + std::vector<Oid> Keys = {Op1, Op1, Op1, Op1, Op2, Op2}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 1); + CHECK(Blocks[0].size() == 4); +} + +TEST_CASE("project.store.blockcomposer.final_flush") +{ + // Three ops with all chunks fitting in pending (no mid-stream flush) -> single block from final flush. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 1000; + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + Oid Op2 = MakeTestOid(2); + Oid Op3 = MakeTestOid(3); + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3)}; + std::vector<uint64_t> Sizes = {60, 80, 70}; // each <= MaxChunkEmbedSize (100); sum=210 << UsableSize (1000) + std::vector<Oid> Keys = {Op1, Op2, Op3}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 1); + CHECK(Blocks[0].size() == 3); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[0][1] == MakeTestHash(2)); + CHECK(Blocks[0][2] == MakeTestHash(3)); +} + +TEST_CASE("project.store.blockcomposer.path_b_b_c") +{ + // Path B -> Path B -> Path C: two ops accumulate past 75% threshold; third op triggers Path C flush. + // UsableSize=200, threshold=150; two ops of 90 bytes each accumulate 180 bytes, exceeding threshold. + using namespace projectstore_testutils; + constexpr uint64_t UsableSize = 200; // 75% threshold = 150 bytes + constexpr uint64_t MaxChunks = 8; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + Oid Op2 = MakeTestOid(2); + Oid Op3 = MakeTestOid(3); + // Op1: 90 bytes -> Path B, pending = {90 bytes, 1 chunk} (90 <= 150) + // Op2: 90 bytes -> Path B, pending = {180 bytes, 2 chunks} (180 > 150) + // Op3: 60 bytes -> does not fit (180+60=240 > 200) and 180 > 150 -> Path C flush -> block 1 + // then Path B, pending = {60 bytes} -> final flush -> block 2 + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3)}; + std::vector<uint64_t> Sizes = {90, 90, 60}; + std::vector<Oid> Keys = {Op1, Op2, Op3}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 2); + CHECK(Blocks[0].size() == 2); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[0][1] == MakeTestHash(2)); + CHECK(Blocks[1].size() == 1); + CHECK(Blocks[1][0] == MakeTestHash(3)); +} + +TEST_CASE("project.store.blockcomposer.path_a_b_final_flush") +{ + // Path A -> Path B -> final flush: first op count-saturates -> standalone block, second op placed via Path B. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 1000; + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + Oid Op2 = MakeTestOid(2); + // Op1: 4 x 100 bytes -> MaxChunksPerBlock reached -> CurrentOpFillFullBlock=true -> Path A + // Op2: 2 x 100 bytes -> Path B (pending empty) -> final flush + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4), MakeTestHash(5), MakeTestHash(6)}; + std::vector<uint64_t> Sizes = {100, 100, 100, 100, 100, 100}; + std::vector<Oid> Keys = {Op1, Op1, Op1, Op1, Op2, Op2}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 2); + CHECK(Blocks[0].size() == 4); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[0][3] == MakeTestHash(4)); + CHECK(Blocks[1].size() == 2); + CHECK(Blocks[1][0] == MakeTestHash(5)); + CHECK(Blocks[1][1] == MakeTestHash(6)); +} + +TEST_CASE("project.store.blockcomposer.empty_input") +{ + // Zero attachments -> no blocks emitted. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 1000; + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose({}, {}, {}, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + CHECK(Blocks.empty()); +} + +TEST_CASE("project.store.blockcomposer.single_attachment") +{ + // Single chunk -> Path B into empty pending, final flush emits it. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 1000; + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + std::vector<IoHash> Hashes = {MakeTestHash(1)}; + std::vector<uint64_t> Sizes = {60}; + std::vector<Oid> Keys = {Op1}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 1); + CHECK(Blocks[0].size() == 1); + CHECK(Blocks[0][0] == MakeTestHash(1)); +} + +TEST_CASE("project.store.blockcomposer.path_a_size_saturation") +{ + // Path A by size overflow: 60+60 > UsableSize=100; first chunk emitted standalone, second via Path B. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 100; // MaxChunkEmbedSize=100; two 60-byte chunks overflow + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + // chunk0=60, chunk1=60: 60+60=120 > UsableSize=100 -> size overflow after gathering chunk0 + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2)}; + std::vector<uint64_t> Sizes = {60, 60}; + std::vector<Oid> Keys = {Op1, Op1}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 2); + CHECK(Blocks[0].size() == 1); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[1].size() == 1); + CHECK(Blocks[1][0] == MakeTestHash(2)); +} + +TEST_CASE("project.store.blockcomposer.path_b_exact_size_fill") +{ + // Path B immediate flush when pending reaches UsableBlockSize exactly (vs count-fill in path_b_exact_count_fill). + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 100; + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + Oid Op2 = MakeTestOid(2); + // Op1: 60 bytes -> Path B, pending = {60 bytes, 1 chunk} + // Op2: 40 bytes -> 60+40=100 == UsableSize -> Path B, immediate size-exact flush + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2)}; + std::vector<uint64_t> Sizes = {60, 40}; + std::vector<Oid> Keys = {Op1, Op2}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 1); + CHECK(Blocks[0].size() == 2); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[0][1] == MakeTestHash(2)); +} + +TEST_CASE("project.store.blockcomposer.path_d_size_limited_greedy") +{ + // Path D where greedy fill is limited by size (not count). MaxChunks=8 ensures size is binding. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 200; // 75% threshold = 150 bytes + constexpr uint64_t MaxChunks = 8; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + Oid Op2 = MakeTestOid(2); + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4)}; + std::vector<uint64_t> Sizes = {90, 60, 60, 60}; + std::vector<Oid> Keys = {Op1, Op2, Op2, Op2}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 2); + CHECK(Blocks[0].size() == 2); + CHECK(Blocks[0][0] == MakeTestHash(1)); + CHECK(Blocks[0][1] == MakeTestHash(2)); + CHECK(Blocks[1].size() == 2); + CHECK(Blocks[1][0] == MakeTestHash(3)); + CHECK(Blocks[1][1] == MakeTestHash(4)); +} + +TEST_CASE("project.store.blockcomposer.path_a_pending_untouched") +{ + // Path A leaves pending untouched: Op1 in pending, Op2 count-saturates -> standalone block. Final flush emits Op1. + using namespace projectstore_testutils; + + constexpr uint64_t UsableSize = 1000; + constexpr uint64_t MaxChunks = 4; + remotestore_impl::BlockComposer Composer(MakeTestConfig(UsableSize, MaxChunks)); + + Oid Op1 = MakeTestOid(1); + Oid Op2 = MakeTestOid(2); + // Op1: 2 x 60 bytes -> Path B, pending = {2 chunks, 120 bytes} + // Op2: 4 x 100 bytes -> count reaches MaxChunks=4 -> CurrentOpFillFullBlock=true -> Path A + // Path A emits Op2 standalone as block 1; pending (Op1's chunks) is left untouched. + // Final flush emits pending -> block 2. + std::vector<IoHash> Hashes = {MakeTestHash(1), MakeTestHash(2), MakeTestHash(3), MakeTestHash(4), MakeTestHash(5), MakeTestHash(6)}; + std::vector<uint64_t> Sizes = {60, 60, 100, 100, 100, 100}; + std::vector<Oid> Keys = {Op1, Op1, Op2, Op2, Op2, Op2}; + + std::vector<std::vector<IoHash>> Blocks; + Composer.Compose(Hashes, Sizes, Keys, [&](std::vector<IoHash>&& B) { Blocks.push_back(std::move(B)); }); + + REQUIRE(Blocks.size() == 2); + CHECK(Blocks[0].size() == 4); + CHECK(Blocks[0][0] == MakeTestHash(3)); + CHECK(Blocks[0][3] == MakeTestHash(6)); + CHECK(Blocks[1].size() == 2); + CHECK(Blocks[1][0] == MakeTestHash(1)); + CHECK(Blocks[1][1] == MakeTestHash(2)); +} + +// --------------------------------------------------------------------------- +// BuildContainer-direct tests +// --------------------------------------------------------------------------- + +TEST_CASE("buildcontainer.public_overload_smoke") +{ + // Verifies the public BuildContainer overload runs successfully and calls AsyncOnBlock. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_smoke", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024}))); + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + std::atomic<int> BlockCallCount{0}; + CbObject Container = BuildContainer( + CidStore, + *Project, + *Oplog, + WorkerPool, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/true, + [&](CompressedBuffer&&, ChunkBlockDescription&&) { BlockCallCount.fetch_add(1); }, + [](const IoHash&, TGetAttachmentBufferFunc&&) {}, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/false); + + CHECK(Container.GetSize() > 0); + CHECK(BlockCallCount.load() >= 1); +} + +TEST_CASE("buildcontainer.build_blocks_false_on_block_chunks") +{ + // BuildBlocks=false: small attachments go to OnBlockChunks instead of AsyncOnBlock. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_no_blocks", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024, 1024}))); + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024, 1024}))); + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + std::atomic<int> BlockChunksCallCount{0}; + CbObject Container = BuildContainer( + CidStore, + *Project, + *Oplog, + WorkerPool, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/false, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/true, + [](CompressedBuffer&&, ChunkBlockDescription&&) { CHECK(false); }, + [](const IoHash&, TGetAttachmentBufferFunc&&) {}, + [&](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) { BlockChunksCallCount.fetch_add(1); }, + /*EmbedLooseFiles=*/false); + + CHECK(Container.GetSize() > 0); + CHECK(BlockChunksCallCount.load() >= 1); +} + +TEST_CASE("buildcontainer.ignore_missing_binary_attachment_warn") +{ + // A bulk-data op references a hash that is absent from CidStore. + // SUBCASE warn: IgnoreMissingAttachments=true -> ReportMessage("Missing attachment ..."). + // SUBCASE throw: IgnoreMissingAttachments=false -> std::runtime_error. + using namespace projectstore_testutils; + using namespace std::literals; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + // Fabricate a hash not in CidStore and build a package that references it as a + // BinaryAttachment field but carries no inline attachment data. + IoBuffer FakeData = CreateRandomBlob(256); + IoHash FakeHash = IoHash::HashBuffer(FakeData); + + CbObjectWriter Object; + Object << "key"sv << OidAsString(Oid::NewOid()); + Object.BeginArray("bulkdata"sv); + { + Object.BeginObject(); + Object << "id"sv << Oid::NewOid(); + Object << "type"sv + << "Standard"sv; + Object.AddBinaryAttachment("data"sv, FakeHash); + Object.EndObject(); + } + Object.EndArray(); + CbPackage Package; + Package.SetObject(Object.Save()); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_missing_bin", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(Package); + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + SUBCASE("warn") + { + CapturingJobContext Ctx; + BuildContainer( + CidStore, + *Project, + *Oplog, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/true, + /*AllowChunking=*/true, + {}, + WorkerPool, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, + [](const IoHash&, TGetAttachmentBufferFunc&&) {}, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/false, + &Ctx); + CHECK(Ctx.HasMessage("Missing attachment")); + } + + SUBCASE("throw") + { + CHECK_THROWS(BuildContainer( + CidStore, + *Project, + *Oplog, + WorkerPool, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/true, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, + [](const IoHash&, TGetAttachmentBufferFunc&&) {}, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/false)); + } +} + +TEST_CASE("buildcontainer.ignore_missing_file_attachment_warn") +{ + // File attachments are created on disk then deleted before BuildContainer runs. + // SUBCASE warn: IgnoreMissingAttachments=true -> ReportMessage("Missing attachment ..."). + // SUBCASE throw: IgnoreMissingAttachments=false -> exception. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + auto FileAtts = CreateFileAttachments(RootDir, std::initializer_list<size_t>{512, 1024}); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_missing_file", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateFilesOplogPackage(Oid::NewOid(), RootDir, FileAtts)); + + // Delete files before BuildContainer runs so RewriteOp finds them missing. + for (const auto& [Id, Path] : FileAtts) + { + std::filesystem::remove(Path); + } + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + SUBCASE("warn") + { + CapturingJobContext Ctx; + BuildContainer( + CidStore, + *Project, + *Oplog, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/true, + /*AllowChunking=*/true, + {}, + WorkerPool, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, + [](const IoHash&, TGetAttachmentBufferFunc&&) {}, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/true, + &Ctx); + CHECK(Ctx.HasMessage("Missing attachment")); + } + + SUBCASE("throw") + { + CHECK_THROWS(BuildContainer( + CidStore, + *Project, + *Oplog, + WorkerPool, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/true, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, + [](const IoHash&, TGetAttachmentBufferFunc&&) {}, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/true)); + } +} + +TEST_CASE("buildcontainer.embed_loose_files_false_no_rewrite") +{ + // EmbedLooseFiles=false: RewriteOp is skipped for file-op entries; they pass through + // unchanged. Neither AsyncOnBlock nor OnLargeAttachment should fire. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + std::filesystem::path RootDir = TempDir.Path() / "root"; + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_embed_false", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry( + CreateFilesOplogPackage(Oid::NewOid(), RootDir, CreateFileAttachments(RootDir, std::initializer_list<size_t>{1024, 2048}))); + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + CbObject Container = BuildContainer( + CidStore, + *Project, + *Oplog, + WorkerPool, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/true, + [](CompressedBuffer&&, ChunkBlockDescription&&) { CHECK(false); }, + [](const IoHash&, TGetAttachmentBufferFunc&&) { CHECK(false); }, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/false); + + CHECK(Container.GetSize() > 0); +} + +TEST_CASE("buildcontainer.allow_chunking_false") +{ + // AllowChunking=false: attachments exceeding ChunkFileSizeLimit skip chunking -> OnLargeAttachment. + // AllowChunking=true: same data is chunked, but chunk still exceeds MaxChunkEmbedSize -> OnLargeAttachment; + // exercises the AllowChunking branch in FindChunkSizes. + // 4 KB attachment: > MaxChunkEmbedSize (2 KB) and > ChunkFileSizeLimit (1 KB). + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + // None encoding: compressed ~ 4 KB > MaxChunkEmbedSize (2 KB) and ChunkFileSizeLimit (1 KB). + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_allow_chunk", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{4096}, OodleCompressionLevel::None))); + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + constexpr size_t TestMaxBlockSize = 16u * 1024u; + constexpr size_t TestMaxChunkEmbedSize = 2u * 1024u; + constexpr size_t TestChunkFileSizeLimit = 1u * 1024u; + + SUBCASE("allow_chunking_false") + { + std::atomic<int> LargeAttachmentCallCount{0}; + BuildContainer( + CidStore, + *Project, + *Oplog, + WorkerPool, + TestMaxBlockSize, + 1000, + TestMaxChunkEmbedSize, + TestChunkFileSizeLimit, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/false, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, + [&](const IoHash&, TGetAttachmentBufferFunc&&) { LargeAttachmentCallCount.fetch_add(1); }, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/false); + CHECK(LargeAttachmentCallCount.load() >= 1); + } + + SUBCASE("allow_chunking_true") + { + // Chunking branch in FindChunkSizes is taken, but the ~4 KB chunk still exceeds MaxChunkEmbedSize -> OnLargeAttachment. + std::atomic<int> LargeAttachmentCallCount{0}; + BuildContainer( + CidStore, + *Project, + *Oplog, + WorkerPool, + TestMaxBlockSize, + 1000, + TestMaxChunkEmbedSize, + TestChunkFileSizeLimit, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/true, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, + [&](const IoHash&, TGetAttachmentBufferFunc&&) { LargeAttachmentCallCount.fetch_add(1); }, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/false); + CHECK(LargeAttachmentCallCount.load() >= 1); + } +} + +TEST_CASE("buildcontainer.async_on_block_exception_propagates") +{ + // If AsyncOnBlock throws, the exception must propagate out of BuildContainer. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_block_exc", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024, 1024}))); + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024, 1024}))); + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + CHECK_THROWS_AS(BuildContainer( + CidStore, + *Project, + *Oplog, + WorkerPool, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/true, + [](CompressedBuffer&&, ChunkBlockDescription&&) { throw std::runtime_error("inject_block"); }, + [](const IoHash&, TGetAttachmentBufferFunc&&) {}, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/false), + std::runtime_error); +} + +TEST_CASE("buildcontainer.on_large_attachment_exception_propagates") +{ + // OnLargeAttachment exception must propagate. 64 KB with MaxChunkEmbedSize=32 KB -> OnLargeAttachment. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_large_exc", {}); + REQUIRE(Oplog); + // 64 KB with OodleCompressionLevel::None -> compressed ~ 64 KB > MaxChunkEmbedSize (32 KB). + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oid::NewOid(), + CreateAttachments(std::initializer_list<size_t>{64u * 1024u}, OodleCompressionLevel::None))); + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + CHECK_THROWS_AS(BuildContainer( + CidStore, + *Project, + *Oplog, + WorkerPool, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/false, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, + [](const IoHash&, TGetAttachmentBufferFunc&&) { throw std::runtime_error("inject_large"); }, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/false), + std::runtime_error); +} + +TEST_CASE("buildcontainer.context_cancellation_aborts") +{ + // IsCancelled() returns true from the start; BuildContainer must not crash or throw. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_cancel", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024}))); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024}))); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024}))); + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + CapturingJobContext Ctx; + Ctx.m_Cancel = true; + + CHECK_NOTHROW(BuildContainer( + CidStore, + *Project, + *Oplog, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/true, + {}, + WorkerPool, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, + [](const IoHash&, TGetAttachmentBufferFunc&&) {}, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/false, + &Ctx)); +} + +TEST_CASE("buildcontainer.context_progress_reporting") +{ + // BuildContainer calls ReportProgress at least once ("Scanning oplog"). + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("bc_progress", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024}))); + + WorkerThreadPool WorkerPool(GetWorkerCount()); + + CapturingJobContext Ctx; + BuildContainer( + CidStore, + *Project, + *Oplog, + 64u * 1024u, + 1000, + 32u * 1024u, + 64u * 1024u * 1024u, + /*BuildBlocks=*/true, + /*IgnoreMissingAttachments=*/false, + /*AllowChunking=*/true, + {}, + WorkerPool, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, + [](const IoHash&, TGetAttachmentBufferFunc&&) {}, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /*EmbedLooseFiles=*/false, + &Ctx); + + CHECK(!Ctx.ProgressMessages.empty()); +} + +TEST_CASE("getblocksfromoplog.filtered") +{ + // GetBlocksFromOplog(ContainerObject, IncludeBlockHashes) returns only the requested blocks. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + std::shared_ptr<RemoteProjectStore> RemoteStore = SetupExportStore(CidStore, *Project, NetworkPool, WorkerPool, ExportDir.Path()); + + RemoteProjectStore::LoadContainerResult ContainerResult = RemoteStore->LoadContainer(); + REQUIRE(ContainerResult.ErrorCode == 0); + + std::vector<IoHash> AllBlockHashes = GetBlockHashesFromOplog(ContainerResult.ContainerObject); + REQUIRE(!AllBlockHashes.empty()); + + // Filter to the first block only. + std::vector<IoHash> Subset = {AllBlockHashes[0]}; + std::vector<ThinChunkBlockDescription> Filtered = GetBlocksFromOplog(ContainerResult.ContainerObject, Subset); + CHECK(Filtered.size() == 1); + CHECK(Filtered[0].BlockHash == AllBlockHashes[0]); + CHECK(!Filtered[0].ChunkRawHashes.empty()); + + // Empty include set returns empty result (exercises the no-match branch). + std::vector<ThinChunkBlockDescription> Empty = GetBlocksFromOplog(ContainerResult.ContainerObject, std::span<const IoHash>{}); + CHECK(Empty.empty()); +} + +// --------------------------------------------------------------------------- +// SaveOplog-focused tests +// --------------------------------------------------------------------------- + +TEST_CASE("saveoplog.cancellation") +{ + // IsCancelled() returns true from the start; SaveOplog must not throw. + using namespace projectstore_testutils; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_cancel_save", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry(CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 2048}))); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + CapturingJobContext Ctx; + Ctx.m_Cancel = true; + + CHECK_NOTHROW(RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_cancel_save", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/false, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + &Ctx, + /*ForceDisableBlocks=*/false)); +} + +// --------------------------------------------------------------------------- +// LoadOplog-focused tests +// --------------------------------------------------------------------------- + +TEST_CASE("loadoplog.missing_block_attachment_ignored") +{ + // Export creates a block file; deleting it then loading with IgnoreMissingAttachments=true + // must succeed and report the failure via "Failed to download block attachment". + using namespace projectstore_testutils; + using namespace std::literals; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + GcManager Gc; + CidStore CidStore(Gc); + std::unique_ptr<ProjectStore> ProjectStoreDummy; + Ref<ProjectStore::Project> Project = MakeTestProject(CidStore, Gc, TempDir.Path(), ProjectStoreDummy); + + Ref<ProjectStore::Oplog> Oplog = Project->NewOplog("oplog_missing_block", {}); + REQUIRE(Oplog); + Oplog->AppendNewOplogEntry( + CreateBulkDataOplogPackage(Oid::NewOid(), CreateAttachments(std::initializer_list<size_t>{1024, 1024, 2048, 512}))); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + std::shared_ptr<RemoteProjectStore> RemoteStore; + RunSaveOplog(CidStore, + *Project, + *Oplog, + NetworkPool, + WorkerPool, + ExportDir.Path(), + "oplog_missing_block", + 64u * 1024u, + 1000, + 32u * 1024u, + /*EmbedLooseFiles=*/false, + /*ForceUpload=*/false, + /*IgnoreMissingAttachments=*/false, + /*OptionalContext=*/nullptr, + /*ForceDisableBlocks=*/false, + &RemoteStore); + + RemoteProjectStore::GetKnownBlocksResult KnownBlocks = RemoteStore->GetKnownBlocks(); + REQUIRE(KnownBlocks.ErrorCode == 0); + REQUIRE(!KnownBlocks.Blocks.empty()); + + for (const ChunkBlockDescription& BlockDesc : KnownBlocks.Blocks) + { + std::string HexStr = BlockDesc.BlockHash.ToHexString(); + std::filesystem::path BlockPath = ExportDir.Path() / HexStr.substr(0, 3) / HexStr.substr(3, 2) / (HexStr.substr(5) + ".blob"); + REQUIRE(std::filesystem::exists(BlockPath)); + std::filesystem::remove(BlockPath); + } + + CapturingJobContext Ctx; + Ref<ProjectStore::Oplog> ImportOplog = Project->NewOplog("oplog_missing_block_import", {}); + CHECK_NOTHROW(LoadOplog(LoadOplogContext{.ChunkStore = CidStore, + .RemoteStore = *RemoteStore, + .Oplog = *ImportOplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = true, + .IgnoreMissingAttachments = true, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, + .OptionalJobContext = &Ctx})); + CHECK(Ctx.HasMessage("Failed to download block attachment")); +} + +TEST_CASE("loadoplog.clean_oplog_with_populated_cache") +{ + // Second import with CleanOplog=true and a non-null cache exercises the OptionalCache->Flush() path. + using namespace projectstore_testutils; + using namespace std::literals; + + ScopedTemporaryDirectory TempDir; + ScopedTemporaryDirectory ExportDir; + + std::filesystem::path RootDir = TempDir.Path() / "root"; + std::filesystem::path EngineRootDir = TempDir.Path() / "engine"; + std::filesystem::path ProjectRootDir = TempDir.Path() / "game"; + std::filesystem::path ProjectFilePath = TempDir.Path() / "game" / "game.uproject"; + + // Export side. + GcManager ExportGc; + CidStore ExportCidStore(ExportGc); + CidStoreConfiguration ExportCidConfig = {.RootDirectory = TempDir.Path() / "export_cas", + .TinyValueThreshold = 1024, + .HugeValueThreshold = 4096}; + ExportCidStore.Initialize(ExportCidConfig); + + std::filesystem::path ExportBasePath = TempDir.Path() / "export_projectstore"; + ProjectStore ExportProjectStore(ExportCidStore, ExportBasePath, ExportGc, ProjectStore::Configuration{}); + Ref<ProjectStore::Project> ExportProject(ExportProjectStore.NewProject(ExportBasePath / "proj1"sv, + "proj1"sv, + RootDir.string(), + EngineRootDir.string(), + ProjectRootDir.string(), + ProjectFilePath.string())); + + TestWorkerPools Pools; + WorkerThreadPool& NetworkPool = Pools.NetworkPool; + WorkerThreadPool& WorkerPool = Pools.WorkerPool; + + std::shared_ptr<RemoteProjectStore> RemoteStore = + SetupExportStore(ExportCidStore, *ExportProject, NetworkPool, WorkerPool, ExportDir.Path()); + + // Import side, starts empty. + GcManager ImportGc; + CidStore ImportCidStore(ImportGc); + CidStoreConfiguration ImportCidConfig = {.RootDirectory = TempDir.Path() / "import_cas", + .TinyValueThreshold = 1024, + .HugeValueThreshold = 4096}; + ImportCidStore.Initialize(ImportCidConfig); + + std::filesystem::path ImportBasePath = TempDir.Path() / "import_projectstore"; + ProjectStore ImportProjectStore(ImportCidStore, ImportBasePath, ImportGc, ProjectStore::Configuration{}); + Ref<ProjectStore::Project> ImportProject(ImportProjectStore.NewProject(ImportBasePath / "proj1"sv, + "proj1"sv, + RootDir.string(), + EngineRootDir.string(), + ProjectRootDir.string(), + ProjectFilePath.string())); + + const Oid CacheBuildId = Oid::NewOid(); + BuildStorageCache::Statistics CacheStats; + std::unique_ptr<BuildStorageCache> Cache = CreateInMemoryBuildStorageCache(256u, CacheStats); + + { + Ref<ProjectStore::Oplog> Phase1Oplog = ImportProject->NewOplog("oplog_clean_cache_p1", {}); + LoadOplog(LoadOplogContext{.ChunkStore = ImportCidStore, + .RemoteStore = *RemoteStore, + .OptionalCache = Cache.get(), + .CacheBuildId = CacheBuildId, + .Oplog = *Phase1Oplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = false, + .IgnoreMissingAttachments = false, + .CleanOplog = false, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, + .PopulateCache = true}); + } + + { + Ref<ProjectStore::Oplog> Phase2Oplog = ImportProject->NewOplog("oplog_clean_cache_p2", {}); + CHECK_NOTHROW(LoadOplog(LoadOplogContext{.ChunkStore = ImportCidStore, + .RemoteStore = *RemoteStore, + .OptionalCache = Cache.get(), + .CacheBuildId = CacheBuildId, + .Oplog = *Phase2Oplog, + .NetworkWorkerPool = NetworkPool, + .WorkerPool = WorkerPool, + .ForceDownload = true, + .IgnoreMissingAttachments = false, + .CleanOplog = true, + .PartialBlockRequestMode = EPartialBlockRequestMode::Mixed, + .PopulateCache = false})); + } +} + TEST_SUITE_END(); #endif // ZEN_WITH_TESTS diff --git a/src/zenserver-test/projectstore-tests.cpp b/src/zenserver-test/projectstore-tests.cpp index eb2e187d7..52ae937f5 100644 --- a/src/zenserver-test/projectstore-tests.cpp +++ b/src/zenserver-test/projectstore-tests.cpp @@ -576,6 +576,7 @@ TEST_CASE("project.remote") Writer << "maxblocksize"sv << 3072u; Writer << "maxchunkembedsize"sv << 1296u; Writer << "chunkfilesizelimit"sv << 5u * 1024u; + Writer << "maxchunksperblock"sv << 16u; Writer << "force"sv << false; Writer << "file"sv << BeginObject; { @@ -634,6 +635,7 @@ TEST_CASE("project.remote") { Writer << "maxblocksize"sv << 3072u; Writer << "maxchunkembedsize"sv << 1296u; + Writer << "maxchunksperblock"sv << 16u; Writer << "chunkfilesizelimit"sv << 5u * 1024u; Writer << "force"sv << false; Writer << "file"sv << BeginObject; @@ -693,6 +695,7 @@ TEST_CASE("project.remote") { Writer << "maxblocksize"sv << 3072u; Writer << "maxchunkembedsize"sv << 1296u; + Writer << "maxchunksperblock"sv << 16u; Writer << "chunkfilesizelimit"sv << 5u * 1024u; Writer << "force"sv << false; Writer << "file"sv << BeginObject; @@ -755,6 +758,7 @@ TEST_CASE("project.remote") { Writer << "maxblocksize"sv << 3072u; Writer << "maxchunkembedsize"sv << 1296u; + Writer << "maxchunksperblock"sv << 16u; Writer << "chunkfilesizelimit"sv << 5u * 1024u; Writer << "force"sv << false; Writer << "zen"sv << BeginObject; diff --git a/src/zenserver/storage/projectstore/httpprojectstore.cpp b/src/zenserver/storage/projectstore/httpprojectstore.cpp index 2fa10a292..38a121b37 100644 --- a/src/zenserver/storage/projectstore/httpprojectstore.cpp +++ b/src/zenserver/storage/projectstore/httpprojectstore.cpp @@ -636,18 +636,6 @@ namespace { return Result; } - std::pair<HttpResponseCode, std::string> ConvertResult(const RemoteProjectStore::Result& Result) - { - if (Result.ErrorCode == 0) - { - return {HttpResponseCode::OK, Result.Text}; - } - return {static_cast<HttpResponseCode>(Result.ErrorCode), - Result.Reason.empty() ? Result.Text - : Result.Text.empty() ? Result.Reason - : fmt::format("{}: {}", Result.Reason, Result.Text)}; - } - static uint64_t GetMaxMemoryBufferSize(size_t MaxBlockSize, bool BoostWorkerMemory) { return BoostWorkerMemory ? (MaxBlockSize + 16u * 1024u) : 1024u * 1024u; @@ -2671,44 +2659,38 @@ HttpProjectService::HandleOplogLoadRequest(HttpRouterRequest& Req) WorkerThreadPool& WorkerPool = GetLargeWorkerPool(EWorkloadType::Background); - RemoteProjectStore::LoadContainerResult ContainerResult = BuildContainer( - m_CidStore, - *Project, - *Oplog, - WorkerPool, - MaxBlockSize, - MaxChunkEmbedSize, - MaxChunksPerBlock, - ChunkFileSizeLimit, - /* BuildBlocks */ false, - /* IgnoreMissingAttachments */ false, - /* AllowChunking*/ false, - [](CompressedBuffer&&, ChunkBlockDescription&&) {}, - [](const IoHash&, TGetAttachmentBufferFunc&&) {}, - [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, - /* EmbedLooseFiles*/ false); - - if (ContainerResult.ErrorCode == 0) - { - return HttpReq.WriteResponse(HttpResponseCode::OK, ContainerResult.ContainerObject); - } - else - { - ZEN_DEBUG("Request {}: '{}' failed with {}. Reason: `{}`", - ToString(HttpReq.RequestVerb()), - HttpReq.QueryString(), - ContainerResult.ErrorCode, - ContainerResult.Reason); - - if (ContainerResult.Reason.empty()) + try + { + CbObject ContainerObject = BuildContainer( + m_CidStore, + *Project, + *Oplog, + WorkerPool, + MaxBlockSize, + MaxChunksPerBlock, + MaxChunkEmbedSize, + ChunkFileSizeLimit, + /* BuildBlocks */ false, + /* IgnoreMissingAttachments */ false, + /* AllowChunking*/ false, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, + [](const IoHash&, TGetAttachmentBufferFunc&&) {}, + [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, + /* EmbedLooseFiles*/ false); + return HttpReq.WriteResponse(HttpResponseCode::OK, ContainerObject); + } + catch (const HttpClientError& HttpEx) + { + if (HttpEx.GetInternalErrorCode() != HttpClientErrorCode::kOK) { - return HttpReq.WriteResponse(HttpResponseCode(ContainerResult.ErrorCode)); + return HttpReq.WriteResponse(HttpResponseCode::InternalServerError, HttpContentType::kText, HttpEx.what()); } else { - return HttpReq.WriteResponse(HttpResponseCode(ContainerResult.ErrorCode), HttpContentType::kText, ContainerResult.Reason); + return HttpReq.WriteResponse(HttpEx.GetHttpResponseCode(), HttpContentType::kText, HttpEx.what()); } } + // Let server request handler deal with other exceptions } void @@ -2872,32 +2854,70 @@ HttpProjectService::HandleRpcRequest(HttpRouterRequest& Req) WorkerThreadPool& NetworkWorkerPool = Workers->GetNetworkPool(); Context.ReportMessage(fmt::format("{}", Workers->GetWorkersInfo())); - RemoteProjectStore::Result Result = LoadOplog(LoadOplogContext{ - .ChunkStore = m_CidStore, - .RemoteStore = *RemoteStoreResult->Store, - .OptionalCache = RemoteStoreResult->OptionalCache ? RemoteStoreResult->OptionalCache->Cache.get() : nullptr, - .CacheBuildId = RemoteStoreResult->OptionalCache ? RemoteStoreResult->OptionalCache->BuildsId : Oid::Zero, - .OptionalCacheStats = RemoteStoreResult->OptionalCache ? &RemoteStoreResult->OptionalCache->Stats : nullptr, - .Oplog = *Oplog, - .NetworkWorkerPool = NetworkWorkerPool, - .WorkerPool = WorkerPool, - .ForceDownload = Force, - .IgnoreMissingAttachments = IgnoreMissingAttachments, - .CleanOplog = CleanOplog, - .PartialBlockRequestMode = PartialBlockRequestMode, - .PopulateCache = RemoteStoreResult->OptionalCache ? RemoteStoreResult->OptionalCache->Populate : false, - .StoreLatencySec = RemoteStoreResult->LatencySec, - .StoreMaxRangeCountPerRequest = RemoteStoreResult->MaxRangeCountPerRequest, - .CacheLatencySec = RemoteStoreResult->OptionalCache ? RemoteStoreResult->OptionalCache->LatencySec : -1.0, - .CacheMaxRangeCountPerRequest = - RemoteStoreResult->OptionalCache ? RemoteStoreResult->OptionalCache->MaxRangeCountPerRequest : 0, - .OptionalJobContext = &Context}); - auto Response = ConvertResult(Result); - ZEN_INFO("LoadOplog: Status: {} '{}'", ToString(Response.first), Response.second); - if (!IsHttpSuccessCode(Response.first)) + + try { - throw JobError(Response.second.empty() ? fmt::format("Status: {}", ToString(Response.first)) : Response.second, - (int)Response.first); + LoadOplog(LoadOplogContext{ + .ChunkStore = m_CidStore, + .RemoteStore = *RemoteStoreResult->Store, + .OptionalCache = RemoteStoreResult->OptionalCache ? RemoteStoreResult->OptionalCache->Cache.get() : nullptr, + .CacheBuildId = RemoteStoreResult->OptionalCache ? RemoteStoreResult->OptionalCache->BuildsId : Oid::Zero, + .OptionalCacheStats = RemoteStoreResult->OptionalCache ? &RemoteStoreResult->OptionalCache->Stats : nullptr, + .Oplog = *Oplog, + .NetworkWorkerPool = NetworkWorkerPool, + .WorkerPool = WorkerPool, + .ForceDownload = Force, + .IgnoreMissingAttachments = IgnoreMissingAttachments, + .CleanOplog = CleanOplog, + .PartialBlockRequestMode = PartialBlockRequestMode, + .PopulateCache = RemoteStoreResult->OptionalCache ? RemoteStoreResult->OptionalCache->Populate : false, + .StoreLatencySec = RemoteStoreResult->LatencySec, + .StoreMaxRangeCountPerRequest = RemoteStoreResult->MaxRangeCountPerRequest, + .CacheLatencySec = RemoteStoreResult->OptionalCache ? RemoteStoreResult->OptionalCache->LatencySec : -1.0, + .CacheMaxRangeCountPerRequest = + RemoteStoreResult->OptionalCache ? RemoteStoreResult->OptionalCache->MaxRangeCountPerRequest : 0, + .OptionalJobContext = &Context}); + } + catch (const HttpClientError& HttpEx) + { + if (HttpEx.GetInternalErrorCode() != HttpClientErrorCode::kOK) + { + throw JobError(fmt::format("Failed due to an http exception (Err: {}): {}", + static_cast<int>(HttpEx.GetInternalErrorCode()), + HttpEx.what()), + static_cast<int>(HttpEx.GetResponseClass())); + } + else + { + throw JobError(fmt::format("Failed due to an http exception (Status: {}): {}", + static_cast<int>(HttpEx.GetHttpResponseCode()), + HttpEx.what()), + static_cast<int>(HttpEx.GetHttpResponseCode())); + } + } + catch (const AssertException& AssertEx) + { + throw JobError(fmt::format("Failed due to an assert exception: {}", AssertEx.FullDescription()), + static_cast<int>(HttpResponseCode::InternalServerError)); + } + catch (const std::system_error& SysEx) + { + throw JobError(fmt::format("Failed due to a system error ({}): {}", SysEx.code().value(), SysEx.what()), + SysEx.code().value()); + } + catch (const std::exception& Ex) + { + throw JobError(fmt::format("Failed due to an exception: {}", Ex.what()), + static_cast<int>(HttpResponseCode::InternalServerError)); + } + + if (Context.IsCancelled()) + { + ZEN_INFO("LoadOplog: Operation cancelled"); + } + else + { + ZEN_INFO("LoadOplog: Complete"); } }); @@ -2961,26 +2981,63 @@ HttpProjectService::HandleRpcRequest(HttpRouterRequest& Req) WorkerThreadPool& WorkerPool = Workers->GetIOWorkerPool(); WorkerThreadPool& NetworkWorkerPool = Workers->GetNetworkPool(); - RemoteProjectStore::Result Result = SaveOplog(m_CidStore, - *ActualRemoteStore, - *Project, - *Oplog, - NetworkWorkerPool, - WorkerPool, - MaxBlockSize, - MaxChunksPerBlock, - MaxChunkEmbedSize, - ChunkFileSizeLimit, - EmbedLooseFile, - Force, - IgnoreMissingAttachments, - &Context); - auto Response = ConvertResult(Result); - ZEN_INFO("SaveOplog: Status: {} '{}'", ToString(Response.first), Response.second); - if (!IsHttpSuccessCode(Response.first)) + try + { + SaveOplog(m_CidStore, + *ActualRemoteStore, + *Project, + *Oplog, + NetworkWorkerPool, + WorkerPool, + MaxBlockSize, + MaxChunksPerBlock, + MaxChunkEmbedSize, + ChunkFileSizeLimit, + EmbedLooseFile, + Force, + IgnoreMissingAttachments, + &Context); + } + catch (const HttpClientError& HttpEx) + { + if (HttpEx.GetInternalErrorCode() != HttpClientErrorCode::kOK) + { + throw JobError(fmt::format("Failed due to an http exception (Err: {}): {}", + static_cast<int>(HttpEx.GetInternalErrorCode()), + HttpEx.what()), + static_cast<int>(HttpEx.GetResponseClass())); + } + else + { + throw JobError(fmt::format("Failed due to an http exception (Status: {}): {}", + static_cast<int>(HttpEx.GetHttpResponseCode()), + HttpEx.what()), + static_cast<int>(HttpEx.GetHttpResponseCode())); + } + } + catch (const AssertException& AssertEx) + { + throw JobError(fmt::format("Failed due to an assert exception: {}", AssertEx.FullDescription()), + static_cast<int>(HttpResponseCode::InternalServerError)); + } + catch (const std::system_error& SysEx) + { + throw JobError(fmt::format("Failed due to a system error ({}): {}", SysEx.code().value(), SysEx.what()), + SysEx.code().value()); + } + catch (const std::exception& Ex) + { + throw JobError(fmt::format("Failed due to an exception: {}", Ex.what()), + static_cast<int>(HttpResponseCode::InternalServerError)); + } + + if (Context.IsCancelled()) + { + ZEN_INFO("SaveOplog: Operation cancelled"); + } + else { - throw JobError(Response.second.empty() ? fmt::format("Status: {}", ToString(Response.first)) : Response.second, - (int)Response.first); + ZEN_INFO("SaveOplog: Complete"); } }); |