From da4826d560a66b8a5f09158a93c83caa12348c7b Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Fri, 20 Feb 2026 10:32:32 +0100 Subject: move partial chunk block anailsys to chunkblock.h/cpp (#767) --- src/zenremotestore/chunking/chunkblock.cpp | 540 ++++++++++++++++++++++++++++- 1 file changed, 535 insertions(+), 5 deletions(-) (limited to 'src/zenremotestore/chunking/chunkblock.cpp') diff --git a/src/zenremotestore/chunking/chunkblock.cpp b/src/zenremotestore/chunking/chunkblock.cpp index c4d8653f4..06cedae3f 100644 --- a/src/zenremotestore/chunking/chunkblock.cpp +++ b/src/zenremotestore/chunking/chunkblock.cpp @@ -10,18 +10,17 @@ #include +#include #include ZEN_THIRD_PARTY_INCLUDES_START #include +#include ZEN_THIRD_PARTY_INCLUDES_END #if ZEN_WITH_TESTS # include # include - -# include -# include #endif // ZEN_WITH_TESTS namespace zen { @@ -455,6 +454,537 @@ FindReuseBlocks(OperationLogOutput& Output, return FilteredReuseBlockIndexes; } +ChunkBlockAnalyser::ChunkBlockAnalyser(OperationLogOutput& LogOutput, + std::span BlockDescriptions, + const Options& Options) +: m_LogOutput(LogOutput) +, m_BlockDescriptions(BlockDescriptions) +, m_Options(Options) +{ +} + +std::vector +ChunkBlockAnalyser::GetNeeded(const tsl::robin_map& ChunkHashToChunkIndex, + std::function&& NeedsBlockChunk) +{ + ZEN_TRACE_CPU("ChunkBlockAnalyser::GetNeeded"); + + std::vector Result; + + std::vector ChunkIsNeeded(ChunkHashToChunkIndex.size()); + for (uint32_t ChunkIndex = 0; ChunkIndex < ChunkHashToChunkIndex.size(); ChunkIndex++) + { + ChunkIsNeeded[ChunkIndex] = NeedsBlockChunk(ChunkIndex); + } + + std::vector BlockSlack(m_BlockDescriptions.size(), 0u); + for (uint32_t BlockIndex = 0; BlockIndex < m_BlockDescriptions.size(); BlockIndex++) + { + const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; + + uint64_t BlockUsedSize = 0; + uint64_t BlockSize = 0; + + for (uint32_t ChunkBlockIndex = 0; ChunkBlockIndex < BlockDescription.ChunkRawHashes.size(); ChunkBlockIndex++) + { + const IoHash& ChunkHash = BlockDescription.ChunkRawHashes[ChunkBlockIndex]; + if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) + { + const uint32_t RemoteChunkIndex = It->second; + if (ChunkIsNeeded[RemoteChunkIndex]) + { + BlockUsedSize += BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; + } + } + BlockSize += BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; + } + BlockSlack[BlockIndex] = BlockSize - BlockUsedSize; + } + + std::vector BlockOrder(m_BlockDescriptions.size()); + std::iota(BlockOrder.begin(), BlockOrder.end(), 0); + + std::sort(BlockOrder.begin(), BlockOrder.end(), [&BlockSlack](uint32_t Lhs, uint32_t Rhs) { + return BlockSlack[Lhs] < BlockSlack[Rhs]; + }); + + std::vector ChunkIsPickedUp(ChunkHashToChunkIndex.size(), false); + + for (uint32_t BlockIndex : BlockOrder) + { + const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; + + std::vector BlockChunkIndexNeeded; + + for (uint32_t ChunkBlockIndex = 0; ChunkBlockIndex < BlockDescription.ChunkRawHashes.size(); ChunkBlockIndex++) + { + const IoHash& ChunkHash = BlockDescription.ChunkRawHashes[ChunkBlockIndex]; + if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) + { + const uint32_t RemoteChunkIndex = It->second; + if (ChunkIsNeeded[RemoteChunkIndex]) + { + if (!ChunkIsPickedUp[RemoteChunkIndex]) + { + ChunkIsPickedUp[RemoteChunkIndex] = true; + BlockChunkIndexNeeded.push_back(ChunkBlockIndex); + } + } + } + else + { + ZEN_DEBUG("Chunk {} not found in block {}", ChunkHash, BlockDescription.BlockHash); + } + } + + if (!BlockChunkIndexNeeded.empty()) + { + Result.push_back(NeededBlock{.BlockIndex = BlockIndex, .ChunkIndexes = std::move(BlockChunkIndexNeeded)}); + } + } + return Result; +} + +ChunkBlockAnalyser::BlockResult +ChunkBlockAnalyser::CalculatePartialBlockDownloads(std::span NeededBlocks, + std::span BlockPartialDownloadModes) +{ + ZEN_TRACE_CPU("ChunkBlockAnalyser::CalculatePartialBlockDownloads"); + + Stopwatch PartialAnalisysTimer; + + ChunkBlockAnalyser::BlockResult Result; + + uint64_t IdealDownloadTotalSize = 0; + uint64_t AllBlocksTotalBlocksSize = 0; + + for (const NeededBlock& NeededBlock : NeededBlocks) + { + const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[NeededBlock.BlockIndex]; + + std::span BlockChunkIndexNeeded(NeededBlock.ChunkIndexes); + if (!NeededBlock.ChunkIndexes.empty()) + { + bool WantsToDoPartialBlockDownload = NeededBlock.ChunkIndexes.size() < BlockDescription.ChunkRawHashes.size(); + bool CanDoPartialBlockDownload = (BlockDescription.HeaderSize > 0) && + (BlockDescription.ChunkCompressedLengths.size() == BlockDescription.ChunkRawHashes.size()); + + EPartialBlockDownloadMode PartialBlockDownloadMode = BlockPartialDownloadModes[NeededBlock.BlockIndex]; + + const uint32_t ChunkStartOffsetInBlock = + gsl::narrow(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize); + + const uint64_t TotalBlockSize = std::accumulate(BlockDescription.ChunkCompressedLengths.begin(), + BlockDescription.ChunkCompressedLengths.end(), + std::uint64_t(ChunkStartOffsetInBlock)); + + AllBlocksTotalBlocksSize += TotalBlockSize; + + if ((PartialBlockDownloadMode != EPartialBlockDownloadMode::Off) && WantsToDoPartialBlockDownload && CanDoPartialBlockDownload) + { + ZEN_TRACE_CPU("PartialBlockAnalysis"); + + uint64_t TotalWantedChunksSize = 0; + std::optional> MaybeBlockRanges = CalculateBlockRanges(NeededBlock.BlockIndex, + BlockDescription, + NeededBlock.ChunkIndexes, + PartialBlockDownloadMode, + ChunkStartOffsetInBlock, + TotalBlockSize, + TotalWantedChunksSize); + ZEN_ASSERT(TotalWantedChunksSize <= TotalBlockSize); + IdealDownloadTotalSize += TotalWantedChunksSize; + + if (MaybeBlockRanges.has_value()) + { + const std::vector& BlockRanges = MaybeBlockRanges.value(); + ZEN_ASSERT(!BlockRanges.empty()); + + uint64_t RequestedSize = + std::accumulate(BlockRanges.begin(), + BlockRanges.end(), + uint64_t(0), + [](uint64_t Current, const BlockRangeDescriptor& Range) { return Current + Range.RangeLength; }); + + if ((PartialBlockDownloadMode != EPartialBlockDownloadMode::Exact) && ((RequestedSize * 100) / TotalBlockSize) >= 200) + { + if (m_Options.IsVerbose) + { + ZEN_OPERATION_LOG_INFO(m_LogOutput, + "Requesting {} chunks ({}) from block {} ({}) using full block request (extra bytes {})", + NeededBlock.ChunkIndexes.size(), + NiceBytes(RequestedSize), + BlockDescription.BlockHash, + NiceBytes(TotalBlockSize), + NiceBytes(TotalBlockSize - TotalWantedChunksSize)); + } + Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex); + } + else + { + Result.BlockRanges.insert(Result.BlockRanges.end(), BlockRanges.begin(), BlockRanges.end()); + + if (RequestedSize > TotalWantedChunksSize) + { + if (m_Options.IsVerbose) + { + ZEN_OPERATION_LOG_INFO(m_LogOutput, + "Requesting {} chunks ({}) from block {} ({}) using {} requests (extra bytes {})", + NeededBlock.ChunkIndexes.size(), + NiceBytes(RequestedSize), + BlockDescription.BlockHash, + NiceBytes(TotalBlockSize), + BlockRanges.size(), + NiceBytes(RequestedSize - TotalWantedChunksSize)); + } + } + } + } + else + { + Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex); + } + } + else + { + Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex); + IdealDownloadTotalSize += TotalBlockSize; + } + } + } + + if (!Result.BlockRanges.empty() && !m_Options.IsQuiet) + { + tsl::robin_set PartialBlockIndexes; + uint64_t PartialBlocksTotalSize = std::accumulate(Result.BlockRanges.begin(), + Result.BlockRanges.end(), + uint64_t(0u), + [&](uint64_t Current, const BlockRangeDescriptor& Range) { + PartialBlockIndexes.insert(Range.BlockIndex); + return Current + Range.RangeLength; + }); + + uint64_t FullBlocksTotalSize = + std::accumulate(Result.FullBlockIndexes.begin(), + Result.FullBlockIndexes.end(), + uint64_t(0u), + [&](uint64_t Current, uint32_t BlockIndex) { + const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; + uint32_t CurrentOffset = + gsl::narrow(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize); + + return Current + std::accumulate(BlockDescription.ChunkCompressedLengths.begin(), + BlockDescription.ChunkCompressedLengths.end(), + std::uint64_t(CurrentOffset)); + }); + + uint64_t PartialBlockRequestCount = Result.BlockRanges.size(); + uint64_t PartialBlockCount = PartialBlockIndexes.size(); + + uint64_t TotalExtraPartialBlocksRequestCount = PartialBlockRequestCount - PartialBlockCount; + uint64_t ActualPartialDownloadTotalSize = FullBlocksTotalSize + PartialBlocksTotalSize; + + uint64_t IdealSkippedSize = AllBlocksTotalBlocksSize - IdealDownloadTotalSize; + uint64_t ActualSkippedSize = AllBlocksTotalBlocksSize - ActualPartialDownloadTotalSize; + + double PercentOfIdealPartialSkippedSize = (ActualSkippedSize * 100.0) / IdealSkippedSize; + + ZEN_OPERATION_LOG_INFO(m_LogOutput, + "Analysis of partial block requests saves download of {} out of {}, {:.1f}% of possible {} using {} extra " + "requests. Completed in {}", + NiceBytes(ActualSkippedSize), + NiceBytes(AllBlocksTotalBlocksSize), + PercentOfIdealPartialSkippedSize, + NiceBytes(IdealSkippedSize), + TotalExtraPartialBlocksRequestCount, + NiceTimeSpanMs(PartialAnalisysTimer.GetElapsedTimeMs())); + } + + return Result; +} + +ChunkBlockAnalyser::BlockRangeDescriptor +ChunkBlockAnalyser::MergeBlockRanges(std::span Ranges) +{ + ZEN_ASSERT(Ranges.size() > 1); + const BlockRangeDescriptor& First = Ranges.front(); + const BlockRangeDescriptor& Last = Ranges.back(); + + return BlockRangeDescriptor{.BlockIndex = First.BlockIndex, + .RangeStart = First.RangeStart, + .RangeLength = Last.RangeStart + Last.RangeLength - First.RangeStart, + .ChunkBlockIndexStart = First.ChunkBlockIndexStart, + .ChunkBlockIndexCount = Last.ChunkBlockIndexStart + Last.ChunkBlockIndexCount - First.ChunkBlockIndexStart}; +} + +std::optional> +ChunkBlockAnalyser::MakeOptionalBlockRangeVector(uint64_t TotalBlockSize, const BlockRangeDescriptor& Range) +{ + if (Range.RangeLength == TotalBlockSize) + { + return {}; + } + else + { + return std::vector{Range}; + } +}; + +const ChunkBlockAnalyser::BlockRangeLimit* +ChunkBlockAnalyser::GetBlockRangeLimitForRange(std::span Limits, + uint64_t TotalBlockSize, + std::span Ranges) +{ + if (Ranges.size() > 1) + { + const std::uint64_t WantedSize = + std::accumulate(Ranges.begin(), Ranges.end(), uint64_t(0), [](uint64_t Current, const BlockRangeDescriptor& Range) { + return Current + Range.RangeLength; + }); + + const double RangeRequestedPercent = (WantedSize * 100.0) / TotalBlockSize; + + for (const BlockRangeLimit& Limit : Limits) + { + if (RangeRequestedPercent >= Limit.SizePercent && Ranges.size() > Limit.MaxRangeCount) + { + return &Limit; + } + } + } + return nullptr; +}; + +std::vector +ChunkBlockAnalyser::CollapseBlockRanges(const uint64_t AlwaysAcceptableGap, std::span BlockRanges) +{ + ZEN_ASSERT(BlockRanges.size() > 1); + std::vector CollapsedBlockRanges; + + auto BlockRangesIt = BlockRanges.begin(); + CollapsedBlockRanges.push_back(*BlockRangesIt++); + for (; BlockRangesIt != BlockRanges.end(); BlockRangesIt++) + { + BlockRangeDescriptor& LastRange = CollapsedBlockRanges.back(); + + const uint64_t BothRangeSize = BlockRangesIt->RangeLength + LastRange.RangeLength; + + const uint64_t Gap = BlockRangesIt->RangeStart - (LastRange.RangeStart + LastRange.RangeLength); + if (Gap <= Max(BothRangeSize / 16, AlwaysAcceptableGap)) + { + LastRange.ChunkBlockIndexCount = + (BlockRangesIt->ChunkBlockIndexStart + BlockRangesIt->ChunkBlockIndexCount) - LastRange.ChunkBlockIndexStart; + LastRange.RangeLength = (BlockRangesIt->RangeStart + BlockRangesIt->RangeLength) - LastRange.RangeStart; + } + else + { + CollapsedBlockRanges.push_back(*BlockRangesIt); + } + } + + return CollapsedBlockRanges; +}; + +uint64_t +ChunkBlockAnalyser::CalculateNextGap(std::span BlockRanges) +{ + ZEN_ASSERT(BlockRanges.size() > 1); + uint64_t AcceptableGap = (uint64_t)-1; + for (size_t RangeIndex = 0; RangeIndex < BlockRanges.size() - 1; RangeIndex++) + { + const BlockRangeDescriptor& Range = BlockRanges[RangeIndex]; + const BlockRangeDescriptor& NextRange = BlockRanges[RangeIndex + 1]; + + const uint64_t Gap = NextRange.RangeStart - (Range.RangeStart + Range.RangeLength); + AcceptableGap = Min(Gap, AcceptableGap); + } + AcceptableGap = RoundUp(AcceptableGap, 16u * 1024u); + return AcceptableGap; +}; + +std::optional> +ChunkBlockAnalyser::CalculateBlockRanges(uint32_t BlockIndex, + const ChunkBlockDescription& BlockDescription, + std::span BlockChunkIndexNeeded, + EPartialBlockDownloadMode PartialBlockDownloadMode, + const uint64_t ChunkStartOffsetInBlock, + const uint64_t TotalBlockSize, + uint64_t& OutTotalWantedChunksSize) +{ + ZEN_TRACE_CPU("CalculateBlockRanges"); + + if (PartialBlockDownloadMode == EPartialBlockDownloadMode::Off) + { + return {}; + } + + std::vector BlockRanges; + { + uint64_t CurrentOffset = ChunkStartOffsetInBlock; + uint32_t ChunkBlockIndex = 0; + uint32_t NeedBlockChunkIndexOffset = 0; + BlockRangeDescriptor NextRange{.BlockIndex = BlockIndex}; + while (NeedBlockChunkIndexOffset < BlockChunkIndexNeeded.size() && ChunkBlockIndex < BlockDescription.ChunkRawHashes.size()) + { + const uint32_t ChunkCompressedLength = BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; + if (ChunkBlockIndex < BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) + { + if (NextRange.RangeLength > 0) + { + BlockRanges.push_back(NextRange); + NextRange = {.BlockIndex = BlockIndex}; + } + ChunkBlockIndex++; + CurrentOffset += ChunkCompressedLength; + } + else if (ChunkBlockIndex == BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) + { + if (NextRange.RangeLength == 0) + { + NextRange.RangeStart = CurrentOffset; + NextRange.ChunkBlockIndexStart = ChunkBlockIndex; + } + NextRange.RangeLength += ChunkCompressedLength; + NextRange.ChunkBlockIndexCount++; + ChunkBlockIndex++; + CurrentOffset += ChunkCompressedLength; + NeedBlockChunkIndexOffset++; + } + else + { + ZEN_ASSERT(false); + } + } + if (NextRange.RangeLength > 0) + { + BlockRanges.push_back(NextRange); + } + } + ZEN_ASSERT(!BlockRanges.empty()); + + OutTotalWantedChunksSize = + std::accumulate(BlockRanges.begin(), BlockRanges.end(), uint64_t(0), [](uint64_t Current, const BlockRangeDescriptor& Range) { + return Current + Range.RangeLength; + }); + + double RangeWantedPercent = (OutTotalWantedChunksSize * 100.0) / TotalBlockSize; + + if (BlockRanges.size() == 1) + { + if (m_Options.IsVerbose) + { + ZEN_OPERATION_LOG_INFO(m_LogOutput, + "Range request of {} ({:.2f}%) using single range from block {} ({}) as is", + NiceBytes(OutTotalWantedChunksSize), + RangeWantedPercent, + BlockDescription.BlockHash, + NiceBytes(TotalBlockSize)); + } + return BlockRanges; + } + + if (PartialBlockDownloadMode == EPartialBlockDownloadMode::Exact) + { + if (m_Options.IsVerbose) + { + ZEN_OPERATION_LOG_INFO(m_LogOutput, + "Range request of {} ({:.2f}%) using {} ranges from block {} ({})", + NiceBytes(OutTotalWantedChunksSize), + RangeWantedPercent, + BlockRanges.size(), + BlockDescription.BlockHash, + NiceBytes(TotalBlockSize)); + } + return BlockRanges; + } + + if (PartialBlockDownloadMode == EPartialBlockDownloadMode::SingleRange) + { + const BlockRangeDescriptor MergedRange = MergeBlockRanges(BlockRanges); + if (m_Options.IsVerbose) + { + const double RangeRequestedPercent = (MergedRange.RangeLength * 100.0) / TotalBlockSize; + const double WastedPercent = ((MergedRange.RangeLength - OutTotalWantedChunksSize) * 100.0) / MergedRange.RangeLength; + + ZEN_OPERATION_LOG_INFO( + m_LogOutput, + "Range request of {} ({:.2f}%) using {} ranges from block {} ({}) limited to single block range {} ({:.2f}%) wasting " + "{:.2f}% ({})", + NiceBytes(OutTotalWantedChunksSize), + RangeWantedPercent, + BlockRanges.size(), + BlockDescription.BlockHash, + NiceBytes(TotalBlockSize), + NiceBytes(MergedRange.RangeLength), + RangeRequestedPercent, + WastedPercent, + NiceBytes(MergedRange.RangeLength - OutTotalWantedChunksSize)); + } + return MakeOptionalBlockRangeVector(TotalBlockSize, MergedRange); + } + + if (RangeWantedPercent > FullBlockRangePercentLimit) + { + const BlockRangeDescriptor MergedRange = MergeBlockRanges(BlockRanges); + if (m_Options.IsVerbose) + { + const double RangeRequestedPercent = (MergedRange.RangeLength * 100.0) / TotalBlockSize; + const double WastedPercent = ((MergedRange.RangeLength - OutTotalWantedChunksSize) * 100.0) / MergedRange.RangeLength; + + ZEN_OPERATION_LOG_INFO( + m_LogOutput, + "Range request of {} ({:.2f}%) using {} ranges from block {} ({}) exceeds {}%. Merged to single block range {} " + "({:.2f}%) wasting {:.2f}% ({})", + NiceBytes(OutTotalWantedChunksSize), + RangeWantedPercent, + BlockRanges.size(), + BlockDescription.BlockHash, + NiceBytes(TotalBlockSize), + FullBlockRangePercentLimit, + NiceBytes(MergedRange.RangeLength), + RangeRequestedPercent, + WastedPercent, + NiceBytes(MergedRange.RangeLength - OutTotalWantedChunksSize)); + } + return MakeOptionalBlockRangeVector(TotalBlockSize, MergedRange); + } + + std::vector CollapsedBlockRanges = CollapseBlockRanges(16u * 1024u, BlockRanges); + while (GetBlockRangeLimitForRange(ForceMergeLimits, TotalBlockSize, CollapsedBlockRanges)) + { + CollapsedBlockRanges = CollapseBlockRanges(CalculateNextGap(CollapsedBlockRanges), CollapsedBlockRanges); + } + + const std::uint64_t WantedCollapsedSize = + std::accumulate(CollapsedBlockRanges.begin(), + CollapsedBlockRanges.end(), + uint64_t(0), + [](uint64_t Current, const BlockRangeDescriptor& Range) { return Current + Range.RangeLength; }); + + const double CollapsedRangeRequestedPercent = (WantedCollapsedSize * 100.0) / TotalBlockSize; + + if (m_Options.IsVerbose) + { + const double WastedPercent = ((WantedCollapsedSize - OutTotalWantedChunksSize) * 100.0) / WantedCollapsedSize; + + ZEN_OPERATION_LOG_INFO( + m_LogOutput, + "Range request of {} ({:.2f}%) using {} ranges from block {} ({}) collapsed to {} {:.2f}% using {} ranges wasting {:.2f}% " + "({})", + NiceBytes(OutTotalWantedChunksSize), + RangeWantedPercent, + BlockRanges.size(), + BlockDescription.BlockHash, + NiceBytes(TotalBlockSize), + NiceBytes(WantedCollapsedSize), + CollapsedRangeRequestedPercent, + CollapsedBlockRanges.size(), + WastedPercent, + NiceBytes(WantedCollapsedSize - OutTotalWantedChunksSize)); + } + return CollapsedBlockRanges; +} + #if ZEN_WITH_TESTS namespace testutils { @@ -476,7 +1006,7 @@ namespace testutils { } // namespace testutils -TEST_CASE("project.store.block") +TEST_CASE("chunkblock.block") { using namespace std::literals; using namespace testutils; @@ -504,7 +1034,7 @@ TEST_CASE("project.store.block") HeaderSize)); } -TEST_CASE("project.store.reuseblocks") +TEST_CASE("chunkblock.reuseblocks") { using namespace std::literals; using namespace testutils; -- cgit v1.2.3 From eb3079e2ec2969829cbc5b6921575d53df351f0f Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 24 Feb 2026 16:10:36 +0100 Subject: use partial blocks for oplog import (#780) Feature: Add --allow-partial-block-requests to zen oplog-import Improvement: zen oplog-import now uses partial block requests to reduce download size Improvement: Use latency to Cloud Storage host and Zen Cache host when calculating partial block requests --- src/zenremotestore/chunking/chunkblock.cpp | 79 +++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 19 deletions(-) (limited to 'src/zenremotestore/chunking/chunkblock.cpp') diff --git a/src/zenremotestore/chunking/chunkblock.cpp b/src/zenremotestore/chunking/chunkblock.cpp index 06cedae3f..d203e0292 100644 --- a/src/zenremotestore/chunking/chunkblock.cpp +++ b/src/zenremotestore/chunking/chunkblock.cpp @@ -597,7 +597,7 @@ ChunkBlockAnalyser::CalculatePartialBlockDownloads(std::span if (MaybeBlockRanges.has_value()) { - const std::vector& BlockRanges = MaybeBlockRanges.value(); + std::vector BlockRanges = MaybeBlockRanges.value(); ZEN_ASSERT(!BlockRanges.empty()); uint64_t RequestedSize = @@ -606,12 +606,54 @@ ChunkBlockAnalyser::CalculatePartialBlockDownloads(std::span uint64_t(0), [](uint64_t Current, const BlockRangeDescriptor& Range) { return Current + Range.RangeLength; }); - if ((PartialBlockDownloadMode != EPartialBlockDownloadMode::Exact) && ((RequestedSize * 100) / TotalBlockSize) >= 200) + if (PartialBlockDownloadMode != EPartialBlockDownloadMode::Exact && BlockRanges.size() > 1) + { + // TODO: Once we have support in our http client to request multiple ranges in one request this + // logic would need to change as the per-request overhead would go away + + const double LatencySec = PartialBlockDownloadMode == EPartialBlockDownloadMode::MultiRangeHighSpeed + ? m_Options.HostHighSpeedLatencySec + : m_Options.HostLatencySec; + if (LatencySec > 0) + { + const uint64_t BytesPerSec = PartialBlockDownloadMode == EPartialBlockDownloadMode::MultiRangeHighSpeed + ? m_Options.HostHighSpeedBytesPerSec + : m_Options.HostSpeedBytesPerSec; + + const double ExtraRequestTimeSec = (BlockRanges.size() - 1) * LatencySec; + const uint64_t ExtraRequestTimeBytes = uint64_t(ExtraRequestTimeSec * BytesPerSec); + + const uint64_t FullRangeSize = + BlockRanges.back().RangeStart + BlockRanges.back().RangeLength - BlockRanges.front().RangeStart; + + if (ExtraRequestTimeBytes + RequestedSize >= FullRangeSize) + { + BlockRanges = std::vector{MergeBlockRanges(BlockRanges)}; + + if (m_Options.IsVerbose) + { + ZEN_OPERATION_LOG_INFO(m_LogOutput, + "Merging {} chunks ({}) from block {} ({}) to single request (extra bytes {})", + NeededBlock.ChunkIndexes.size(), + NiceBytes(RequestedSize), + BlockDescription.BlockHash, + NiceBytes(TotalBlockSize), + NiceBytes(BlockRanges.front().RangeLength - RequestedSize)); + } + + RequestedSize = BlockRanges.front().RangeLength; + } + } + } + + if ((PartialBlockDownloadMode != EPartialBlockDownloadMode::Exact) && + ((TotalBlockSize - RequestedSize) < (512u * 1024u))) { if (m_Options.IsVerbose) { ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Requesting {} chunks ({}) from block {} ({}) using full block request (extra bytes {})", + "Requesting {} chunks ({}) from block {} ({}) using full block request due to small " + "total slack (extra bytes {})", NeededBlock.ChunkIndexes.size(), NiceBytes(RequestedSize), BlockDescription.BlockHash, @@ -624,19 +666,16 @@ ChunkBlockAnalyser::CalculatePartialBlockDownloads(std::span { Result.BlockRanges.insert(Result.BlockRanges.end(), BlockRanges.begin(), BlockRanges.end()); - if (RequestedSize > TotalWantedChunksSize) + if (m_Options.IsVerbose) { - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Requesting {} chunks ({}) from block {} ({}) using {} requests (extra bytes {})", - NeededBlock.ChunkIndexes.size(), - NiceBytes(RequestedSize), - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize), - BlockRanges.size(), - NiceBytes(RequestedSize - TotalWantedChunksSize)); - } + ZEN_OPERATION_LOG_INFO(m_LogOutput, + "Requesting {} chunks ({}) from block {} ({}) using {} requests (extra bytes {})", + NeededBlock.ChunkIndexes.size(), + NiceBytes(RequestedSize), + BlockDescription.BlockHash, + NiceBytes(TotalBlockSize), + BlockRanges.size(), + NiceBytes(RequestedSize - TotalWantedChunksSize)); } } } @@ -786,7 +825,7 @@ ChunkBlockAnalyser::CollapseBlockRanges(const uint64_t AlwaysAcceptableGap, std: }; uint64_t -ChunkBlockAnalyser::CalculateNextGap(std::span BlockRanges) +ChunkBlockAnalyser::CalculateNextGap(const uint64_t AlwaysAcceptableGap, std::span BlockRanges) { ZEN_ASSERT(BlockRanges.size() > 1); uint64_t AcceptableGap = (uint64_t)-1; @@ -798,7 +837,7 @@ ChunkBlockAnalyser::CalculateNextGap(std::span Block const uint64_t Gap = NextRange.RangeStart - (Range.RangeStart + Range.RangeLength); AcceptableGap = Min(Gap, AcceptableGap); } - AcceptableGap = RoundUp(AcceptableGap, 16u * 1024u); + AcceptableGap = RoundUp(AcceptableGap, AlwaysAcceptableGap); return AcceptableGap; }; @@ -949,10 +988,12 @@ ChunkBlockAnalyser::CalculateBlockRanges(uint32_t BlockIndex, return MakeOptionalBlockRangeVector(TotalBlockSize, MergedRange); } - std::vector CollapsedBlockRanges = CollapseBlockRanges(16u * 1024u, BlockRanges); + const uint64_t AlwaysAcceptableGap = 4u * 1024u; + + std::vector CollapsedBlockRanges = CollapseBlockRanges(AlwaysAcceptableGap, BlockRanges); while (GetBlockRangeLimitForRange(ForceMergeLimits, TotalBlockSize, CollapsedBlockRanges)) { - CollapsedBlockRanges = CollapseBlockRanges(CalculateNextGap(CollapsedBlockRanges), CollapsedBlockRanges); + CollapsedBlockRanges = CollapseBlockRanges(CalculateNextGap(AlwaysAcceptableGap, CollapsedBlockRanges), CollapsedBlockRanges); } const std::uint64_t WantedCollapsedSize = -- cgit v1.2.3 From d604351cb5dc3032a7cb8c84d6ad5f1480325e5c Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Mon, 2 Mar 2026 09:37:14 +0100 Subject: Add test suites (#799) Makes all test cases part of a test suite. Test suites are named after the module and the name of the file containing the implementation of the test. * This allows for better and more predictable filtering of which test cases to run which should also be able to reduce the time CI spends in tests since it can filter on the tests for that particular module. Also improves `xmake test` behaviour: * instead of an explicit list of projects just enumerate the test projects which are available based on build system state * also introduces logic to avoid running `xmake config` unnecessarily which would invalidate the existing build and do lots of unnecessary work since dependencies were invalidated by the updated config * also invokes build only for the chosen test targets As a bonus, also adds `xmake sln --open` which allows opening IDE after generation of solution/xmake project is done. --- src/zenremotestore/chunking/chunkblock.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/zenremotestore/chunking/chunkblock.cpp') diff --git a/src/zenremotestore/chunking/chunkblock.cpp b/src/zenremotestore/chunking/chunkblock.cpp index d203e0292..3a4e6011d 100644 --- a/src/zenremotestore/chunking/chunkblock.cpp +++ b/src/zenremotestore/chunking/chunkblock.cpp @@ -1047,6 +1047,8 @@ namespace testutils { } // namespace testutils +TEST_SUITE_BEGIN("remotestore.chunkblock"); + TEST_CASE("chunkblock.block") { using namespace std::literals; @@ -1315,6 +1317,8 @@ TEST_CASE("chunkblock.reuseblocks") } } +TEST_SUITE_END(); + void chunkblock_forcelink() { -- cgit v1.2.3 From 463a0fde16b827c0ec44c9e88fe3c8c8098aa5ea Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 3 Mar 2026 20:49:01 +0100 Subject: use multi range requests (#800) - Improvement: `zen builds download` now uses multi-range requests for blocks to reduce download size - Improvement: `zen oplog-import` now uses partial block with multi-range requests for blocks to reduce download size - Improvement: Improved feedback in log/console during `zen oplog-import` - Improvement: `--allow-partial-block-requests` now defaults to `true` for `zen builds download` and `zen oplog-import` (was `mixed`) - Improvement: Improved range merging analysis when downloading partial blocks --- src/zenremotestore/chunking/chunkblock.cpp | 63 +++++++++++++++++------------- 1 file changed, 36 insertions(+), 27 deletions(-) (limited to 'src/zenremotestore/chunking/chunkblock.cpp') diff --git a/src/zenremotestore/chunking/chunkblock.cpp b/src/zenremotestore/chunking/chunkblock.cpp index 3a4e6011d..9c3fe8a0b 100644 --- a/src/zenremotestore/chunking/chunkblock.cpp +++ b/src/zenremotestore/chunking/chunkblock.cpp @@ -608,40 +608,49 @@ ChunkBlockAnalyser::CalculatePartialBlockDownloads(std::span if (PartialBlockDownloadMode != EPartialBlockDownloadMode::Exact && BlockRanges.size() > 1) { - // TODO: Once we have support in our http client to request multiple ranges in one request this - // logic would need to change as the per-request overhead would go away + const uint64_t MaxRangeCountPerRequest = PartialBlockDownloadMode == EPartialBlockDownloadMode::MultiRangeHighSpeed + ? m_Options.HostHighSpeedMaxRangeCountPerRequest + : m_Options.HostMaxRangeCountPerRequest; - const double LatencySec = PartialBlockDownloadMode == EPartialBlockDownloadMode::MultiRangeHighSpeed - ? m_Options.HostHighSpeedLatencySec - : m_Options.HostLatencySec; - if (LatencySec > 0) + ZEN_ASSERT(MaxRangeCountPerRequest != 0); + + if (MaxRangeCountPerRequest != (uint64_t)-1) { - const uint64_t BytesPerSec = PartialBlockDownloadMode == EPartialBlockDownloadMode::MultiRangeHighSpeed - ? m_Options.HostHighSpeedBytesPerSec - : m_Options.HostSpeedBytesPerSec; + const uint64_t ExtraRequestCount = BlockRanges.size() / MaxRangeCountPerRequest; - const double ExtraRequestTimeSec = (BlockRanges.size() - 1) * LatencySec; - const uint64_t ExtraRequestTimeBytes = uint64_t(ExtraRequestTimeSec * BytesPerSec); + const double LatencySec = PartialBlockDownloadMode == EPartialBlockDownloadMode::MultiRangeHighSpeed + ? m_Options.HostHighSpeedLatencySec + : m_Options.HostLatencySec; + if (LatencySec > 0) + { + const uint64_t BytesPerSec = PartialBlockDownloadMode == EPartialBlockDownloadMode::MultiRangeHighSpeed + ? m_Options.HostHighSpeedBytesPerSec + : m_Options.HostSpeedBytesPerSec; - const uint64_t FullRangeSize = - BlockRanges.back().RangeStart + BlockRanges.back().RangeLength - BlockRanges.front().RangeStart; + const double ExtraRequestTimeSec = ExtraRequestCount * LatencySec; + const uint64_t ExtraRequestTimeBytes = uint64_t(ExtraRequestTimeSec * BytesPerSec); - if (ExtraRequestTimeBytes + RequestedSize >= FullRangeSize) - { - BlockRanges = std::vector{MergeBlockRanges(BlockRanges)}; + const uint64_t FullRangeSize = + BlockRanges.back().RangeStart + BlockRanges.back().RangeLength - BlockRanges.front().RangeStart; - if (m_Options.IsVerbose) + if (ExtraRequestTimeBytes + RequestedSize >= FullRangeSize) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Merging {} chunks ({}) from block {} ({}) to single request (extra bytes {})", - NeededBlock.ChunkIndexes.size(), - NiceBytes(RequestedSize), - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize), - NiceBytes(BlockRanges.front().RangeLength - RequestedSize)); + BlockRanges = std::vector{MergeBlockRanges(BlockRanges)}; + + if (m_Options.IsVerbose) + { + ZEN_OPERATION_LOG_INFO( + m_LogOutput, + "Merging {} chunks ({}) from block {} ({}) to single request (extra bytes {})", + NeededBlock.ChunkIndexes.size(), + NiceBytes(RequestedSize), + BlockDescription.BlockHash, + NiceBytes(TotalBlockSize), + NiceBytes(BlockRanges.front().RangeLength - RequestedSize)); + } + + RequestedSize = BlockRanges.front().RangeLength; } - - RequestedSize = BlockRanges.front().RangeLength; } } } @@ -730,7 +739,7 @@ ChunkBlockAnalyser::CalculatePartialBlockDownloads(std::span ZEN_OPERATION_LOG_INFO(m_LogOutput, "Analysis of partial block requests saves download of {} out of {}, {:.1f}% of possible {} using {} extra " - "requests. Completed in {}", + "ranges. Completed in {}", NiceBytes(ActualSkippedSize), NiceBytes(AllBlocksTotalBlocksSize), PercentOfIdealPartialSkippedSize, -- cgit v1.2.3 From b67dac7c093cc82b7e8f12f9eb57bfa34dfe26d8 Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Wed, 4 Mar 2026 08:35:32 +0100 Subject: unity build fixes (#802) Various fixes to make cpp files build in unity build mode as an aside using Unity build doesn't really seem to work on Linux, unsure why but it leads to link-time issues --- src/zenremotestore/chunking/chunkblock.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/zenremotestore/chunking/chunkblock.cpp') diff --git a/src/zenremotestore/chunking/chunkblock.cpp b/src/zenremotestore/chunking/chunkblock.cpp index 9c3fe8a0b..f80bfc2ba 100644 --- a/src/zenremotestore/chunking/chunkblock.cpp +++ b/src/zenremotestore/chunking/chunkblock.cpp @@ -1037,7 +1037,7 @@ ChunkBlockAnalyser::CalculateBlockRanges(uint32_t BlockIndex, #if ZEN_WITH_TESTS -namespace testutils { +namespace chunkblock_testutils { static std::vector> CreateAttachments( const std::span& Sizes, OodleCompressionLevel CompressionLevel = OodleCompressionLevel::VeryFast, @@ -1054,14 +1054,14 @@ namespace testutils { return Result; } -} // namespace testutils +} // namespace chunkblock_testutils TEST_SUITE_BEGIN("remotestore.chunkblock"); TEST_CASE("chunkblock.block") { using namespace std::literals; - using namespace testutils; + using namespace chunkblock_testutils; std::vector AttachmentSizes({7633, 6825, 5738, 8031, 7225, 566, 3656, 6006, 24, 3466, 1093, 4269, 2257, 3685, 3489, 7194, 6151, 5482, 6217, 3511, 6738, 5061, 7537, 2759, 1916, 8210, 2235, 4024, 1582, 5251, @@ -1089,7 +1089,7 @@ TEST_CASE("chunkblock.block") TEST_CASE("chunkblock.reuseblocks") { using namespace std::literals; - using namespace testutils; + using namespace chunkblock_testutils; std::vector> BlockAttachmentSizes( {std::vector{7633, 6825, 5738, 8031, 7225, 566, 3656, 6006, 24, 3466, 1093, 4269, 2257, 3685, 3489, -- cgit v1.2.3 From 9f33eaa7b7f246d68ea539f32cd74ecf8dfd5790 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 9 Mar 2026 22:20:52 +0100 Subject: =?UTF-8?q?updated=20chunk=E2=80=93block=20analyser=20(#818)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * create oplogs as they are imported * Improved logic for partial block analisys * unit tests for ChunkBlockAnalyser --- src/zenremotestore/chunking/chunkblock.cpp | 1670 ++++++++++++++++++++-------- 1 file changed, 1220 insertions(+), 450 deletions(-) (limited to 'src/zenremotestore/chunking/chunkblock.cpp') diff --git a/src/zenremotestore/chunking/chunkblock.cpp b/src/zenremotestore/chunking/chunkblock.cpp index f80bfc2ba..cca32c17d 100644 --- a/src/zenremotestore/chunking/chunkblock.cpp +++ b/src/zenremotestore/chunking/chunkblock.cpp @@ -7,14 +7,11 @@ #include #include #include - #include #include -#include ZEN_THIRD_PARTY_INCLUDES_START -#include #include ZEN_THIRD_PARTY_INCLUDES_END @@ -27,6 +24,184 @@ namespace zen { using namespace std::literals; +namespace chunkblock_impl { + + struct RangeDescriptor + { + uint64_t RangeStart = 0; + uint64_t RangeLength = 0; + uint32_t ChunkBlockIndexStart = 0; + uint32_t ChunkBlockIndexCount = 0; + }; + + void MergeCheapestRange(std::vector& InOutRanges) + { + ZEN_ASSERT(InOutRanges.size() > 1); + + size_t BestRangeIndexToCollapse = SIZE_MAX; + uint64_t BestGap = (uint64_t)-1; + + for (size_t RangeIndex = 0; RangeIndex < InOutRanges.size() - 1; RangeIndex++) + { + const RangeDescriptor& Range = InOutRanges[RangeIndex]; + const RangeDescriptor& NextRange = InOutRanges[RangeIndex + 1]; + uint64_t Gap = NextRange.RangeStart - (Range.RangeStart + Range.RangeLength); + if (Gap < BestGap) + { + BestRangeIndexToCollapse = RangeIndex; + BestGap = Gap; + } + else if (Gap == BestGap) + { + const RangeDescriptor& BestRange = InOutRanges[BestRangeIndexToCollapse]; + const RangeDescriptor& BestNextRange = InOutRanges[BestRangeIndexToCollapse + 1]; + uint64_t BestMergedSize = (BestNextRange.RangeStart + BestNextRange.RangeLength) - BestRange.RangeStart; + uint64_t MergedSize = (NextRange.RangeStart + NextRange.RangeLength) - Range.RangeStart; + if (MergedSize < BestMergedSize) + { + BestRangeIndexToCollapse = RangeIndex; + } + } + } + + ZEN_ASSERT(BestRangeIndexToCollapse != SIZE_MAX); + ZEN_ASSERT(BestRangeIndexToCollapse < InOutRanges.size() - 1); + ZEN_ASSERT(BestGap != (uint64_t)-1); + + RangeDescriptor& BestRange = InOutRanges[BestRangeIndexToCollapse]; + const RangeDescriptor& BestNextRange = InOutRanges[BestRangeIndexToCollapse + 1]; + BestRange.RangeLength = BestNextRange.RangeStart - BestRange.RangeStart + BestNextRange.RangeLength; + BestRange.ChunkBlockIndexCount = + BestNextRange.ChunkBlockIndexStart - BestRange.ChunkBlockIndexStart + BestNextRange.ChunkBlockIndexCount; + InOutRanges.erase(InOutRanges.begin() + BestRangeIndexToCollapse + 1); + } + + std::vector GetBlockRanges(const ChunkBlockDescription& BlockDescription, + const uint64_t ChunkStartOffsetInBlock, + std::span BlockChunkIndexNeeded) + { + ZEN_TRACE_CPU("GetBlockRanges"); + std::vector BlockRanges; + { + uint64_t CurrentOffset = ChunkStartOffsetInBlock; + uint32_t ChunkBlockIndex = 0; + uint32_t NeedBlockChunkIndexOffset = 0; + RangeDescriptor NextRange; + while (NeedBlockChunkIndexOffset < BlockChunkIndexNeeded.size() && ChunkBlockIndex < BlockDescription.ChunkRawHashes.size()) + { + const uint32_t ChunkCompressedLength = BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; + if (ChunkBlockIndex < BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) + { + if (NextRange.RangeLength > 0) + { + BlockRanges.push_back(NextRange); + NextRange = {}; + } + ChunkBlockIndex++; + CurrentOffset += ChunkCompressedLength; + } + else if (ChunkBlockIndex == BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) + { + if (NextRange.RangeLength == 0) + { + NextRange.RangeStart = CurrentOffset; + NextRange.ChunkBlockIndexStart = ChunkBlockIndex; + } + NextRange.RangeLength += ChunkCompressedLength; + NextRange.ChunkBlockIndexCount++; + ChunkBlockIndex++; + CurrentOffset += ChunkCompressedLength; + NeedBlockChunkIndexOffset++; + } + else + { + ZEN_ASSERT(false); + } + } + if (NextRange.RangeLength > 0) + { + BlockRanges.push_back(NextRange); + } + } + ZEN_ASSERT(!BlockRanges.empty()); + return BlockRanges; + } + + std::vector OptimizeRanges(uint64_t TotalBlockSize, + std::span ExactRanges, + double LatencySec, + uint64_t SpeedBytesPerSec, + uint64_t MaxRangeCountPerRequest, + uint64_t MaxRangesPerBlock) + { + ZEN_TRACE_CPU("OptimizeRanges"); + ZEN_ASSERT(MaxRangesPerBlock > 0); + std::vector Ranges(ExactRanges.begin(), ExactRanges.end()); + + while (Ranges.size() > MaxRangesPerBlock) + { + MergeCheapestRange(Ranges); + } + + while (true) + { + const std::uint64_t RangeTotalSize = + std::accumulate(Ranges.begin(), Ranges.end(), uint64_t(0u), [](uint64_t Current, const RangeDescriptor& Value) { + return Current + Value.RangeLength; + }); + + const size_t RangeCount = Ranges.size(); + const uint64_t RequestCount = + MaxRangeCountPerRequest == (uint64_t)-1 ? 1 : (RangeCount + MaxRangeCountPerRequest - 1) / MaxRangeCountPerRequest; + uint64_t RequestTimeAsBytes = uint64_t(SpeedBytesPerSec * RequestCount * LatencySec); + + if (RangeCount == 1) + { + // Does fetching the full block add less time than the time it takes to complete a single request? + if (TotalBlockSize - RangeTotalSize < SpeedBytesPerSec * LatencySec) + { + const std::uint64_t InitialRangeTotalSize = + std::accumulate(ExactRanges.begin(), + ExactRanges.end(), + uint64_t(0u), + [](uint64_t Current, const RangeDescriptor& Value) { return Current + Value.RangeLength; }); + + ZEN_DEBUG( + "Latency round trip takes as long as receiving the extra redundant bytes - go full block, dropping {} of slack, " + "adding {} of bytes to fetch, for block of size {}", + NiceBytes(TotalBlockSize - RangeTotalSize), + NiceBytes(TotalBlockSize - InitialRangeTotalSize), + NiceBytes(TotalBlockSize)); + return {}; + } + else + { + return Ranges; + } + } + + if (RequestTimeAsBytes < (TotalBlockSize - RangeTotalSize)) + { + return Ranges; + } + + if (RangeCount == 2) + { + // Merge to single range + Ranges.front().RangeLength = Ranges.back().RangeStart - Ranges.front().RangeStart + Ranges.back().RangeLength; + Ranges.front().ChunkBlockIndexCount = + Ranges.back().ChunkBlockIndexStart - Ranges.front().ChunkBlockIndexStart + Ranges.back().ChunkBlockIndexCount; + Ranges.pop_back(); + } + else + { + MergeCheapestRange(Ranges); + } + } + } + +} // namespace chunkblock_impl + ChunkBlockDescription ParseChunkBlockDescription(const CbObjectView& BlockObject) { @@ -555,484 +730,193 @@ ChunkBlockAnalyser::CalculatePartialBlockDownloads(std::span ChunkBlockAnalyser::BlockResult Result; - uint64_t IdealDownloadTotalSize = 0; - uint64_t AllBlocksTotalBlocksSize = 0; - - for (const NeededBlock& NeededBlock : NeededBlocks) { - const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[NeededBlock.BlockIndex]; - - std::span BlockChunkIndexNeeded(NeededBlock.ChunkIndexes); - if (!NeededBlock.ChunkIndexes.empty()) + uint64_t MinRequestCount = 0; + uint64_t RequestCount = 0; + uint64_t RangeCount = 0; + uint64_t IdealDownloadTotalSize = 0; + uint64_t ActualDownloadTotalSize = 0; + uint64_t FullDownloadTotalSize = 0; + for (const NeededBlock& NeededBlock : NeededBlocks) { - bool WantsToDoPartialBlockDownload = NeededBlock.ChunkIndexes.size() < BlockDescription.ChunkRawHashes.size(); - bool CanDoPartialBlockDownload = (BlockDescription.HeaderSize > 0) && - (BlockDescription.ChunkCompressedLengths.size() == BlockDescription.ChunkRawHashes.size()); - - EPartialBlockDownloadMode PartialBlockDownloadMode = BlockPartialDownloadModes[NeededBlock.BlockIndex]; - - const uint32_t ChunkStartOffsetInBlock = + const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[NeededBlock.BlockIndex]; + std::span BlockChunkIndexNeeded(NeededBlock.ChunkIndexes); + const uint32_t ChunkStartOffsetInBlock = gsl::narrow(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize); + uint64_t TotalBlockSize = std::accumulate(BlockDescription.ChunkCompressedLengths.begin(), + BlockDescription.ChunkCompressedLengths.end(), + uint64_t(ChunkStartOffsetInBlock)); + uint64_t ExactRangesSize = 0; + uint64_t DownloadRangesSize = 0; + uint64_t FullDownloadSize = 0; + + bool CanDoPartialBlockDownload = (BlockDescription.HeaderSize > 0) && + (BlockDescription.ChunkCompressedLengths.size() == BlockDescription.ChunkRawHashes.size()); - const uint64_t TotalBlockSize = std::accumulate(BlockDescription.ChunkCompressedLengths.begin(), - BlockDescription.ChunkCompressedLengths.end(), - std::uint64_t(ChunkStartOffsetInBlock)); - - AllBlocksTotalBlocksSize += TotalBlockSize; - - if ((PartialBlockDownloadMode != EPartialBlockDownloadMode::Off) && WantsToDoPartialBlockDownload && CanDoPartialBlockDownload) + if (NeededBlock.ChunkIndexes.size() == BlockDescription.ChunkRawHashes.size() || !CanDoPartialBlockDownload) + { + // Full block + ExactRangesSize = TotalBlockSize; + DownloadRangesSize = TotalBlockSize; + FullDownloadSize = TotalBlockSize; + MinRequestCount++; + RequestCount++; + RangeCount++; + Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex); + } + else if (NeededBlock.ChunkIndexes.empty()) + { + // Not needed + } + else { - ZEN_TRACE_CPU("PartialBlockAnalysis"); - - uint64_t TotalWantedChunksSize = 0; - std::optional> MaybeBlockRanges = CalculateBlockRanges(NeededBlock.BlockIndex, - BlockDescription, - NeededBlock.ChunkIndexes, - PartialBlockDownloadMode, - ChunkStartOffsetInBlock, - TotalBlockSize, - TotalWantedChunksSize); - ZEN_ASSERT(TotalWantedChunksSize <= TotalBlockSize); - IdealDownloadTotalSize += TotalWantedChunksSize; - - if (MaybeBlockRanges.has_value()) + FullDownloadSize = TotalBlockSize; + std::vector Ranges = + chunkblock_impl::GetBlockRanges(BlockDescription, ChunkStartOffsetInBlock, BlockChunkIndexNeeded); + ExactRangesSize = std::accumulate( + Ranges.begin(), + Ranges.end(), + uint64_t(0), + [](uint64_t Current, const chunkblock_impl::RangeDescriptor& Range) { return Current + Range.RangeLength; }); + + EPartialBlockDownloadMode PartialBlockDownloadMode = BlockPartialDownloadModes[NeededBlock.BlockIndex]; + if (PartialBlockDownloadMode == EPartialBlockDownloadMode::Off) { - std::vector BlockRanges = MaybeBlockRanges.value(); - ZEN_ASSERT(!BlockRanges.empty()); - - uint64_t RequestedSize = - std::accumulate(BlockRanges.begin(), - BlockRanges.end(), - uint64_t(0), - [](uint64_t Current, const BlockRangeDescriptor& Range) { return Current + Range.RangeLength; }); + // Use full block + MinRequestCount++; + RangeCount++; + RequestCount++; + Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex); + DownloadRangesSize = TotalBlockSize; + } + else + { + const bool IsHighSpeed = (PartialBlockDownloadMode == EPartialBlockDownloadMode::MultiRangeHighSpeed); + uint64_t MaxRangeCountPerRequest = + IsHighSpeed ? m_Options.HostHighSpeedMaxRangeCountPerRequest : m_Options.HostMaxRangeCountPerRequest; + ZEN_ASSERT(MaxRangeCountPerRequest != 0); - if (PartialBlockDownloadMode != EPartialBlockDownloadMode::Exact && BlockRanges.size() > 1) + if (PartialBlockDownloadMode == EPartialBlockDownloadMode::Exact) { - const uint64_t MaxRangeCountPerRequest = PartialBlockDownloadMode == EPartialBlockDownloadMode::MultiRangeHighSpeed - ? m_Options.HostHighSpeedMaxRangeCountPerRequest - : m_Options.HostMaxRangeCountPerRequest; - - ZEN_ASSERT(MaxRangeCountPerRequest != 0); - - if (MaxRangeCountPerRequest != (uint64_t)-1) + // Use exact ranges + for (const chunkblock_impl::RangeDescriptor& Range : Ranges) { - const uint64_t ExtraRequestCount = BlockRanges.size() / MaxRangeCountPerRequest; + Result.BlockRanges.push_back(BlockRangeDescriptor{.BlockIndex = NeededBlock.BlockIndex, + .RangeStart = Range.RangeStart, + .RangeLength = Range.RangeLength, + .ChunkBlockIndexStart = Range.ChunkBlockIndexStart, + .ChunkBlockIndexCount = Range.ChunkBlockIndexCount}); + } - const double LatencySec = PartialBlockDownloadMode == EPartialBlockDownloadMode::MultiRangeHighSpeed - ? m_Options.HostHighSpeedLatencySec - : m_Options.HostLatencySec; - if (LatencySec > 0) + MinRequestCount++; + RangeCount += Ranges.size(); + RequestCount += MaxRangeCountPerRequest == (uint64_t)-1 + ? 1 + : (Ranges.size() + MaxRangeCountPerRequest - 1) / MaxRangeCountPerRequest; + DownloadRangesSize = ExactRangesSize; + } + else + { + if (PartialBlockDownloadMode == EPartialBlockDownloadMode::SingleRange) + { + // Use single range + if (Ranges.size() > 1) { - const uint64_t BytesPerSec = PartialBlockDownloadMode == EPartialBlockDownloadMode::MultiRangeHighSpeed - ? m_Options.HostHighSpeedBytesPerSec - : m_Options.HostSpeedBytesPerSec; - - const double ExtraRequestTimeSec = ExtraRequestCount * LatencySec; - const uint64_t ExtraRequestTimeBytes = uint64_t(ExtraRequestTimeSec * BytesPerSec); - - const uint64_t FullRangeSize = - BlockRanges.back().RangeStart + BlockRanges.back().RangeLength - BlockRanges.front().RangeStart; - - if (ExtraRequestTimeBytes + RequestedSize >= FullRangeSize) - { - BlockRanges = std::vector{MergeBlockRanges(BlockRanges)}; - - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO( - m_LogOutput, - "Merging {} chunks ({}) from block {} ({}) to single request (extra bytes {})", - NeededBlock.ChunkIndexes.size(), - NiceBytes(RequestedSize), - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize), - NiceBytes(BlockRanges.front().RangeLength - RequestedSize)); - } - - RequestedSize = BlockRanges.front().RangeLength; - } + Ranges = {chunkblock_impl::RangeDescriptor{ + .RangeStart = Ranges.front().RangeStart, + .RangeLength = Ranges.back().RangeStart + Ranges.back().RangeLength - Ranges.front().RangeStart, + .ChunkBlockIndexStart = Ranges.front().ChunkBlockIndexStart, + .ChunkBlockIndexCount = Ranges.back().ChunkBlockIndexStart + Ranges.back().ChunkBlockIndexCount - + Ranges.front().ChunkBlockIndexStart}}; } + + // We still do the optimize pass to see if it is more effective to use a full block } - } - if ((PartialBlockDownloadMode != EPartialBlockDownloadMode::Exact) && - ((TotalBlockSize - RequestedSize) < (512u * 1024u))) - { - if (m_Options.IsVerbose) + double LatencySec = IsHighSpeed ? m_Options.HostHighSpeedLatencySec : m_Options.HostLatencySec; + uint64_t SpeedBytesPerSec = IsHighSpeed ? m_Options.HostHighSpeedBytesPerSec : m_Options.HostSpeedBytesPerSec; + if (LatencySec > 0.0 && SpeedBytesPerSec > 0u) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Requesting {} chunks ({}) from block {} ({}) using full block request due to small " - "total slack (extra bytes {})", - NeededBlock.ChunkIndexes.size(), - NiceBytes(RequestedSize), - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize), - NiceBytes(TotalBlockSize - TotalWantedChunksSize)); + Ranges = chunkblock_impl::OptimizeRanges(TotalBlockSize, + Ranges, + LatencySec, + SpeedBytesPerSec, + MaxRangeCountPerRequest, + m_Options.MaxRangesPerBlock); } - Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex); - } - else - { - Result.BlockRanges.insert(Result.BlockRanges.end(), BlockRanges.begin(), BlockRanges.end()); - if (m_Options.IsVerbose) + MinRequestCount++; + if (Ranges.empty()) { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Requesting {} chunks ({}) from block {} ({}) using {} requests (extra bytes {})", - NeededBlock.ChunkIndexes.size(), - NiceBytes(RequestedSize), - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize), - BlockRanges.size(), - NiceBytes(RequestedSize - TotalWantedChunksSize)); + Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex); + RequestCount++; + RangeCount++; + DownloadRangesSize = TotalBlockSize; } + else + { + for (const chunkblock_impl::RangeDescriptor& Range : Ranges) + { + Result.BlockRanges.push_back(BlockRangeDescriptor{.BlockIndex = NeededBlock.BlockIndex, + .RangeStart = Range.RangeStart, + .RangeLength = Range.RangeLength, + .ChunkBlockIndexStart = Range.ChunkBlockIndexStart, + .ChunkBlockIndexCount = Range.ChunkBlockIndexCount}); + } + RangeCount += Ranges.size(); + RequestCount += MaxRangeCountPerRequest == (uint64_t)-1 + ? 1 + : (Ranges.size() + MaxRangeCountPerRequest - 1) / MaxRangeCountPerRequest; + } + + DownloadRangesSize = Ranges.empty() + ? TotalBlockSize + : std::accumulate(Ranges.begin(), + Ranges.end(), + uint64_t(0), + [](uint64_t Current, const chunkblock_impl::RangeDescriptor& Range) { + return Current + Range.RangeLength; + }); } } - else - { - Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex); - } - } - else - { - Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex); - IdealDownloadTotalSize += TotalBlockSize; } - } - } - - if (!Result.BlockRanges.empty() && !m_Options.IsQuiet) - { - tsl::robin_set PartialBlockIndexes; - uint64_t PartialBlocksTotalSize = std::accumulate(Result.BlockRanges.begin(), - Result.BlockRanges.end(), - uint64_t(0u), - [&](uint64_t Current, const BlockRangeDescriptor& Range) { - PartialBlockIndexes.insert(Range.BlockIndex); - return Current + Range.RangeLength; - }); - - uint64_t FullBlocksTotalSize = - std::accumulate(Result.FullBlockIndexes.begin(), - Result.FullBlockIndexes.end(), - uint64_t(0u), - [&](uint64_t Current, uint32_t BlockIndex) { - const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex]; - uint32_t CurrentOffset = - gsl::narrow(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize); - - return Current + std::accumulate(BlockDescription.ChunkCompressedLengths.begin(), - BlockDescription.ChunkCompressedLengths.end(), - std::uint64_t(CurrentOffset)); - }); - - uint64_t PartialBlockRequestCount = Result.BlockRanges.size(); - uint64_t PartialBlockCount = PartialBlockIndexes.size(); - - uint64_t TotalExtraPartialBlocksRequestCount = PartialBlockRequestCount - PartialBlockCount; - uint64_t ActualPartialDownloadTotalSize = FullBlocksTotalSize + PartialBlocksTotalSize; - - uint64_t IdealSkippedSize = AllBlocksTotalBlocksSize - IdealDownloadTotalSize; - uint64_t ActualSkippedSize = AllBlocksTotalBlocksSize - ActualPartialDownloadTotalSize; - - double PercentOfIdealPartialSkippedSize = (ActualSkippedSize * 100.0) / IdealSkippedSize; - - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Analysis of partial block requests saves download of {} out of {}, {:.1f}% of possible {} using {} extra " - "ranges. Completed in {}", - NiceBytes(ActualSkippedSize), - NiceBytes(AllBlocksTotalBlocksSize), - PercentOfIdealPartialSkippedSize, - NiceBytes(IdealSkippedSize), - TotalExtraPartialBlocksRequestCount, - NiceTimeSpanMs(PartialAnalisysTimer.GetElapsedTimeMs())); - } - - return Result; -} + IdealDownloadTotalSize += ExactRangesSize; + ActualDownloadTotalSize += DownloadRangesSize; + FullDownloadTotalSize += FullDownloadSize; -ChunkBlockAnalyser::BlockRangeDescriptor -ChunkBlockAnalyser::MergeBlockRanges(std::span Ranges) -{ - ZEN_ASSERT(Ranges.size() > 1); - const BlockRangeDescriptor& First = Ranges.front(); - const BlockRangeDescriptor& Last = Ranges.back(); - - return BlockRangeDescriptor{.BlockIndex = First.BlockIndex, - .RangeStart = First.RangeStart, - .RangeLength = Last.RangeStart + Last.RangeLength - First.RangeStart, - .ChunkBlockIndexStart = First.ChunkBlockIndexStart, - .ChunkBlockIndexCount = Last.ChunkBlockIndexStart + Last.ChunkBlockIndexCount - First.ChunkBlockIndexStart}; -} - -std::optional> -ChunkBlockAnalyser::MakeOptionalBlockRangeVector(uint64_t TotalBlockSize, const BlockRangeDescriptor& Range) -{ - if (Range.RangeLength == TotalBlockSize) - { - return {}; - } - else - { - return std::vector{Range}; - } -}; - -const ChunkBlockAnalyser::BlockRangeLimit* -ChunkBlockAnalyser::GetBlockRangeLimitForRange(std::span Limits, - uint64_t TotalBlockSize, - std::span Ranges) -{ - if (Ranges.size() > 1) - { - const std::uint64_t WantedSize = - std::accumulate(Ranges.begin(), Ranges.end(), uint64_t(0), [](uint64_t Current, const BlockRangeDescriptor& Range) { - return Current + Range.RangeLength; - }); - - const double RangeRequestedPercent = (WantedSize * 100.0) / TotalBlockSize; - - for (const BlockRangeLimit& Limit : Limits) - { - if (RangeRequestedPercent >= Limit.SizePercent && Ranges.size() > Limit.MaxRangeCount) + if (ExactRangesSize < FullDownloadSize) { - return &Limit; + ZEN_DEBUG("Block {}: Full: {}, Ideal: {}, Actual: {}, Saves: {}", + NeededBlock.BlockIndex, + NiceBytes(FullDownloadSize), + NiceBytes(ExactRangesSize), + NiceBytes(DownloadRangesSize), + NiceBytes(FullDownloadSize - DownloadRangesSize)); } } - } - return nullptr; -}; - -std::vector -ChunkBlockAnalyser::CollapseBlockRanges(const uint64_t AlwaysAcceptableGap, std::span BlockRanges) -{ - ZEN_ASSERT(BlockRanges.size() > 1); - std::vector CollapsedBlockRanges; - - auto BlockRangesIt = BlockRanges.begin(); - CollapsedBlockRanges.push_back(*BlockRangesIt++); - for (; BlockRangesIt != BlockRanges.end(); BlockRangesIt++) - { - BlockRangeDescriptor& LastRange = CollapsedBlockRanges.back(); - - const uint64_t BothRangeSize = BlockRangesIt->RangeLength + LastRange.RangeLength; - - const uint64_t Gap = BlockRangesIt->RangeStart - (LastRange.RangeStart + LastRange.RangeLength); - if (Gap <= Max(BothRangeSize / 16, AlwaysAcceptableGap)) - { - LastRange.ChunkBlockIndexCount = - (BlockRangesIt->ChunkBlockIndexStart + BlockRangesIt->ChunkBlockIndexCount) - LastRange.ChunkBlockIndexStart; - LastRange.RangeLength = (BlockRangesIt->RangeStart + BlockRangesIt->RangeLength) - LastRange.RangeStart; - } - else - { - CollapsedBlockRanges.push_back(*BlockRangesIt); - } - } - - return CollapsedBlockRanges; -}; - -uint64_t -ChunkBlockAnalyser::CalculateNextGap(const uint64_t AlwaysAcceptableGap, std::span BlockRanges) -{ - ZEN_ASSERT(BlockRanges.size() > 1); - uint64_t AcceptableGap = (uint64_t)-1; - for (size_t RangeIndex = 0; RangeIndex < BlockRanges.size() - 1; RangeIndex++) - { - const BlockRangeDescriptor& Range = BlockRanges[RangeIndex]; - const BlockRangeDescriptor& NextRange = BlockRanges[RangeIndex + 1]; - - const uint64_t Gap = NextRange.RangeStart - (Range.RangeStart + Range.RangeLength); - AcceptableGap = Min(Gap, AcceptableGap); - } - AcceptableGap = RoundUp(AcceptableGap, AlwaysAcceptableGap); - return AcceptableGap; -}; - -std::optional> -ChunkBlockAnalyser::CalculateBlockRanges(uint32_t BlockIndex, - const ChunkBlockDescription& BlockDescription, - std::span BlockChunkIndexNeeded, - EPartialBlockDownloadMode PartialBlockDownloadMode, - const uint64_t ChunkStartOffsetInBlock, - const uint64_t TotalBlockSize, - uint64_t& OutTotalWantedChunksSize) -{ - ZEN_TRACE_CPU("CalculateBlockRanges"); - - if (PartialBlockDownloadMode == EPartialBlockDownloadMode::Off) - { - return {}; - } - - std::vector BlockRanges; - { - uint64_t CurrentOffset = ChunkStartOffsetInBlock; - uint32_t ChunkBlockIndex = 0; - uint32_t NeedBlockChunkIndexOffset = 0; - BlockRangeDescriptor NextRange{.BlockIndex = BlockIndex}; - while (NeedBlockChunkIndexOffset < BlockChunkIndexNeeded.size() && ChunkBlockIndex < BlockDescription.ChunkRawHashes.size()) - { - const uint32_t ChunkCompressedLength = BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; - if (ChunkBlockIndex < BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) - { - if (NextRange.RangeLength > 0) - { - BlockRanges.push_back(NextRange); - NextRange = {.BlockIndex = BlockIndex}; - } - ChunkBlockIndex++; - CurrentOffset += ChunkCompressedLength; - } - else if (ChunkBlockIndex == BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) - { - if (NextRange.RangeLength == 0) - { - NextRange.RangeStart = CurrentOffset; - NextRange.ChunkBlockIndexStart = ChunkBlockIndex; - } - NextRange.RangeLength += ChunkCompressedLength; - NextRange.ChunkBlockIndexCount++; - ChunkBlockIndex++; - CurrentOffset += ChunkCompressedLength; - NeedBlockChunkIndexOffset++; - } - else - { - ZEN_ASSERT(false); - } - } - if (NextRange.RangeLength > 0) - { - BlockRanges.push_back(NextRange); - } - } - ZEN_ASSERT(!BlockRanges.empty()); - - OutTotalWantedChunksSize = - std::accumulate(BlockRanges.begin(), BlockRanges.end(), uint64_t(0), [](uint64_t Current, const BlockRangeDescriptor& Range) { - return Current + Range.RangeLength; - }); - - double RangeWantedPercent = (OutTotalWantedChunksSize * 100.0) / TotalBlockSize; - - if (BlockRanges.size() == 1) - { - if (m_Options.IsVerbose) - { - ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Range request of {} ({:.2f}%) using single range from block {} ({}) as is", - NiceBytes(OutTotalWantedChunksSize), - RangeWantedPercent, - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize)); - } - return BlockRanges; - } - - if (PartialBlockDownloadMode == EPartialBlockDownloadMode::Exact) - { - if (m_Options.IsVerbose) + uint64_t Actual = FullDownloadTotalSize - ActualDownloadTotalSize; + uint64_t Ideal = FullDownloadTotalSize - IdealDownloadTotalSize; + if (Ideal < FullDownloadTotalSize && !m_Options.IsQuiet) { + const double AchievedPercent = Ideal == 0 ? 100.0 : (100.0 * Actual) / Ideal; ZEN_OPERATION_LOG_INFO(m_LogOutput, - "Range request of {} ({:.2f}%) using {} ranges from block {} ({})", - NiceBytes(OutTotalWantedChunksSize), - RangeWantedPercent, - BlockRanges.size(), - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize)); - } - return BlockRanges; - } - - if (PartialBlockDownloadMode == EPartialBlockDownloadMode::SingleRange) - { - const BlockRangeDescriptor MergedRange = MergeBlockRanges(BlockRanges); - if (m_Options.IsVerbose) - { - const double RangeRequestedPercent = (MergedRange.RangeLength * 100.0) / TotalBlockSize; - const double WastedPercent = ((MergedRange.RangeLength - OutTotalWantedChunksSize) * 100.0) / MergedRange.RangeLength; - - ZEN_OPERATION_LOG_INFO( - m_LogOutput, - "Range request of {} ({:.2f}%) using {} ranges from block {} ({}) limited to single block range {} ({:.2f}%) wasting " - "{:.2f}% ({})", - NiceBytes(OutTotalWantedChunksSize), - RangeWantedPercent, - BlockRanges.size(), - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize), - NiceBytes(MergedRange.RangeLength), - RangeRequestedPercent, - WastedPercent, - NiceBytes(MergedRange.RangeLength - OutTotalWantedChunksSize)); - } - return MakeOptionalBlockRangeVector(TotalBlockSize, MergedRange); - } - - if (RangeWantedPercent > FullBlockRangePercentLimit) - { - const BlockRangeDescriptor MergedRange = MergeBlockRanges(BlockRanges); - if (m_Options.IsVerbose) - { - const double RangeRequestedPercent = (MergedRange.RangeLength * 100.0) / TotalBlockSize; - const double WastedPercent = ((MergedRange.RangeLength - OutTotalWantedChunksSize) * 100.0) / MergedRange.RangeLength; - - ZEN_OPERATION_LOG_INFO( - m_LogOutput, - "Range request of {} ({:.2f}%) using {} ranges from block {} ({}) exceeds {}%. Merged to single block range {} " - "({:.2f}%) wasting {:.2f}% ({})", - NiceBytes(OutTotalWantedChunksSize), - RangeWantedPercent, - BlockRanges.size(), - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize), - FullBlockRangePercentLimit, - NiceBytes(MergedRange.RangeLength), - RangeRequestedPercent, - WastedPercent, - NiceBytes(MergedRange.RangeLength - OutTotalWantedChunksSize)); + "Block Partial Analysis: Blocks: {}, Full: {}, Ideal: {}, Actual: {}. Skipping {} ({:.1f}%) out of " + "possible {} using {} extra ranges " + "via {} extra requests. Completed in {}", + NeededBlocks.size(), + NiceBytes(FullDownloadTotalSize), + NiceBytes(IdealDownloadTotalSize), + NiceBytes(ActualDownloadTotalSize), + NiceBytes(FullDownloadTotalSize - ActualDownloadTotalSize), + AchievedPercent, + NiceBytes(Ideal), + RangeCount - MinRequestCount, + RequestCount - MinRequestCount, + NiceTimeSpanMs(PartialAnalisysTimer.GetElapsedTimeMs())); } - return MakeOptionalBlockRangeVector(TotalBlockSize, MergedRange); } - const uint64_t AlwaysAcceptableGap = 4u * 1024u; - - std::vector CollapsedBlockRanges = CollapseBlockRanges(AlwaysAcceptableGap, BlockRanges); - while (GetBlockRangeLimitForRange(ForceMergeLimits, TotalBlockSize, CollapsedBlockRanges)) - { - CollapsedBlockRanges = CollapseBlockRanges(CalculateNextGap(AlwaysAcceptableGap, CollapsedBlockRanges), CollapsedBlockRanges); - } - - const std::uint64_t WantedCollapsedSize = - std::accumulate(CollapsedBlockRanges.begin(), - CollapsedBlockRanges.end(), - uint64_t(0), - [](uint64_t Current, const BlockRangeDescriptor& Range) { return Current + Range.RangeLength; }); - - const double CollapsedRangeRequestedPercent = (WantedCollapsedSize * 100.0) / TotalBlockSize; - - if (m_Options.IsVerbose) - { - const double WastedPercent = ((WantedCollapsedSize - OutTotalWantedChunksSize) * 100.0) / WantedCollapsedSize; - - ZEN_OPERATION_LOG_INFO( - m_LogOutput, - "Range request of {} ({:.2f}%) using {} ranges from block {} ({}) collapsed to {} {:.2f}% using {} ranges wasting {:.2f}% " - "({})", - NiceBytes(OutTotalWantedChunksSize), - RangeWantedPercent, - BlockRanges.size(), - BlockDescription.BlockHash, - NiceBytes(TotalBlockSize), - NiceBytes(WantedCollapsedSize), - CollapsedRangeRequestedPercent, - CollapsedBlockRanges.size(), - WastedPercent, - NiceBytes(WantedCollapsedSize - OutTotalWantedChunksSize)); - } - return CollapsedBlockRanges; + return Result; } #if ZEN_WITH_TESTS @@ -1326,6 +1210,892 @@ TEST_CASE("chunkblock.reuseblocks") } } +namespace chunkblock_analyser_testutils { + + // Build a ChunkBlockDescription without any real payload. + // Hashes are derived deterministically from (BlockSeed XOR ChunkIndex) so that the same + // seed produces the same hashes — useful for deduplication tests. + static ChunkBlockDescription MakeBlockDesc(uint64_t HeaderSize, + std::initializer_list CompressedLengths, + uint32_t BlockSeed = 0) + { + ChunkBlockDescription Desc; + Desc.HeaderSize = HeaderSize; + uint32_t ChunkIndex = 0; + for (uint32_t Length : CompressedLengths) + { + uint64_t HashInput = uint64_t(BlockSeed ^ ChunkIndex); + Desc.ChunkRawHashes.push_back(IoHash::HashBuffer(MemoryView(&HashInput, sizeof(HashInput)))); + Desc.ChunkRawLengths.push_back(Length); + Desc.ChunkCompressedLengths.push_back(Length); + ChunkIndex++; + } + return Desc; + } + + // Build the robin_map needed by GetNeeded from a flat list of blocks. + // First occurrence of each hash wins; index is assigned sequentially across all blocks. + [[maybe_unused]] static tsl::robin_map MakeHashMap(const std::vector& Blocks) + { + tsl::robin_map Result; + uint32_t Index = 0; + for (const ChunkBlockDescription& Block : Blocks) + { + for (const IoHash& Hash : Block.ChunkRawHashes) + { + if (!Result.contains(Hash)) + { + Result.emplace(Hash, Index++); + } + } + } + return Result; + } + +} // namespace chunkblock_analyser_testutils + +TEST_CASE("chunkblock.mergecheapestrange.picks_smallest_gap") +{ + using RD = chunkblock_impl::RangeDescriptor; + // Gap between ranges 0-1 is 50, gap between 1-2 is 150 → pair 0-1 gets merged + std::vector Ranges = { + {.RangeStart = 0, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1}, + {.RangeStart = 150, .RangeLength = 100, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1}, + {.RangeStart = 400, .RangeLength = 100, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1}, + }; + chunkblock_impl::MergeCheapestRange(Ranges); + + REQUIRE_EQ(2u, Ranges.size()); + CHECK_EQ(0u, Ranges[0].RangeStart); + CHECK_EQ(250u, Ranges[0].RangeLength); // 150+100 + CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(2u, Ranges[0].ChunkBlockIndexCount); + CHECK_EQ(400u, Ranges[1].RangeStart); + CHECK_EQ(100u, Ranges[1].RangeLength); + CHECK_EQ(2u, Ranges[1].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[1].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.mergecheapestrange.tiebreak_smaller_merged") +{ + using RD = chunkblock_impl::RangeDescriptor; + // Gap 0-1 == gap 1-2 == 100; merged size 0-1 (250) < merged size 1-2 (350) → pair 0-1 wins + std::vector Ranges = { + {.RangeStart = 0, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1}, + {.RangeStart = 200, .RangeLength = 50, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1}, + {.RangeStart = 350, .RangeLength = 200, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1}, + }; + chunkblock_impl::MergeCheapestRange(Ranges); + + REQUIRE_EQ(2u, Ranges.size()); + // Pair 0-1 merged: start=0, length = (200+50)-0 = 250 + CHECK_EQ(0u, Ranges[0].RangeStart); + CHECK_EQ(250u, Ranges[0].RangeLength); + CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(2u, Ranges[0].ChunkBlockIndexCount); + // Pair 1 unchanged (was index 2) + CHECK_EQ(350u, Ranges[1].RangeStart); + CHECK_EQ(200u, Ranges[1].RangeLength); + CHECK_EQ(2u, Ranges[1].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[1].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.optimizeranges.preserves_ranges_low_latency") +{ + using RD = chunkblock_impl::RangeDescriptor; + // With MaxRangeCountPerRequest unlimited, RequestCount=1 + // RequestTimeAsBytes = 100000 * 1 * 0.001 = 100 << slack=7000 → all ranges preserved + std::vector ExactRanges = { + {.RangeStart = 0, .RangeLength = 1000, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1}, + {.RangeStart = 2000, .RangeLength = 1000, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1}, + {.RangeStart = 4000, .RangeLength = 1000, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1}, + }; + uint64_t TotalBlockSize = 10000; + double LatencySec = 0.001; + uint64_t SpeedBytesPerSec = 100000; + uint64_t MaxRangeCountPerReq = (uint64_t)-1; + uint64_t MaxRangesPerBlock = 1024; + + auto Result = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock); + + REQUIRE_EQ(3u, Result.size()); +} + +TEST_CASE("chunkblock.optimizeranges.falls_back_to_full_block") +{ + using RD = chunkblock_impl::RangeDescriptor; + // 1 range already; slack=100 < SpeedBytesPerSec*LatencySec=200 → full block (empty result) + std::vector ExactRanges = { + {.RangeStart = 100, .RangeLength = 900, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 3}, + }; + uint64_t TotalBlockSize = 1000; + double LatencySec = 0.01; + uint64_t SpeedBytesPerSec = 20000; + uint64_t MaxRangeCountPerReq = (uint64_t)-1; + uint64_t MaxRangesPerBlock = 1024; + + auto Result = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock); + + CHECK(Result.empty()); +} + +TEST_CASE("chunkblock.optimizeranges.maxrangesperblock_clamp") +{ + using RD = chunkblock_impl::RangeDescriptor; + // 5 input ranges; MaxRangesPerBlock=2 clamps to ≤2 before the cost model runs + std::vector ExactRanges = { + {.RangeStart = 0, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1}, + {.RangeStart = 300, .RangeLength = 100, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1}, + {.RangeStart = 600, .RangeLength = 100, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1}, + {.RangeStart = 900, .RangeLength = 100, .ChunkBlockIndexStart = 3, .ChunkBlockIndexCount = 1}, + {.RangeStart = 1200, .RangeLength = 100, .ChunkBlockIndexStart = 4, .ChunkBlockIndexCount = 1}, + }; + uint64_t TotalBlockSize = 5000; + double LatencySec = 0.001; + uint64_t SpeedBytesPerSec = 100000; + uint64_t MaxRangeCountPerReq = (uint64_t)-1; + uint64_t MaxRangesPerBlock = 2; + + auto Result = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock); + + CHECK(Result.size() <= 2u); + CHECK(!Result.empty()); +} + +TEST_CASE("chunkblock.optimizeranges.low_maxrangecountperrequest_drives_merge") +{ + using RD = chunkblock_impl::RangeDescriptor; + // MaxRangeCountPerRequest=1 means RequestCount==RangeCount; high latency drives merging + // With MaxRangeCountPerRequest=-1 the same 3 ranges would be preserved (verified by comment below) + std::vector ExactRanges = { + {.RangeStart = 100, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1}, + {.RangeStart = 250, .RangeLength = 100, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1}, + {.RangeStart = 400, .RangeLength = 100, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1}, + }; + uint64_t TotalBlockSize = 1000; + double LatencySec = 1.0; + uint64_t SpeedBytesPerSec = 500; + // With MaxRangeCountPerRequest=-1: RequestCount=1, RequestTimeAsBytes=500 < slack=700 → preserved + // With MaxRangeCountPerRequest=1: RequestCount=3, RequestTimeAsBytes=1500 > slack=700 → merged + uint64_t MaxRangesPerBlock = 1024; + + auto Unlimited = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, (uint64_t)-1, MaxRangesPerBlock); + CHECK_EQ(3u, Unlimited.size()); + + auto Limited = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, uint64_t(1), MaxRangesPerBlock); + CHECK(Limited.size() < 3u); +} + +TEST_CASE("chunkblock.optimizeranges.unlimited_rangecountperrequest_no_extra_cost") +{ + using RD = chunkblock_impl::RangeDescriptor; + // MaxRangeCountPerRequest=-1 → RequestCount always 1, even with many ranges and high latency + std::vector ExactRanges = { + {.RangeStart = 0, .RangeLength = 50, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1}, + {.RangeStart = 200, .RangeLength = 50, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1}, + {.RangeStart = 400, .RangeLength = 50, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1}, + {.RangeStart = 600, .RangeLength = 50, .ChunkBlockIndexStart = 3, .ChunkBlockIndexCount = 1}, + {.RangeStart = 800, .RangeLength = 50, .ChunkBlockIndexStart = 4, .ChunkBlockIndexCount = 1}, + }; + uint64_t TotalBlockSize = 5000; + double LatencySec = 0.1; + uint64_t SpeedBytesPerSec = 10000; // RequestTimeAsBytes=1000 << slack=4750 + uint64_t MaxRangeCountPerReq = (uint64_t)-1; + uint64_t MaxRangesPerBlock = 1024; + + auto Result = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock); + + CHECK_EQ(5u, Result.size()); +} + +TEST_CASE("chunkblock.optimizeranges.two_range_direct_merge_path") +{ + using RD = chunkblock_impl::RangeDescriptor; + // Exactly 2 ranges; cost model demands merge; exercises the RangeCount==2 direct-merge branch + // After direct merge → 1 range with small slack → full block (empty) + std::vector ExactRanges = { + {.RangeStart = 0, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 2}, + {.RangeStart = 400, .RangeLength = 100, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 2}, + }; + uint64_t TotalBlockSize = 600; + double LatencySec = 0.1; + uint64_t SpeedBytesPerSec = 5000; // RequestTimeAsBytes=500 > slack=400 on first iter + uint64_t MaxRangeCountPerReq = (uint64_t)-1; + uint64_t MaxRangesPerBlock = 1024; + + // Iteration 1: RangeCount=2, RequestCount=1, RequestTimeAsBytes=500 > slack=400 → direct merge + // After merge: 1 range [{0,500,0,4}], slack=100 < Speed*Lat=500 → full block + auto Result = + chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock); + + CHECK(Result.empty()); +} + +TEST_CASE("chunkblock.getneeded.all_chunks") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + std::unique_ptr LogOutput(CreateStandardLogOutput(LogRef)); + + auto Block = MakeBlockDesc(50, {100, 100, 100, 100}); + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(*LogOutput, std::span(&Block, 1), Options); + + auto HashMap = MakeHashMap({Block}); + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t) { return true; }); + + REQUIRE_EQ(1u, NeededBlocks.size()); + CHECK_EQ(0u, NeededBlocks[0].BlockIndex); + REQUIRE_EQ(4u, NeededBlocks[0].ChunkIndexes.size()); + CHECK_EQ(0u, NeededBlocks[0].ChunkIndexes[0]); + CHECK_EQ(1u, NeededBlocks[0].ChunkIndexes[1]); + CHECK_EQ(2u, NeededBlocks[0].ChunkIndexes[2]); + CHECK_EQ(3u, NeededBlocks[0].ChunkIndexes[3]); +} + +TEST_CASE("chunkblock.getneeded.no_chunks") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + std::unique_ptr LogOutput(CreateStandardLogOutput(LogRef)); + + auto Block = MakeBlockDesc(50, {100, 100, 100, 100}); + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(*LogOutput, std::span(&Block, 1), Options); + + auto HashMap = MakeHashMap({Block}); + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t) { return false; }); + + CHECK(NeededBlocks.empty()); +} + +TEST_CASE("chunkblock.getneeded.subset_within_block") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + std::unique_ptr LogOutput(CreateStandardLogOutput(LogRef)); + + auto Block = MakeBlockDesc(50, {100, 100, 100, 100}); + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(*LogOutput, std::span(&Block, 1), Options); + + auto HashMap = MakeHashMap({Block}); + // Indices 0 and 2 are needed; 1 and 3 are not + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t ChunkIndex) { return ChunkIndex == 0 || ChunkIndex == 2; }); + + REQUIRE_EQ(1u, NeededBlocks.size()); + CHECK_EQ(0u, NeededBlocks[0].BlockIndex); + REQUIRE_EQ(2u, NeededBlocks[0].ChunkIndexes.size()); + CHECK_EQ(0u, NeededBlocks[0].ChunkIndexes[0]); + CHECK_EQ(2u, NeededBlocks[0].ChunkIndexes[1]); +} + +TEST_CASE("chunkblock.getneeded.dedup_low_slack_wins") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + std::unique_ptr LogOutput(CreateStandardLogOutput(LogRef)); + + // Block 0: {H0, H1, SharedH, H3} — 3 of 4 needed (H3 not needed); slack = 100 + // Block 1: {H4, H5, SharedH, H6} — only SharedH needed; slack = 300 + // Block 0 has less slack → processed first → SharedH assigned to block 0 + IoHash SharedH = IoHash::HashBuffer(MemoryView("shared_chunk_dedup", 18)); + IoHash H0 = IoHash::HashBuffer(MemoryView("block0_chunk0", 13)); + IoHash H1 = IoHash::HashBuffer(MemoryView("block0_chunk1", 13)); + IoHash H3 = IoHash::HashBuffer(MemoryView("block0_chunk3", 13)); + IoHash H4 = IoHash::HashBuffer(MemoryView("block1_chunk0", 13)); + IoHash H5 = IoHash::HashBuffer(MemoryView("block1_chunk1", 13)); + IoHash H6 = IoHash::HashBuffer(MemoryView("block1_chunk3", 13)); + + ChunkBlockDescription Block0; + Block0.HeaderSize = 50; + Block0.ChunkRawHashes = {H0, H1, SharedH, H3}; + Block0.ChunkRawLengths = {100, 100, 100, 100}; + Block0.ChunkCompressedLengths = {100, 100, 100, 100}; + + ChunkBlockDescription Block1; + Block1.HeaderSize = 50; + Block1.ChunkRawHashes = {H4, H5, SharedH, H6}; + Block1.ChunkRawLengths = {100, 100, 100, 100}; + Block1.ChunkCompressedLengths = {100, 100, 100, 100}; + + std::vector Blocks = {Block0, Block1}; + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(*LogOutput, Blocks, Options); + + // Map: H0→0, H1→1, SharedH→2, H3→3, H4→4, H5→5, H6→6 + auto HashMap = MakeHashMap(Blocks); + // Need H0(0), H1(1), SharedH(2) from block 0; SharedH from block 1 (already index 2) + // H3(3) not needed; H4,H5,H6 not needed + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t ChunkIndex) { return ChunkIndex <= 2; }); + + // Block 0 slack=100 (H3 unused), block 1 slack=300 (H4,H5,H6 unused) + // Block 0 processed first; picks up H0, H1, SharedH + // Block 1 tries SharedH but it's already picked up → empty → not added + REQUIRE_EQ(1u, NeededBlocks.size()); + CHECK_EQ(0u, NeededBlocks[0].BlockIndex); + REQUIRE_EQ(3u, NeededBlocks[0].ChunkIndexes.size()); + CHECK_EQ(0u, NeededBlocks[0].ChunkIndexes[0]); + CHECK_EQ(1u, NeededBlocks[0].ChunkIndexes[1]); + CHECK_EQ(2u, NeededBlocks[0].ChunkIndexes[2]); +} + +TEST_CASE("chunkblock.getneeded.dedup_no_double_pickup") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + std::unique_ptr LogOutput(CreateStandardLogOutput(LogRef)); + + // SharedH appears in both blocks; should appear in the result exactly once + IoHash SharedH = IoHash::HashBuffer(MemoryView("shared_chunk_nodup", 18)); + IoHash H0 = IoHash::HashBuffer(MemoryView("unique_chunk_b0", 15)); + IoHash H1 = IoHash::HashBuffer(MemoryView("unique_chunk_b1a", 16)); + IoHash H2 = IoHash::HashBuffer(MemoryView("unique_chunk_b1b", 16)); + IoHash H3 = IoHash::HashBuffer(MemoryView("unique_chunk_b1c", 16)); + + ChunkBlockDescription Block0; + Block0.HeaderSize = 50; + Block0.ChunkRawHashes = {SharedH, H0}; + Block0.ChunkRawLengths = {100, 100}; + Block0.ChunkCompressedLengths = {100, 100}; + + ChunkBlockDescription Block1; + Block1.HeaderSize = 50; + Block1.ChunkRawHashes = {H1, H2, H3, SharedH}; + Block1.ChunkRawLengths = {100, 100, 100, 100}; + Block1.ChunkCompressedLengths = {100, 100, 100, 100}; + + std::vector Blocks = {Block0, Block1}; + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(*LogOutput, Blocks, Options); + + // Map: SharedH→0, H0→1, H1→2, H2→3, H3→4 + // Only SharedH (index 0) needed; no other chunks + auto HashMap = MakeHashMap(Blocks); + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t ChunkIndex) { return ChunkIndex == 0; }); + + // Block 0: SharedH needed, H0 not needed → slack=100 + // Block 1: SharedH needed, H1/H2/H3 not needed → slack=300 + // Block 0 processed first → picks up SharedH; Block 1 skips it + + // Count total occurrences of SharedH across all NeededBlocks + uint32_t SharedOccurrences = 0; + for (const auto& NB : NeededBlocks) + { + for (uint32_t Idx : NB.ChunkIndexes) + { + // SharedH is at block-local index 0 in Block0 and index 3 in Block1 + (void)Idx; + SharedOccurrences++; + } + } + CHECK_EQ(1u, SharedOccurrences); + REQUIRE_EQ(1u, NeededBlocks.size()); + CHECK_EQ(0u, NeededBlocks[0].BlockIndex); +} + +TEST_CASE("chunkblock.getneeded.skips_unrequested_chunks") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + std::unique_ptr LogOutput(CreateStandardLogOutput(LogRef)); + + // Block has 4 chunks but only 2 appear in the hash map → ChunkIndexes has exactly those 2 + auto Block = MakeBlockDesc(50, {100, 100, 100, 100}); + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(*LogOutput, std::span(&Block, 1), Options); + + // Only put chunks at positions 0 and 2 in the map + tsl::robin_map HashMap; + HashMap.emplace(Block.ChunkRawHashes[0], 0u); + HashMap.emplace(Block.ChunkRawHashes[2], 1u); + + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t) { return true; }); + + REQUIRE_EQ(1u, NeededBlocks.size()); + CHECK_EQ(0u, NeededBlocks[0].BlockIndex); + REQUIRE_EQ(2u, NeededBlocks[0].ChunkIndexes.size()); + CHECK_EQ(0u, NeededBlocks[0].ChunkIndexes[0]); + CHECK_EQ(2u, NeededBlocks[0].ChunkIndexes[1]); +} + +TEST_CASE("chunkblock.getneeded.two_blocks_both_contribute") +{ + using namespace chunkblock_analyser_testutils; + + LoggerRef LogRef = Log(); + std::unique_ptr LogOutput(CreateStandardLogOutput(LogRef)); + + // Block 0: all 4 needed (slack=0); block 1: 3 of 4 needed (slack=100) + // Both blocks contribute chunks → 2 NeededBlocks in result + auto Block0 = MakeBlockDesc(50, {100, 100, 100, 100}, /*BlockSeed=*/0); + auto Block1 = MakeBlockDesc(50, {100, 100, 100, 100}, /*BlockSeed=*/200); + + std::vector Blocks = {Block0, Block1}; + ChunkBlockAnalyser::Options Options; + ChunkBlockAnalyser Analyser(*LogOutput, Blocks, Options); + + // HashMap: Block0 hashes → indices 0-3, Block1 hashes → indices 4-7 + auto HashMap = MakeHashMap(Blocks); + // Need all Block0 chunks (0-3) and Block1 chunks 0-2 (indices 4-6); not chunk index 7 (Block1 chunk 3) + auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t ChunkIndex) { return ChunkIndex <= 6; }); + + CHECK_EQ(2u, NeededBlocks.size()); + // Block 0 has slack=0 (all 4 needed), Block 1 has slack=100 (1 not needed) + // Block 0 comes first in result + CHECK_EQ(0u, NeededBlocks[0].BlockIndex); + CHECK_EQ(4u, NeededBlocks[0].ChunkIndexes.size()); + CHECK_EQ(1u, NeededBlocks[1].BlockIndex); + CHECK_EQ(3u, NeededBlocks[1].ChunkIndexes.size()); +} + +TEST_CASE("chunkblock.calc.off_mode") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + std::unique_ptr LogOutput(CreateStandardLogOutput(LogRef)); + + // HeaderSize > 0, chunks size matches → CanDoPartialBlockDownload = true + // But mode Off forces full block regardless + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + ChunkBlockAnalyser Analyser(*LogOutput, std::span(&Block, 1), Options); + + std::vector NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector Modes = {Mode::Off}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + REQUIRE_EQ(1u, Result.FullBlockIndexes.size()); + CHECK_EQ(0u, Result.FullBlockIndexes[0]); + CHECK(Result.BlockRanges.empty()); +} + +TEST_CASE("chunkblock.calc.exact_mode") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + std::unique_ptr LogOutput(CreateStandardLogOutput(LogRef)); + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + ChunkBlockAnalyser Analyser(*LogOutput, std::span(&Block, 1), Options); + + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + // Need chunks 0 and 2 → 2 non-contiguous ranges; Exact mode passes them straight through + std::vector NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector Modes = {Mode::Exact}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + CHECK(Result.FullBlockIndexes.empty()); + REQUIRE_EQ(2u, Result.BlockRanges.size()); + + CHECK_EQ(0u, Result.BlockRanges[0].BlockIndex); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart); + CHECK_EQ(100u, Result.BlockRanges[0].RangeLength); + CHECK_EQ(0u, Result.BlockRanges[0].ChunkBlockIndexStart); + CHECK_EQ(1u, Result.BlockRanges[0].ChunkBlockIndexCount); + + CHECK_EQ(0u, Result.BlockRanges[1].BlockIndex); + CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart); // 100+200 before chunk 2 + CHECK_EQ(300u, Result.BlockRanges[1].RangeLength); + CHECK_EQ(2u, Result.BlockRanges[1].ChunkBlockIndexStart); + CHECK_EQ(1u, Result.BlockRanges[1].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.calc.singlerange_mode") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + std::unique_ptr LogOutput(CreateStandardLogOutput(LogRef)); + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + // Default HostLatencySec=-1 → OptimizeRanges not called after SingleRange collapse + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + ChunkBlockAnalyser Analyser(*LogOutput, std::span(&Block, 1), Options); + + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + // Need chunks 0 and 2 → 2 ranges that get collapsed to 1 + std::vector NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector Modes = {Mode::SingleRange}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + CHECK(Result.FullBlockIndexes.empty()); + REQUIRE_EQ(1u, Result.BlockRanges.size()); + CHECK_EQ(0u, Result.BlockRanges[0].BlockIndex); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart); + // Spans from chunk 0 start to chunk 2 end: 100+200+300=600 + CHECK_EQ(600u, Result.BlockRanges[0].RangeLength); + CHECK_EQ(0u, Result.BlockRanges[0].ChunkBlockIndexStart); + // ChunkBlockIndexCount = (2+1) - 0 = 3 + CHECK_EQ(3u, Result.BlockRanges[0].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.calc.multirange_mode") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + std::unique_ptr LogOutput(CreateStandardLogOutput(LogRef)); + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + // Low latency: RequestTimeAsBytes=100 << slack → OptimizeRanges preserves ranges + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + Options.HostLatencySec = 0.001; + Options.HostSpeedBytesPerSec = 100000; + ChunkBlockAnalyser Analyser(*LogOutput, std::span(&Block, 1), Options); + + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector Modes = {Mode::MultiRange}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + CHECK(Result.FullBlockIndexes.empty()); + REQUIRE_EQ(2u, Result.BlockRanges.size()); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart); + CHECK_EQ(100u, Result.BlockRanges[0].RangeLength); + CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart); + CHECK_EQ(300u, Result.BlockRanges[1].RangeLength); +} + +TEST_CASE("chunkblock.calc.multirangehighspeed_mode") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + std::unique_ptr LogOutput(CreateStandardLogOutput(LogRef)); + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + // Block slack ≈ 714 bytes (TotalBlockSize≈1114, RangeTotalSize=400 for chunks 0+2) + // RequestTimeAsBytes = 400000 * 1 * 0.001 = 400 < 714 → ranges preserved + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + Options.HostHighSpeedLatencySec = 0.001; + Options.HostHighSpeedBytesPerSec = 400000; + ChunkBlockAnalyser Analyser(*LogOutput, std::span(&Block, 1), Options); + + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector Modes = {Mode::MultiRangeHighSpeed}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + CHECK(Result.FullBlockIndexes.empty()); + REQUIRE_EQ(2u, Result.BlockRanges.size()); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart); + CHECK_EQ(100u, Result.BlockRanges[0].RangeLength); + CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart); + CHECK_EQ(300u, Result.BlockRanges[1].RangeLength); +} + +TEST_CASE("chunkblock.calc.all_chunks_needed_full_block") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + std::unique_ptr LogOutput(CreateStandardLogOutput(LogRef)); + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + Options.HostLatencySec = 0.001; + Options.HostSpeedBytesPerSec = 100000; + ChunkBlockAnalyser Analyser(*LogOutput, std::span(&Block, 1), Options); + + // All 4 chunks needed → short-circuit to full block regardless of mode + std::vector NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 1, 2, 3}}}; + std::vector Modes = {Mode::Exact}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + REQUIRE_EQ(1u, Result.FullBlockIndexes.size()); + CHECK_EQ(0u, Result.FullBlockIndexes[0]); + CHECK(Result.BlockRanges.empty()); +} + +TEST_CASE("chunkblock.calc.headersize_zero_forces_full_block") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + std::unique_ptr LogOutput(CreateStandardLogOutput(LogRef)); + + // HeaderSize=0 → CanDoPartialBlockDownload=false → full block even in Exact mode + auto Block = MakeBlockDesc(0, {100, 200, 300, 400}); + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + ChunkBlockAnalyser Analyser(*LogOutput, std::span(&Block, 1), Options); + + std::vector NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector Modes = {Mode::Exact}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + REQUIRE_EQ(1u, Result.FullBlockIndexes.size()); + CHECK_EQ(0u, Result.FullBlockIndexes[0]); + CHECK(Result.BlockRanges.empty()); +} + +TEST_CASE("chunkblock.calc.low_maxrangecountperrequest") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + std::unique_ptr LogOutput(CreateStandardLogOutput(LogRef)); + + // 5 chunks of 100 bytes each; need chunks 0, 2, 4 → 3 non-contiguous ranges + // With MaxRangeCountPerRequest=1 and high latency, cost model merges aggressively → full block + auto Block = MakeBlockDesc(10, {100, 100, 100, 100, 100}); + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + Options.HostLatencySec = 0.1; + Options.HostSpeedBytesPerSec = 1000; + Options.HostMaxRangeCountPerRequest = 1; + ChunkBlockAnalyser Analyser(*LogOutput, std::span(&Block, 1), Options); + + std::vector NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2, 4}}}; + std::vector Modes = {Mode::MultiRange}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + // Cost model drives merging: 3 requests × 1000 × 0.1 = 300 > slack ≈ 210+headersize + // After merges converges to full block + REQUIRE_EQ(1u, Result.FullBlockIndexes.size()); + CHECK_EQ(0u, Result.FullBlockIndexes[0]); + CHECK(Result.BlockRanges.empty()); +} + +TEST_CASE("chunkblock.calc.no_latency_skips_optimize") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + std::unique_ptr LogOutput(CreateStandardLogOutput(LogRef)); + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + // Default HostLatencySec=-1 → OptimizeRanges not called; raw GetBlockRanges result used + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + ChunkBlockAnalyser Analyser(*LogOutput, std::span(&Block, 1), Options); + + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}}; + std::vector Modes = {Mode::MultiRange}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + // No optimize pass → exact ranges from GetBlockRanges + CHECK(Result.FullBlockIndexes.empty()); + REQUIRE_EQ(2u, Result.BlockRanges.size()); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart); + CHECK_EQ(100u, Result.BlockRanges[0].RangeLength); + CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart); + CHECK_EQ(300u, Result.BlockRanges[1].RangeLength); +} + +TEST_CASE("chunkblock.calc.multiple_blocks_different_modes") +{ + using namespace chunkblock_analyser_testutils; + using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode; + + LoggerRef LogRef = Log(); + std::unique_ptr LogOutput(CreateStandardLogOutput(LogRef)); + + // 3 blocks with different modes: Off, Exact, MultiRange + auto Block0 = MakeBlockDesc(50, {100, 200, 300, 400}, /*BlockSeed=*/0); + auto Block1 = MakeBlockDesc(50, {100, 200, 300, 400}, /*BlockSeed=*/10); + auto Block2 = MakeBlockDesc(50, {100, 200, 300, 400}, /*BlockSeed=*/20); + + ChunkBlockAnalyser::Options Options; + Options.IsQuiet = true; + Options.HostLatencySec = 0.001; + Options.HostSpeedBytesPerSec = 100000; + + std::vector Blocks = {Block0, Block1, Block2}; + ChunkBlockAnalyser Analyser(*LogOutput, Blocks, Options); + + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + 50; + + std::vector NeededBlocks = { + {.BlockIndex = 0, .ChunkIndexes = {0, 2}}, + {.BlockIndex = 1, .ChunkIndexes = {0, 2}}, + {.BlockIndex = 2, .ChunkIndexes = {0, 2}}, + }; + std::vector Modes = {Mode::Off, Mode::Exact, Mode::MultiRange}; + + auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes); + + // Block 0: Off → FullBlockIndexes + REQUIRE_EQ(1u, Result.FullBlockIndexes.size()); + CHECK_EQ(0u, Result.FullBlockIndexes[0]); + + // Block 1: Exact → 2 ranges; Block 2: MultiRange (low latency) → 2 ranges + // Total: 4 ranges + REQUIRE_EQ(4u, Result.BlockRanges.size()); + + // First 2 ranges belong to Block 1 (Exact) + CHECK_EQ(1u, Result.BlockRanges[0].BlockIndex); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart); + CHECK_EQ(100u, Result.BlockRanges[0].RangeLength); + CHECK_EQ(1u, Result.BlockRanges[1].BlockIndex); + CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart); + CHECK_EQ(300u, Result.BlockRanges[1].RangeLength); + + // Last 2 ranges belong to Block 2 (MultiRange preserved) + CHECK_EQ(2u, Result.BlockRanges[2].BlockIndex); + CHECK_EQ(ChunkStartOffset, Result.BlockRanges[2].RangeStart); + CHECK_EQ(100u, Result.BlockRanges[2].RangeLength); + CHECK_EQ(2u, Result.BlockRanges[3].BlockIndex); + CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[3].RangeStart); + CHECK_EQ(300u, Result.BlockRanges[3].RangeLength); +} + +TEST_CASE("chunkblock.getblockranges.first_chunk_only") +{ + using namespace chunkblock_analyser_testutils; + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector Needed = {0}; + auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed); + + REQUIRE_EQ(1u, Ranges.size()); + CHECK_EQ(ChunkStartOffset, Ranges[0].RangeStart); + CHECK_EQ(100u, Ranges[0].RangeLength); + CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[0].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.getblockranges.last_chunk_only") +{ + using namespace chunkblock_analyser_testutils; + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector Needed = {3}; + auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed); + + REQUIRE_EQ(1u, Ranges.size()); + CHECK_EQ(ChunkStartOffset + 600u, Ranges[0].RangeStart); // 100+200+300 before chunk 3 + CHECK_EQ(400u, Ranges[0].RangeLength); + CHECK_EQ(3u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[0].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.getblockranges.middle_chunk_only") +{ + using namespace chunkblock_analyser_testutils; + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector Needed = {1}; + auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed); + + REQUIRE_EQ(1u, Ranges.size()); + CHECK_EQ(ChunkStartOffset + 100u, Ranges[0].RangeStart); // 100 before chunk 1 + CHECK_EQ(200u, Ranges[0].RangeLength); + CHECK_EQ(1u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[0].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.getblockranges.all_chunks") +{ + using namespace chunkblock_analyser_testutils; + + auto Block = MakeBlockDesc(50, {100, 200, 300, 400}); + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector Needed = {0, 1, 2, 3}; + auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed); + + REQUIRE_EQ(1u, Ranges.size()); + CHECK_EQ(ChunkStartOffset, Ranges[0].RangeStart); + CHECK_EQ(1000u, Ranges[0].RangeLength); // 100+200+300+400 + CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(4u, Ranges[0].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.getblockranges.non_contiguous") +{ + using namespace chunkblock_analyser_testutils; + + // Chunks 0 and 2 needed, chunk 1 skipped → two separate ranges + auto Block = MakeBlockDesc(50, {100, 200, 300}); + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector Needed = {0, 2}; + auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed); + + REQUIRE_EQ(2u, Ranges.size()); + + CHECK_EQ(ChunkStartOffset, Ranges[0].RangeStart); + CHECK_EQ(100u, Ranges[0].RangeLength); + CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[0].ChunkBlockIndexCount); + + CHECK_EQ(ChunkStartOffset + 300u, Ranges[1].RangeStart); // 100+200 before chunk 2 + CHECK_EQ(300u, Ranges[1].RangeLength); + CHECK_EQ(2u, Ranges[1].ChunkBlockIndexStart); + CHECK_EQ(1u, Ranges[1].ChunkBlockIndexCount); +} + +TEST_CASE("chunkblock.getblockranges.contiguous_run") +{ + using namespace chunkblock_analyser_testutils; + + // Chunks 1, 2, 3 needed (consecutive) → one merged range + auto Block = MakeBlockDesc(50, {50, 100, 150, 200, 250}); + uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize; + + std::vector Needed = {1, 2, 3}; + auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed); + + REQUIRE_EQ(1u, Ranges.size()); + CHECK_EQ(ChunkStartOffset + 50u, Ranges[0].RangeStart); // 50 before chunk 1 + CHECK_EQ(450u, Ranges[0].RangeLength); // 100+150+200 + CHECK_EQ(1u, Ranges[0].ChunkBlockIndexStart); + CHECK_EQ(3u, Ranges[0].ChunkBlockIndexCount); +} + TEST_SUITE_END(); void -- cgit v1.2.3