aboutsummaryrefslogtreecommitdiff
path: root/src/zenremotestore/chunking/chunkblock.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/zenremotestore/chunking/chunkblock.cpp')
-rw-r--r--src/zenremotestore/chunking/chunkblock.cpp1378
1 files changed, 1366 insertions, 12 deletions
diff --git a/src/zenremotestore/chunking/chunkblock.cpp b/src/zenremotestore/chunking/chunkblock.cpp
index c4d8653f4..cca32c17d 100644
--- a/src/zenremotestore/chunking/chunkblock.cpp
+++ b/src/zenremotestore/chunking/chunkblock.cpp
@@ -7,27 +7,201 @@
#include <zencore/logging.h>
#include <zencore/timer.h>
#include <zencore/trace.h>
-
#include <zenremotestore/operationlogoutput.h>
-#include <vector>
+#include <numeric>
ZEN_THIRD_PARTY_INCLUDES_START
-#include <tsl/robin_map.h>
+#include <tsl/robin_set.h>
ZEN_THIRD_PARTY_INCLUDES_END
#if ZEN_WITH_TESTS
# include <zencore/testing.h>
# include <zencore/testutils.h>
-
-# include <unordered_map>
-# include <numeric>
#endif // ZEN_WITH_TESTS
namespace zen {
using namespace std::literals;
+namespace chunkblock_impl {
+
+ struct RangeDescriptor
+ {
+ uint64_t RangeStart = 0;
+ uint64_t RangeLength = 0;
+ uint32_t ChunkBlockIndexStart = 0;
+ uint32_t ChunkBlockIndexCount = 0;
+ };
+
+ void MergeCheapestRange(std::vector<RangeDescriptor>& InOutRanges)
+ {
+ ZEN_ASSERT(InOutRanges.size() > 1);
+
+ size_t BestRangeIndexToCollapse = SIZE_MAX;
+ uint64_t BestGap = (uint64_t)-1;
+
+ for (size_t RangeIndex = 0; RangeIndex < InOutRanges.size() - 1; RangeIndex++)
+ {
+ const RangeDescriptor& Range = InOutRanges[RangeIndex];
+ const RangeDescriptor& NextRange = InOutRanges[RangeIndex + 1];
+ uint64_t Gap = NextRange.RangeStart - (Range.RangeStart + Range.RangeLength);
+ if (Gap < BestGap)
+ {
+ BestRangeIndexToCollapse = RangeIndex;
+ BestGap = Gap;
+ }
+ else if (Gap == BestGap)
+ {
+ const RangeDescriptor& BestRange = InOutRanges[BestRangeIndexToCollapse];
+ const RangeDescriptor& BestNextRange = InOutRanges[BestRangeIndexToCollapse + 1];
+ uint64_t BestMergedSize = (BestNextRange.RangeStart + BestNextRange.RangeLength) - BestRange.RangeStart;
+ uint64_t MergedSize = (NextRange.RangeStart + NextRange.RangeLength) - Range.RangeStart;
+ if (MergedSize < BestMergedSize)
+ {
+ BestRangeIndexToCollapse = RangeIndex;
+ }
+ }
+ }
+
+ ZEN_ASSERT(BestRangeIndexToCollapse != SIZE_MAX);
+ ZEN_ASSERT(BestRangeIndexToCollapse < InOutRanges.size() - 1);
+ ZEN_ASSERT(BestGap != (uint64_t)-1);
+
+ RangeDescriptor& BestRange = InOutRanges[BestRangeIndexToCollapse];
+ const RangeDescriptor& BestNextRange = InOutRanges[BestRangeIndexToCollapse + 1];
+ BestRange.RangeLength = BestNextRange.RangeStart - BestRange.RangeStart + BestNextRange.RangeLength;
+ BestRange.ChunkBlockIndexCount =
+ BestNextRange.ChunkBlockIndexStart - BestRange.ChunkBlockIndexStart + BestNextRange.ChunkBlockIndexCount;
+ InOutRanges.erase(InOutRanges.begin() + BestRangeIndexToCollapse + 1);
+ }
+
+ std::vector<RangeDescriptor> GetBlockRanges(const ChunkBlockDescription& BlockDescription,
+ const uint64_t ChunkStartOffsetInBlock,
+ std::span<const uint32_t> BlockChunkIndexNeeded)
+ {
+ ZEN_TRACE_CPU("GetBlockRanges");
+ std::vector<RangeDescriptor> BlockRanges;
+ {
+ uint64_t CurrentOffset = ChunkStartOffsetInBlock;
+ uint32_t ChunkBlockIndex = 0;
+ uint32_t NeedBlockChunkIndexOffset = 0;
+ RangeDescriptor NextRange;
+ while (NeedBlockChunkIndexOffset < BlockChunkIndexNeeded.size() && ChunkBlockIndex < BlockDescription.ChunkRawHashes.size())
+ {
+ const uint32_t ChunkCompressedLength = BlockDescription.ChunkCompressedLengths[ChunkBlockIndex];
+ if (ChunkBlockIndex < BlockChunkIndexNeeded[NeedBlockChunkIndexOffset])
+ {
+ if (NextRange.RangeLength > 0)
+ {
+ BlockRanges.push_back(NextRange);
+ NextRange = {};
+ }
+ ChunkBlockIndex++;
+ CurrentOffset += ChunkCompressedLength;
+ }
+ else if (ChunkBlockIndex == BlockChunkIndexNeeded[NeedBlockChunkIndexOffset])
+ {
+ if (NextRange.RangeLength == 0)
+ {
+ NextRange.RangeStart = CurrentOffset;
+ NextRange.ChunkBlockIndexStart = ChunkBlockIndex;
+ }
+ NextRange.RangeLength += ChunkCompressedLength;
+ NextRange.ChunkBlockIndexCount++;
+ ChunkBlockIndex++;
+ CurrentOffset += ChunkCompressedLength;
+ NeedBlockChunkIndexOffset++;
+ }
+ else
+ {
+ ZEN_ASSERT(false);
+ }
+ }
+ if (NextRange.RangeLength > 0)
+ {
+ BlockRanges.push_back(NextRange);
+ }
+ }
+ ZEN_ASSERT(!BlockRanges.empty());
+ return BlockRanges;
+ }
+
+ std::vector<RangeDescriptor> OptimizeRanges(uint64_t TotalBlockSize,
+ std::span<const RangeDescriptor> ExactRanges,
+ double LatencySec,
+ uint64_t SpeedBytesPerSec,
+ uint64_t MaxRangeCountPerRequest,
+ uint64_t MaxRangesPerBlock)
+ {
+ ZEN_TRACE_CPU("OptimizeRanges");
+ ZEN_ASSERT(MaxRangesPerBlock > 0);
+ std::vector<RangeDescriptor> Ranges(ExactRanges.begin(), ExactRanges.end());
+
+ while (Ranges.size() > MaxRangesPerBlock)
+ {
+ MergeCheapestRange(Ranges);
+ }
+
+ while (true)
+ {
+ const std::uint64_t RangeTotalSize =
+ std::accumulate(Ranges.begin(), Ranges.end(), uint64_t(0u), [](uint64_t Current, const RangeDescriptor& Value) {
+ return Current + Value.RangeLength;
+ });
+
+ const size_t RangeCount = Ranges.size();
+ const uint64_t RequestCount =
+ MaxRangeCountPerRequest == (uint64_t)-1 ? 1 : (RangeCount + MaxRangeCountPerRequest - 1) / MaxRangeCountPerRequest;
+ uint64_t RequestTimeAsBytes = uint64_t(SpeedBytesPerSec * RequestCount * LatencySec);
+
+ if (RangeCount == 1)
+ {
+ // Does fetching the full block add less time than the time it takes to complete a single request?
+ if (TotalBlockSize - RangeTotalSize < SpeedBytesPerSec * LatencySec)
+ {
+ const std::uint64_t InitialRangeTotalSize =
+ std::accumulate(ExactRanges.begin(),
+ ExactRanges.end(),
+ uint64_t(0u),
+ [](uint64_t Current, const RangeDescriptor& Value) { return Current + Value.RangeLength; });
+
+ ZEN_DEBUG(
+ "Latency round trip takes as long as receiving the extra redundant bytes - go full block, dropping {} of slack, "
+ "adding {} of bytes to fetch, for block of size {}",
+ NiceBytes(TotalBlockSize - RangeTotalSize),
+ NiceBytes(TotalBlockSize - InitialRangeTotalSize),
+ NiceBytes(TotalBlockSize));
+ return {};
+ }
+ else
+ {
+ return Ranges;
+ }
+ }
+
+ if (RequestTimeAsBytes < (TotalBlockSize - RangeTotalSize))
+ {
+ return Ranges;
+ }
+
+ if (RangeCount == 2)
+ {
+ // Merge to single range
+ Ranges.front().RangeLength = Ranges.back().RangeStart - Ranges.front().RangeStart + Ranges.back().RangeLength;
+ Ranges.front().ChunkBlockIndexCount =
+ Ranges.back().ChunkBlockIndexStart - Ranges.front().ChunkBlockIndexStart + Ranges.back().ChunkBlockIndexCount;
+ Ranges.pop_back();
+ }
+ else
+ {
+ MergeCheapestRange(Ranges);
+ }
+ }
+ }
+
+} // namespace chunkblock_impl
+
ChunkBlockDescription
ParseChunkBlockDescription(const CbObjectView& BlockObject)
{
@@ -455,9 +629,299 @@ FindReuseBlocks(OperationLogOutput& Output,
return FilteredReuseBlockIndexes;
}
+ChunkBlockAnalyser::ChunkBlockAnalyser(OperationLogOutput& LogOutput,
+ std::span<const ChunkBlockDescription> BlockDescriptions,
+ const Options& Options)
+: m_LogOutput(LogOutput)
+, m_BlockDescriptions(BlockDescriptions)
+, m_Options(Options)
+{
+}
+
+std::vector<ChunkBlockAnalyser::NeededBlock>
+ChunkBlockAnalyser::GetNeeded(const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex,
+ std::function<bool(uint32_t ChunkIndex)>&& NeedsBlockChunk)
+{
+ ZEN_TRACE_CPU("ChunkBlockAnalyser::GetNeeded");
+
+ std::vector<NeededBlock> Result;
+
+ std::vector<bool> ChunkIsNeeded(ChunkHashToChunkIndex.size());
+ for (uint32_t ChunkIndex = 0; ChunkIndex < ChunkHashToChunkIndex.size(); ChunkIndex++)
+ {
+ ChunkIsNeeded[ChunkIndex] = NeedsBlockChunk(ChunkIndex);
+ }
+
+ std::vector<uint64_t> BlockSlack(m_BlockDescriptions.size(), 0u);
+ for (uint32_t BlockIndex = 0; BlockIndex < m_BlockDescriptions.size(); BlockIndex++)
+ {
+ const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex];
+
+ uint64_t BlockUsedSize = 0;
+ uint64_t BlockSize = 0;
+
+ for (uint32_t ChunkBlockIndex = 0; ChunkBlockIndex < BlockDescription.ChunkRawHashes.size(); ChunkBlockIndex++)
+ {
+ const IoHash& ChunkHash = BlockDescription.ChunkRawHashes[ChunkBlockIndex];
+ if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end())
+ {
+ const uint32_t RemoteChunkIndex = It->second;
+ if (ChunkIsNeeded[RemoteChunkIndex])
+ {
+ BlockUsedSize += BlockDescription.ChunkCompressedLengths[ChunkBlockIndex];
+ }
+ }
+ BlockSize += BlockDescription.ChunkCompressedLengths[ChunkBlockIndex];
+ }
+ BlockSlack[BlockIndex] = BlockSize - BlockUsedSize;
+ }
+
+ std::vector<uint32_t> BlockOrder(m_BlockDescriptions.size());
+ std::iota(BlockOrder.begin(), BlockOrder.end(), 0);
+
+ std::sort(BlockOrder.begin(), BlockOrder.end(), [&BlockSlack](uint32_t Lhs, uint32_t Rhs) {
+ return BlockSlack[Lhs] < BlockSlack[Rhs];
+ });
+
+ std::vector<bool> ChunkIsPickedUp(ChunkHashToChunkIndex.size(), false);
+
+ for (uint32_t BlockIndex : BlockOrder)
+ {
+ const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[BlockIndex];
+
+ std::vector<uint32_t> BlockChunkIndexNeeded;
+
+ for (uint32_t ChunkBlockIndex = 0; ChunkBlockIndex < BlockDescription.ChunkRawHashes.size(); ChunkBlockIndex++)
+ {
+ const IoHash& ChunkHash = BlockDescription.ChunkRawHashes[ChunkBlockIndex];
+ if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end())
+ {
+ const uint32_t RemoteChunkIndex = It->second;
+ if (ChunkIsNeeded[RemoteChunkIndex])
+ {
+ if (!ChunkIsPickedUp[RemoteChunkIndex])
+ {
+ ChunkIsPickedUp[RemoteChunkIndex] = true;
+ BlockChunkIndexNeeded.push_back(ChunkBlockIndex);
+ }
+ }
+ }
+ else
+ {
+ ZEN_DEBUG("Chunk {} not found in block {}", ChunkHash, BlockDescription.BlockHash);
+ }
+ }
+
+ if (!BlockChunkIndexNeeded.empty())
+ {
+ Result.push_back(NeededBlock{.BlockIndex = BlockIndex, .ChunkIndexes = std::move(BlockChunkIndexNeeded)});
+ }
+ }
+ return Result;
+}
+
+ChunkBlockAnalyser::BlockResult
+ChunkBlockAnalyser::CalculatePartialBlockDownloads(std::span<const NeededBlock> NeededBlocks,
+ std::span<const EPartialBlockDownloadMode> BlockPartialDownloadModes)
+{
+ ZEN_TRACE_CPU("ChunkBlockAnalyser::CalculatePartialBlockDownloads");
+
+ Stopwatch PartialAnalisysTimer;
+
+ ChunkBlockAnalyser::BlockResult Result;
+
+ {
+ uint64_t MinRequestCount = 0;
+ uint64_t RequestCount = 0;
+ uint64_t RangeCount = 0;
+ uint64_t IdealDownloadTotalSize = 0;
+ uint64_t ActualDownloadTotalSize = 0;
+ uint64_t FullDownloadTotalSize = 0;
+ for (const NeededBlock& NeededBlock : NeededBlocks)
+ {
+ const ChunkBlockDescription& BlockDescription = m_BlockDescriptions[NeededBlock.BlockIndex];
+ std::span<const uint32_t> BlockChunkIndexNeeded(NeededBlock.ChunkIndexes);
+ const uint32_t ChunkStartOffsetInBlock =
+ gsl::narrow<uint32_t>(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize);
+ uint64_t TotalBlockSize = std::accumulate(BlockDescription.ChunkCompressedLengths.begin(),
+ BlockDescription.ChunkCompressedLengths.end(),
+ uint64_t(ChunkStartOffsetInBlock));
+ uint64_t ExactRangesSize = 0;
+ uint64_t DownloadRangesSize = 0;
+ uint64_t FullDownloadSize = 0;
+
+ bool CanDoPartialBlockDownload = (BlockDescription.HeaderSize > 0) &&
+ (BlockDescription.ChunkCompressedLengths.size() == BlockDescription.ChunkRawHashes.size());
+
+ if (NeededBlock.ChunkIndexes.size() == BlockDescription.ChunkRawHashes.size() || !CanDoPartialBlockDownload)
+ {
+ // Full block
+ ExactRangesSize = TotalBlockSize;
+ DownloadRangesSize = TotalBlockSize;
+ FullDownloadSize = TotalBlockSize;
+ MinRequestCount++;
+ RequestCount++;
+ RangeCount++;
+ Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex);
+ }
+ else if (NeededBlock.ChunkIndexes.empty())
+ {
+ // Not needed
+ }
+ else
+ {
+ FullDownloadSize = TotalBlockSize;
+ std::vector<chunkblock_impl::RangeDescriptor> Ranges =
+ chunkblock_impl::GetBlockRanges(BlockDescription, ChunkStartOffsetInBlock, BlockChunkIndexNeeded);
+ ExactRangesSize = std::accumulate(
+ Ranges.begin(),
+ Ranges.end(),
+ uint64_t(0),
+ [](uint64_t Current, const chunkblock_impl::RangeDescriptor& Range) { return Current + Range.RangeLength; });
+
+ EPartialBlockDownloadMode PartialBlockDownloadMode = BlockPartialDownloadModes[NeededBlock.BlockIndex];
+ if (PartialBlockDownloadMode == EPartialBlockDownloadMode::Off)
+ {
+ // Use full block
+ MinRequestCount++;
+ RangeCount++;
+ RequestCount++;
+ Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex);
+ DownloadRangesSize = TotalBlockSize;
+ }
+ else
+ {
+ const bool IsHighSpeed = (PartialBlockDownloadMode == EPartialBlockDownloadMode::MultiRangeHighSpeed);
+ uint64_t MaxRangeCountPerRequest =
+ IsHighSpeed ? m_Options.HostHighSpeedMaxRangeCountPerRequest : m_Options.HostMaxRangeCountPerRequest;
+ ZEN_ASSERT(MaxRangeCountPerRequest != 0);
+
+ if (PartialBlockDownloadMode == EPartialBlockDownloadMode::Exact)
+ {
+ // Use exact ranges
+ for (const chunkblock_impl::RangeDescriptor& Range : Ranges)
+ {
+ Result.BlockRanges.push_back(BlockRangeDescriptor{.BlockIndex = NeededBlock.BlockIndex,
+ .RangeStart = Range.RangeStart,
+ .RangeLength = Range.RangeLength,
+ .ChunkBlockIndexStart = Range.ChunkBlockIndexStart,
+ .ChunkBlockIndexCount = Range.ChunkBlockIndexCount});
+ }
+
+ MinRequestCount++;
+ RangeCount += Ranges.size();
+ RequestCount += MaxRangeCountPerRequest == (uint64_t)-1
+ ? 1
+ : (Ranges.size() + MaxRangeCountPerRequest - 1) / MaxRangeCountPerRequest;
+ DownloadRangesSize = ExactRangesSize;
+ }
+ else
+ {
+ if (PartialBlockDownloadMode == EPartialBlockDownloadMode::SingleRange)
+ {
+ // Use single range
+ if (Ranges.size() > 1)
+ {
+ Ranges = {chunkblock_impl::RangeDescriptor{
+ .RangeStart = Ranges.front().RangeStart,
+ .RangeLength = Ranges.back().RangeStart + Ranges.back().RangeLength - Ranges.front().RangeStart,
+ .ChunkBlockIndexStart = Ranges.front().ChunkBlockIndexStart,
+ .ChunkBlockIndexCount = Ranges.back().ChunkBlockIndexStart + Ranges.back().ChunkBlockIndexCount -
+ Ranges.front().ChunkBlockIndexStart}};
+ }
+
+ // We still do the optimize pass to see if it is more effective to use a full block
+ }
+
+ double LatencySec = IsHighSpeed ? m_Options.HostHighSpeedLatencySec : m_Options.HostLatencySec;
+ uint64_t SpeedBytesPerSec = IsHighSpeed ? m_Options.HostHighSpeedBytesPerSec : m_Options.HostSpeedBytesPerSec;
+ if (LatencySec > 0.0 && SpeedBytesPerSec > 0u)
+ {
+ Ranges = chunkblock_impl::OptimizeRanges(TotalBlockSize,
+ Ranges,
+ LatencySec,
+ SpeedBytesPerSec,
+ MaxRangeCountPerRequest,
+ m_Options.MaxRangesPerBlock);
+ }
+
+ MinRequestCount++;
+ if (Ranges.empty())
+ {
+ Result.FullBlockIndexes.push_back(NeededBlock.BlockIndex);
+ RequestCount++;
+ RangeCount++;
+ DownloadRangesSize = TotalBlockSize;
+ }
+ else
+ {
+ for (const chunkblock_impl::RangeDescriptor& Range : Ranges)
+ {
+ Result.BlockRanges.push_back(BlockRangeDescriptor{.BlockIndex = NeededBlock.BlockIndex,
+ .RangeStart = Range.RangeStart,
+ .RangeLength = Range.RangeLength,
+ .ChunkBlockIndexStart = Range.ChunkBlockIndexStart,
+ .ChunkBlockIndexCount = Range.ChunkBlockIndexCount});
+ }
+ RangeCount += Ranges.size();
+ RequestCount += MaxRangeCountPerRequest == (uint64_t)-1
+ ? 1
+ : (Ranges.size() + MaxRangeCountPerRequest - 1) / MaxRangeCountPerRequest;
+ }
+
+ DownloadRangesSize = Ranges.empty()
+ ? TotalBlockSize
+ : std::accumulate(Ranges.begin(),
+ Ranges.end(),
+ uint64_t(0),
+ [](uint64_t Current, const chunkblock_impl::RangeDescriptor& Range) {
+ return Current + Range.RangeLength;
+ });
+ }
+ }
+ }
+ IdealDownloadTotalSize += ExactRangesSize;
+ ActualDownloadTotalSize += DownloadRangesSize;
+ FullDownloadTotalSize += FullDownloadSize;
+
+ if (ExactRangesSize < FullDownloadSize)
+ {
+ ZEN_DEBUG("Block {}: Full: {}, Ideal: {}, Actual: {}, Saves: {}",
+ NeededBlock.BlockIndex,
+ NiceBytes(FullDownloadSize),
+ NiceBytes(ExactRangesSize),
+ NiceBytes(DownloadRangesSize),
+ NiceBytes(FullDownloadSize - DownloadRangesSize));
+ }
+ }
+ uint64_t Actual = FullDownloadTotalSize - ActualDownloadTotalSize;
+ uint64_t Ideal = FullDownloadTotalSize - IdealDownloadTotalSize;
+ if (Ideal < FullDownloadTotalSize && !m_Options.IsQuiet)
+ {
+ const double AchievedPercent = Ideal == 0 ? 100.0 : (100.0 * Actual) / Ideal;
+ ZEN_OPERATION_LOG_INFO(m_LogOutput,
+ "Block Partial Analysis: Blocks: {}, Full: {}, Ideal: {}, Actual: {}. Skipping {} ({:.1f}%) out of "
+ "possible {} using {} extra ranges "
+ "via {} extra requests. Completed in {}",
+ NeededBlocks.size(),
+ NiceBytes(FullDownloadTotalSize),
+ NiceBytes(IdealDownloadTotalSize),
+ NiceBytes(ActualDownloadTotalSize),
+ NiceBytes(FullDownloadTotalSize - ActualDownloadTotalSize),
+ AchievedPercent,
+ NiceBytes(Ideal),
+ RangeCount - MinRequestCount,
+ RequestCount - MinRequestCount,
+ NiceTimeSpanMs(PartialAnalisysTimer.GetElapsedTimeMs()));
+ }
+ }
+
+ return Result;
+}
+
#if ZEN_WITH_TESTS
-namespace testutils {
+namespace chunkblock_testutils {
static std::vector<std::pair<Oid, CompressedBuffer>> CreateAttachments(
const std::span<const size_t>& Sizes,
OodleCompressionLevel CompressionLevel = OodleCompressionLevel::VeryFast,
@@ -474,12 +938,14 @@ namespace testutils {
return Result;
}
-} // namespace testutils
+} // namespace chunkblock_testutils
+
+TEST_SUITE_BEGIN("remotestore.chunkblock");
-TEST_CASE("project.store.block")
+TEST_CASE("chunkblock.block")
{
using namespace std::literals;
- using namespace testutils;
+ using namespace chunkblock_testutils;
std::vector<std::size_t> AttachmentSizes({7633, 6825, 5738, 8031, 7225, 566, 3656, 6006, 24, 3466, 1093, 4269, 2257, 3685, 3489,
7194, 6151, 5482, 6217, 3511, 6738, 5061, 7537, 2759, 1916, 8210, 2235, 4024, 1582, 5251,
@@ -504,10 +970,10 @@ TEST_CASE("project.store.block")
HeaderSize));
}
-TEST_CASE("project.store.reuseblocks")
+TEST_CASE("chunkblock.reuseblocks")
{
using namespace std::literals;
- using namespace testutils;
+ using namespace chunkblock_testutils;
std::vector<std::vector<std::size_t>> BlockAttachmentSizes(
{std::vector<std::size_t>{7633, 6825, 5738, 8031, 7225, 566, 3656, 6006, 24, 3466, 1093, 4269, 2257, 3685, 3489,
@@ -744,6 +1210,894 @@ TEST_CASE("project.store.reuseblocks")
}
}
+namespace chunkblock_analyser_testutils {
+
+ // Build a ChunkBlockDescription without any real payload.
+ // Hashes are derived deterministically from (BlockSeed XOR ChunkIndex) so that the same
+ // seed produces the same hashes — useful for deduplication tests.
+ static ChunkBlockDescription MakeBlockDesc(uint64_t HeaderSize,
+ std::initializer_list<uint32_t> CompressedLengths,
+ uint32_t BlockSeed = 0)
+ {
+ ChunkBlockDescription Desc;
+ Desc.HeaderSize = HeaderSize;
+ uint32_t ChunkIndex = 0;
+ for (uint32_t Length : CompressedLengths)
+ {
+ uint64_t HashInput = uint64_t(BlockSeed ^ ChunkIndex);
+ Desc.ChunkRawHashes.push_back(IoHash::HashBuffer(MemoryView(&HashInput, sizeof(HashInput))));
+ Desc.ChunkRawLengths.push_back(Length);
+ Desc.ChunkCompressedLengths.push_back(Length);
+ ChunkIndex++;
+ }
+ return Desc;
+ }
+
+ // Build the robin_map<IoHash, uint32_t> needed by GetNeeded from a flat list of blocks.
+ // First occurrence of each hash wins; index is assigned sequentially across all blocks.
+ [[maybe_unused]] static tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> MakeHashMap(const std::vector<ChunkBlockDescription>& Blocks)
+ {
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> Result;
+ uint32_t Index = 0;
+ for (const ChunkBlockDescription& Block : Blocks)
+ {
+ for (const IoHash& Hash : Block.ChunkRawHashes)
+ {
+ if (!Result.contains(Hash))
+ {
+ Result.emplace(Hash, Index++);
+ }
+ }
+ }
+ return Result;
+ }
+
+} // namespace chunkblock_analyser_testutils
+
+TEST_CASE("chunkblock.mergecheapestrange.picks_smallest_gap")
+{
+ using RD = chunkblock_impl::RangeDescriptor;
+ // Gap between ranges 0-1 is 50, gap between 1-2 is 150 → pair 0-1 gets merged
+ std::vector<RD> Ranges = {
+ {.RangeStart = 0, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 150, .RangeLength = 100, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 400, .RangeLength = 100, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1},
+ };
+ chunkblock_impl::MergeCheapestRange(Ranges);
+
+ REQUIRE_EQ(2u, Ranges.size());
+ CHECK_EQ(0u, Ranges[0].RangeStart);
+ CHECK_EQ(250u, Ranges[0].RangeLength); // 150+100
+ CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart);
+ CHECK_EQ(2u, Ranges[0].ChunkBlockIndexCount);
+ CHECK_EQ(400u, Ranges[1].RangeStart);
+ CHECK_EQ(100u, Ranges[1].RangeLength);
+ CHECK_EQ(2u, Ranges[1].ChunkBlockIndexStart);
+ CHECK_EQ(1u, Ranges[1].ChunkBlockIndexCount);
+}
+
+TEST_CASE("chunkblock.mergecheapestrange.tiebreak_smaller_merged")
+{
+ using RD = chunkblock_impl::RangeDescriptor;
+ // Gap 0-1 == gap 1-2 == 100; merged size 0-1 (250) < merged size 1-2 (350) → pair 0-1 wins
+ std::vector<RD> Ranges = {
+ {.RangeStart = 0, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 200, .RangeLength = 50, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 350, .RangeLength = 200, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1},
+ };
+ chunkblock_impl::MergeCheapestRange(Ranges);
+
+ REQUIRE_EQ(2u, Ranges.size());
+ // Pair 0-1 merged: start=0, length = (200+50)-0 = 250
+ CHECK_EQ(0u, Ranges[0].RangeStart);
+ CHECK_EQ(250u, Ranges[0].RangeLength);
+ CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart);
+ CHECK_EQ(2u, Ranges[0].ChunkBlockIndexCount);
+ // Pair 1 unchanged (was index 2)
+ CHECK_EQ(350u, Ranges[1].RangeStart);
+ CHECK_EQ(200u, Ranges[1].RangeLength);
+ CHECK_EQ(2u, Ranges[1].ChunkBlockIndexStart);
+ CHECK_EQ(1u, Ranges[1].ChunkBlockIndexCount);
+}
+
+TEST_CASE("chunkblock.optimizeranges.preserves_ranges_low_latency")
+{
+ using RD = chunkblock_impl::RangeDescriptor;
+ // With MaxRangeCountPerRequest unlimited, RequestCount=1
+ // RequestTimeAsBytes = 100000 * 1 * 0.001 = 100 << slack=7000 → all ranges preserved
+ std::vector<RD> ExactRanges = {
+ {.RangeStart = 0, .RangeLength = 1000, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 2000, .RangeLength = 1000, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 4000, .RangeLength = 1000, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1},
+ };
+ uint64_t TotalBlockSize = 10000;
+ double LatencySec = 0.001;
+ uint64_t SpeedBytesPerSec = 100000;
+ uint64_t MaxRangeCountPerReq = (uint64_t)-1;
+ uint64_t MaxRangesPerBlock = 1024;
+
+ auto Result =
+ chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock);
+
+ REQUIRE_EQ(3u, Result.size());
+}
+
+TEST_CASE("chunkblock.optimizeranges.falls_back_to_full_block")
+{
+ using RD = chunkblock_impl::RangeDescriptor;
+ // 1 range already; slack=100 < SpeedBytesPerSec*LatencySec=200 → full block (empty result)
+ std::vector<RD> ExactRanges = {
+ {.RangeStart = 100, .RangeLength = 900, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 3},
+ };
+ uint64_t TotalBlockSize = 1000;
+ double LatencySec = 0.01;
+ uint64_t SpeedBytesPerSec = 20000;
+ uint64_t MaxRangeCountPerReq = (uint64_t)-1;
+ uint64_t MaxRangesPerBlock = 1024;
+
+ auto Result =
+ chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock);
+
+ CHECK(Result.empty());
+}
+
+TEST_CASE("chunkblock.optimizeranges.maxrangesperblock_clamp")
+{
+ using RD = chunkblock_impl::RangeDescriptor;
+ // 5 input ranges; MaxRangesPerBlock=2 clamps to ≤2 before the cost model runs
+ std::vector<RD> ExactRanges = {
+ {.RangeStart = 0, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 300, .RangeLength = 100, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 600, .RangeLength = 100, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 900, .RangeLength = 100, .ChunkBlockIndexStart = 3, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 1200, .RangeLength = 100, .ChunkBlockIndexStart = 4, .ChunkBlockIndexCount = 1},
+ };
+ uint64_t TotalBlockSize = 5000;
+ double LatencySec = 0.001;
+ uint64_t SpeedBytesPerSec = 100000;
+ uint64_t MaxRangeCountPerReq = (uint64_t)-1;
+ uint64_t MaxRangesPerBlock = 2;
+
+ auto Result =
+ chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock);
+
+ CHECK(Result.size() <= 2u);
+ CHECK(!Result.empty());
+}
+
+TEST_CASE("chunkblock.optimizeranges.low_maxrangecountperrequest_drives_merge")
+{
+ using RD = chunkblock_impl::RangeDescriptor;
+ // MaxRangeCountPerRequest=1 means RequestCount==RangeCount; high latency drives merging
+ // With MaxRangeCountPerRequest=-1 the same 3 ranges would be preserved (verified by comment below)
+ std::vector<RD> ExactRanges = {
+ {.RangeStart = 100, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 250, .RangeLength = 100, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 400, .RangeLength = 100, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1},
+ };
+ uint64_t TotalBlockSize = 1000;
+ double LatencySec = 1.0;
+ uint64_t SpeedBytesPerSec = 500;
+ // With MaxRangeCountPerRequest=-1: RequestCount=1, RequestTimeAsBytes=500 < slack=700 → preserved
+ // With MaxRangeCountPerRequest=1: RequestCount=3, RequestTimeAsBytes=1500 > slack=700 → merged
+ uint64_t MaxRangesPerBlock = 1024;
+
+ auto Unlimited =
+ chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, (uint64_t)-1, MaxRangesPerBlock);
+ CHECK_EQ(3u, Unlimited.size());
+
+ auto Limited =
+ chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, uint64_t(1), MaxRangesPerBlock);
+ CHECK(Limited.size() < 3u);
+}
+
+TEST_CASE("chunkblock.optimizeranges.unlimited_rangecountperrequest_no_extra_cost")
+{
+ using RD = chunkblock_impl::RangeDescriptor;
+ // MaxRangeCountPerRequest=-1 → RequestCount always 1, even with many ranges and high latency
+ std::vector<RD> ExactRanges = {
+ {.RangeStart = 0, .RangeLength = 50, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 200, .RangeLength = 50, .ChunkBlockIndexStart = 1, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 400, .RangeLength = 50, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 600, .RangeLength = 50, .ChunkBlockIndexStart = 3, .ChunkBlockIndexCount = 1},
+ {.RangeStart = 800, .RangeLength = 50, .ChunkBlockIndexStart = 4, .ChunkBlockIndexCount = 1},
+ };
+ uint64_t TotalBlockSize = 5000;
+ double LatencySec = 0.1;
+ uint64_t SpeedBytesPerSec = 10000; // RequestTimeAsBytes=1000 << slack=4750
+ uint64_t MaxRangeCountPerReq = (uint64_t)-1;
+ uint64_t MaxRangesPerBlock = 1024;
+
+ auto Result =
+ chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock);
+
+ CHECK_EQ(5u, Result.size());
+}
+
+TEST_CASE("chunkblock.optimizeranges.two_range_direct_merge_path")
+{
+ using RD = chunkblock_impl::RangeDescriptor;
+ // Exactly 2 ranges; cost model demands merge; exercises the RangeCount==2 direct-merge branch
+ // After direct merge → 1 range with small slack → full block (empty)
+ std::vector<RD> ExactRanges = {
+ {.RangeStart = 0, .RangeLength = 100, .ChunkBlockIndexStart = 0, .ChunkBlockIndexCount = 2},
+ {.RangeStart = 400, .RangeLength = 100, .ChunkBlockIndexStart = 2, .ChunkBlockIndexCount = 2},
+ };
+ uint64_t TotalBlockSize = 600;
+ double LatencySec = 0.1;
+ uint64_t SpeedBytesPerSec = 5000; // RequestTimeAsBytes=500 > slack=400 on first iter
+ uint64_t MaxRangeCountPerReq = (uint64_t)-1;
+ uint64_t MaxRangesPerBlock = 1024;
+
+ // Iteration 1: RangeCount=2, RequestCount=1, RequestTimeAsBytes=500 > slack=400 → direct merge
+ // After merge: 1 range [{0,500,0,4}], slack=100 < Speed*Lat=500 → full block
+ auto Result =
+ chunkblock_impl::OptimizeRanges(TotalBlockSize, ExactRanges, LatencySec, SpeedBytesPerSec, MaxRangeCountPerReq, MaxRangesPerBlock);
+
+ CHECK(Result.empty());
+}
+
+TEST_CASE("chunkblock.getneeded.all_chunks")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ LoggerRef LogRef = Log();
+ std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef));
+
+ auto Block = MakeBlockDesc(50, {100, 100, 100, 100});
+ ChunkBlockAnalyser::Options Options;
+ ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ auto HashMap = MakeHashMap({Block});
+ auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t) { return true; });
+
+ REQUIRE_EQ(1u, NeededBlocks.size());
+ CHECK_EQ(0u, NeededBlocks[0].BlockIndex);
+ REQUIRE_EQ(4u, NeededBlocks[0].ChunkIndexes.size());
+ CHECK_EQ(0u, NeededBlocks[0].ChunkIndexes[0]);
+ CHECK_EQ(1u, NeededBlocks[0].ChunkIndexes[1]);
+ CHECK_EQ(2u, NeededBlocks[0].ChunkIndexes[2]);
+ CHECK_EQ(3u, NeededBlocks[0].ChunkIndexes[3]);
+}
+
+TEST_CASE("chunkblock.getneeded.no_chunks")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ LoggerRef LogRef = Log();
+ std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef));
+
+ auto Block = MakeBlockDesc(50, {100, 100, 100, 100});
+ ChunkBlockAnalyser::Options Options;
+ ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ auto HashMap = MakeHashMap({Block});
+ auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t) { return false; });
+
+ CHECK(NeededBlocks.empty());
+}
+
+TEST_CASE("chunkblock.getneeded.subset_within_block")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ LoggerRef LogRef = Log();
+ std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef));
+
+ auto Block = MakeBlockDesc(50, {100, 100, 100, 100});
+ ChunkBlockAnalyser::Options Options;
+ ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ auto HashMap = MakeHashMap({Block});
+ // Indices 0 and 2 are needed; 1 and 3 are not
+ auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t ChunkIndex) { return ChunkIndex == 0 || ChunkIndex == 2; });
+
+ REQUIRE_EQ(1u, NeededBlocks.size());
+ CHECK_EQ(0u, NeededBlocks[0].BlockIndex);
+ REQUIRE_EQ(2u, NeededBlocks[0].ChunkIndexes.size());
+ CHECK_EQ(0u, NeededBlocks[0].ChunkIndexes[0]);
+ CHECK_EQ(2u, NeededBlocks[0].ChunkIndexes[1]);
+}
+
+TEST_CASE("chunkblock.getneeded.dedup_low_slack_wins")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ LoggerRef LogRef = Log();
+ std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef));
+
+ // Block 0: {H0, H1, SharedH, H3} — 3 of 4 needed (H3 not needed); slack = 100
+ // Block 1: {H4, H5, SharedH, H6} — only SharedH needed; slack = 300
+ // Block 0 has less slack → processed first → SharedH assigned to block 0
+ IoHash SharedH = IoHash::HashBuffer(MemoryView("shared_chunk_dedup", 18));
+ IoHash H0 = IoHash::HashBuffer(MemoryView("block0_chunk0", 13));
+ IoHash H1 = IoHash::HashBuffer(MemoryView("block0_chunk1", 13));
+ IoHash H3 = IoHash::HashBuffer(MemoryView("block0_chunk3", 13));
+ IoHash H4 = IoHash::HashBuffer(MemoryView("block1_chunk0", 13));
+ IoHash H5 = IoHash::HashBuffer(MemoryView("block1_chunk1", 13));
+ IoHash H6 = IoHash::HashBuffer(MemoryView("block1_chunk3", 13));
+
+ ChunkBlockDescription Block0;
+ Block0.HeaderSize = 50;
+ Block0.ChunkRawHashes = {H0, H1, SharedH, H3};
+ Block0.ChunkRawLengths = {100, 100, 100, 100};
+ Block0.ChunkCompressedLengths = {100, 100, 100, 100};
+
+ ChunkBlockDescription Block1;
+ Block1.HeaderSize = 50;
+ Block1.ChunkRawHashes = {H4, H5, SharedH, H6};
+ Block1.ChunkRawLengths = {100, 100, 100, 100};
+ Block1.ChunkCompressedLengths = {100, 100, 100, 100};
+
+ std::vector<ChunkBlockDescription> Blocks = {Block0, Block1};
+ ChunkBlockAnalyser::Options Options;
+ ChunkBlockAnalyser Analyser(*LogOutput, Blocks, Options);
+
+ // Map: H0→0, H1→1, SharedH→2, H3→3, H4→4, H5→5, H6→6
+ auto HashMap = MakeHashMap(Blocks);
+ // Need H0(0), H1(1), SharedH(2) from block 0; SharedH from block 1 (already index 2)
+ // H3(3) not needed; H4,H5,H6 not needed
+ auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t ChunkIndex) { return ChunkIndex <= 2; });
+
+ // Block 0 slack=100 (H3 unused), block 1 slack=300 (H4,H5,H6 unused)
+ // Block 0 processed first; picks up H0, H1, SharedH
+ // Block 1 tries SharedH but it's already picked up → empty → not added
+ REQUIRE_EQ(1u, NeededBlocks.size());
+ CHECK_EQ(0u, NeededBlocks[0].BlockIndex);
+ REQUIRE_EQ(3u, NeededBlocks[0].ChunkIndexes.size());
+ CHECK_EQ(0u, NeededBlocks[0].ChunkIndexes[0]);
+ CHECK_EQ(1u, NeededBlocks[0].ChunkIndexes[1]);
+ CHECK_EQ(2u, NeededBlocks[0].ChunkIndexes[2]);
+}
+
+TEST_CASE("chunkblock.getneeded.dedup_no_double_pickup")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ LoggerRef LogRef = Log();
+ std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef));
+
+ // SharedH appears in both blocks; should appear in the result exactly once
+ IoHash SharedH = IoHash::HashBuffer(MemoryView("shared_chunk_nodup", 18));
+ IoHash H0 = IoHash::HashBuffer(MemoryView("unique_chunk_b0", 15));
+ IoHash H1 = IoHash::HashBuffer(MemoryView("unique_chunk_b1a", 16));
+ IoHash H2 = IoHash::HashBuffer(MemoryView("unique_chunk_b1b", 16));
+ IoHash H3 = IoHash::HashBuffer(MemoryView("unique_chunk_b1c", 16));
+
+ ChunkBlockDescription Block0;
+ Block0.HeaderSize = 50;
+ Block0.ChunkRawHashes = {SharedH, H0};
+ Block0.ChunkRawLengths = {100, 100};
+ Block0.ChunkCompressedLengths = {100, 100};
+
+ ChunkBlockDescription Block1;
+ Block1.HeaderSize = 50;
+ Block1.ChunkRawHashes = {H1, H2, H3, SharedH};
+ Block1.ChunkRawLengths = {100, 100, 100, 100};
+ Block1.ChunkCompressedLengths = {100, 100, 100, 100};
+
+ std::vector<ChunkBlockDescription> Blocks = {Block0, Block1};
+ ChunkBlockAnalyser::Options Options;
+ ChunkBlockAnalyser Analyser(*LogOutput, Blocks, Options);
+
+ // Map: SharedH→0, H0→1, H1→2, H2→3, H3→4
+ // Only SharedH (index 0) needed; no other chunks
+ auto HashMap = MakeHashMap(Blocks);
+ auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t ChunkIndex) { return ChunkIndex == 0; });
+
+ // Block 0: SharedH needed, H0 not needed → slack=100
+ // Block 1: SharedH needed, H1/H2/H3 not needed → slack=300
+ // Block 0 processed first → picks up SharedH; Block 1 skips it
+
+ // Count total occurrences of SharedH across all NeededBlocks
+ uint32_t SharedOccurrences = 0;
+ for (const auto& NB : NeededBlocks)
+ {
+ for (uint32_t Idx : NB.ChunkIndexes)
+ {
+ // SharedH is at block-local index 0 in Block0 and index 3 in Block1
+ (void)Idx;
+ SharedOccurrences++;
+ }
+ }
+ CHECK_EQ(1u, SharedOccurrences);
+ REQUIRE_EQ(1u, NeededBlocks.size());
+ CHECK_EQ(0u, NeededBlocks[0].BlockIndex);
+}
+
+TEST_CASE("chunkblock.getneeded.skips_unrequested_chunks")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ LoggerRef LogRef = Log();
+ std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef));
+
+ // Block has 4 chunks but only 2 appear in the hash map → ChunkIndexes has exactly those 2
+ auto Block = MakeBlockDesc(50, {100, 100, 100, 100});
+ ChunkBlockAnalyser::Options Options;
+ ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ // Only put chunks at positions 0 and 2 in the map
+ tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> HashMap;
+ HashMap.emplace(Block.ChunkRawHashes[0], 0u);
+ HashMap.emplace(Block.ChunkRawHashes[2], 1u);
+
+ auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t) { return true; });
+
+ REQUIRE_EQ(1u, NeededBlocks.size());
+ CHECK_EQ(0u, NeededBlocks[0].BlockIndex);
+ REQUIRE_EQ(2u, NeededBlocks[0].ChunkIndexes.size());
+ CHECK_EQ(0u, NeededBlocks[0].ChunkIndexes[0]);
+ CHECK_EQ(2u, NeededBlocks[0].ChunkIndexes[1]);
+}
+
+TEST_CASE("chunkblock.getneeded.two_blocks_both_contribute")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ LoggerRef LogRef = Log();
+ std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef));
+
+ // Block 0: all 4 needed (slack=0); block 1: 3 of 4 needed (slack=100)
+ // Both blocks contribute chunks → 2 NeededBlocks in result
+ auto Block0 = MakeBlockDesc(50, {100, 100, 100, 100}, /*BlockSeed=*/0);
+ auto Block1 = MakeBlockDesc(50, {100, 100, 100, 100}, /*BlockSeed=*/200);
+
+ std::vector<ChunkBlockDescription> Blocks = {Block0, Block1};
+ ChunkBlockAnalyser::Options Options;
+ ChunkBlockAnalyser Analyser(*LogOutput, Blocks, Options);
+
+ // HashMap: Block0 hashes → indices 0-3, Block1 hashes → indices 4-7
+ auto HashMap = MakeHashMap(Blocks);
+ // Need all Block0 chunks (0-3) and Block1 chunks 0-2 (indices 4-6); not chunk index 7 (Block1 chunk 3)
+ auto NeededBlocks = Analyser.GetNeeded(HashMap, [](uint32_t ChunkIndex) { return ChunkIndex <= 6; });
+
+ CHECK_EQ(2u, NeededBlocks.size());
+ // Block 0 has slack=0 (all 4 needed), Block 1 has slack=100 (1 not needed)
+ // Block 0 comes first in result
+ CHECK_EQ(0u, NeededBlocks[0].BlockIndex);
+ CHECK_EQ(4u, NeededBlocks[0].ChunkIndexes.size());
+ CHECK_EQ(1u, NeededBlocks[1].BlockIndex);
+ CHECK_EQ(3u, NeededBlocks[1].ChunkIndexes.size());
+}
+
+TEST_CASE("chunkblock.calc.off_mode")
+{
+ using namespace chunkblock_analyser_testutils;
+ using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode;
+
+ LoggerRef LogRef = Log();
+ std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef));
+
+ // HeaderSize > 0, chunks size matches → CanDoPartialBlockDownload = true
+ // But mode Off forces full block regardless
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ ChunkBlockAnalyser::Options Options;
+ Options.IsQuiet = true;
+ ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}};
+ std::vector<Mode> Modes = {Mode::Off};
+
+ auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes);
+
+ REQUIRE_EQ(1u, Result.FullBlockIndexes.size());
+ CHECK_EQ(0u, Result.FullBlockIndexes[0]);
+ CHECK(Result.BlockRanges.empty());
+}
+
+TEST_CASE("chunkblock.calc.exact_mode")
+{
+ using namespace chunkblock_analyser_testutils;
+ using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode;
+
+ LoggerRef LogRef = Log();
+ std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef));
+
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ ChunkBlockAnalyser::Options Options;
+ Options.IsQuiet = true;
+ ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ // Need chunks 0 and 2 → 2 non-contiguous ranges; Exact mode passes them straight through
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}};
+ std::vector<Mode> Modes = {Mode::Exact};
+
+ auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes);
+
+ CHECK(Result.FullBlockIndexes.empty());
+ REQUIRE_EQ(2u, Result.BlockRanges.size());
+
+ CHECK_EQ(0u, Result.BlockRanges[0].BlockIndex);
+ CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart);
+ CHECK_EQ(100u, Result.BlockRanges[0].RangeLength);
+ CHECK_EQ(0u, Result.BlockRanges[0].ChunkBlockIndexStart);
+ CHECK_EQ(1u, Result.BlockRanges[0].ChunkBlockIndexCount);
+
+ CHECK_EQ(0u, Result.BlockRanges[1].BlockIndex);
+ CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart); // 100+200 before chunk 2
+ CHECK_EQ(300u, Result.BlockRanges[1].RangeLength);
+ CHECK_EQ(2u, Result.BlockRanges[1].ChunkBlockIndexStart);
+ CHECK_EQ(1u, Result.BlockRanges[1].ChunkBlockIndexCount);
+}
+
+TEST_CASE("chunkblock.calc.singlerange_mode")
+{
+ using namespace chunkblock_analyser_testutils;
+ using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode;
+
+ LoggerRef LogRef = Log();
+ std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef));
+
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ // Default HostLatencySec=-1 → OptimizeRanges not called after SingleRange collapse
+ ChunkBlockAnalyser::Options Options;
+ Options.IsQuiet = true;
+ ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ // Need chunks 0 and 2 → 2 ranges that get collapsed to 1
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}};
+ std::vector<Mode> Modes = {Mode::SingleRange};
+
+ auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes);
+
+ CHECK(Result.FullBlockIndexes.empty());
+ REQUIRE_EQ(1u, Result.BlockRanges.size());
+ CHECK_EQ(0u, Result.BlockRanges[0].BlockIndex);
+ CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart);
+ // Spans from chunk 0 start to chunk 2 end: 100+200+300=600
+ CHECK_EQ(600u, Result.BlockRanges[0].RangeLength);
+ CHECK_EQ(0u, Result.BlockRanges[0].ChunkBlockIndexStart);
+ // ChunkBlockIndexCount = (2+1) - 0 = 3
+ CHECK_EQ(3u, Result.BlockRanges[0].ChunkBlockIndexCount);
+}
+
+TEST_CASE("chunkblock.calc.multirange_mode")
+{
+ using namespace chunkblock_analyser_testutils;
+ using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode;
+
+ LoggerRef LogRef = Log();
+ std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef));
+
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ // Low latency: RequestTimeAsBytes=100 << slack → OptimizeRanges preserves ranges
+ ChunkBlockAnalyser::Options Options;
+ Options.IsQuiet = true;
+ Options.HostLatencySec = 0.001;
+ Options.HostSpeedBytesPerSec = 100000;
+ ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}};
+ std::vector<Mode> Modes = {Mode::MultiRange};
+
+ auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes);
+
+ CHECK(Result.FullBlockIndexes.empty());
+ REQUIRE_EQ(2u, Result.BlockRanges.size());
+ CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart);
+ CHECK_EQ(100u, Result.BlockRanges[0].RangeLength);
+ CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart);
+ CHECK_EQ(300u, Result.BlockRanges[1].RangeLength);
+}
+
+TEST_CASE("chunkblock.calc.multirangehighspeed_mode")
+{
+ using namespace chunkblock_analyser_testutils;
+ using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode;
+
+ LoggerRef LogRef = Log();
+ std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef));
+
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ // Block slack ≈ 714 bytes (TotalBlockSize≈1114, RangeTotalSize=400 for chunks 0+2)
+ // RequestTimeAsBytes = 400000 * 1 * 0.001 = 400 < 714 → ranges preserved
+ ChunkBlockAnalyser::Options Options;
+ Options.IsQuiet = true;
+ Options.HostHighSpeedLatencySec = 0.001;
+ Options.HostHighSpeedBytesPerSec = 400000;
+ ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}};
+ std::vector<Mode> Modes = {Mode::MultiRangeHighSpeed};
+
+ auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes);
+
+ CHECK(Result.FullBlockIndexes.empty());
+ REQUIRE_EQ(2u, Result.BlockRanges.size());
+ CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart);
+ CHECK_EQ(100u, Result.BlockRanges[0].RangeLength);
+ CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart);
+ CHECK_EQ(300u, Result.BlockRanges[1].RangeLength);
+}
+
+TEST_CASE("chunkblock.calc.all_chunks_needed_full_block")
+{
+ using namespace chunkblock_analyser_testutils;
+ using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode;
+
+ LoggerRef LogRef = Log();
+ std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef));
+
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ ChunkBlockAnalyser::Options Options;
+ Options.IsQuiet = true;
+ Options.HostLatencySec = 0.001;
+ Options.HostSpeedBytesPerSec = 100000;
+ ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ // All 4 chunks needed → short-circuit to full block regardless of mode
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 1, 2, 3}}};
+ std::vector<Mode> Modes = {Mode::Exact};
+
+ auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes);
+
+ REQUIRE_EQ(1u, Result.FullBlockIndexes.size());
+ CHECK_EQ(0u, Result.FullBlockIndexes[0]);
+ CHECK(Result.BlockRanges.empty());
+}
+
+TEST_CASE("chunkblock.calc.headersize_zero_forces_full_block")
+{
+ using namespace chunkblock_analyser_testutils;
+ using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode;
+
+ LoggerRef LogRef = Log();
+ std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef));
+
+ // HeaderSize=0 → CanDoPartialBlockDownload=false → full block even in Exact mode
+ auto Block = MakeBlockDesc(0, {100, 200, 300, 400});
+ ChunkBlockAnalyser::Options Options;
+ Options.IsQuiet = true;
+ ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}};
+ std::vector<Mode> Modes = {Mode::Exact};
+
+ auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes);
+
+ REQUIRE_EQ(1u, Result.FullBlockIndexes.size());
+ CHECK_EQ(0u, Result.FullBlockIndexes[0]);
+ CHECK(Result.BlockRanges.empty());
+}
+
+TEST_CASE("chunkblock.calc.low_maxrangecountperrequest")
+{
+ using namespace chunkblock_analyser_testutils;
+ using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode;
+
+ LoggerRef LogRef = Log();
+ std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef));
+
+ // 5 chunks of 100 bytes each; need chunks 0, 2, 4 → 3 non-contiguous ranges
+ // With MaxRangeCountPerRequest=1 and high latency, cost model merges aggressively → full block
+ auto Block = MakeBlockDesc(10, {100, 100, 100, 100, 100});
+ ChunkBlockAnalyser::Options Options;
+ Options.IsQuiet = true;
+ Options.HostLatencySec = 0.1;
+ Options.HostSpeedBytesPerSec = 1000;
+ Options.HostMaxRangeCountPerRequest = 1;
+ ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2, 4}}};
+ std::vector<Mode> Modes = {Mode::MultiRange};
+
+ auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes);
+
+ // Cost model drives merging: 3 requests × 1000 × 0.1 = 300 > slack ≈ 210+headersize
+ // After merges converges to full block
+ REQUIRE_EQ(1u, Result.FullBlockIndexes.size());
+ CHECK_EQ(0u, Result.FullBlockIndexes[0]);
+ CHECK(Result.BlockRanges.empty());
+}
+
+TEST_CASE("chunkblock.calc.no_latency_skips_optimize")
+{
+ using namespace chunkblock_analyser_testutils;
+ using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode;
+
+ LoggerRef LogRef = Log();
+ std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef));
+
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ // Default HostLatencySec=-1 → OptimizeRanges not called; raw GetBlockRanges result used
+ ChunkBlockAnalyser::Options Options;
+ Options.IsQuiet = true;
+ ChunkBlockAnalyser Analyser(*LogOutput, std::span<const ChunkBlockDescription>(&Block, 1), Options);
+
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {{.BlockIndex = 0, .ChunkIndexes = {0, 2}}};
+ std::vector<Mode> Modes = {Mode::MultiRange};
+
+ auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes);
+
+ // No optimize pass → exact ranges from GetBlockRanges
+ CHECK(Result.FullBlockIndexes.empty());
+ REQUIRE_EQ(2u, Result.BlockRanges.size());
+ CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart);
+ CHECK_EQ(100u, Result.BlockRanges[0].RangeLength);
+ CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart);
+ CHECK_EQ(300u, Result.BlockRanges[1].RangeLength);
+}
+
+TEST_CASE("chunkblock.calc.multiple_blocks_different_modes")
+{
+ using namespace chunkblock_analyser_testutils;
+ using Mode = ChunkBlockAnalyser::EPartialBlockDownloadMode;
+
+ LoggerRef LogRef = Log();
+ std::unique_ptr<OperationLogOutput> LogOutput(CreateStandardLogOutput(LogRef));
+
+ // 3 blocks with different modes: Off, Exact, MultiRange
+ auto Block0 = MakeBlockDesc(50, {100, 200, 300, 400}, /*BlockSeed=*/0);
+ auto Block1 = MakeBlockDesc(50, {100, 200, 300, 400}, /*BlockSeed=*/10);
+ auto Block2 = MakeBlockDesc(50, {100, 200, 300, 400}, /*BlockSeed=*/20);
+
+ ChunkBlockAnalyser::Options Options;
+ Options.IsQuiet = true;
+ Options.HostLatencySec = 0.001;
+ Options.HostSpeedBytesPerSec = 100000;
+
+ std::vector<ChunkBlockDescription> Blocks = {Block0, Block1, Block2};
+ ChunkBlockAnalyser Analyser(*LogOutput, Blocks, Options);
+
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + 50;
+
+ std::vector<ChunkBlockAnalyser::NeededBlock> NeededBlocks = {
+ {.BlockIndex = 0, .ChunkIndexes = {0, 2}},
+ {.BlockIndex = 1, .ChunkIndexes = {0, 2}},
+ {.BlockIndex = 2, .ChunkIndexes = {0, 2}},
+ };
+ std::vector<Mode> Modes = {Mode::Off, Mode::Exact, Mode::MultiRange};
+
+ auto Result = Analyser.CalculatePartialBlockDownloads(NeededBlocks, Modes);
+
+ // Block 0: Off → FullBlockIndexes
+ REQUIRE_EQ(1u, Result.FullBlockIndexes.size());
+ CHECK_EQ(0u, Result.FullBlockIndexes[0]);
+
+ // Block 1: Exact → 2 ranges; Block 2: MultiRange (low latency) → 2 ranges
+ // Total: 4 ranges
+ REQUIRE_EQ(4u, Result.BlockRanges.size());
+
+ // First 2 ranges belong to Block 1 (Exact)
+ CHECK_EQ(1u, Result.BlockRanges[0].BlockIndex);
+ CHECK_EQ(ChunkStartOffset, Result.BlockRanges[0].RangeStart);
+ CHECK_EQ(100u, Result.BlockRanges[0].RangeLength);
+ CHECK_EQ(1u, Result.BlockRanges[1].BlockIndex);
+ CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[1].RangeStart);
+ CHECK_EQ(300u, Result.BlockRanges[1].RangeLength);
+
+ // Last 2 ranges belong to Block 2 (MultiRange preserved)
+ CHECK_EQ(2u, Result.BlockRanges[2].BlockIndex);
+ CHECK_EQ(ChunkStartOffset, Result.BlockRanges[2].RangeStart);
+ CHECK_EQ(100u, Result.BlockRanges[2].RangeLength);
+ CHECK_EQ(2u, Result.BlockRanges[3].BlockIndex);
+ CHECK_EQ(ChunkStartOffset + 300u, Result.BlockRanges[3].RangeStart);
+ CHECK_EQ(300u, Result.BlockRanges[3].RangeLength);
+}
+
+TEST_CASE("chunkblock.getblockranges.first_chunk_only")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ std::vector<uint32_t> Needed = {0};
+ auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed);
+
+ REQUIRE_EQ(1u, Ranges.size());
+ CHECK_EQ(ChunkStartOffset, Ranges[0].RangeStart);
+ CHECK_EQ(100u, Ranges[0].RangeLength);
+ CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart);
+ CHECK_EQ(1u, Ranges[0].ChunkBlockIndexCount);
+}
+
+TEST_CASE("chunkblock.getblockranges.last_chunk_only")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ std::vector<uint32_t> Needed = {3};
+ auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed);
+
+ REQUIRE_EQ(1u, Ranges.size());
+ CHECK_EQ(ChunkStartOffset + 600u, Ranges[0].RangeStart); // 100+200+300 before chunk 3
+ CHECK_EQ(400u, Ranges[0].RangeLength);
+ CHECK_EQ(3u, Ranges[0].ChunkBlockIndexStart);
+ CHECK_EQ(1u, Ranges[0].ChunkBlockIndexCount);
+}
+
+TEST_CASE("chunkblock.getblockranges.middle_chunk_only")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ std::vector<uint32_t> Needed = {1};
+ auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed);
+
+ REQUIRE_EQ(1u, Ranges.size());
+ CHECK_EQ(ChunkStartOffset + 100u, Ranges[0].RangeStart); // 100 before chunk 1
+ CHECK_EQ(200u, Ranges[0].RangeLength);
+ CHECK_EQ(1u, Ranges[0].ChunkBlockIndexStart);
+ CHECK_EQ(1u, Ranges[0].ChunkBlockIndexCount);
+}
+
+TEST_CASE("chunkblock.getblockranges.all_chunks")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ auto Block = MakeBlockDesc(50, {100, 200, 300, 400});
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ std::vector<uint32_t> Needed = {0, 1, 2, 3};
+ auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed);
+
+ REQUIRE_EQ(1u, Ranges.size());
+ CHECK_EQ(ChunkStartOffset, Ranges[0].RangeStart);
+ CHECK_EQ(1000u, Ranges[0].RangeLength); // 100+200+300+400
+ CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart);
+ CHECK_EQ(4u, Ranges[0].ChunkBlockIndexCount);
+}
+
+TEST_CASE("chunkblock.getblockranges.non_contiguous")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ // Chunks 0 and 2 needed, chunk 1 skipped → two separate ranges
+ auto Block = MakeBlockDesc(50, {100, 200, 300});
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ std::vector<uint32_t> Needed = {0, 2};
+ auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed);
+
+ REQUIRE_EQ(2u, Ranges.size());
+
+ CHECK_EQ(ChunkStartOffset, Ranges[0].RangeStart);
+ CHECK_EQ(100u, Ranges[0].RangeLength);
+ CHECK_EQ(0u, Ranges[0].ChunkBlockIndexStart);
+ CHECK_EQ(1u, Ranges[0].ChunkBlockIndexCount);
+
+ CHECK_EQ(ChunkStartOffset + 300u, Ranges[1].RangeStart); // 100+200 before chunk 2
+ CHECK_EQ(300u, Ranges[1].RangeLength);
+ CHECK_EQ(2u, Ranges[1].ChunkBlockIndexStart);
+ CHECK_EQ(1u, Ranges[1].ChunkBlockIndexCount);
+}
+
+TEST_CASE("chunkblock.getblockranges.contiguous_run")
+{
+ using namespace chunkblock_analyser_testutils;
+
+ // Chunks 1, 2, 3 needed (consecutive) → one merged range
+ auto Block = MakeBlockDesc(50, {50, 100, 150, 200, 250});
+ uint64_t ChunkStartOffset = CompressedBuffer::GetHeaderSizeForNoneEncoder() + Block.HeaderSize;
+
+ std::vector<uint32_t> Needed = {1, 2, 3};
+ auto Ranges = chunkblock_impl::GetBlockRanges(Block, ChunkStartOffset, Needed);
+
+ REQUIRE_EQ(1u, Ranges.size());
+ CHECK_EQ(ChunkStartOffset + 50u, Ranges[0].RangeStart); // 50 before chunk 1
+ CHECK_EQ(450u, Ranges[0].RangeLength); // 100+150+200
+ CHECK_EQ(1u, Ranges[0].ChunkBlockIndexStart);
+ CHECK_EQ(3u, Ranges[0].ChunkBlockIndexCount);
+}
+
+TEST_SUITE_END();
+
void
chunkblock_forcelink()
{